| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147 |
- #!/usr/bin/env python3
- """
- Round 4 cleanup: 合并 howard_dedup 里因 alias miss 产生的重复 capability.
- 所有合并都是 member → canonical:junction 全部改向 canonical,member 删除。
- """
- import argparse
- import sys
- from pathlib import Path
- sys.path.insert(0, str(Path(__file__).parent.parent.parent))
- from knowhub.knowhub_db.pg_capability_store import PostgreSQLCapabilityStore
- # canonical -> [members]. canonical 必须存在且是"更好"的版本(含原始 id 或更详细 desc)
- CLEANUP = {
- # === 视频/动画 ===
- "CAP-d92ffc99": ["CAP-bcdfba3c"], # AI 虚拟换装
- "CAP-017": ["CAP-1f7f4d72"], # 全向参考
- "CAP-3b8df701": ["CAP-974c9781"], # AI VFX 合成
- "CAP-98490894": ["CAP-e5934ab7", "CAP-c020af1d"], # 唇形同步
- "CAP-7b9d2baf": ["CAP-8c7b1114"], # 首尾帧衔接
- "CAP-49175b92": ["CAP-22768ee3"], # 电影镜头运动
- # === 图像/构图 ===
- "CAP-c9426dcc": ["CAP-8b3d4b8d"], # 产品爆炸分解图
- "CAP-7c8532dc": ["CAP-059c8d22"], # 宠物服装虚拟上身
- "CAP-fddd3349": ["CAP-7abc6773"], # 网格/全景大图切割
- "CAP-1649b549": ["CAP-69f5a034"], # 戏剧性明暗对比
- "CAP-d043d289": ["CAP-14378e6f"], # 手持物体精准细节
- "CAP-5a1ac59d": ["CAP-5f4aa6b7", "CAP-d1234756", "CAP-82851c39"], # 多情绪表情
- "CAP-d93a0ac2": ["CAP-50f66faa"], # 场景光影一致性校正
- "CAP-3c49ff0a": ["CAP-f5ea6d52"], # 空间透视
- "CAP-d1f429ff": ["CAP-7d07e293"], # 科技感元素
- "CAP-8467736a": ["CAP-d1757626"], # 粒子光效
- "CAP-5b000814": ["CAP-92c7cc81"], # 结构化 Prompt
- "CAP-a35e7966": ["CAP-0545b3ca"], # 霓虹发光
- "CAP-2de278d6": ["CAP-6485105e"], # 高饱和配色
- "CAP-562d91c1": ["CAP-2ba237a5", "CAP-85ed8dc9", "CAP-a5f8e368"], # 海报多元素排版
- "CAP-792fd807": ["CAP-fb3906c8", "CAP-d9f9692f"], # 景深虚化
- "CAP-8d69865f": ["CAP-421b1002"], # 照片转绘插画
- # === 文字 ===
- "CAP-16c5174b": ["CAP-db0cb47e"], # 文字描边阴影
- "CAP-00c474e2": ["CAP-e714656e", "CAP-920091f9"], # 视频动效字幕/文字动画
- "CAP-ce113c41" if False else "CAP-7423a8b2": [], # (placeholder)
- # === 拼贴 ===
- "CAP-bc67d346" if False else "CAP-6f73062a": [], # (skip, 6f73062a is current)
- # === 信息层级 ===
- "CAP-067edd94": ["CAP-64f5a261"], # 层级排版 + 标题字号
- # === 数据模板 ===
- "CAP-a08749c3": ["CAP-89bf9fd7"], # 模板数据批量套版
- # === 文字特效 ===
- "CAP-a4b638a6": [], # keep alone (文字特效与动画渲染)
- # === 毛绒 ===
- "CAP-96182b8f": ["CAP-9692aa0a"], # 毛绒材质渲染
- # === 复古大字报 ===
- # CAP-c6dfb2b0 特定风格,保留独立
- }
- # Remove placeholder keys with empty member list
- CLEANUP = {k: v for k, v in CLEANUP.items() if v}
- JUNCTIONS = [
- ("requirement_capability", "capability_id", "requirement_id"),
- ("capability_tool", "capability_id", "tool_id"),
- ("capability_knowledge", "capability_id", "knowledge_id"),
- ("capability_resource", "capability_id", "resource_id"),
- ("strategy_capability", "capability_id", "strategy_id"),
- ]
- def merge_one(cur, canonical, members):
- stats = {'junction_upd': 0, 'junction_dup_del': 0, 'cap_del': 0, 'skipped_missing': []}
- for member in members:
- cur.execute('SELECT 1 FROM capability WHERE id = %s', (member,))
- if not cur.fetchone():
- stats['skipped_missing'].append(member)
- continue
- for table, col, other_col in JUNCTIONS:
- # delete dups (member has (other_col) that canonical already has)
- cur.execute(
- f"DELETE FROM {table} WHERE {col} = %s AND {other_col} IN ("
- f" SELECT {other_col} FROM {table} WHERE {col} = %s)",
- (member, canonical))
- stats['junction_dup_del'] += cur.rowcount or 0
- # update remaining member refs to canonical
- cur.execute(f"UPDATE {table} SET {col} = %s WHERE {col} = %s",
- (canonical, member))
- stats['junction_upd'] += cur.rowcount or 0
- cur.execute('DELETE FROM capability WHERE id = %s', (member,))
- stats['cap_del'] += cur.rowcount or 0
- return stats
- def main():
- ap = argparse.ArgumentParser()
- g = ap.add_mutually_exclusive_group(required=True)
- g.add_argument('--dry-run', action='store_true')
- g.add_argument('--execute', action='store_true')
- args = ap.parse_args()
- s = PostgreSQLCapabilityStore()
- cur = s._get_cursor()
- try:
- cur.execute("SELECT COUNT(*) c FROM capability")
- before = cur.fetchone()['c']
- print(f'before: {before} caps', flush=True)
- total = {'upd': 0, 'dup': 0, 'del': 0, 'skip': []}
- for canonical, members in CLEANUP.items():
- cur.execute('SELECT name FROM capability WHERE id = %s', (canonical,))
- r = cur.fetchone()
- if not r:
- print(f'❌ canonical {canonical} missing, SKIP cluster', flush=True)
- continue
- if args.dry_run:
- # just check members exist
- exists = [m for m in members
- if (cur.execute('SELECT 1 FROM capability WHERE id=%s',(m,)),
- cur.fetchone())[1]]
- print(f'[DRY] {canonical} ({r["name"]}) ← {len(exists)}/{len(members)} members to merge',
- flush=True)
- else:
- stats = merge_one(cur, canonical, members)
- total['upd'] += stats['junction_upd']
- total['dup'] += stats['junction_dup_del']
- total['del'] += stats['cap_del']
- total['skip'].extend(stats['skipped_missing'])
- print(f'✓ {canonical} ({r["name"][:40]}) ← {stats["cap_del"]} merged '
- f'(upd={stats["junction_upd"]}, dup={stats["junction_dup_del"]})',
- flush=True)
- cur.execute("SELECT COUNT(*) c FROM capability")
- after = cur.fetchone()['c']
- print(f'\n{"="*50}')
- print(f'{"DRY-RUN" if args.dry_run else "EXECUTE"}: {before} → {after} (-{before-after})', flush=True)
- if not args.dry_run:
- print(f'junction UPDATE={total["upd"]}, dup DELETE={total["dup"]}, cap DELETE={total["del"]}',
- flush=True)
- if total['skip']:
- print(f'skipped (missing): {total["skip"]}', flush=True)
- finally:
- cur.close()
- s.close()
- if __name__ == '__main__':
- main()
|