#!/usr/bin/env python3 """ Round 4 cleanup: 合并 howard_dedup 里因 alias miss 产生的重复 capability. 所有合并都是 member → canonical:junction 全部改向 canonical,member 删除。 """ import argparse import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent.parent)) from knowhub.knowhub_db.pg_capability_store import PostgreSQLCapabilityStore # canonical -> [members]. canonical 必须存在且是"更好"的版本(含原始 id 或更详细 desc) CLEANUP = { # === 视频/动画 === "CAP-d92ffc99": ["CAP-bcdfba3c"], # AI 虚拟换装 "CAP-017": ["CAP-1f7f4d72"], # 全向参考 "CAP-3b8df701": ["CAP-974c9781"], # AI VFX 合成 "CAP-98490894": ["CAP-e5934ab7", "CAP-c020af1d"], # 唇形同步 "CAP-7b9d2baf": ["CAP-8c7b1114"], # 首尾帧衔接 "CAP-49175b92": ["CAP-22768ee3"], # 电影镜头运动 # === 图像/构图 === "CAP-c9426dcc": ["CAP-8b3d4b8d"], # 产品爆炸分解图 "CAP-7c8532dc": ["CAP-059c8d22"], # 宠物服装虚拟上身 "CAP-fddd3349": ["CAP-7abc6773"], # 网格/全景大图切割 "CAP-1649b549": ["CAP-69f5a034"], # 戏剧性明暗对比 "CAP-d043d289": ["CAP-14378e6f"], # 手持物体精准细节 "CAP-5a1ac59d": ["CAP-5f4aa6b7", "CAP-d1234756", "CAP-82851c39"], # 多情绪表情 "CAP-d93a0ac2": ["CAP-50f66faa"], # 场景光影一致性校正 "CAP-3c49ff0a": ["CAP-f5ea6d52"], # 空间透视 "CAP-d1f429ff": ["CAP-7d07e293"], # 科技感元素 "CAP-8467736a": ["CAP-d1757626"], # 粒子光效 "CAP-5b000814": ["CAP-92c7cc81"], # 结构化 Prompt "CAP-a35e7966": ["CAP-0545b3ca"], # 霓虹发光 "CAP-2de278d6": ["CAP-6485105e"], # 高饱和配色 "CAP-562d91c1": ["CAP-2ba237a5", "CAP-85ed8dc9", "CAP-a5f8e368"], # 海报多元素排版 "CAP-792fd807": ["CAP-fb3906c8", "CAP-d9f9692f"], # 景深虚化 "CAP-8d69865f": ["CAP-421b1002"], # 照片转绘插画 # === 文字 === "CAP-16c5174b": ["CAP-db0cb47e"], # 文字描边阴影 "CAP-00c474e2": ["CAP-e714656e", "CAP-920091f9"], # 视频动效字幕/文字动画 "CAP-ce113c41" if False else "CAP-7423a8b2": [], # (placeholder) # === 拼贴 === "CAP-bc67d346" if False else "CAP-6f73062a": [], # (skip, 6f73062a is current) # === 信息层级 === "CAP-067edd94": ["CAP-64f5a261"], # 层级排版 + 标题字号 # === 数据模板 === "CAP-a08749c3": ["CAP-89bf9fd7"], # 模板数据批量套版 # === 文字特效 === "CAP-a4b638a6": [], # keep alone (文字特效与动画渲染) # === 毛绒 === "CAP-96182b8f": ["CAP-9692aa0a"], # 毛绒材质渲染 # === 复古大字报 === # CAP-c6dfb2b0 特定风格,保留独立 } # Remove placeholder keys with empty member list CLEANUP = {k: v for k, v in CLEANUP.items() if v} JUNCTIONS = [ ("requirement_capability", "capability_id", "requirement_id"), ("capability_tool", "capability_id", "tool_id"), ("capability_knowledge", "capability_id", "knowledge_id"), ("capability_resource", "capability_id", "resource_id"), ("strategy_capability", "capability_id", "strategy_id"), ] def merge_one(cur, canonical, members): stats = {'junction_upd': 0, 'junction_dup_del': 0, 'cap_del': 0, 'skipped_missing': []} for member in members: cur.execute('SELECT 1 FROM capability WHERE id = %s', (member,)) if not cur.fetchone(): stats['skipped_missing'].append(member) continue for table, col, other_col in JUNCTIONS: # delete dups (member has (other_col) that canonical already has) cur.execute( f"DELETE FROM {table} WHERE {col} = %s AND {other_col} IN (" f" SELECT {other_col} FROM {table} WHERE {col} = %s)", (member, canonical)) stats['junction_dup_del'] += cur.rowcount or 0 # update remaining member refs to canonical cur.execute(f"UPDATE {table} SET {col} = %s WHERE {col} = %s", (canonical, member)) stats['junction_upd'] += cur.rowcount or 0 cur.execute('DELETE FROM capability WHERE id = %s', (member,)) stats['cap_del'] += cur.rowcount or 0 return stats def main(): ap = argparse.ArgumentParser() g = ap.add_mutually_exclusive_group(required=True) g.add_argument('--dry-run', action='store_true') g.add_argument('--execute', action='store_true') args = ap.parse_args() s = PostgreSQLCapabilityStore() cur = s._get_cursor() try: cur.execute("SELECT COUNT(*) c FROM capability") before = cur.fetchone()['c'] print(f'before: {before} caps', flush=True) total = {'upd': 0, 'dup': 0, 'del': 0, 'skip': []} for canonical, members in CLEANUP.items(): cur.execute('SELECT name FROM capability WHERE id = %s', (canonical,)) r = cur.fetchone() if not r: print(f'❌ canonical {canonical} missing, SKIP cluster', flush=True) continue if args.dry_run: # just check members exist exists = [m for m in members if (cur.execute('SELECT 1 FROM capability WHERE id=%s',(m,)), cur.fetchone())[1]] print(f'[DRY] {canonical} ({r["name"]}) ← {len(exists)}/{len(members)} members to merge', flush=True) else: stats = merge_one(cur, canonical, members) total['upd'] += stats['junction_upd'] total['dup'] += stats['junction_dup_del'] total['del'] += stats['cap_del'] total['skip'].extend(stats['skipped_missing']) print(f'✓ {canonical} ({r["name"][:40]}) ← {stats["cap_del"]} merged ' f'(upd={stats["junction_upd"]}, dup={stats["junction_dup_del"]})', flush=True) cur.execute("SELECT COUNT(*) c FROM capability") after = cur.fetchone()['c'] print(f'\n{"="*50}') print(f'{"DRY-RUN" if args.dry_run else "EXECUTE"}: {before} → {after} (-{before-after})', flush=True) if not args.dry_run: print(f'junction UPDATE={total["upd"]}, dup DELETE={total["dup"]}, cap DELETE={total["del"]}', flush=True) if total['skip']: print(f'skipped (missing): {total["skip"]}', flush=True) finally: cur.close() s.close() if __name__ == '__main__': main()