| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353 |
- #!/usr/bin/env python3
- """
- 修复 5 个占位 strategy(REQ_004 / 031 / 053 / 066 / 070):
- 把各自的非标准 body schema 正规化成标准 workflow_outline 结构,
- 并写 strategy_capability junction(之前 0 条)。
- 不改 strategy.id / strategy.name / strategy.description(保留原调研的 rich 描述)。
- body 里原 schema 字段也保留(作为 salvage_original),只是增加 workflow_outline。
- 各 folder 的 schema 变体:
- REQ_004: body.strategy.phases + body.capability_mapping (cap by name, phase_id 映射)
- REQ_031: body.strategy.phases[*].capabilities_used (含 'CAP-XXX 名字' 组合字符串)
- REQ_053: body.phases[*].capabilities (dict with id/name) + body.core_workflow
- REQ_066: body.execution_phases[*].capabilities_used (混合 id 和 name)
- REQ_070: body.selected_blueprint.phases[*].capabilities_used
- Cap 引用处理:
- 1. 若是 'CAP-XXX' 形式且 XXX 存在于 DB → 直接用
- 2. 若是 'CAP-tao_dev_1-XX-YY' 老 ID → 通过 MERGE_CLUSTERS 传递闭包解析
- 3. 若是纯名字 → 通过 alias map(含 LLM_RENAMES)解析
- 4. 解析不到 → 记录到 unresolved,不写 junction
- """
- import hashlib
- import json
- import re
- import sys
- from pathlib import Path
- sys.path.insert(0, str(Path(__file__).parent.parent.parent))
- from knowhub.knowhub_db.pg_capability_store import PostgreSQLCapabilityStore
- from knowhub.scripts.merge_capabilities import MERGE_CLUSTERS
- from knowhub.scripts.rename_merged_capabilities import RENAMES
- from knowhub.scripts.llm_renames import LLM_RENAMES
- TARGET_REQS = ['REQ_004', 'REQ_031', 'REQ_053', 'REQ_066', 'REQ_070']
- # 占位 strategy 里特有的老 ID / 含混名字 → canonical
- # 这些是 MERGE_CLUSTERS 和 LLM_RENAMES 都没覆盖的 salvage-specific refs
- LEGACY_REFS = {
- # 老 tao_dev ID
- 'CAP-tao_dev_1-02-03': 'CAP-792fd807', # 景深虚化光学模拟
- 'CAP-tao_dev_1-03-01': 'CAP-1649b549', # 戏剧性明暗对比
- 'CAP-tao_dev_1-00-02': 'CAP-008ee6c9', # 真实感提示词注入
- # REQ_004 的名字变体
- '结构化提示词工程(PROMPT MASTER)': 'CAP-5b000814',
- '参考图融合控制(Omni-Reference)': 'CAP-017',
- '手持道具细节强化': 'CAP-d043d289',
- # REQ_031 的 is_new cap(现已有 canonical)
- '跨物种形态融合生成(is_new)': 'CAP-24dd762b',
- '跨物种形态融合生成': 'CAP-24dd762b',
- # REQ_053 的变体
- '景深虚化光学模拟(前景虚化)': 'CAP-792fd807',
- }
- def norm(s):
- return (s or '').strip().lower()
- def build_alias_and_member(cur):
- """Return (alias_name→canonical, member_id→canonical) with transitive closure."""
- m2c = {}
- for canonical, members in MERGE_CLUSTERS.items():
- for m in members:
- m2c[m] = canonical
- def final(cid, limit=10):
- seen = set()
- while cid in m2c and cid not in seen and limit > 0:
- seen.add(cid); cid = m2c[cid]; limit -= 1
- return cid
- for m in list(m2c.keys()):
- m2c[m] = final(m)
- alias = {}
- cur.execute('SELECT id, name FROM capability')
- db_caps = {r['id']: r['name'] for r in cur.fetchall()}
- for cid, name in db_caps.items():
- alias[norm(name)] = cid
- for cid, (new_name, _) in RENAMES.items():
- alias[norm(new_name)] = final(cid)
- for llm_name, canonical in LLM_RENAMES.items():
- alias[norm(llm_name)] = final(canonical)
- return alias, m2c, db_caps
- def resolve_cap(cap_ref, alias, m2c, db_caps, unresolved):
- """cap_ref can be: an id 'CAP-XXX', a name, or 'CAP-XXX 名字' combo."""
- if not cap_ref:
- return None
- cap_ref = str(cap_ref).strip()
- # 0. LEGACY_REFS 优先(占位 strategy 特有的老 ID 和 含混名字)
- if cap_ref in LEGACY_REFS:
- cand = LEGACY_REFS[cap_ref]
- if cand in db_caps:
- return cand
- # Extract leading CAP-... if present
- id_match = re.match(r'^(CAP-[\w\-]+)', cap_ref)
- candidate_id = id_match.group(1) if id_match else None
- # Extract name part
- if id_match:
- name_part = cap_ref[id_match.end():].strip()
- else:
- name_part = cap_ref
- # 1. Direct ID if exists in DB
- if candidate_id and candidate_id in db_caps:
- return candidate_id
- # 2. LEGACY tao_dev_id
- if candidate_id and candidate_id in LEGACY_REFS:
- return LEGACY_REFS[candidate_id]
- # 3. tao_dev old ID through member->canonical transitive closure
- if candidate_id and candidate_id in m2c:
- return m2c[candidate_id]
- # 4. Name alias
- if name_part:
- cand = alias.get(norm(name_part))
- if cand and cand in db_caps:
- return cand
- # 5. Try whole ref as name (for cases where no prefix)
- cand = alias.get(norm(cap_ref))
- if cand and cand in db_caps:
- return cand
- unresolved.append(cap_ref)
- return None
- # ═══════════════════════════════════════════════════════════
- # Each folder's schema normalizer
- def salvage_req_004(body, alias, m2c, db_caps, unresolved):
- """body.strategy.phases + body.capability_mapping (phase_id -> caps)."""
- strat = body.get('strategy', {})
- phases = strat.get('phases', []) if isinstance(strat, dict) else []
- cap_map = body.get('capability_mapping', [])
- # Build phase_id -> [cap_name, ...]
- pid_to_caps = {}
- for cm in cap_map:
- if not isinstance(cm, dict): continue
- cap_name = cm.get('capability')
- for pid in cm.get('used_in_phases', []):
- # pid can be like 'P1', 'P2(备选)' — take prefix P[digit]
- m = re.match(r'(P\d+)', str(pid))
- if m:
- pid_to_caps.setdefault(m.group(1), []).append(cap_name)
- wo = []
- for ph in phases:
- if not isinstance(ph, dict): continue
- pid = ph.get('phase_id', '')
- caps_names = pid_to_caps.get(pid, [])
- resolved = []
- seen = set()
- for n in caps_names:
- r = resolve_cap(n, alias, m2c, db_caps, unresolved)
- if r and r not in seen:
- resolved.append({'id': r, 'name': db_caps.get(r, n)})
- seen.add(r)
- wo.append({
- 'phase': ph.get('phase', ''),
- 'description': ph.get('description', ''),
- 'capabilities': resolved,
- })
- return wo
- def salvage_req_031(body, alias, m2c, db_caps, unresolved):
- """body.strategy.phases[*].capabilities_used (strings like 'CAP-003 图像主体一致性保持')."""
- strat = body.get('strategy', {})
- phases = strat.get('phases', []) if isinstance(strat, dict) else []
- wo = []
- for ph in phases:
- if not isinstance(ph, dict): continue
- caps_used = ph.get('capabilities_used', [])
- resolved = []
- seen = set()
- for cu in caps_used:
- r = resolve_cap(cu, alias, m2c, db_caps, unresolved)
- if r and r not in seen:
- resolved.append({'id': r, 'name': db_caps.get(r, cu)})
- seen.add(r)
- wo.append({
- 'phase': ph.get('phase', ''),
- 'description': ph.get('description', ''),
- 'capabilities': resolved,
- })
- return wo
- def salvage_req_053(body, alias, m2c, db_caps, unresolved):
- """body.phases (top-level) with capabilities[*] dict."""
- phases = body.get('phases', [])
- wo = []
- for ph in phases:
- if not isinstance(ph, dict): continue
- caps_list = ph.get('capabilities', [])
- resolved = []
- seen = set()
- for c in caps_list:
- if not isinstance(c, dict): continue
- cid = c.get('id')
- name = c.get('name', '')
- # combined ref
- ref = cid if cid else name
- r = resolve_cap(ref, alias, m2c, db_caps, unresolved)
- if not r and name:
- r = resolve_cap(name, alias, m2c, db_caps, unresolved)
- if r and r not in seen:
- resolved.append({'id': r, 'name': db_caps.get(r, name)})
- seen.add(r)
- wo.append({
- 'phase': ph.get('phase', ''),
- 'description': ph.get('description', ''),
- 'capabilities': resolved,
- })
- return wo
- def salvage_req_066(body, alias, m2c, db_caps, unresolved):
- """body.execution_phases[*].capabilities_used (mixed id/name strings)."""
- phases = body.get('execution_phases', [])
- wo = []
- for ph in phases:
- if not isinstance(ph, dict): continue
- caps_used = ph.get('capabilities_used', [])
- resolved = []
- seen = set()
- for cu in caps_used:
- r = resolve_cap(cu, alias, m2c, db_caps, unresolved)
- if r and r not in seen:
- resolved.append({'id': r, 'name': db_caps.get(r, cu)})
- seen.add(r)
- wo.append({
- 'phase': ph.get('phase', ''),
- 'description': ph.get('description', ''),
- 'capabilities': resolved,
- })
- return wo
- def salvage_req_070(body, alias, m2c, db_caps, unresolved):
- """body.selected_blueprint.phases[*].capabilities_used."""
- sbp = body.get('selected_blueprint', {})
- if isinstance(sbp, str):
- try: sbp = json.loads(sbp)
- except: sbp = {}
- phases = sbp.get('phases', []) if isinstance(sbp, dict) else []
- wo = []
- for ph in phases:
- if not isinstance(ph, dict): continue
- caps_used = ph.get('capabilities_used', [])
- resolved = []
- seen = set()
- for cu in caps_used:
- r = resolve_cap(cu, alias, m2c, db_caps, unresolved)
- if r and r not in seen:
- resolved.append({'id': r, 'name': db_caps.get(r, cu)})
- seen.add(r)
- wo.append({
- 'phase': ph.get('phase', ''),
- 'description': ph.get('description', ''),
- 'capabilities': resolved,
- })
- return wo
- SALVAGERS = {
- 'REQ_004': salvage_req_004,
- 'REQ_031': salvage_req_031,
- 'REQ_053': salvage_req_053,
- 'REQ_066': salvage_req_066,
- 'REQ_070': salvage_req_070,
- }
- # ═══════════════════════════════════════════════════════════
- def main():
- s = PostgreSQLCapabilityStore()
- cur = s._get_cursor()
- try:
- alias, m2c, db_caps = build_alias_and_member(cur)
- print(f'alias entries: {len(alias)}, members: {len(m2c)}, db caps: {len(db_caps)}', flush=True)
- for req_id in TARGET_REQS:
- print(f'\n=== {req_id} ===', flush=True)
- cur.execute("""SELECT s.id, s.name, s.body FROM strategy s
- JOIN requirement_strategy rs ON rs.strategy_id=s.id
- WHERE rs.requirement_id=%s""", (req_id,))
- row = cur.fetchone()
- if not row:
- print(f' ⚠️ no strategy found for {req_id}', flush=True)
- continue
- strat_id, strat_name = row['id'], row['name']
- body = row['body'] if isinstance(row['body'], dict) else json.loads(row['body'] or '{}')
- unresolved = []
- salvager = SALVAGERS[req_id]
- wo = salvager(body, alias, m2c, db_caps, unresolved)
- phase_count = len(wo)
- cap_total = sum(len(ph['capabilities']) for ph in wo)
- unique_caps = set()
- for ph in wo:
- for c in ph['capabilities']:
- unique_caps.add(c['id'])
- print(f' strategy: {strat_id} ({strat_name})', flush=True)
- print(f' produced: {phase_count} phases, {cap_total} cap slots ({len(unique_caps)} unique)', flush=True)
- if unresolved:
- print(f' unresolved refs: {len(unresolved)}', flush=True)
- for u in unresolved[:5]: print(f' - {u!r}', flush=True)
- # Add workflow_outline to body (preserve all original fields)
- body['workflow_outline'] = wo
- body['_salvaged_at'] = '2026-04-22'
- body['_salvage_source'] = 'salvage_placeholder_strategies.py'
- # Update strategy.body
- cur.execute('UPDATE strategy SET body = %s WHERE id = %s',
- (json.dumps(body, ensure_ascii=False), strat_id))
- # Write strategy_capability junction
- # First remove existing (should be 0 but be safe)
- cur.execute('DELETE FROM strategy_capability WHERE strategy_id = %s', (strat_id,))
- for cap_id in unique_caps:
- cur.execute("""INSERT INTO strategy_capability (strategy_id, capability_id, relation_type)
- VALUES (%s, %s, 'compose') ON CONFLICT DO NOTHING""", (strat_id, cap_id))
- print(f' wrote strategy_capability: {len(unique_caps)} rows', flush=True)
- # Verify: count strat_cap rows
- cur.execute('SELECT COUNT(*) c FROM strategy_capability WHERE strategy_id=%s', (strat_id,))
- print(f' strategy_capability after: {cur.fetchone()["c"]}', flush=True)
- # Final verification
- print(f'\n{"="*60}', flush=True)
- print('All 5 placeholder strategies after salvage:', flush=True)
- for req_id in TARGET_REQS:
- cur.execute("""SELECT s.id, s.name,
- (SELECT COUNT(*) FROM strategy_capability sc WHERE sc.strategy_id=s.id) cap_n
- FROM strategy s
- JOIN requirement_strategy rs ON rs.strategy_id=s.id
- WHERE rs.requirement_id=%s""", (req_id,))
- r = cur.fetchone()
- if r:
- print(f' [{req_id}] {r["id"]} ({r["name"]}): strat_cap={r["cap_n"]}', flush=True)
- finally:
- cur.close()
- s.close()
- if __name__ == '__main__':
- main()
|