phase5_add_knowledge_capability.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. #!/usr/bin/env python3
  2. """
  3. Phase 5 补丁:新建 knowledge_capability junction,保留"req-specific 执行用了哪些 cap"的粒度。
  4. 背景:Phase 4/5 迁移后,strategy_capability 是抽象 pattern 的 cap 联合集,
  5. 不再与某 req 的 research 强一致。原 strat_cap ⊆ req_cap 约束失效,
  6. 需要在 knowledge 层重建同等约束:knowledge_cap ⊆ req_cap(via requirement_knowledge)。
  7. 数据源:bk_20260422_strategy_capability(具体 strategy 迁移前的快照)+
  8. knowledge.source.original_strategy_id(指向原具体 strategy)
  9. 验证:所有新建的 knowledge_capability 必须满足
  10. (knowledge.req_id, cap_id) ∈ requirement_capability
  11. """
  12. import json
  13. import sys
  14. from pathlib import Path
  15. sys.path.insert(0, str(Path(__file__).parent.parent.parent))
  16. from knowhub.knowhub_db.pg_capability_store import PostgreSQLCapabilityStore
  17. def main():
  18. s = PostgreSQLCapabilityStore()
  19. cur = s._get_cursor()
  20. try:
  21. # 1. 检查 knowledge_capability 是否存在(可能已是空表)
  22. cur.execute("""SELECT EXISTS (SELECT 1 FROM information_schema.tables
  23. WHERE table_name='knowledge_capability')""")
  24. if not cur.fetchone()['exists']:
  25. print('创建 knowledge_capability 表...', flush=True)
  26. cur.execute("""
  27. CREATE TABLE knowledge_capability (
  28. knowledge_id VARCHAR NOT NULL,
  29. capability_id VARCHAR NOT NULL,
  30. PRIMARY KEY (knowledge_id, capability_id)
  31. ) DISTRIBUTED BY (knowledge_id)
  32. """)
  33. print(' ✓ 已建表', flush=True)
  34. else:
  35. cur.execute('SELECT COUNT(*) c FROM knowledge_capability')
  36. print(f'knowledge_capability 已存在 ({cur.fetchone()["c"]} rows)', flush=True)
  37. # 2. 从备份表 + knowledge.source 重建 knowledge ↔ cap
  38. print('\n=== 从备份还原 knowledge-cap 关系 ===', flush=True)
  39. cur.execute("""SELECT id, source FROM knowledge
  40. WHERE version='howard_strategy_instance'""")
  41. knowledge_list = cur.fetchall()
  42. print(f' new knowledge: {len(knowledge_list)}', flush=True)
  43. total_inserted = 0
  44. for k in knowledge_list:
  45. src = k['source'] if isinstance(k['source'], dict) else json.loads(k['source'] or '{}')
  46. orig_sid = src.get('original_strategy_id')
  47. if not orig_sid: continue
  48. # 查原具体 strategy 的 cap(从备份)
  49. cur.execute("""SELECT capability_id FROM bk_20260422_strategy_capability
  50. WHERE strategy_id = %s""", (orig_sid,))
  51. caps = [r['capability_id'] for r in cur.fetchall()]
  52. for cap_id in caps:
  53. cur.execute("""INSERT INTO knowledge_capability
  54. (knowledge_id, capability_id) VALUES (%s, %s)
  55. ON CONFLICT DO NOTHING""", (k['id'], cap_id))
  56. total_inserted += 1
  57. print(f' inserted knowledge_capability: {total_inserted}', flush=True)
  58. # 3. 验证约束:knowledge_cap ⊆ req_cap(via requirement_knowledge)
  59. print('\n=== 验证 knowledge_cap ⊆ req_cap ===', flush=True)
  60. cur.execute("""SELECT COUNT(*) c FROM knowledge_capability kc
  61. JOIN requirement_knowledge rk ON rk.knowledge_id = kc.knowledge_id
  62. LEFT JOIN requirement_capability rc
  63. ON rc.requirement_id = rk.requirement_id
  64. AND rc.capability_id = kc.capability_id
  65. WHERE rc.capability_id IS NULL""")
  66. violations = cur.fetchone()['c']
  67. print(f' 违规行数: {violations} {"❌" if violations>0 else "✓"}', flush=True)
  68. # 4. 展示:每个 knowledge 的 cap 数量分布
  69. cur.execute("""SELECT n_caps, COUNT(*) knowledge_n FROM
  70. (SELECT knowledge_id, COUNT(*) n_caps FROM knowledge_capability
  71. WHERE knowledge_id IN (SELECT id FROM knowledge WHERE version='howard_strategy_instance')
  72. GROUP BY knowledge_id) t
  73. GROUP BY n_caps ORDER BY n_caps""")
  74. print('\n 每 knowledge cap 数量分布:', flush=True)
  75. for r in cur.fetchall():
  76. print(f' {r["n_caps"]} caps: {r["knowledge_n"]} knowledge', flush=True)
  77. # 5. 汇总最终 DB 状态
  78. print('\n=== 最终汇总 ===', flush=True)
  79. cur.execute("""SELECT
  80. (SELECT COUNT(*) FROM strategy) s_total,
  81. (SELECT COUNT(*) FROM knowledge WHERE version='howard_strategy_instance') new_k,
  82. (SELECT COUNT(*) FROM requirement_strategy) rs,
  83. (SELECT COUNT(*) FROM requirement_knowledge
  84. WHERE knowledge_id IN (SELECT id FROM knowledge WHERE version='howard_strategy_instance')) new_rk,
  85. (SELECT COUNT(*) FROM strategy_knowledge
  86. WHERE knowledge_id IN (SELECT id FROM knowledge WHERE version='howard_strategy_instance')) new_sk,
  87. (SELECT COUNT(*) FROM knowledge_resource
  88. WHERE knowledge_id IN (SELECT id FROM knowledge WHERE version='howard_strategy_instance')) new_kr,
  89. (SELECT COUNT(*) FROM knowledge_capability
  90. WHERE knowledge_id IN (SELECT id FROM knowledge WHERE version='howard_strategy_instance')) new_kc,
  91. (SELECT COUNT(*) FROM strategy_capability) sc,
  92. (SELECT COUNT(*) FROM strategy_resource) sr,
  93. (SELECT COUNT(*) FROM requirement_capability) req_cap""")
  94. for k, v in dict(cur.fetchone()).items(): print(f' {k}: {v}', flush=True)
  95. finally:
  96. cur.close()
  97. s.close()
  98. if __name__ == '__main__':
  99. main()