taodev_backfill_missing_tools.py 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. #!/usr/bin/env python3
  2. """
  3. 补齐 tool 表:为 capability_tool 引用但 tool 表里没有的 tool_id 创建存根行。
  4. 只看 tao_dev 视图引用的 tool_id(dev_abstract 已经齐备)。
  5. 新建 tool 行打 version='tao_dev' 标签(与 `capability.version='tao_dev'` 呼应)。
  6. 字段策略:
  7. id = 原 tool_id(路径或名字,原样保留)
  8. name = 同 id(因为 tao_dev 的 tool_id 大多已经是人类可读名)
  9. version = 'tao_dev'
  10. status = '未接入'(默认)
  11. 其他 = 空串 / 空 JSON
  12. """
  13. import json, sys, time
  14. from pathlib import Path
  15. import psycopg2.extras
  16. sys.path.insert(0, str(Path(__file__).parent.parent.parent))
  17. from knowhub.knowhub_db.pg_capability_store import PostgreSQLCapabilityStore
  18. def main():
  19. s = PostgreSQLCapabilityStore()
  20. cur = s._get_cursor()
  21. try:
  22. cur.execute("SET statement_timeout = '120s'")
  23. # 所有 cap_tool 引用 vs tool 表现有
  24. cur.execute("""SELECT DISTINCT ct.tool_id FROM capability_tool ct
  25. JOIN capability c ON c.id=ct.capability_id""")
  26. refs = {r['tool_id'] for r in cur.fetchall()}
  27. cur.execute('SELECT id FROM tool')
  28. existing = {r['id'] for r in cur.fetchall()}
  29. missing = sorted(refs - existing)
  30. print(f'cap_tool 引用独特 tool_id: {len(refs)}', flush=True)
  31. print(f'tool 表已有: {len(existing)}', flush=True)
  32. print(f'缺失待建: {len(missing)}', flush=True)
  33. now_ts = int(time.time())
  34. inserted = 0
  35. for tid in missing:
  36. cur.execute("""INSERT INTO tool (id, name, version, introduction, tutorial,
  37. input, output, updated_time, status)
  38. VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
  39. ON CONFLICT (id) DO NOTHING""",
  40. (tid, tid, 'tao_dev', '', '',
  41. json.dumps(''), json.dumps(''), now_ts, '未接入'))
  42. inserted += cur.rowcount or 0
  43. print(f'\ninserted: {inserted}', flush=True)
  44. # 验证
  45. cur.execute("SELECT COUNT(*) c FROM tool WHERE version='tao_dev'")
  46. print(f'tool 表 version=tao_dev 的行: {cur.fetchone()["c"]}', flush=True)
  47. cur.execute('SELECT COUNT(*) c FROM tool')
  48. print(f'tool 表总行数: {cur.fetchone()["c"]}', flush=True)
  49. # 残留缺失
  50. cur.execute("""SELECT DISTINCT ct.tool_id FROM capability_tool ct
  51. WHERE ct.tool_id NOT IN (SELECT id FROM tool)""")
  52. still_miss = [r['tool_id'] for r in cur.fetchall()]
  53. print(f'仍缺失: {len(still_miss)}', flush=True)
  54. finally:
  55. cur.close(); s.close()
  56. if __name__ == '__main__':
  57. main()