| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768 |
- #!/usr/bin/env python3
- """
- 补齐 tool 表:为 capability_tool 引用但 tool 表里没有的 tool_id 创建存根行。
- 只看 tao_dev 视图引用的 tool_id(dev_abstract 已经齐备)。
- 新建 tool 行打 version='tao_dev' 标签(与 `capability.version='tao_dev'` 呼应)。
- 字段策略:
- id = 原 tool_id(路径或名字,原样保留)
- name = 同 id(因为 tao_dev 的 tool_id 大多已经是人类可读名)
- version = 'tao_dev'
- status = '未接入'(默认)
- 其他 = 空串 / 空 JSON
- """
- import json, sys, time
- from pathlib import Path
- import psycopg2.extras
- sys.path.insert(0, str(Path(__file__).parent.parent.parent))
- from knowhub.knowhub_db.pg_capability_store import PostgreSQLCapabilityStore
- def main():
- s = PostgreSQLCapabilityStore()
- cur = s._get_cursor()
- try:
- cur.execute("SET statement_timeout = '120s'")
- # 所有 cap_tool 引用 vs tool 表现有
- cur.execute("""SELECT DISTINCT ct.tool_id FROM capability_tool ct
- JOIN capability c ON c.id=ct.capability_id""")
- refs = {r['tool_id'] for r in cur.fetchall()}
- cur.execute('SELECT id FROM tool')
- existing = {r['id'] for r in cur.fetchall()}
- missing = sorted(refs - existing)
- print(f'cap_tool 引用独特 tool_id: {len(refs)}', flush=True)
- print(f'tool 表已有: {len(existing)}', flush=True)
- print(f'缺失待建: {len(missing)}', flush=True)
- now_ts = int(time.time())
- inserted = 0
- for tid in missing:
- cur.execute("""INSERT INTO tool (id, name, version, introduction, tutorial,
- input, output, updated_time, status)
- VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
- ON CONFLICT (id) DO NOTHING""",
- (tid, tid, 'tao_dev', '', '',
- json.dumps(''), json.dumps(''), now_ts, '未接入'))
- inserted += cur.rowcount or 0
- print(f'\ninserted: {inserted}', flush=True)
- # 验证
- cur.execute("SELECT COUNT(*) c FROM tool WHERE version='tao_dev'")
- print(f'tool 表 version=tao_dev 的行: {cur.fetchone()["c"]}', flush=True)
- cur.execute('SELECT COUNT(*) c FROM tool')
- print(f'tool 表总行数: {cur.fetchone()["c"]}', flush=True)
- # 残留缺失
- cur.execute("""SELECT DISTINCT ct.tool_id FROM capability_tool ct
- WHERE ct.tool_id NOT IN (SELECT id FROM tool)""")
- still_miss = [r['tool_id'] for r in cur.fetchall()]
- print(f'仍缺失: {len(still_miss)}', flush=True)
- finally:
- cur.close(); s.close()
- if __name__ == '__main__':
- main()
|