#!/usr/bin/env python3 """ 补齐 tool 表:为 capability_tool 引用但 tool 表里没有的 tool_id 创建存根行。 只看 tao_dev 视图引用的 tool_id(dev_abstract 已经齐备)。 新建 tool 行打 version='tao_dev' 标签(与 `capability.version='tao_dev'` 呼应)。 字段策略: id = 原 tool_id(路径或名字,原样保留) name = 同 id(因为 tao_dev 的 tool_id 大多已经是人类可读名) version = 'tao_dev' status = '未接入'(默认) 其他 = 空串 / 空 JSON """ import json, sys, time from pathlib import Path import psycopg2.extras sys.path.insert(0, str(Path(__file__).parent.parent.parent)) from knowhub.knowhub_db.pg_capability_store import PostgreSQLCapabilityStore def main(): s = PostgreSQLCapabilityStore() cur = s._get_cursor() try: cur.execute("SET statement_timeout = '120s'") # 所有 cap_tool 引用 vs tool 表现有 cur.execute("""SELECT DISTINCT ct.tool_id FROM capability_tool ct JOIN capability c ON c.id=ct.capability_id""") refs = {r['tool_id'] for r in cur.fetchall()} cur.execute('SELECT id FROM tool') existing = {r['id'] for r in cur.fetchall()} missing = sorted(refs - existing) print(f'cap_tool 引用独特 tool_id: {len(refs)}', flush=True) print(f'tool 表已有: {len(existing)}', flush=True) print(f'缺失待建: {len(missing)}', flush=True) now_ts = int(time.time()) inserted = 0 for tid in missing: cur.execute("""INSERT INTO tool (id, name, version, introduction, tutorial, input, output, updated_time, status) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s) ON CONFLICT (id) DO NOTHING""", (tid, tid, 'tao_dev', '', '', json.dumps(''), json.dumps(''), now_ts, '未接入')) inserted += cur.rowcount or 0 print(f'\ninserted: {inserted}', flush=True) # 验证 cur.execute("SELECT COUNT(*) c FROM tool WHERE version='tao_dev'") print(f'tool 表 version=tao_dev 的行: {cur.fetchone()["c"]}', flush=True) cur.execute('SELECT COUNT(*) c FROM tool') print(f'tool 表总行数: {cur.fetchone()["c"]}', flush=True) # 残留缺失 cur.execute("""SELECT DISTINCT ct.tool_id FROM capability_tool ct WHERE ct.tool_id NOT IN (SELECT id FROM tool)""") still_miss = [r['tool_id'] for r in cur.fetchall()] print(f'仍缺失: {len(still_miss)}', flush=True) finally: cur.close(); s.close() if __name__ == '__main__': main()