#!/usr/bin/env python3 """ 清理 tool_table 中无效的 knowledge 引用 """ import os import json import psycopg2 from psycopg2.extras import RealDictCursor from dotenv import load_dotenv load_dotenv() def clean_invalid_refs(): conn = psycopg2.connect( host=os.getenv('KNOWHUB_DB'), port=int(os.getenv('KNOWHUB_PORT', 5432)), user=os.getenv('KNOWHUB_USER'), password=os.getenv('KNOWHUB_PASSWORD'), database=os.getenv('KNOWHUB_DB_NAME') ) conn.autocommit = False cur = conn.cursor(cursor_factory=RealDictCursor) # 查所有工具 cur.execute("SELECT id, name, knowledge FROM tool_table WHERE knowledge IS NOT NULL AND knowledge::text != '[]'") tools = cur.fetchall() print(f"检查 {len(tools)} 个工具...") cleaned_count = 0 total_removed = 0 for tool in tools: knowledge_ids = tool['knowledge'] if isinstance(tool['knowledge'], list) else json.loads(tool['knowledge'] or '[]') if not knowledge_ids: continue # 检查哪些存在 cur.execute('SELECT id FROM knowledge WHERE id = ANY(%s)', (knowledge_ids,)) existing = {r['id'] for r in cur.fetchall()} # 过滤出有效的 valid_ids = [kid for kid in knowledge_ids if kid in existing] if len(valid_ids) < len(knowledge_ids): removed = len(knowledge_ids) - len(valid_ids) print(f"✅ {tool['name']}: 移除 {removed} 个无效引用,保留 {len(valid_ids)} 个") cur.execute( "UPDATE tool_table SET knowledge = %s WHERE id = %s", (json.dumps(valid_ids), tool['id']) ) cleaned_count += 1 total_removed += removed conn.commit() print(f"\n完成:清理了 {cleaned_count} 个工具,共移除 {total_removed} 个无效引用") conn.close() if __name__ == "__main__": clean_invalid_refs()