| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061 |
- #!/usr/bin/env python3
- """
- 清理 tool_table 中无效的 knowledge 引用
- """
- import os
- import json
- import psycopg2
- from psycopg2.extras import RealDictCursor
- from dotenv import load_dotenv
- load_dotenv()
- def clean_invalid_refs():
- conn = psycopg2.connect(
- host=os.getenv('KNOWHUB_DB'),
- port=int(os.getenv('KNOWHUB_PORT', 5432)),
- user=os.getenv('KNOWHUB_USER'),
- password=os.getenv('KNOWHUB_PASSWORD'),
- database=os.getenv('KNOWHUB_DB_NAME')
- )
- conn.autocommit = False
- cur = conn.cursor(cursor_factory=RealDictCursor)
- # 查所有工具
- cur.execute("SELECT id, name, knowledge FROM tool_table WHERE knowledge IS NOT NULL AND knowledge::text != '[]'")
- tools = cur.fetchall()
- print(f"检查 {len(tools)} 个工具...")
- cleaned_count = 0
- total_removed = 0
- for tool in tools:
- knowledge_ids = tool['knowledge'] if isinstance(tool['knowledge'], list) else json.loads(tool['knowledge'] or '[]')
- if not knowledge_ids:
- continue
- # 检查哪些存在
- cur.execute('SELECT id FROM knowledge WHERE id = ANY(%s)', (knowledge_ids,))
- existing = {r['id'] for r in cur.fetchall()}
- # 过滤出有效的
- valid_ids = [kid for kid in knowledge_ids if kid in existing]
- if len(valid_ids) < len(knowledge_ids):
- removed = len(knowledge_ids) - len(valid_ids)
- print(f"✅ {tool['name']}: 移除 {removed} 个无效引用,保留 {len(valid_ids)} 个")
- cur.execute(
- "UPDATE tool_table SET knowledge = %s WHERE id = %s",
- (json.dumps(valid_ids), tool['id'])
- )
- cleaned_count += 1
- total_removed += removed
- conn.commit()
- print(f"\n完成:清理了 {cleaned_count} 个工具,共移除 {total_removed} 个无效引用")
- conn.close()
- if __name__ == "__main__":
- clean_invalid_refs()
|