#!/usr/bin/env python3 """ 补全 knowledge 表的剩余变更: 1. 添加 tools 列 2. 添加 task_embedding, content_embedding 列 3. 将 embedding 数据迁移到 content_embedding 4. 删除旧 embedding 列 注意:步骤3涉及 387 条 x 1536维向量的 UPDATE,可能需要几十秒。 如果中途卡住不要 kill 进程,等它跑完,否则会死锁。 """ import os, psycopg2 from dotenv import load_dotenv _dir = os.path.dirname(os.path.abspath(__file__)) _root = os.path.normpath(os.path.join(_dir, '..', '..')) load_dotenv(os.path.join(_root, '.env')) conn = psycopg2.connect( host=os.getenv('KNOWHUB_DB'), port=int(os.getenv('KNOWHUB_PORT', 5432)), user=os.getenv('KNOWHUB_USER'), password=os.getenv('KNOWHUB_PASSWORD'), database=os.getenv('KNOWHUB_DB_NAME'), connect_timeout=10 ) conn.autocommit = True cur = conn.cursor() print("Connected.\n") def col_exists(table, column): cur.execute(""" SELECT 1 FROM pg_catalog.pg_attribute a JOIN pg_catalog.pg_class c ON a.attrelid = c.oid JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid WHERE c.relname = %s AND n.nspname = 'public' AND a.attname = %s AND a.attnum > 0 AND NOT a.attisdropped """, (table, column)) return cur.fetchone() is not None # Step 1: 添加 tools 列 print("[1] Add tools column...") if not col_exists('knowledge', 'tools'): cur.execute("ALTER TABLE knowledge ADD COLUMN tools JSONB DEFAULT '[]'") print(" + Added tools") else: print(" . tools already exists") # Step 2: 添加 task_embedding 和 content_embedding print("[2] Add task_embedding, content_embedding columns...") if not col_exists('knowledge', 'task_embedding'): cur.execute("ALTER TABLE knowledge ADD COLUMN task_embedding real[]") print(" + Added task_embedding") else: print(" . task_embedding already exists") if not col_exists('knowledge', 'content_embedding'): cur.execute("ALTER TABLE knowledge ADD COLUMN content_embedding real[]") print(" + Added content_embedding") else: print(" . content_embedding already exists") # Step 3: 迁移 embedding -> content_embedding if col_exists('knowledge', 'embedding'): print("[3] Migrating embedding -> content_embedding (this may take a while)...") cur.execute(""" UPDATE knowledge SET content_embedding = embedding WHERE content_embedding IS NULL AND embedding IS NOT NULL """) print(" Migrated.") # Step 4: 删除旧 embedding print("[4] Dropping old embedding column...") cur.execute("ALTER TABLE knowledge DROP COLUMN embedding") print(" Dropped.") else: print("[3] embedding column already removed, skip migration.") print("[4] skip.") print("\nDone. Run check_table_structure.py to verify.") cur.close() conn.close()