| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384 |
- #!/usr/bin/env python3
- """
- 补全 knowledge 表的剩余变更:
- 1. 添加 tools 列
- 2. 添加 task_embedding, content_embedding 列
- 3. 将 embedding 数据迁移到 content_embedding
- 4. 删除旧 embedding 列
- 注意:步骤3涉及 387 条 x 1536维向量的 UPDATE,可能需要几十秒。
- 如果中途卡住不要 kill 进程,等它跑完,否则会死锁。
- """
- import os, psycopg2
- from dotenv import load_dotenv
- _dir = os.path.dirname(os.path.abspath(__file__))
- _root = os.path.normpath(os.path.join(_dir, '..', '..'))
- load_dotenv(os.path.join(_root, '.env'))
- conn = psycopg2.connect(
- host=os.getenv('KNOWHUB_DB'),
- port=int(os.getenv('KNOWHUB_PORT', 5432)),
- user=os.getenv('KNOWHUB_USER'),
- password=os.getenv('KNOWHUB_PASSWORD'),
- database=os.getenv('KNOWHUB_DB_NAME'),
- connect_timeout=10
- )
- conn.autocommit = True
- cur = conn.cursor()
- print("Connected.\n")
- def col_exists(table, column):
- cur.execute("""
- SELECT 1 FROM pg_catalog.pg_attribute a
- JOIN pg_catalog.pg_class c ON a.attrelid = c.oid
- JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid
- WHERE c.relname = %s AND n.nspname = 'public'
- AND a.attname = %s AND a.attnum > 0 AND NOT a.attisdropped
- """, (table, column))
- return cur.fetchone() is not None
- # Step 1: 添加 tools 列
- print("[1] Add tools column...")
- if not col_exists('knowledge', 'tools'):
- cur.execute("ALTER TABLE knowledge ADD COLUMN tools JSONB DEFAULT '[]'")
- print(" + Added tools")
- else:
- print(" . tools already exists")
- # Step 2: 添加 task_embedding 和 content_embedding
- print("[2] Add task_embedding, content_embedding columns...")
- if not col_exists('knowledge', 'task_embedding'):
- cur.execute("ALTER TABLE knowledge ADD COLUMN task_embedding real[]")
- print(" + Added task_embedding")
- else:
- print(" . task_embedding already exists")
- if not col_exists('knowledge', 'content_embedding'):
- cur.execute("ALTER TABLE knowledge ADD COLUMN content_embedding real[]")
- print(" + Added content_embedding")
- else:
- print(" . content_embedding already exists")
- # Step 3: 迁移 embedding -> content_embedding
- if col_exists('knowledge', 'embedding'):
- print("[3] Migrating embedding -> content_embedding (this may take a while)...")
- cur.execute("""
- UPDATE knowledge
- SET content_embedding = embedding
- WHERE content_embedding IS NULL AND embedding IS NOT NULL
- """)
- print(" Migrated.")
- # Step 4: 删除旧 embedding
- print("[4] Dropping old embedding column...")
- cur.execute("ALTER TABLE knowledge DROP COLUMN embedding")
- print(" Dropped.")
- else:
- print("[3] embedding column already removed, skip migration.")
- print("[4] skip.")
- print("\nDone. Run check_table_structure.py to verify.")
- cur.close()
- conn.close()
|