#!/usr/bin/env python3 """ 修正 knowledge 表的 embedding 迁移错误: 旧 embedding 是基于 task 生成的,应该迁移到 task_embedding 而非 content_embedding。 1. 将 content_embedding 数据移到 task_embedding(如果 task_embedding 为空) 2. 清空 content_embedding(等待 fill_embeddings.py 重新生成) """ import os, psycopg2 from dotenv import load_dotenv _dir = os.path.dirname(os.path.abspath(__file__)) _root = os.path.normpath(os.path.join(_dir, '..', '..')) load_dotenv(os.path.join(_root, '.env')) conn = psycopg2.connect( host=os.getenv('KNOWHUB_DB'), port=int(os.getenv('KNOWHUB_PORT', 5432)), user=os.getenv('KNOWHUB_USER'), password=os.getenv('KNOWHUB_PASSWORD'), database=os.getenv('KNOWHUB_DB_NAME'), connect_timeout=10 ) conn.autocommit = True cur = conn.cursor() print("Connected.\n") # Step 1: 将 content_embedding 移到 task_embedding(仅当 task_embedding 为空时) print("[1] Moving content_embedding -> task_embedding ...") cur.execute(""" UPDATE knowledge SET task_embedding = content_embedding WHERE task_embedding IS NULL AND content_embedding IS NOT NULL """) print(f" Moved {cur.rowcount} rows.") # Step 2: 清空 content_embedding(让 fill_embeddings.py 基于 content 重新生成) print("[2] Clearing content_embedding ...") cur.execute("UPDATE knowledge SET content_embedding = NULL WHERE content_embedding IS NOT NULL") print(f" Cleared {cur.rowcount} rows.") # Verify print("\n[Verify]") cur.execute("SELECT COUNT(*) FROM knowledge WHERE task_embedding IS NOT NULL") print(f" task_embedding: {cur.fetchone()[0]} rows have data") cur.execute("SELECT COUNT(*) FROM knowledge WHERE content_embedding IS NOT NULL") print(f" content_embedding: {cur.fetchone()[0]} rows have data (should be 0)") cur.close() conn.close() print("\nDone. Now run fill_embeddings.py to generate content_embedding.")