howard
/
Agent


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
							#!/usr/bin/env python3
"""
修正 knowledge 表的 embedding 迁移错误：
旧 embedding 是基于 task 生成的，应该迁移到 task_embedding 而非 content_embedding。
1. 将 content_embedding 数据移到 task_embedding（如果 task_embedding 为空）
2. 清空 content_embedding（等待 fill_embeddings.py 重新生成）
"""
import os, psycopg2
from dotenv import load_dotenv

_dir = os.path.dirname(os.path.abspath(__file__))
_root = os.path.normpath(os.path.join(_dir, '..', '..'))
load_dotenv(os.path.join(_root, '.env'))

conn = psycopg2.connect(
    host=os.getenv('KNOWHUB_DB'),
    port=int(os.getenv('KNOWHUB_PORT', 5432)),
    user=os.getenv('KNOWHUB_USER'),
    password=os.getenv('KNOWHUB_PASSWORD'),
    database=os.getenv('KNOWHUB_DB_NAME'),
    connect_timeout=10
)
conn.autocommit = True
cur = conn.cursor()

print("Connected.\n")

# Step 1: 将 content_embedding 移到 task_embedding（仅当 task_embedding 为空时）
print("[1] Moving content_embedding -> task_embedding ...")
cur.execute("""
    UPDATE knowledge
    SET task_embedding = content_embedding
    WHERE task_embedding IS NULL AND content_embedding IS NOT NULL
""")
print(f"  Moved {cur.rowcount} rows.")

# Step 2: 清空 content_embedding（让 fill_embeddings.py 基于 content 重新生成）
print("[2] Clearing content_embedding ...")
cur.execute("UPDATE knowledge SET content_embedding = NULL WHERE content_embedding IS NOT NULL")
print(f"  Cleared {cur.rowcount} rows.")

# Verify
print("\n[Verify]")
cur.execute("SELECT COUNT(*) FROM knowledge WHERE task_embedding IS NOT NULL")
print(f"  task_embedding: {cur.fetchone()[0]} rows have data")
cur.execute("SELECT COUNT(*) FROM knowledge WHERE content_embedding IS NOT NULL")
print(f"  content_embedding: {cur.fetchone()[0]} rows have data (should be 0)")

cur.close()
conn.close()
print("\nDone. Now run fill_embeddings.py to generate content_embedding.")