migrate_resources.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. #!/usr/bin/env python3
  2. """
  3. 迁移 SQLite resources 表中的 text 数据到 PostgreSQL
  4. 删除 code 类型数据
  5. """
  6. import os
  7. import json
  8. import sqlite3
  9. import psycopg2
  10. from psycopg2.extras import RealDictCursor
  11. from dotenv import load_dotenv
  12. from datetime import datetime
  13. load_dotenv()
  14. CREATE_RESOURCES_TABLE = """
  15. CREATE TABLE IF NOT EXISTS resources (
  16. id TEXT PRIMARY KEY,
  17. title TEXT,
  18. body TEXT,
  19. secure_body TEXT,
  20. content_type TEXT,
  21. metadata JSONB,
  22. sort_order INTEGER DEFAULT 0,
  23. submitted_by TEXT,
  24. created_at BIGINT,
  25. updated_at BIGINT
  26. ) WITH (appendoptimized=false);
  27. """
  28. def migrate_resources():
  29. # 连接 SQLite
  30. sqlite_conn = sqlite3.connect('/root/Agent/knowhub/knowhub.db')
  31. sqlite_conn.row_factory = sqlite3.Row
  32. sqlite_cur = sqlite_conn.cursor()
  33. # 连接 PostgreSQL
  34. pg_conn = psycopg2.connect(
  35. host=os.getenv('KNOWHUB_DB'),
  36. port=int(os.getenv('KNOWHUB_PORT', 5432)),
  37. user=os.getenv('KNOWHUB_USER'),
  38. password=os.getenv('KNOWHUB_PASSWORD'),
  39. database=os.getenv('KNOWHUB_DB_NAME')
  40. )
  41. pg_conn.autocommit = True
  42. pg_cur = pg_conn.cursor(cursor_factory=RealDictCursor)
  43. # 创建表
  44. pg_cur.execute(CREATE_RESOURCES_TABLE)
  45. print("✅ resources 表已创建")
  46. # 读取 text 类型数据
  47. sqlite_cur.execute("SELECT * FROM resources WHERE content_type='text'")
  48. texts = sqlite_cur.fetchall()
  49. print(f"\n找到 {len(texts)} 条 text 数据")
  50. success = 0
  51. for text in texts:
  52. try:
  53. # 转换时间戳
  54. created_at = updated_at = None
  55. if text['created_at']:
  56. dt = datetime.fromisoformat(text['created_at'].replace('+00:00', ''))
  57. created_at = int(dt.timestamp())
  58. if text['updated_at']:
  59. dt = datetime.fromisoformat(text['updated_at'].replace('+00:00', ''))
  60. updated_at = int(dt.timestamp())
  61. pg_cur.execute("""
  62. INSERT INTO resources (id, title, body, secure_body, content_type,
  63. metadata, sort_order, submitted_by, created_at, updated_at)
  64. VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
  65. ON CONFLICT (id) DO UPDATE SET
  66. title = EXCLUDED.title,
  67. body = EXCLUDED.body,
  68. secure_body = EXCLUDED.secure_body,
  69. metadata = EXCLUDED.metadata,
  70. sort_order = EXCLUDED.sort_order,
  71. updated_at = EXCLUDED.updated_at
  72. """, (
  73. text['id'],
  74. text['title'],
  75. text['body'],
  76. text['secure_body'],
  77. text['content_type'],
  78. text['metadata'],
  79. text['sort_order'] or 0,
  80. text['submitted_by'],
  81. created_at,
  82. updated_at
  83. ))
  84. success += 1
  85. except Exception as e:
  86. print(f"❌ 迁移失败 {text['id']}: {e}")
  87. print(f"✅ text 数据迁移完成: {success}/{len(texts)}")
  88. # 删除 code 类型数据
  89. sqlite_cur.execute("SELECT COUNT(*) FROM resources WHERE content_type='code'")
  90. code_count = sqlite_cur.fetchone()[0]
  91. sqlite_cur.execute("DELETE FROM resources WHERE content_type='code'")
  92. sqlite_conn.commit()
  93. print(f"✅ 已删除 {code_count} 条 code 数据")
  94. sqlite_conn.close()
  95. pg_conn.close()
  96. if __name__ == "__main__":
  97. migrate_resources()