direct_insert.py 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. #!/usr/bin/env python3
  2. """
  3. 直接插入知识到数据库的脚本
  4. """
  5. import sys
  6. import os
  7. import uuid
  8. import time
  9. import asyncio
  10. from datetime import datetime, timezone
  11. sys.path.insert(0, '/root/Agent')
  12. # 设置环境变量
  13. os.environ['KNOWHUB_DB'] = 'gp-t4n72471pkmt4b9q7o-master.gpdbmaster.singapore.rds.aliyuncs.com'
  14. os.environ['KNOWHUB_PORT'] = '5432'
  15. os.environ['KNOWHUB_USER'] = 'aiddit_aigc'
  16. os.environ['KNOWHUB_PASSWORD'] = '%a&&yqNxg^V1$toJ*WOa^-b^X=QJ'
  17. os.environ['KNOWHUB_DB_NAME'] = 'knowhub'
  18. from knowhub.knowhub_db.pg_store import PostgreSQLStore
  19. from knowhub.embeddings import get_embedding
  20. # 测试知识数据
  21. knowledge_data = {
  22. 'task': '接口测试验证条目',
  23. 'content': '这是一条用于验证 upload 接口是否可用的测试知识,可以删除。',
  24. 'types': ['experience'],
  25. 'tags': {'source': 'api_test'},
  26. 'score': 1
  27. }
  28. async def main():
  29. print('正在连接数据库...')
  30. store = PostgreSQLStore()
  31. # 生成 ID
  32. now = datetime.now(timezone.utc)
  33. knowledge_id = f'knowledge-{now.strftime("%Y%m%d")}-{uuid.uuid4().hex[:8]}'
  34. message_id = f'msg-{uuid.uuid4().hex[:12]}'
  35. # 生成 embedding
  36. print('正在生成 embedding...')
  37. task_embedding = await get_embedding(knowledge_data['task'])
  38. content_embedding = await get_embedding(knowledge_data['content'])
  39. # 构建知识记录
  40. knowledge = {
  41. 'id': knowledge_id,
  42. 'message_id': message_id,
  43. 'task': knowledge_data['task'],
  44. 'content': knowledge_data['content'],
  45. 'types': knowledge_data['types'],
  46. 'tags': knowledge_data.get('tags', {}),
  47. 'tag_keys': list(knowledge_data.get('tags', {}).keys()),
  48. 'scopes': ['org:cybertogether'],
  49. 'owner': 'system',
  50. 'source': {'category': 'execution'},
  51. 'eval': {'score': knowledge_data.get('score', 3)},
  52. 'task_embedding': task_embedding,
  53. 'content_embedding': content_embedding,
  54. 'created_at': int(time.time()),
  55. 'updated_at': int(time.time()),
  56. 'status': 'approved',
  57. }
  58. print(f'正在插入知识条目...')
  59. store.insert(knowledge)
  60. store.close()
  61. print(f'✅ 成功插入知识条目!')
  62. print(f' ID: {knowledge_id}')
  63. print(f' Task: {knowledge_data["task"]}')
  64. print(f' Types: {knowledge_data["types"]}')
  65. return knowledge_id
  66. if __name__ == '__main__':
  67. knowledge_id = asyncio.run(main())
  68. print(f'\n最终知识 ID: {knowledge_id}')