test_vector_search.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. """
  2. 测试 Milvus Lite 向量检索实现
  3. 运行前确保:
  4. 1. pip install -r knowhub/requirements.txt
  5. 2. 设置环境变量 OPENROUTER_API_KEY
  6. """
  7. import asyncio
  8. import sys
  9. from pathlib import Path
  10. # 添加项目路径
  11. sys.path.insert(0, str(Path(__file__).parent))
  12. from knowhub.vector_store import MilvusStore
  13. from knowhub.embeddings import get_embedding, get_embeddings_batch
  14. async def test_basic():
  15. """测试基本功能"""
  16. print("=" * 60)
  17. print("测试 1: 初始化 Milvus Lite")
  18. print("=" * 60)
  19. store = MilvusStore(data_dir="./test_milvus_data")
  20. print(f"✓ Milvus Lite 初始化成功")
  21. print(f" 当前知识数量: {store.count()}")
  22. print("\n" + "=" * 60)
  23. print("测试 2: 生成 Embedding")
  24. print("=" * 60)
  25. text = "如何使用 Python 读取 PDF 文件"
  26. embedding = await get_embedding(text)
  27. print(f"✓ 单条 embedding 生成成功")
  28. print(f" 文本: {text}")
  29. print(f" 向量维度: {len(embedding)}")
  30. texts = ["测试文本1", "测试文本2", "测试文本3"]
  31. embeddings = await get_embeddings_batch(texts)
  32. print(f"✓ 批量 embedding 生成成功")
  33. print(f" 文本数量: {len(texts)}")
  34. print(f" 向量数量: {len(embeddings)}")
  35. print("\n" + "=" * 60)
  36. print("测试 3: 插入知识")
  37. print("=" * 60)
  38. import time
  39. knowledge = {
  40. "id": "test-001",
  41. "embedding": embedding,
  42. "message_id": "",
  43. "task": "读取 PDF 文件",
  44. "content": "使用 pymupdf 库可以高效读取 PDF 文件内容",
  45. "types": ["tool"],
  46. "tags": {"category": "file_processing"},
  47. "scopes": ["org:test"],
  48. "owner": "test_user",
  49. "resource_ids": [],
  50. "source": {"name": "test"},
  51. "eval": {"score": 4, "helpful": 0, "harmful": 0},
  52. "created_at": int(time.time()),
  53. "updated_at": int(time.time()),
  54. }
  55. store.insert(knowledge)
  56. print(f"✓ 知识插入成功")
  57. print(f" ID: {knowledge['id']}")
  58. print(f" 当前知识数量: {store.count()}")
  59. print("\n" + "=" * 60)
  60. print("测试 4: 查询知识")
  61. print("=" * 60)
  62. result = store.get_by_id("test-001")
  63. print(f"✓ 按 ID 查询成功")
  64. print(f" Task: {result['task']}")
  65. print(f" Content: {result['content']}")
  66. print("\n" + "=" * 60)
  67. print("测试 5: 向量检索")
  68. print("=" * 60)
  69. query_text = "怎么处理 PDF"
  70. query_embedding = await get_embedding(query_text)
  71. results = store.search(query_embedding, limit=5)
  72. print(f"✓ 向量检索成功")
  73. print(f" 查询: {query_text}")
  74. print(f" 结果数量: {len(results)}")
  75. if results:
  76. print(f" Top 1: {results[0]['task']}")
  77. print("\n" + "=" * 60)
  78. print("测试 6: 更新知识")
  79. print("=" * 60)
  80. store.update("test-001", {"content": "使用 pymupdf 库(推荐)或 PyPDF2 库读取 PDF"})
  81. updated = store.get_by_id("test-001")
  82. print(f"✓ 知识更新成功")
  83. print(f" 新内容: {updated['content']}")
  84. print("\n" + "=" * 60)
  85. print("测试 7: 删除知识")
  86. print("=" * 60)
  87. store.delete("test-001")
  88. deleted = store.get_by_id("test-001")
  89. print(f"✓ 知识删除成功")
  90. print(f" 删除后查询结果: {deleted}")
  91. print(f" 当前知识数量: {store.count()}")
  92. print("\n" + "=" * 60)
  93. print("所有测试通过!")
  94. print("=" * 60)
  95. if __name__ == "__main__":
  96. asyncio.run(test_basic())