|
|
@@ -1,120 +0,0 @@
|
|
|
-"""
|
|
|
-测试 Milvus Lite 向量检索实现
|
|
|
-
|
|
|
-运行前确保:
|
|
|
-1. pip install -r knowhub/requirements.txt
|
|
|
-2. 设置环境变量 OPENROUTER_API_KEY
|
|
|
-"""
|
|
|
-
|
|
|
-import asyncio
|
|
|
-import sys
|
|
|
-from pathlib import Path
|
|
|
-
|
|
|
-# 添加项目路径
|
|
|
-sys.path.insert(0, str(Path(__file__).parent))
|
|
|
-
|
|
|
-from knowhub.vector_store import MilvusStore
|
|
|
-from knowhub.embeddings import get_embedding, get_embeddings_batch
|
|
|
-
|
|
|
-
|
|
|
-async def test_basic():
|
|
|
- """测试基本功能"""
|
|
|
- print("=" * 60)
|
|
|
- print("测试 1: 初始化 Milvus Lite")
|
|
|
- print("=" * 60)
|
|
|
-
|
|
|
- store = MilvusStore(data_dir="./test_milvus_data")
|
|
|
- print(f"✓ Milvus Lite 初始化成功")
|
|
|
- print(f" 当前知识数量: {store.count()}")
|
|
|
-
|
|
|
- print("\n" + "=" * 60)
|
|
|
- print("测试 2: 生成 Embedding")
|
|
|
- print("=" * 60)
|
|
|
-
|
|
|
- text = "如何使用 Python 读取 PDF 文件"
|
|
|
- embedding = await get_embedding(text)
|
|
|
- print(f"✓ 单条 embedding 生成成功")
|
|
|
- print(f" 文本: {text}")
|
|
|
- print(f" 向量维度: {len(embedding)}")
|
|
|
-
|
|
|
- texts = ["测试文本1", "测试文本2", "测试文本3"]
|
|
|
- embeddings = await get_embeddings_batch(texts)
|
|
|
- print(f"✓ 批量 embedding 生成成功")
|
|
|
- print(f" 文本数量: {len(texts)}")
|
|
|
- print(f" 向量数量: {len(embeddings)}")
|
|
|
-
|
|
|
- print("\n" + "=" * 60)
|
|
|
- print("测试 3: 插入知识")
|
|
|
- print("=" * 60)
|
|
|
-
|
|
|
- import time
|
|
|
- knowledge = {
|
|
|
- "id": "test-001",
|
|
|
- "embedding": embedding,
|
|
|
- "message_id": "",
|
|
|
- "task": "读取 PDF 文件",
|
|
|
- "content": "使用 pymupdf 库可以高效读取 PDF 文件内容",
|
|
|
- "types": ["tool"],
|
|
|
- "tags": {"category": "file_processing"},
|
|
|
- "scopes": ["org:test"],
|
|
|
- "owner": "test_user",
|
|
|
- "resource_ids": [],
|
|
|
- "source": {"name": "test"},
|
|
|
- "eval": {"score": 4, "helpful": 0, "harmful": 0},
|
|
|
- "created_at": int(time.time()),
|
|
|
- "updated_at": int(time.time()),
|
|
|
- }
|
|
|
-
|
|
|
- store.insert(knowledge)
|
|
|
- print(f"✓ 知识插入成功")
|
|
|
- print(f" ID: {knowledge['id']}")
|
|
|
- print(f" 当前知识数量: {store.count()}")
|
|
|
-
|
|
|
- print("\n" + "=" * 60)
|
|
|
- print("测试 4: 查询知识")
|
|
|
- print("=" * 60)
|
|
|
-
|
|
|
- result = store.get_by_id("test-001")
|
|
|
- print(f"✓ 按 ID 查询成功")
|
|
|
- print(f" Task: {result['task']}")
|
|
|
- print(f" Content: {result['content']}")
|
|
|
-
|
|
|
- print("\n" + "=" * 60)
|
|
|
- print("测试 5: 向量检索")
|
|
|
- print("=" * 60)
|
|
|
-
|
|
|
- query_text = "怎么处理 PDF"
|
|
|
- query_embedding = await get_embedding(query_text)
|
|
|
- results = store.search(query_embedding, limit=5)
|
|
|
- print(f"✓ 向量检索成功")
|
|
|
- print(f" 查询: {query_text}")
|
|
|
- print(f" 结果数量: {len(results)}")
|
|
|
- if results:
|
|
|
- print(f" Top 1: {results[0]['task']}")
|
|
|
-
|
|
|
- print("\n" + "=" * 60)
|
|
|
- print("测试 6: 更新知识")
|
|
|
- print("=" * 60)
|
|
|
-
|
|
|
- store.update("test-001", {"content": "使用 pymupdf 库(推荐)或 PyPDF2 库读取 PDF"})
|
|
|
- updated = store.get_by_id("test-001")
|
|
|
- print(f"✓ 知识更新成功")
|
|
|
- print(f" 新内容: {updated['content']}")
|
|
|
-
|
|
|
- print("\n" + "=" * 60)
|
|
|
- print("测试 7: 删除知识")
|
|
|
- print("=" * 60)
|
|
|
-
|
|
|
- store.delete("test-001")
|
|
|
- deleted = store.get_by_id("test-001")
|
|
|
- print(f"✓ 知识删除成功")
|
|
|
- print(f" 删除后查询结果: {deleted}")
|
|
|
- print(f" 当前知识数量: {store.count()}")
|
|
|
-
|
|
|
- print("\n" + "=" * 60)
|
|
|
- print("所有测试通过!")
|
|
|
- print("=" * 60)
|
|
|
-
|
|
|
-
|
|
|
-if __name__ == "__main__":
|
|
|
- asyncio.run(test_basic())
|