| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120 |
- """
- 测试 Milvus Lite 向量检索实现
- 运行前确保:
- 1. pip install -r knowhub/requirements.txt
- 2. 设置环境变量 OPENROUTER_API_KEY
- """
- import asyncio
- import sys
- from pathlib import Path
- # 添加项目路径
- sys.path.insert(0, str(Path(__file__).parent))
- from knowhub.vector_store import MilvusStore
- from knowhub.embeddings import get_embedding, get_embeddings_batch
- async def test_basic():
- """测试基本功能"""
- print("=" * 60)
- print("测试 1: 初始化 Milvus Lite")
- print("=" * 60)
- store = MilvusStore(data_dir="./test_milvus_data")
- print(f"✓ Milvus Lite 初始化成功")
- print(f" 当前知识数量: {store.count()}")
- print("\n" + "=" * 60)
- print("测试 2: 生成 Embedding")
- print("=" * 60)
- text = "如何使用 Python 读取 PDF 文件"
- embedding = await get_embedding(text)
- print(f"✓ 单条 embedding 生成成功")
- print(f" 文本: {text}")
- print(f" 向量维度: {len(embedding)}")
- texts = ["测试文本1", "测试文本2", "测试文本3"]
- embeddings = await get_embeddings_batch(texts)
- print(f"✓ 批量 embedding 生成成功")
- print(f" 文本数量: {len(texts)}")
- print(f" 向量数量: {len(embeddings)}")
- print("\n" + "=" * 60)
- print("测试 3: 插入知识")
- print("=" * 60)
- import time
- knowledge = {
- "id": "test-001",
- "embedding": embedding,
- "message_id": "",
- "task": "读取 PDF 文件",
- "content": "使用 pymupdf 库可以高效读取 PDF 文件内容",
- "types": ["tool"],
- "tags": {"category": "file_processing"},
- "scopes": ["org:test"],
- "owner": "test_user",
- "resource_ids": [],
- "source": {"name": "test"},
- "eval": {"score": 4, "helpful": 0, "harmful": 0},
- "created_at": int(time.time()),
- "updated_at": int(time.time()),
- }
- store.insert(knowledge)
- print(f"✓ 知识插入成功")
- print(f" ID: {knowledge['id']}")
- print(f" 当前知识数量: {store.count()}")
- print("\n" + "=" * 60)
- print("测试 4: 查询知识")
- print("=" * 60)
- result = store.get_by_id("test-001")
- print(f"✓ 按 ID 查询成功")
- print(f" Task: {result['task']}")
- print(f" Content: {result['content']}")
- print("\n" + "=" * 60)
- print("测试 5: 向量检索")
- print("=" * 60)
- query_text = "怎么处理 PDF"
- query_embedding = await get_embedding(query_text)
- results = store.search(query_embedding, limit=5)
- print(f"✓ 向量检索成功")
- print(f" 查询: {query_text}")
- print(f" 结果数量: {len(results)}")
- if results:
- print(f" Top 1: {results[0]['task']}")
- print("\n" + "=" * 60)
- print("测试 6: 更新知识")
- print("=" * 60)
- store.update("test-001", {"content": "使用 pymupdf 库(推荐)或 PyPDF2 库读取 PDF"})
- updated = store.get_by_id("test-001")
- print(f"✓ 知识更新成功")
- print(f" 新内容: {updated['content']}")
- print("\n" + "=" * 60)
- print("测试 7: 删除知识")
- print("=" * 60)
- store.delete("test-001")
- deleted = store.get_by_id("test-001")
- print(f"✓ 知识删除成功")
- print(f" 删除后查询结果: {deleted}")
- print(f" 当前知识数量: {store.count()}")
- print("\n" + "=" * 60)
- print("所有测试通过!")
- print("=" * 60)
- if __name__ == "__main__":
- asyncio.run(test_basic())
|