""" 测试 Milvus Lite 向量检索实现 运行前确保: 1. pip install -r knowhub/requirements.txt 2. 设置环境变量 OPENROUTER_API_KEY """ import asyncio import sys from pathlib import Path # 添加项目路径 sys.path.insert(0, str(Path(__file__).parent)) from knowhub.vector_store import MilvusStore from knowhub.embeddings import get_embedding, get_embeddings_batch async def test_basic(): """测试基本功能""" print("=" * 60) print("测试 1: 初始化 Milvus Lite") print("=" * 60) store = MilvusStore(data_dir="./test_milvus_data") print(f"✓ Milvus Lite 初始化成功") print(f" 当前知识数量: {store.count()}") print("\n" + "=" * 60) print("测试 2: 生成 Embedding") print("=" * 60) text = "如何使用 Python 读取 PDF 文件" embedding = await get_embedding(text) print(f"✓ 单条 embedding 生成成功") print(f" 文本: {text}") print(f" 向量维度: {len(embedding)}") texts = ["测试文本1", "测试文本2", "测试文本3"] embeddings = await get_embeddings_batch(texts) print(f"✓ 批量 embedding 生成成功") print(f" 文本数量: {len(texts)}") print(f" 向量数量: {len(embeddings)}") print("\n" + "=" * 60) print("测试 3: 插入知识") print("=" * 60) import time knowledge = { "id": "test-001", "embedding": embedding, "message_id": "", "task": "读取 PDF 文件", "content": "使用 pymupdf 库可以高效读取 PDF 文件内容", "types": ["tool"], "tags": {"category": "file_processing"}, "scopes": ["org:test"], "owner": "test_user", "resource_ids": [], "source": {"name": "test"}, "eval": {"score": 4, "helpful": 0, "harmful": 0}, "created_at": int(time.time()), "updated_at": int(time.time()), } store.insert(knowledge) print(f"✓ 知识插入成功") print(f" ID: {knowledge['id']}") print(f" 当前知识数量: {store.count()}") print("\n" + "=" * 60) print("测试 4: 查询知识") print("=" * 60) result = store.get_by_id("test-001") print(f"✓ 按 ID 查询成功") print(f" Task: {result['task']}") print(f" Content: {result['content']}") print("\n" + "=" * 60) print("测试 5: 向量检索") print("=" * 60) query_text = "怎么处理 PDF" query_embedding = await get_embedding(query_text) results = store.search(query_embedding, limit=5) print(f"✓ 向量检索成功") print(f" 查询: {query_text}") print(f" 结果数量: {len(results)}") if results: print(f" Top 1: {results[0]['task']}") print("\n" + "=" * 60) print("测试 6: 更新知识") print("=" * 60) store.update("test-001", {"content": "使用 pymupdf 库(推荐)或 PyPDF2 库读取 PDF"}) updated = store.get_by_id("test-001") print(f"✓ 知识更新成功") print(f" 新内容: {updated['content']}") print("\n" + "=" * 60) print("测试 7: 删除知识") print("=" * 60) store.delete("test-001") deleted = store.get_by_id("test-001") print(f"✓ 知识删除成功") print(f" 删除后查询结果: {deleted}") print(f" 当前知识数量: {store.count()}") print("\n" + "=" * 60) print("所有测试通过!") print("=" * 60) if __name__ == "__main__": asyncio.run(test_basic())