Просмотр исходного кода

add condition query to knowledge_search

guantao 16 часов назад
Родитель
Сommit
bfd3bb11ec
7 измененных файлов с 160 добавлено и 7 удалено
  1. 27 2
      knowhub/agents/librarian_agent.prompt
  2. 11 5
      knowhub/server.py
  3. 16 0
      scratch_db_caps.py
  4. 21 0
      scratch_db_test.py
  5. 21 0
      scratch_db_test_case.py
  6. 46 0
      scratch_search_test.py
  7. 18 0
      scratch_test.py

+ 27 - 2
knowhub/agents/librarian_agent.prompt

@@ -51,8 +51,33 @@ Knowledge 按 types 分类:
 
 ## 工具使用规范与检索策略
 
-1. **精准查询优于全文搜索**:当你需要查询跨表关联关系时(例如:“寻找某个特定 Capability ID 被哪些 Requirement 关联了” 或者 “查某个 Tool 有没有被某个 Capability 包含”),**强烈推荐且务必优先使用 `relation_search` 工具**直接查询关系表(例如 `requirement_capability`, `capability_tool`, `tool_knowledge` 等)。
-2. 使用 `relation_search` 时,在 `filters` 中传入已知的 `_id` 即可迅速获得所有匹配的关联链路,进而拿到目标实体的 ID 再去定向获取详情,这比漫无目的地做大文本向量搜索 (`search` 结尾工具) 效率极速且精准得多!
+1. **实体及关联知识的靶向查询**:当你明确需要获取包含某个实体(Tool、Requirement、Capability)相关内容的知识时,请**直接利用 `knowledge_search` 及对应的筛选参数**(如 `tool_id`, `requirement_id`, `capability_id`),直接一步检索到位。
+2. **跨表关系寻根溯源更适合查关联表**:仅当你需要了解多层跨表关联路径(例如:“寻找某个特定 Capability ID 被哪些 Requirement 关联了” 或者 “探查某个 Tool 是否存在于特定的 Capability 覆盖集里”)时,才需要使用 `relation_search` 工具。一旦拿到链路上的目标 `_id`,你可以继续拿着 ID 去进行定点信息补充。
+
+### 工具调用示例
+
+**示例 1:查询某工具专有的经验知识 (直接过滤)**
+你想查找针对 `midjourney` 工具的用户案例:
+```json
+// 调用 knowledge_search
+{
+  "query": "生成图片的控制案例",
+  "tool_id": "midjourney",
+  "types": ["case", "tool"]
+}
+```
+
+**示例 2:反查关联了某外接能力的全部需求 ID (跨表溯源)**
+你想知道含有 `CAP-001` 能力的需求有哪些:
+```json
+// 调用 relation_search
+{
+  "table_name": "requirement_capability",
+  "filters": {
+    "capability_id": "CAP-001"
+  }
+}
+```
 
 $user$
 

+ 11 - 5
knowhub/server.py

@@ -1135,8 +1135,11 @@ async def search_knowledge_api(
         filters = []
         if types:
             type_list = [t.strip() for t in types.split(',') if t.strip()]
-            for t in type_list:
-                filters.append(f'array_contains(types, "{t}")')
+            if len(type_list) == 1:
+                filters.append(f'array_contains(types, "{type_list[0]}")')
+            elif len(type_list) > 1:
+                type_filters = [f'array_contains(types, "{t}")' for t in type_list]
+                filters.append(f'({" or ".join(type_filters)})')
         if owner:
             owner_list = [o.strip() for o in owner.split(',') if o.strip()]
             if len(owner_list) == 1:
@@ -1280,11 +1283,14 @@ def list_knowledge(
         # 构建过滤表达式
         filters = []
 
-        # types 支持多个,用 AND 连接(交集:必须同时包含所有选中的type
+        # types 支持多个,改为用 OR 连接(并集:包含任意选中 type 即可
         if types:
             type_list = [t.strip() for t in types.split(',') if t.strip()]
-            for t in type_list:
-                filters.append(f'array_contains(types, "{t}")')
+            if len(type_list) == 1:
+                filters.append(f'array_contains(types, "{type_list[0]}")')
+            elif len(type_list) > 1:
+                type_filters = [f'array_contains(types, "{t}")' for t in type_list]
+                filters.append(f'({" or ".join(type_filters)})')
 
         if scopes:
             filters.append(f'array_contains(scopes, "{scopes}")')

+ 16 - 0
scratch_db_caps.py

@@ -0,0 +1,16 @@
+import os
+from dotenv import load_dotenv
+load_dotenv("/root/Agent/.env")
+
+import psycopg2
+conn = psycopg2.connect(
+    host=os.getenv('KNOWHUB_DB'),
+    port=int(os.getenv('KNOWHUB_PORT', 5432)),
+    user=os.getenv('KNOWHUB_USER'),
+    password=os.getenv('KNOWHUB_PASSWORD'),
+    database=os.getenv('KNOWHUB_DB_NAME')
+)
+cursor = conn.cursor()
+cursor.execute("SELECT capability_id, COUNT(*) FROM capability_knowledge GROUP BY capability_id ORDER BY COUNT(*) DESC LIMIT 10")
+for row in cursor.fetchall():
+    print(row)

+ 21 - 0
scratch_db_test.py

@@ -0,0 +1,21 @@
+import os
+from dotenv import load_dotenv
+load_dotenv("/root/Agent/.env")
+
+import sys
+sys.path.append("/root/Agent")
+from knowhub.knowhub_db.pg_store import PostgreSQLStore
+
+store = PostgreSQLStore()
+print("Total knowledge:", store.count())
+
+try:
+    print("\n--- Searching for CAP-000 ---")
+    results = store.query("id != ''", limit=10, relation_filters={"capability_id": "CAP-000"})
+    print("CAP-000 count:", len(results))
+    
+    print("\n--- Searching for cap_0 ---")
+    results2 = store.query("id != ''", limit=10, relation_filters={"capability_id": "cap_0"})
+    print("cap_0 count:", len(results2))
+except Exception as e:
+    print("Error:", e)

+ 21 - 0
scratch_db_test_case.py

@@ -0,0 +1,21 @@
+import asyncio
+import os
+from dotenv import load_dotenv
+load_dotenv("/root/Agent/.env")
+
+import sys
+sys.path.append("/root/Agent")
+from knowhub.knowhub_db.pg_store import PostgreSQLStore
+
+store = PostgreSQLStore()
+
+try:
+    print("--- Searching for CAP-001 (Uppercase) ---")
+    results = store.query("id != ''", limit=10, relation_filters={"capability_id": "CAP-001"})
+    print("CAP-001 count:", len(results))
+    
+    print("\n--- Searching for cap-001 (Lowercase) ---")
+    results2 = store.query("id != ''", limit=10, relation_filters={"capability_id": "cap-001"})
+    print("cap-001 count:", len(results2))
+except Exception as e:
+    print("Error:", e)

+ 46 - 0
scratch_search_test.py

@@ -0,0 +1,46 @@
+import urllib.request, urllib.parse, json
+
+# First test: exactly what user provided
+params1 = urllib.parse.urlencode({
+    "q": "散景 浅景深 逆光 光斑 背景虚化 轮廓光",
+    "capability_id": "CAP-001",
+    "types": "strategy,case,tool",
+    "top_k": 10,
+    "min_score": 3
+})
+try:
+    req1 = urllib.request.Request(f'http://localhost:8000/api/knowledge/search?{params1}')
+    with urllib.request.urlopen(req1) as f:
+        print('Search 1 (all types): count =', json.loads(f.read().decode('utf-8')).get('count', 0))
+except Exception as e:
+    print('Search 1 error:', e)
+
+# Second test: only one type
+params2 = urllib.parse.urlencode({
+    "q": "散景 浅景深 逆光 光斑 背景虚化 轮廓光",
+    "capability_id": "CAP-001",
+    "types": "case",
+    "top_k": 10,
+    "min_score": 3
+})
+try:
+    req2 = urllib.request.Request(f'http://localhost:8000/api/knowledge/search?{params2}')
+    with urllib.request.urlopen(req2) as f:
+        print('Search 2 (single type case): count =', json.loads(f.read().decode('utf-8')).get('count', 0))
+except Exception as e:
+    print('Search 2 error:', e)
+
+# Third test: no types filter
+params3 = urllib.parse.urlencode({
+    "q": "散景 浅景深 逆光 光斑 背景虚化 轮廓光",
+    "capability_id": "CAP-001",
+    "top_k": 10,
+    "min_score": 3
+})
+try:
+    req3 = urllib.request.Request(f'http://localhost:8000/api/knowledge/search?{params3}')
+    with urllib.request.urlopen(req3) as f:
+        print('Search 3 (no types filter): count =', json.loads(f.read().decode('utf-8')).get('count', 0))
+except Exception as e:
+    print('Search 3 error:', e)
+

+ 18 - 0
scratch_test.py

@@ -0,0 +1,18 @@
+import asyncio
+import httpx
+import json
+
+async def main():
+    async with httpx.AsyncClient(timeout=30.0) as client:
+        # test search API
+        res = await client.get("http://localhost:8000/api/knowledge/search", params={"q": "test", "capability_id": "cap_0", "min_score": 1, "top_k": 5})
+        print("Search Response:")
+        print(json.dumps(res.json(), indent=2, ensure_ascii=False))
+
+        # test directly getting relations from DB or testing relation API
+        res2 = await client.get("http://localhost:8000/api/relation/capability_knowledge", params={"capability_id": "cap_0"})
+        print("Relation Response:")
+        print(json.dumps(res2.json(), indent=2, ensure_ascii=False))
+
+if __name__ == "__main__":
+    asyncio.run(main())