elksmmx 22 hodín pred
rodič
commit
a5e9a4cd11

+ 70 - 1
examples/content_tree_analyst/analyst.prompt

@@ -40,10 +40,79 @@ $system$
 - 相关频繁项集 id(若有)
 - 所属维度(实质/形式/意图)
 
+### 第六步:针对需求进行调研并保存知识
+
+读取 `%output_dir%/requirements.md`,对每个需求执行调研与知识保存。
+
+#### 6.1 调研循环
+
+对每个需求:
+
+1. **构造调研任务**
+   - task 格式:`"调研需求:{需求描述}。目标:找到实现该需求的工具、方法或工作流"`
+   - **严禁**预设具体工具名称
+   - 调用:`agent(task="...", agent_type="research")`
+
+2. **评估调研结果**
+   - **相关性**:找到的方案/工具是否符合需求方向?
+   - **可用性**:工具能否被 agent 使用?(过滤纯手机 app、本地桌面软件)
+   - **时效性**:信息是否过时?(AI 工具 6 个月内为佳)
+   - **完整性**:信息是否足够支撑决策?
+
+3. **追问或通过**
+   - 需补充 → `continue_from` 同一 subagent,明确告知:缺什么、建议搜索方向
+   - 通过 → 进入知识保存
+
+#### 6.2 知识识别与保存
+
+从调研结果的 JSON 中识别知识类型并保存:
+
+**工具知识**(单个工具):
+- 识别条件:`调研发现[i].类型 == "tool"`
+- 保存调用:
+```python
+knowledge_save(
+    task=f"【工具】{需求描述}",
+    content=f"工具:{工具名称}\n能力:{核心描述}\n使用方式:{说明}\n限制:{限制}",
+    types=["tool"],
+    tags={"tool": True, "domain": "content_production"},
+    source_name=来源,
+    urls=[工具链接]
+)
+```
+
+**工序知识**(工作流/方案):
+- 识别条件:`调研发现[i].类型 == "workflow"` 或 `"case"`
+- 保存调用:
+```python
+knowledge_save(
+    task=f"【工序】{需求描述}",
+    content=f"工序方案:{方案名称}\n步骤:\n{逐步骤说明}",
+    types=["strategy"],
+    tags={"workflow": True, "domain": "content_production"},
+    source_name=来源,
+    urls=[来源链接]
+)
+```
+
+**重要**:
+- task 字段必须以【工具】或【工序】开头,明确知识类型
+- 暂不填写 resource_ids
+- **每完成一个需求的调研,立即保存所有发现的知识,不要等到全部调研完成**
+
+#### 6.3 输出调研日志
+
+将调研过程记录到 `%output_dir%/research_log.md`,包括:
+- 每个需求的调研轮次
+- 评估结论
+- 保存的知识条目 ID
+
+
+
 $user$
 请对以下内容树节点进行制作需求归纳分析:
 
-stable_id:334
+entity_id:15382
 source_type:形式
 
 请按照工作流程,逐步分析该节点及其周边结构,最终将结构化的制作需求列表输出到 %output_dir%/requirements.md。

+ 3 - 3
examples/content_tree_analyst/config.py

@@ -18,11 +18,11 @@ RUN_CONFIG = RunConfig(
         enable_extraction=False,
         enable_completion_extraction=False,
         enable_injection=False,
-        owner="sunlit.howard@gmail.com",
-        default_tags={"project": "gen_query_from_content_tree"},
+        owner="srt_knowledgehub",
+        default_tags={"project": "content_tree_analyst", "domain": "content_production"},
         default_scopes=["org:cybertogether"],
         # default_search_types=["strategy"],
-        default_search_owner="sunlit.howard@gmail.com",
+        default_search_owner="srt_knowledgehub",
     ),
 )
 

+ 48 - 102
examples/content_tree_analyst/docs/内容树API.md

@@ -1,4 +1,5 @@
-# 搜索 API 文档
+# Agent 搜索 API 文档
+面向 agent 提供 pattern 数据库的分类树和元素查询接口
 
 本文档包含三个搜索相关的 API 接口:
 
@@ -13,7 +14,7 @@
 ### 接口地址
 
 ```
-GET http://8.147.104.190:8001/api/search
+GET http://8.147.104.190:8001/api/agent/search
 ```
 
 ### 功能说明
@@ -29,10 +30,10 @@ GET http://8.147.104.190:8001/api/search
 | entity_type | string | ✗ | all | 搜索对象类型:`category`(分类)/ `element`(元素)/ `all`(全部) |
 | top_k | integer | ✗ | 20 | 返回结果数量,范围 1-100 |
 | use_description | boolean | ✗ | false | 是否在描述字段中搜索(true=搜索名称+描述,false=仅搜索名称) |
-| mode | string | ✗ | text | 搜索模式:`text`(文本匹配)/ `vector`(向量搜索)/ `hybrid`(混合),当前仅支持 text |
+| execution_id | integer | ✗ | null | 按执行 ID 筛选(可选) |
+| platform | string | ✗ | null | 按平台筛选元素(如:`小红书`、`抖音` 等),仅对元素有效 |
 | include_ancestors | boolean | ✗ | false | 是否返回祖先路径(从根节点到父节点的完整路径) |
 | descendant_depth | integer | ✗ | 0 | 返回 N 代以内的子孙节点,0=不返回,1=直接子节点,2=子节点+孙节点... |
-| platform | string | ✗ | null | 按平台筛选,仅对元素有效(如:`小红书`、`抖音`、`微博` 等) |
 
 ## 返回格式
 
@@ -52,27 +53,14 @@ GET http://8.147.104.190:8001/api/search
       "description": "分类描述",
       "path": "/一级分类/二级分类",
       "category_nature": "领域层级",
-      "level": 2,
-      "score": 0.95,
-      "scores": {
-        "text": 0.95,
-        "vector": 0.0
-      },
-      "ancestors": [...],
-      "descendants": [...]
+      "level": 2
     },
     {
       "entity_type": "element",
       "entity_id": 789,
       "name": "元素名称",
       "description": "元素描述",
-      "belong_category_stable_id": 456,
-      "occurrence_count": 25,
-      "score": 0.88,
-      "scores": {
-        "text": 0.88,
-        "vector": 0.0
-      }
+      "category_path": "/一级分类/二级分类"
     }
   ]
 }
@@ -91,62 +79,19 @@ GET http://8.147.104.190:8001/api/search
 #### 结果对象字段(分类 Category)
 - `entity_type`: 固定为 "category"
 - `entity_id`: 分类数据库 ID
-- `stable_id`: 分类稳定 ID(用于跨版本引用
+- `stable_id`: 分类稳定 ID(source_stable_id
 - `name`: 分类名称
 - `description`: 分类描述
 - `path`: 分类路径(如 `/主体/角色类型/人物角色`)
 - `category_nature`: 分类性质(领域层级/元描述层级)
 - `level`: 层级深度(1=根节点)
-- `score`: 综合相似度分数(0-1)
-- `scores`: 各维度分数
-- `ancestors`: 祖先路径(当 `include_ancestors=true` 时返回)
-- `descendants`: 子孙节点(当 `descendant_depth>0` 时返回)
 
 #### 结果对象字段(元素 Element)
 - `entity_type`: 固定为 "element"
 - `entity_id`: 元素数据库 ID
 - `name`: 元素名称
 - `description`: 元素描述
-- `belong_category_stable_id`: 所属分类的 stable_id
-- `occurrence_count`: 出现次数
-- `score`: 综合相似度分数(0-1)
-- `scores`: 各维度分数
-
-#### ancestors 字段结构
-```json
-[
-  {
-    "stable_id": 1,
-    "name": "主体",
-    "level": 1
-  },
-  {
-    "stable_id": 10,
-    "name": "角色类型",
-    "level": 2
-  }
-]
-```
-
-#### descendants 字段结构
-```json
-[
-  {
-    "stable_id": 500,
-    "name": "人物角色",
-    "level": 3,
-    "depth_from_parent": 1,
-    "is_leaf": false
-  },
-  {
-    "stable_id": 501,
-    "name": "动物角色",
-    "level": 3,
-    "depth_from_parent": 1,
-    "is_leaf": true
-  }
-]
-```
+- `category_path`: 所属分类路径
 
 ---
 
@@ -156,14 +101,14 @@ GET http://8.147.104.190:8001/api/search
 
 #### 搜索所有(分类+元素)
 ```bash
-curl -G "http://8.147.104.190:8001/api/search" \
+curl -G "http://8.147.104.190:8001/api/agent/search" \
   --data-urlencode "q=角色" \
   --data-urlencode "source_type=实质"
 ```
 
 #### 只搜索分类
 ```bash
-curl -G "http://8.147.104.190:8001/api/search" \
+curl -G "http://8.147.104.190:8001/api/agent/search" \
   --data-urlencode "q=主体" \
   --data-urlencode "source_type=实质" \
   --data-urlencode "entity_type=category"
@@ -171,7 +116,7 @@ curl -G "http://8.147.104.190:8001/api/search" \
 
 #### 只搜索元素
 ```bash
-curl -G "http://8.147.104.190:8001/api/search" \
+curl -G "http://8.147.104.190:8001/api/agent/search" \
   --data-urlencode "q=猫咪" \
   --data-urlencode "source_type=形式" \
   --data-urlencode "entity_type=element"
@@ -179,7 +124,7 @@ curl -G "http://8.147.104.190:8001/api/search" \
 
 #### 限制返回数量
 ```bash
-curl -G "http://8.147.104.190:8001/api/search" \
+curl -G "http://8.147.104.190:8001/api/agent/search" \
   --data-urlencode "q=角色" \
   --data-urlencode "source_type=实质" \
   --data-urlencode "top_k=5"
@@ -189,7 +134,7 @@ curl -G "http://8.147.104.190:8001/api/search" \
 
 #### 搜索名称+描述
 ```bash
-curl -G "http://8.147.104.190:8001/api/search" \
+curl -G "http://8.147.104.190:8001/api/agent/search" \
   --data-urlencode "q=拟人" \
   --data-urlencode "source_type=形式" \
   --data-urlencode "use_description=true"
@@ -199,7 +144,7 @@ curl -G "http://8.147.104.190:8001/api/search" \
 
 #### 返回祖先路径
 ```bash
-curl -G "http://8.147.104.190:8001/api/search" \
+curl -G "http://8.147.104.190:8001/api/agent/search" \
   --data-urlencode "q=人物角色" \
   --data-urlencode "source_type=实质" \
   --data-urlencode "entity_type=category" \
@@ -208,7 +153,7 @@ curl -G "http://8.147.104.190:8001/api/search" \
 
 #### 返回1代子孙节点
 ```bash
-curl -G "http://8.147.104.190:8001/api/search" \
+curl -G "http://8.147.104.190:8001/api/agent/search" \
   --data-urlencode "q=主体" \
   --data-urlencode "source_type=实质" \
   --data-urlencode "entity_type=category" \
@@ -217,7 +162,7 @@ curl -G "http://8.147.104.190:8001/api/search" \
 
 #### 返回2代子孙节点
 ```bash
-curl -G "http://8.147.104.190:8001/api/search" \
+curl -G "http://8.147.104.190:8001/api/agent/search" \
   --data-urlencode "q=主体" \
   --data-urlencode "source_type=实质" \
   --data-urlencode "entity_type=category" \
@@ -226,7 +171,7 @@ curl -G "http://8.147.104.190:8001/api/search" \
 
 #### 同时返回祖先+子孙
 ```bash
-curl -G "http://8.147.104.190:8001/api/search" \
+curl -G "http://8.147.104.190:8001/api/agent/search" \
   --data-urlencode "q=角色类型" \
   --data-urlencode "source_type=实质" \
   --data-urlencode "entity_type=category" \
@@ -238,7 +183,7 @@ curl -G "http://8.147.104.190:8001/api/search" \
 
 #### 只搜索小红书平台的元素
 ```bash
-curl -G "http://8.147.104.190:8001/api/search" \
+curl -G "http://8.147.104.190:8001/api/agent/search" \
   --data-urlencode "q=角色" \
   --data-urlencode "source_type=实质" \
   --data-urlencode "entity_type=element" \
@@ -247,7 +192,7 @@ curl -G "http://8.147.104.190:8001/api/search" \
 
 #### 搜索抖音平台的元素
 ```bash
-curl -G "http://8.147.104.190:8001/api/search" \
+curl -G "http://8.147.104.190:8001/api/agent/search" \
   --data-urlencode "q=主体" \
   --data-urlencode "source_type=形式" \
   --data-urlencode "platform=抖音"
@@ -255,7 +200,7 @@ curl -G "http://8.147.104.190:8001/api/search" \
 
 #### 平台筛选+描述搜索
 ```bash
-curl -G "http://8.147.104.190:8001/api/search" \
+curl -G "http://8.147.104.190:8001/api/agent/search" \
   --data-urlencode "q=拟人" \
   --data-urlencode "source_type=形式" \
   --data-urlencode "platform=小红书" \
@@ -266,7 +211,7 @@ curl -G "http://8.147.104.190:8001/api/search" \
 
 #### 全功能组合
 ```bash
-curl -G "http://8.147.104.190:8001/api/search" \
+curl -G "http://8.147.104.190:8001/api/agent/search" \
   --data-urlencode "q=角色" \
   --data-urlencode "source_type=实质" \
   --data-urlencode "entity_type=all" \
@@ -280,12 +225,12 @@ curl -G "http://8.147.104.190:8001/api/search" \
 #### 不同 source_type 的搜索
 ```bash
 # 搜索意图维度
-curl -G "http://8.147.104.190:8001/api/search" \
+curl -G "http://8.147.104.190:8001/api/agent/search" \
   --data-urlencode "q=情感" \
   --data-urlencode "source_type=意图"
 
 # 搜索形式维度
-curl -G "http://8.147.104.190:8001/api/search" \
+curl -G "http://8.147.104.190:8001/api/agent/search" \
   --data-urlencode "q=视觉" \
   --data-urlencode "source_type=形式"
 ```
@@ -293,9 +238,9 @@ curl -G "http://8.147.104.190:8001/api/search" \
 ### 6. 浏览器直接访问
 
 ```
-http://8.147.104.190:8001/api/search?q=角色&source_type=实质
-http://8.147.104.190:8001/api/search?q=主体&source_type=实质&entity_type=category&include_ancestors=true&descendant_depth=2
-http://8.147.104.190:8001/api/search?q=猫咪&source_type=形式&platform=小红书
+http://8.147.104.190:8001/api/agent/search?q=角色&source_type=实质
+http://8.147.104.190:8001/api/agent/search?q=主体&source_type=实质&entity_type=category&include_ancestors=true&descendant_depth=2
+http://8.147.104.190:8001/api/agent/search?q=猫咪&source_type=形式&platform=小红书
 ```
 
 ### 7. FastAPI 交互式文档
@@ -316,7 +261,7 @@ http://8.147.104.190:8001/docs
 
 **请求:**
 ```bash
-curl -G "http://8.147.104.190:8001/api/search" \
+curl -G "http://8.147.104.190:8001/api/agent/search" \
   --data-urlencode "q=角色" \
   --data-urlencode "source_type=实质"
 ```
@@ -366,7 +311,7 @@ curl -G "http://8.147.104.190:8001/api/search" \
 
 **请求:**
 ```bash
-curl -G "http://8.147.104.190:8001/api/search" \
+curl -G "http://8.147.104.190:8001/api/agent/search" \
   --data-urlencode "q=角色类型" \
   --data-urlencode "source_type=实质" \
   --data-urlencode "entity_type=category" \
@@ -443,7 +388,7 @@ curl -G "http://8.147.104.190:8001/api/search" \
 
 **请求:**
 ```bash
-curl -G "http://8.147.104.190:8001/api/search" \
+curl -G "http://8.147.104.190:8001/api/agent/search" \
   --data-urlencode "q=猫咪" \
   --data-urlencode "source_type=形式" \
   --data-urlencode "entity_type=element" \
@@ -531,7 +476,7 @@ curl -G "http://8.147.104.190:8001/api/search" \
 ### 接口地址
 
 ```
-GET http://8.147.104.190:8001/api/search/category/{stable_id}
+GET http://8.147.104.190:8001/api/agent/search/category/{stable_id}
 ```
 
 ### 功能说明
@@ -590,7 +535,7 @@ GET http://8.147.104.190:8001/api/search/category/{stable_id}
 
 #### 获取分类的完整路径和所有子孙
 ```bash
-curl -G "http://8.147.104.190:8001/api/search/category/125" \
+curl -G "http://8.147.104.190:8001/api/agent/search/category/125" \
   --data-urlencode "source_type=实质" \
   --data-urlencode "include_ancestors=true" \
   --data-urlencode "descendant_depth=-1"
@@ -598,7 +543,7 @@ curl -G "http://8.147.104.190:8001/api/search/category/125" \
 
 #### 只获取当前节点和祖先路径
 ```bash
-curl -G "http://8.147.104.190:8001/api/search/category/125" \
+curl -G "http://8.147.104.190:8001/api/agent/search/category/125" \
   --data-urlencode "source_type=实质" \
   --data-urlencode "include_ancestors=true" \
   --data-urlencode "descendant_depth=0"
@@ -606,7 +551,7 @@ curl -G "http://8.147.104.190:8001/api/search/category/125" \
 
 #### 获取2代子孙节点
 ```bash
-curl -G "http://8.147.104.190:8001/api/search/category/125" \
+curl -G "http://8.147.104.190:8001/api/agent/search/category/125" \
   --data-urlencode "source_type=实质" \
   --data-urlencode "descendant_depth=2"
 ```
@@ -618,7 +563,7 @@ curl -G "http://8.147.104.190:8001/api/search/category/125" \
 ### 接口地址
 
 ```
-GET http://8.147.104.190:8001/api/search/elements
+GET http://8.147.104.190:8001/api/agent/search/elements
 ```
 
 ### 功能说明
@@ -630,11 +575,12 @@ GET http://8.147.104.190:8001/api/search/elements
 | 参数名 | 类型 | 必填 | 默认值 | 说明 |
 |--------|------|------|--------|------|
 | source_type | string | ✓ | - | 元素类型:`实质` / `形式` / `意图` |
+| execution_id | integer | ✗ | null | 按执行 ID 筛选(可选) |
 | page | integer | ✗ | 1 | 页码(从1开始) |
 | page_size | integer | ✗ | 50 | 每页数量(1-200) |
-| sort_by | string | ✗ | occurrence_count | 排序字段:`occurrence_count` / `name` / `id` |
-| order | string | ✗ | desc | 排序方向:`asc` / `desc` |
-| category_stable_id | integer | ✗ | null | 按分类筛选(可选) |
+| sort_by | string | ✗ | name | 排序字段:`name` / `id` / `occurrence_count` |
+| order | string | ✗ | asc | 排序方向:`asc` / `desc` |
+| category_path | string | ✗ | null | 按分类路径前缀筛选(可选) |
 | platform | string | ✗ | null | 按平台筛选(可选) |
 | min_occurrence | integer | ✗ | null | 最小出现次数(可选) |
 
@@ -669,13 +615,13 @@ GET http://8.147.104.190:8001/api/search/elements
 
 #### 获取高频元素前50(默认)
 ```bash
-curl -G "http://8.147.104.190:8001/api/search/elements" \
+curl -G "http://8.147.104.190:8001/api/agent/search/elements" \
   --data-urlencode "source_type=实质"
 ```
 
 #### 获取高频元素前10
 ```bash
-curl -G "http://8.147.104.190:8001/api/search/elements" \
+curl -G "http://8.147.104.190:8001/api/agent/search/elements" \
   --data-urlencode "source_type=实质" \
   --data-urlencode "page=1" \
   --data-urlencode "page_size=10" \
@@ -685,7 +631,7 @@ curl -G "http://8.147.104.190:8001/api/search/elements" \
 
 #### 按名称排序
 ```bash
-curl -G "http://8.147.104.190:8001/api/search/elements" \
+curl -G "http://8.147.104.190:8001/api/agent/search/elements" \
   --data-urlencode "source_type=形式" \
   --data-urlencode "sort_by=name" \
   --data-urlencode "order=asc"
@@ -693,28 +639,28 @@ curl -G "http://8.147.104.190:8001/api/search/elements" \
 
 #### 筛选指定分类下的元素
 ```bash
-curl -G "http://8.147.104.190:8001/api/search/elements" \
+curl -G "http://8.147.104.190:8001/api/agent/search/elements" \
   --data-urlencode "source_type=实质" \
   --data-urlencode "category_stable_id=125"
 ```
 
 #### 筛选出现次数>=10的元素
 ```bash
-curl -G "http://8.147.104.190:8001/api/search/elements" \
+curl -G "http://8.147.104.190:8001/api/agent/search/elements" \
   --data-urlencode "source_type=实质" \
   --data-urlencode "min_occurrence=10"
 ```
 
 #### 按平台筛选元素
 ```bash
-curl -G "http://8.147.104.190:8001/api/search/elements" \
+curl -G "http://8.147.104.190:8001/api/agent/search/elements" \
   --data-urlencode "source_type=形式" \
   --data-urlencode "platform=小红书"
 ```
 
 #### 组合筛选:指定分类+最小出现次数
 ```bash
-curl -G "http://8.147.104.190:8001/api/search/elements" \
+curl -G "http://8.147.104.190:8001/api/agent/search/elements" \
   --data-urlencode "source_type=实质" \
   --data-urlencode "category_stable_id=125" \
   --data-urlencode "min_occurrence=50" \
@@ -724,13 +670,13 @@ curl -G "http://8.147.104.190:8001/api/search/elements" \
 #### 分页浏览
 ```bash
 # 第1页
-curl -G "http://8.147.104.190:8001/api/search/elements" \
+curl -G "http://8.147.104.190:8001/api/agent/search/elements" \
   --data-urlencode "source_type=实质" \
   --data-urlencode "page=1" \
   --data-urlencode "page_size=50"
 
 # 第2页
-curl -G "http://8.147.104.190:8001/api/search/elements" \
+curl -G "http://8.147.104.190:8001/api/agent/search/elements" \
   --data-urlencode "source_type=实质" \
   --data-urlencode "page=2" \
   --data-urlencode "page_size=50"

+ 7 - 0
examples/content_tree_analyst/presets.json

@@ -5,5 +5,12 @@
     "temperature": 0.3,
     "skills": ["planning"],
     "description": "内容树需求归纳 Agent"
+  },
+  "research": {
+    "system_prompt_file": "research.prompt",
+    "max_iterations": 200,
+    "temperature": 0.3,
+    "skills": ["planning", "research", "browser"],
+    "description": "调研 Agent - 搜索工具、方法、工作流"
   }
 }

+ 12 - 12
examples/content_tree_analyst/tools/content_tree.py

@@ -53,7 +53,7 @@ async def search_content_tree(
 
     try:
         async with httpx.AsyncClient(timeout=30.0) as client:
-            resp = await client.get(f"{BASE_URL}/api/search", params=params)
+            resp = await client.get(f"{BASE_URL}/api/agent/search", params=params)
             resp.raise_for_status()
             data = resp.json()
 
@@ -67,10 +67,10 @@ async def search_content_tree(
             name = r.get("name", "")
             score = r.get("score", 0)
             if etype == "category":
-                sid = r.get("stable_id", "")
+                sid = r.get("entity_id", "")
                 path = r.get("path", "")
                 desc = r.get("description", "")
-                lines.append(f"[分类] stable_id={sid} | {path} | score={score:.2f}")
+                lines.append(f"[分类] entity_id={sid} | {path} | score={score:.2f}")
                 if desc:
                     lines.append(f"  描述: {desc}")
                 ancestors = r.get("ancestors", [])
@@ -83,7 +83,7 @@ async def search_content_tree(
                     lines.append(f"  子孙({len(descendants)}): {desc_names}")
             else:
                 eid = r.get("entity_id", "")
-                belong = r.get("belong_category_stable_id", "")
+                belong = r.get("belong_category_entity_id", "")
                 occ = r.get("occurrence_count", 0)
                 lines.append(f"[元素] entity_id={eid} | {name} | belong_category={belong} | 出现次数={occ} | score={score:.2f}")
                 edesc = r.get("description", "")
@@ -103,9 +103,9 @@ async def search_content_tree(
         return ToolResult(title="内容树搜索失败", output=f"错误: {e}")
 
 
-@tool(description="获取指定分类节点的完整路径、祖先和子孙结构(通过 stable_id 精确查询)")
+@tool(description="获取指定分类节点的完整路径、祖先和子孙结构(通过 entity_id 精确查询)")
 async def get_category_tree(
-    stable_id: int,
+    entity_id: int,
     source_type: str,
     include_ancestors: bool = True,
     descendant_depth: int = -1,
@@ -114,7 +114,7 @@ async def get_category_tree(
     获取指定分类的完整路径和子树结构。
 
     Args:
-        stable_id: 分类的 stable_id
+        entity_id: 分类的 entity_id
         source_type: 维度,"实质" / "形式" / "意图"
         include_ancestors: 是否返回祖先路径(默认 True)
         descendant_depth: 子孙深度,-1=全部,0=仅当前,1=子节点,2=子+孙...
@@ -127,7 +127,7 @@ async def get_category_tree(
 
     try:
         async with httpx.AsyncClient(timeout=30.0) as client:
-            resp = await client.get(f"{BASE_URL}/api/search/category/{stable_id}", params=params)
+            resp = await client.get(f"{BASE_URL}/api/agent/search/category/{entity_id}", params=params)
             resp.raise_for_status()
             data = resp.json()
 
@@ -136,7 +136,7 @@ async def get_category_tree(
         descendants = data.get("descendants", [])
 
         lines = []
-        lines.append(f"分类节点: {current.get('name', '')} (stable_id={stable_id})")
+        lines.append(f"分类节点: {current.get('name', '')} (entity_id={entity_id})")
         lines.append(f"路径: {current.get('path', '')}")
         if current.get("description"):
             lines.append(f"描述: {current['description']}")
@@ -145,7 +145,7 @@ async def get_category_tree(
         if ancestors:
             lines.append("祖先路径:")
             for a in ancestors:
-                lines.append(f"  L{a.get('level', '?')} {a.get('name', '')} (stable_id={a.get('stable_id', '')})")
+                lines.append(f"  L{a.get('level', '?')} {a.get('name', '')} (entity_id={a.get('entity_id', '')})")
             lines.append("")
 
         if descendants:
@@ -153,10 +153,10 @@ async def get_category_tree(
             for d in descendants:
                 indent = "  " * d.get("depth_from_parent", 1)
                 leaf_mark = " [叶]" if d.get("is_leaf") else ""
-                lines.append(f"{indent}L{d.get('level', '?')} {d.get('name', '')} (stable_id={d.get('stable_id', '')}){leaf_mark}")
+                lines.append(f"{indent}L{d.get('level', '?')} {d.get('name', '')} (entity_id={d.get('entity_id', '')}){leaf_mark}")
 
         return ToolResult(
-            title=f"分类树: {current.get('name', stable_id)} (stable_id={stable_id})",
+            title=f"分类树: {current.get('name', entity_id)} (entity_id={entity_id})",
             output="\n".join(lines),
         )
 

+ 248 - 25
knowhub/server.py

@@ -31,7 +31,10 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
 from dotenv import load_dotenv
 load_dotenv(Path(__file__).parent.parent / ".env")
 
-from agent.llm.openrouter import openrouter_llm_call
+from agent.llm import create_openrouter_llm_call, create_qwen_llm_call
+
+_dedup_llm = create_openrouter_llm_call(model="google/gemini-2.5-flash-lite")
+_tool_analysis_llm = create_qwen_llm_call(model="qwen3.5-plus")
 
 # 导入向量存储和 embedding
 from knowhub.vector_store import MilvusStore
@@ -413,6 +416,46 @@ Content: {new_content}
 """
 
 
+TOOL_ANALYSIS_PROMPT = """\
+分析以下知识条目,判断是否涉及"图像创作或解构任务中使用的工具"。
+
+工具范畴(包括但不限于):
+- AI 生图平台/模型:Midjourney、Stable Diffusion、DALL-E、Flux、ComfyUI
+- SD 插件/节点:ControlNet、IP-Adapter、InstantID、DWPose、DSINE
+- 图像处理库:rembg、PIL/Pillow、OpenCV、scikit-image
+- LoRA/checkpoint 模型、ComfyUI 自定义节点、AI 绘图辅助工具
+
+知识条目:
+task: {task}
+content: {content}
+
+要求:
+- 如果涉及上述工具,提取每个工具的信息并以 JSON 格式返回。
+- 如果不涉及任何工具,返回 {{"has_tools": false}}。
+- 只输出 JSON,不要输出其他内容。
+
+输出格式:
+{{
+  "has_tools": true,
+  "tools": [
+    {{
+      "name": "工具名称(原名)",
+      "slug": "小写英文短名,空格换下划线,如 controlnet、ip_adapter",
+      "category": "image_gen | image_process | model | plugin | workflow | other",
+      "version": "版本号或 null",
+      "description": "一句话功能介绍",
+      "usage": "核心用法",
+      "scenarios": ["应用场景1", "应用场景2"],
+      "input": "输入类型描述或 null",
+      "output": "输出类型描述或 null",
+      "source": "来源/文档链接或 null",
+      "status": "未接入"
+    }}
+  ]
+}}
+"""
+
+
 # --- Dedup: RelationCache ---
 
 class RelationCache:
@@ -451,10 +494,11 @@ class KnowledgeProcessor:
         self._relation_cache = RelationCache()
 
     async def process_pending(self):
-        """持续处理 pending 知识直到队列为空,有锁防并发"""
+        """持续处理 pending 和 dedup_passed 知识直到队列为空,有锁防并发"""
         if self._lock.locked():
             return
         async with self._lock:
+            # 第一阶段:处理 pending(去重)
             while True:
                 try:
                     pending = milvus_store.query('status == "pending"', limit=50)
@@ -465,6 +509,17 @@ class KnowledgeProcessor:
                     break
                 for knowledge in pending:
                     await self._process_one(knowledge)
+            # 第二阶段:处理 dedup_passed(工具关联)
+            while True:
+                try:
+                    dedup_passed = milvus_store.query('status == "dedup_passed"', limit=50)
+                except Exception as e:
+                    print(f"[KnowledgeProcessor] 查询 dedup_passed 失败: {e}")
+                    break
+                if not dedup_passed:
+                    break
+                for knowledge in dedup_passed:
+                    await self._analyze_tool_relation(knowledge)
 
     async def _process_one(self, knowledge: dict):
         kid = knowledge["id"]
@@ -490,16 +545,16 @@ class KnowledgeProcessor:
             candidates = [c for c in candidates if c.get("score", 0) >= 0.75]
 
             if not candidates:
-                milvus_store.update(kid, {"status": "approved", "updated_at": now})
+                milvus_store.update(kid, {"status": "dedup_passed", "updated_at": now})
                 return
 
             llm_result = await self._llm_judge_relations(knowledge, candidates)
             await self._apply_decision(knowledge, llm_result)
 
         except Exception as e:
-            print(f"[KnowledgeProcessor] 处理 {kid} 失败: {e},fallback 到 approved")
+            print(f"[KnowledgeProcessor] 处理 {kid} 失败: {e},回退到 pending")
             try:
-                milvus_store.update(kid, {"status": "approved", "updated_at": int(time.time())})
+                milvus_store.update(kid, {"status": "pending", "updated_at": int(time.time())})
             except Exception:
                 pass
 
@@ -515,9 +570,8 @@ class KnowledgeProcessor:
         )
         for attempt in range(3):
             try:
-                response = await openrouter_llm_call(
+                response = await _dedup_llm(
                     messages=[{"role": "user", "content": prompt}],
-                    model="google/gemini-2.5-flash-lite"
                 )
                 content = response.get("content", "").strip()
                 # 清理 markdown 代码块
@@ -603,26 +657,191 @@ class KnowledgeProcessor:
                     except Exception as e:
                         print(f"[Apply Decision] 更新旧知识关系 {old_id} 失败: {e}")
             milvus_store.update(kid, {
-                "status": "approved",
+                "status": "dedup_passed",
                 "relationships": json.dumps(new_relationships),
                 "updated_at": now
             })
 
+    async def _llm_analyze_tools(self, knowledge: dict) -> dict:
+        """使用 LLM 分析知识中涉及的工具(复用迁移脚本逻辑)"""
+        task = (knowledge.get("task") or "")[:600]
+        content = (knowledge.get("content") or "")[:1200]
+        prompt = TOOL_ANALYSIS_PROMPT.format(task=task, content=content)
+        try:
+            response = await _tool_analysis_llm(
+                messages=[{"role": "user", "content": prompt}],
+                max_tokens=2048,
+                temperature=0.1,
+            )
+            raw = (response.get("content") or "").strip()
+            if raw.startswith("```"):
+                lines = raw.split("\n")
+                inner = []
+                in_block = False
+                for line in lines:
+                    if line.startswith("```"):
+                        in_block = not in_block
+                        continue
+                    if in_block:
+                        inner.append(line)
+                raw = "\n".join(inner).strip()
+            return json.loads(raw)
+        except Exception as e:
+            print(f"[Tool Analysis LLM] 调用失败: {e}")
+            raise
+
+    async def _create_or_get_tool_resource(self, tool_info: dict) -> Optional[str]:
+        """创建或获取工具资源"""
+        category = tool_info.get("category", "other")
+        slug = tool_info.get("slug", "")
+        if not slug:
+            return None
+        tool_id = f"tools/{category}/{slug}"
+        conn = get_db()
+        try:
+            row = conn.execute("SELECT id FROM resources WHERE id = ?", (tool_id,)).fetchone()
+            if row:
+                return tool_id
+            now_str = datetime.now(timezone.utc).isoformat()
+            metadata = {
+                "tool_name": tool_info.get("name", ""),
+                "tool_slug": slug,
+                "category": category,
+                "version": tool_info.get("version", ""),
+                "description": tool_info.get("description", ""),
+                "usage": tool_info.get("usage", ""),
+                "scenarios": tool_info.get("scenarios", []),
+                "input": tool_info.get("input", ""),
+                "output": tool_info.get("output", ""),
+                "status": tool_info.get("status", "未接入"),
+                "knowledge_ids": []
+            }
+            conn.execute(
+                "INSERT INTO resources (id, title, body, content_type, metadata, submitted_by, created_at, updated_at)"
+                " VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
+                (tool_id, tool_info.get("name", slug), "", "tool",
+                 json.dumps(metadata), "knowledge_processor", now_str, now_str),
+            )
+            conn.commit()
+            print(f"[Tool Resource] 创建新工具: {tool_id}")
+            return tool_id
+        finally:
+            conn.close()
+
+    async def _update_tool_knowledge_index(self, tool_id: str, knowledge_id: str):
+        """更新工具资源的 knowledge_ids 索引"""
+        conn = get_db()
+        try:
+            row = conn.execute("SELECT metadata FROM resources WHERE id = ?", (tool_id,)).fetchone()
+            if not row:
+                return
+            metadata = json.loads(row["metadata"] or "{}")
+            knowledge_ids = metadata.get("knowledge_ids", [])
+            if knowledge_id not in knowledge_ids:
+                knowledge_ids.append(knowledge_id)
+                metadata["knowledge_ids"] = knowledge_ids
+                conn.execute(
+                    "UPDATE resources SET metadata = ?, updated_at = ? WHERE id = ?",
+                    (json.dumps(metadata), datetime.now(timezone.utc).isoformat(), tool_id)
+                )
+                conn.commit()
+        finally:
+            conn.close()
+
+    async def _analyze_tool_relation(self, knowledge: dict):
+        """分析知识与工具的关联关系"""
+        kid = knowledge["id"]
+        now = int(time.time())
+        # 乐观锁:dedup_passed → analyzing
+        try:
+            milvus_store.update(kid, {"status": "analyzing", "updated_at": now})
+        except Exception as e:
+            print(f"[Tool Analysis] 锁定 {kid} 失败: {e}")
+            return
+        try:
+            tool_analysis = await self._llm_analyze_tools(knowledge)
+            has_tools = bool(tool_analysis and tool_analysis.get("has_tools"))
+
+            existing_tags = knowledge.get("tags") or {}
+            has_tool_tag = existing_tags.get("tool") is True
+
+            # 情况1:LLM 判定无工具,但有 tool tag → 重新分析一次
+            if not has_tools and has_tool_tag:
+                print(f"[Tool Analysis] {kid} LLM 判定无工具但有 tool tag,重新分析")
+                tool_analysis = await self._llm_analyze_tools(knowledge)
+                has_tools = bool(tool_analysis and tool_analysis.get("has_tools"))
+                # 重新分析后仍然不一致 → 知识模糊,rejected
+                if not has_tools:
+                    milvus_store.update(kid, {"status": "rejected", "updated_at": now})
+                    print(f"[Tool Analysis] {kid} 两次判定不一致,知识模糊,rejected")
+                    return
+
+            # 情况2:无工具且无 tool tag → 直接 approved
+            if not has_tools:
+                milvus_store.update(kid, {"status": "approved", "updated_at": now})
+                return
+
+            # 情况3/4:有工具 → 创建资源并关联
+            tool_ids = []
+            for tool_info in (tool_analysis.get("tools") or []):
+                tool_id = await self._create_or_get_tool_resource(tool_info)
+                if tool_id:
+                    tool_ids.append(tool_id)
+
+            existing_resource_ids = knowledge.get("resource_ids") or []
+            updated_resource_ids = list(set(existing_resource_ids + tool_ids))
+
+            updates: dict = {
+                "status": "approved",
+                "resource_ids": updated_resource_ids,
+                "updated_at": now
+            }
+            # 有工具但无 tool tag → 添加 tag
+            if not has_tool_tag:
+                updated_tags = dict(existing_tags)
+                updated_tags["tool"] = True
+                updates["tags"] = updated_tags
+                print(f"[Tool Analysis] {kid} 添加 tool tag")
+
+            milvus_store.update(kid, updates)
+
+            for tool_id in tool_ids:
+                await self._update_tool_knowledge_index(tool_id, kid)
+
+            print(f"[Tool Analysis] {kid} 关联了 {len(tool_ids)} 个工具")
+
+        except Exception as e:
+            print(f"[Tool Analysis] {kid} 分析失败: {e},回退到 dedup_passed")
+            try:
+                milvus_store.update(kid, {"status": "dedup_passed", "updated_at": int(time.time())})
+            except Exception:
+                pass
+
 
 async def _periodic_processor():
-    """每60秒检测超时 processing 条目(>5分钟)并回滚到 pending"""
+    """每60秒检测超时条目并回滚:processing(>5min)→pending,analyzing(>10min)→dedup_passed"""
     while True:
         await asyncio.sleep(60)
         try:
-            timeout_threshold = int(time.time()) - 300
+            now = int(time.time())
+            # 回滚超时的 processing(5分钟 → pending)
+            timeout_5min = now - 300
             processing = milvus_store.query('status == "processing"', limit=200)
             for item in processing:
                 updated_at = item.get("updated_at", 0) or 0
-                # query 返回的 updated_at 已乘以 1000(毫秒),转回秒
                 updated_at_sec = updated_at // 1000 if updated_at > 1_000_000_000_000 else updated_at
-                if updated_at_sec < timeout_threshold:
-                    print(f"[Periodic] 回滚超时 processing: {item['id']}")
+                if updated_at_sec < timeout_5min:
+                    print(f"[Periodic] 回滚超时 processing → pending: {item['id']}")
                     milvus_store.update(item["id"], {"status": "pending", "updated_at": int(time.time())})
+            # 回滚超时的 analyzing(10分钟 → dedup_passed)
+            timeout_10min = now - 600
+            analyzing = milvus_store.query('status == "analyzing"', limit=200)
+            for item in analyzing:
+                updated_at = item.get("updated_at", 0) or 0
+                updated_at_sec = updated_at // 1000 if updated_at > 1_000_000_000_000 else updated_at
+                if updated_at_sec < timeout_10min:
+                    print(f"[Periodic] 回滚超时 analyzing → dedup_passed: {item['id']}")
+                    milvus_store.update(item["id"], {"status": "dedup_passed", "updated_at": int(time.time())})
         except Exception as e:
             print(f"[Periodic] 定时任务错误: {e}")
 
@@ -901,9 +1120,8 @@ async def _llm_rerank(query: str, candidates: list[dict], top_k: int) -> list[st
 只输出 ID,不要其他内容。"""
 
     try:
-        response = await openrouter_llm_call(
+        response = await _dedup_llm(
             messages=[{"role": "user", "content": prompt}],
-            model="google/gemini-2.5-flash-lite"
         )
 
         content = response.get("content", "").strip()
@@ -1171,9 +1389,12 @@ def get_all_tags():
 
 @app.get("/api/knowledge/pending")
 def get_pending_knowledge(limit: int = Query(default=50, ge=1, le=200)):
-    """查询待处理队列(pending + processing)"""
+    """查询待处理队列(pending + processing + dedup_passed + analyzing)"""
     try:
-        pending = milvus_store.query('status == "pending" or status == "processing"', limit=limit)
+        pending = milvus_store.query(
+            'status == "pending" or status == "processing" or status == "dedup_passed" or status == "analyzing"',
+            limit=limit
+        )
         serialized = [serialize_milvus_result(r) for r in pending]
         return {"results": serialized, "count": len(serialized)}
     except Exception as e:
@@ -1183,13 +1404,17 @@ def get_pending_knowledge(limit: int = Query(default=50, ge=1, le=200)):
 
 @app.post("/api/knowledge/process")
 async def trigger_process(force: bool = Query(default=False)):
-    """手动触发去重处理。force=true 时先回滚所有 processing → pending"""
+    """手动触发去重处理。force=true 时先回滚所有 processing → pending,analyzing → dedup_passed"""
     try:
         if force:
             processing = milvus_store.query('status == "processing"', limit=200)
             for item in processing:
                 milvus_store.update(item["id"], {"status": "pending", "updated_at": int(time.time())})
-            print(f"[Manual Process] 回滚 {len(processing)} 条 processing")
+            print(f"[Manual Process] 回滚 {len(processing)} 条 processing → pending")
+            analyzing = milvus_store.query('status == "analyzing"', limit=200)
+            for item in analyzing:
+                milvus_store.update(item["id"], {"status": "dedup_passed", "updated_at": int(time.time())})
+            print(f"[Manual Process] 回滚 {len(analyzing)} 条 analyzing → dedup_passed")
         asyncio.create_task(knowledge_processor.process_pending())
         return {"status": "ok", "message": "处理任务已触发"}
     except Exception as e:
@@ -1264,9 +1489,8 @@ async def _evolve_knowledge_with_llm(old_content: str, feedback: str) -> str:
 4. 禁止:严禁输出任何开场白、解释语或额外的 Markdown 标题,直接返回重写后的正文。
 """
     try:
-        response = await openrouter_llm_call(
+        response = await _dedup_llm(
             messages=[{"role": "user", "content": prompt}],
-            model="google/gemini-2.5-flash-lite"
         )
         evolved = response.get("content", "").strip()
         if len(evolved) < 5:
@@ -1624,9 +1848,9 @@ REPORT: 原有 X 条,合并后 Y 条,精简了 Z 条。
 禁止输出任何开场白或解释。"""
 
         print(f"\n[知识瘦身] 正在调用 {model} 分析 {len(all_knowledge)} 条知识...")
-        response = await openrouter_llm_call(
+        slim_llm = create_openrouter_llm_call(model=model)
+        response = await slim_llm(
             messages=[{"role": "user", "content": prompt}],
-            model=model
         )
         content = response.get("content", "").strip()
         if not content:
@@ -1822,9 +2046,8 @@ async def extract_knowledge_from_messages(extract_req: MessageExtractIn, backgro
     try:
         print(f"\n[Extract] 正在从 {len(messages)} 条消息中提取知识...")
 
-        response = await openrouter_llm_call(
+        response = await _dedup_llm(
             messages=[{"role": "user", "content": prompt}],
-            model="google/gemini-2.5-flash-lite"
         )
 
         content = response.get("content", "").strip()