elksmmx пре 3 дана
родитељ
комит
b2cc4824c9

+ 17 - 1
examples/process_pipeline/prompts/apply_to_grounding.prompt

@@ -1,4 +1,4 @@
-你是内容树映射助手。现在是 Stage 2:把 apply_to_draft 映射为精确 apply_to。
+你是内容树映射助手。现在是 Stage 2:把 apply_to_draft 映射为精确 apply_to,并为每个条目生成 ideal_path
 
 
 # 绝对规则
 # 绝对规则
 
 
@@ -11,6 +11,20 @@
 - 每侧 1-3 项即可。优先选择最贴近 apply_to_draft 的节点;不确定时选较粗分类,不要编造。
 - 每侧 1-3 项即可。优先选择最贴近 apply_to_draft 的节点;不确定时选较粗分类,不要编造。
 - rationale 用一句话说明 draft 短语为何落在该节点。
 - rationale 用一句话说明 draft 短语为何落在该节点。
 
 
+# ideal_path 规则
+
+每个 apply_to 条目必须输出 ideal_path,表示该描述"理想上应该挂在哪个路径"(即使树上不存在):
+- 从根往下逐层检查 category_path 的每一层,判断该层是否仍然准确描述了 draft 的语义。
+- 找到第一个"不够精确或有偏差"的层级,从该层级开始自由续写(替换该层及其后的所有层)。
+- 如果 category_path 所有层级都准确,且 draft 没有更细的信息,则 ideal_path = category_path。
+- 如果 category_path 所有层级都准确,但 draft 还有更细的信息未体现,则在末尾续写 1-3 个层级。
+- 续写部分命名风格参考下方"邻近路径参考"(两字名词、层级粒度保持一致)。
+- 续写部分可以是树上不存在的节点,这正是 ideal_path 的意义。
+
+# 邻近路径参考(向量搜索得到,仅供 ideal_path 命名风格参考,不可用于 category_id/category_path)
+
+{reference_paths}
+
 # 完整内容树(紧凑 JSON)
 # 完整内容树(紧凑 JSON)
 
 
 {compact_tree}
 {compact_tree}
@@ -24,6 +38,8 @@
 对于 strategy:输出 `{ "strategy": { "steps": [ { "order": 1, "apply_to": {...} } ] } }`
 对于 strategy:输出 `{ "strategy": { "steps": [ { "order": 1, "apply_to": {...} } ] } }`
 对于 capability:输出 `{ "capabilities": [ { "apply_to": {...} } ] }`
 对于 capability:输出 `{ "capabilities": [ { "apply_to": {...} } ] }`
 
 
+每个 apply_to 条目格式:`{ "category_id": 123, "category_path": "...", "ideal_path": "...", "element": null, "rationale": "..." }`
+
 # 输出硬规则
 # 输出硬规则
 
 
 - 只输出最终严格 JSON,不要 Markdown 代码块。
 - 只输出最终严格 JSON,不要 Markdown 代码块。

+ 2 - 0
examples/process_pipeline/prompts/apply_to_grounding_capability.schema.json

@@ -22,6 +22,7 @@
                   "properties": {
                   "properties": {
                     "category_id": { "type": "integer" },
                     "category_id": { "type": "integer" },
                     "category_path": { "type": "string", "minLength": 1 },
                     "category_path": { "type": "string", "minLength": 1 },
+                    "ideal_path": { "type": "string", "minLength": 1 },
                     "element": { "type": ["string", "null"] },
                     "element": { "type": ["string", "null"] },
                     "rationale": { "type": "string", "minLength": 1 }
                     "rationale": { "type": "string", "minLength": 1 }
                   }
                   }
@@ -35,6 +36,7 @@
                   "properties": {
                   "properties": {
                     "category_id": { "type": "integer" },
                     "category_id": { "type": "integer" },
                     "category_path": { "type": "string", "minLength": 1 },
                     "category_path": { "type": "string", "minLength": 1 },
+                    "ideal_path": { "type": "string", "minLength": 1 },
                     "element": { "type": ["string", "null"] },
                     "element": { "type": ["string", "null"] },
                     "rationale": { "type": "string", "minLength": 1 }
                     "rationale": { "type": "string", "minLength": 1 }
                   }
                   }

+ 2 - 0
examples/process_pipeline/prompts/apply_to_grounding_strategy.schema.json

@@ -27,6 +27,7 @@
                       "properties": {
                       "properties": {
                         "category_id": { "type": "integer" },
                         "category_id": { "type": "integer" },
                         "category_path": { "type": "string", "minLength": 1 },
                         "category_path": { "type": "string", "minLength": 1 },
+                        "ideal_path": { "type": "string", "minLength": 1 },
                         "element": { "type": ["string", "null"] },
                         "element": { "type": ["string", "null"] },
                         "rationale": { "type": "string", "minLength": 1 }
                         "rationale": { "type": "string", "minLength": 1 }
                       }
                       }
@@ -40,6 +41,7 @@
                       "properties": {
                       "properties": {
                         "category_id": { "type": "integer" },
                         "category_id": { "type": "integer" },
                         "category_path": { "type": "string", "minLength": 1 },
                         "category_path": { "type": "string", "minLength": 1 },
+                        "ideal_path": { "type": "string", "minLength": 1 },
                         "element": { "type": ["string", "null"] },
                         "element": { "type": ["string", "null"] },
                         "rationale": { "type": "string", "minLength": 1 }
                         "rationale": { "type": "string", "minLength": 1 }
                       }
                       }

+ 16 - 3
examples/process_pipeline/script/apply_to_grounding.py

@@ -200,16 +200,19 @@ def render_grounding_prompt(
     task: str,
     task: str,
     draft: Dict,
     draft: Dict,
     compact_tree: str,
     compact_tree: str,
+    reference_paths: List[str] = None,
 ) -> str:
 ) -> str:
     """渲染 Stage 2 prompt"""
     """渲染 Stage 2 prompt"""
     if task == "capability":
     if task == "capability":
         target = "capabilities 数组中的每一条 capability"
         target = "capabilities 数组中的每一条 capability"
     else:
     else:
         target = "strategy;如果 strategy 为 null,则原样返回"
         target = "strategy;如果 strategy 为 null,则原样返回"
+    paths_str = json.dumps(reference_paths or [], ensure_ascii=False)
     return (
     return (
         template
         template
         .replace("{target}", target)
         .replace("{target}", target)
         .replace("{compact_tree}", compact_tree)
         .replace("{compact_tree}", compact_tree)
+        .replace("{reference_paths}", paths_str)
         .replace("{draft_json}", json.dumps(draft, ensure_ascii=False, indent=2))
         .replace("{draft_json}", json.dumps(draft, ensure_ascii=False, indent=2))
     )
     )
 
 
@@ -266,14 +269,19 @@ async def ground_single_case(
                 if all_keywords:
                 if all_keywords:
                     categories = await search_categories_by_keywords(all_keywords, top_k=5)
                     categories = await search_categories_by_keywords(all_keywords, top_k=5)
                     workflow_compact_tree = build_compact_tree(categories)
                     workflow_compact_tree = build_compact_tree(categories)
+                    workflow_ref_paths = list(dict.fromkeys(
+                        c["path"] for c in categories if c.get("path")
+                    ))
                 else:
                 else:
                     workflow_compact_tree = compact_tree or "[]"
                     workflow_compact_tree = compact_tree or "[]"
+                    workflow_ref_paths = []
             else:
             else:
                 workflow_compact_tree = compact_tree or "[]"
                 workflow_compact_tree = compact_tree or "[]"
+                workflow_ref_paths = []
 
 
             # 整个 workflow 传给 LLM(保持上下文)
             # 整个 workflow 传给 LLM(保持上下文)
             draft = {"strategy": workflow}
             draft = {"strategy": workflow}
-            prompt = render_grounding_prompt(template, "strategy", draft, workflow_compact_tree)
+            prompt = render_grounding_prompt(template, "strategy", draft, workflow_compact_tree, workflow_ref_paths)
             messages = [{"role": "user", "content": prompt}]
             messages = [{"role": "user", "content": prompt}]
 
 
             grounded, cost = await call_llm_with_retry(
             grounded, cost = await call_llm_with_retry(
@@ -336,14 +344,19 @@ async def ground_single_case(
                 if all_keywords:
                 if all_keywords:
                     categories = await search_categories_by_keywords(all_keywords, top_k=5)
                     categories = await search_categories_by_keywords(all_keywords, top_k=5)
                     cap_compact_tree = build_compact_tree(categories)
                     cap_compact_tree = build_compact_tree(categories)
+                    cap_ref_paths = list(dict.fromkeys(
+                        c["path"] for c in categories if c.get("path")
+                    ))
                 else:
                 else:
                     cap_compact_tree = compact_tree or "[]"
                     cap_compact_tree = compact_tree or "[]"
+                    cap_ref_paths = []
             else:
             else:
                 cap_compact_tree = compact_tree or "[]"
                 cap_compact_tree = compact_tree or "[]"
+                cap_ref_paths = []
 
 
             # 整个 capabilities 传给 LLM(保持上下文)
             # 整个 capabilities 传给 LLM(保持上下文)
             draft = {"capabilities": capabilities}
             draft = {"capabilities": capabilities}
-            prompt = render_grounding_prompt(template, "capability", draft, cap_compact_tree)
+            prompt = render_grounding_prompt(template, "capability", draft, cap_compact_tree, cap_ref_paths)
             messages = [{"role": "user", "content": prompt}]
             messages = [{"role": "user", "content": prompt}]
 
 
             grounded, cost = await call_llm_with_retry(
             grounded, cost = await call_llm_with_retry(
@@ -400,7 +413,7 @@ async def apply_grounding(
     cases = case_data.get("cases", [])
     cases = case_data.get("cases", [])
 
 
     # 检查是否使用 API 动态搜索模式
     # 检查是否使用 API 动态搜索模式
-    use_api = os.getenv("USE_SEARCH_API", "false").lower() == "true"
+    use_api = os.getenv("USE_SEARCH_API", "true").lower() == "true"
 
 
     # 如果不使用 API,预加载完整内容树
     # 如果不使用 API,预加载完整内容树
     compact_tree = None
     compact_tree = None