vor 3 Monaten · f0fa1b34ee
--- a/knowledge_search_traverse.py
+++ b/knowledge_search_traverse.py
@@ -442,6 +442,34 @@ class CategoryEvaluation(BaseModel):
 
															     品类维度得分: float = Field(..., description="品类维度得分 -1~1")
														
 
															     简要说明品类维度相关度理由: str = Field(..., description="品类维度相关度理由")
														
 
															+# ============================================================================
														
 
															+# 批量评估数据模型
														
 
															+# ============================================================================
														
 
															+
														
 
															+class BatchMotivationItem(BaseModel):
														
 
															+    """批量动机评估中的单个SUG结果"""
														
 
															+    sug_text: str = Field(..., description="SUG文本")
														
 
															+    原始问题核心动机提取: CoreMotivationExtraction = Field(..., description="原始问题核心动机提取")
														
 
															+    动机维度得分: float = Field(..., description="动机维度得分 -1~1")
														
 
															+    简要说明动机维度相关度理由: str = Field(..., description="动机维度相关度理由")
														
 
															+    得分为零的原因: str = Field(default="不适用", description="原始问题无动机/sug词条无动机/动机不匹配/不适用")
														
 
															+
														
 
															+class BatchMotivationResult(BaseModel):
														
 
															+    """批量动机评估结果"""
														
 
															+    evaluations: list[BatchMotivationItem] = Field(..., description="所有SUG的动机评估结果")
														
 
															+
														
 
															+class BatchCategoryItem(BaseModel):
														
 
															+    """批量品类评估中的单个SUG结果"""
														
 
															+    sug_text: str = Field(..., description="SUG文本")
														
 
															+    品类维度得分: float = Field(..., description="品类维度得分 -1~1")
														
 
															+    简要说明品类维度相关度理由: str = Field(..., description="品类维度相关度理由")
														
 
															+
														
 
															+class BatchCategoryResult(BaseModel):
														
 
															+    """批量品类评估结果"""
														
 
															+    evaluations: list[BatchCategoryItem] = Field(..., description="所有SUG的品类评估结果")
														
 
															+
														
 
															+# ============================================================================
														
 
															+
														
 
															 class ExtensionWordEvaluation(BaseModel):
														
 
															     """延伸词评估"""
														
 
															     延伸词得分: float = Field(..., ge=-1, le=1, description="延伸词得分 -1~1")
														
@@ -1147,6 +1175,293 @@ extension_word_evaluator = Agent[None](
 
															     model_settings=ModelSettings(temperature=0.2)
														
 
															 )
														
 
															+# ============================================================================
														
 
															+# 批量评估专用 Prompt 和 Agent（性能优化：每批10个SUG）
														
 
															+# ============================================================================
														
 
															+
														
 
															+# 批量动机评估prompt - 从batch_evaluation_demo.py复制（已验证有效）
														
 
															+batch_motivation_evaluation_instructions = """
														
 
															+# 角色
														
 
															+你是**专业的动机意图评估专家**。
														
 
															+任务：判断<平台sug词条>与<原始问题>的**动机意图匹配度**，给出**-1到1之间**的数值评分。
														
 
															+
														
 
															+---
														
 
															+# 输入信息
														
 
															+你将接收到以下输入：
														
 
															+- **<原始问题>**：用户的初始查询问题，代表用户的真实需求意图。
														
 
															+- **<平台sug词条列表>**：待评估的多个词条（编号1-N），每个词条需要独立评估
														
 
															+
														
 
															+**批量评估说明**：
														
 
															+- 输入格式为编号列表：1. 词条1  2. 词条2  ...
														
 
															+- 每个词条都是独立的评估对象
														
 
															+- 对每个词条使用完全相同的评估标准
														
 
															+---
														
 
															+
														
 
															+
														
 
															+# 核心约束
														
 
															+
														
 
															+## 维度独立性声明
														
 
															+【严格约束】本评估**仅评估动机意图维度**：
														
 
															+- **只评估** 用户"想要做什么"，即原始问题的行为意图和目的
														
 
															+- 核心是 **动词**：获取、学习、拍摄、制作、寻找等
														
 
															+- 包括：核心动作 + 使用场景 + 最终目的
														
 
															+- **评估重点**：动作本身及其语义方向
														
 
															+ **禁止使用"主题相关"作为评分依据**：评分理由中不得出现"主题"、"内容"、"话题"等词
														
 
															+
														
 
															+---
														
 
															+
														
 
															+# 作用域与动作意图
														
 
															+
														
 
															+## 什么是作用域？
														
 
															+**作用域 = 动机层 + 对象层 + 场景层**
														
 
															+
														
 
															+## 动作意图的识别
														
 
															+
														
 
															+### 方法1: 显性动词直接提取
														
 
															+
														
 
															+当原始问题明确包含动词时，直接提取
														
 
															+示例：
														
 
															+"如何获取素材" → 核心动机 = "获取"
														
 
															+"寻找拍摄技巧" → 核心动机 = "寻找"（或"学习"）
														
 
															+"制作视频教程" → 核心动机 = "制作"
														
 
															+
														
 
															+### 方法2: 隐性动词语义推理
														
 
															+当原始问题没有显性动词时，需要结合上下文推理
														
 
															+
														
 
															+如果原始问题是纯名词短语，无任何动作线索：
														
 
															+→ 核心动机 = 无法识别
														
 
															+→ 在此情况下，动机维度得分应为 0。
														
 
															+示例：
														
 
															+"摄影" → 无法识别动机，动机维度得分 = 0
														
 
															+"川西风光" → 无法识别动机，动机维度得分 = 0
														
 
															+
														
 
															+---
														
 
															+
														
 
															+# 部分作用域的处理
														
 
															+
														
 
															+## 情况1：sug词条是原始问题的部分作用域
														
 
															+
														
 
															+当sug词条只包含原始问题的部分作用域时，需要判断：
														
 
															+1. sug词条是否包含动作意图
														
 
															+2. 如果包含，动作是否匹配
														
 
															+
														
 
															+**示例**：
														
 
															+```
														
 
															+原始问题："川西旅行行程规划"
														
 
															+- 完整作用域：规划（动作）+ 旅行行程（对象）+ 川西（场景）
														
 
															+
														
 
															+Sug词条："川西旅行"
														
 
															+- 包含作用域：旅行（部分对象）+ 川西（场景）
														
 
															+- 缺失作用域：规划（动作）
														
 
															+- 动作意图评分：0（无动作意图）
														
 
															+```
														
 
															+
														
 
															+**评分原则**：
														
 
															+- 如果sug词条缺失动机层（动作） → 动作意图得分 = 0
														
 
															+- 如果sug词条包含动机层 → 按动作匹配度评分
														
 
															+
														
 
															+---
														
 
															+
														
 
															+# 评分标准
														
 
															+
														
 
															+## 【正向匹配】
														
 
															+
														
 
															+### +0.9~1.0：核心动作完全一致
														
 
															+**示例**：
														
 
															+- "规划旅行行程" vs "安排旅行路线" → 0.98
														
 
															+  - 规划≈安排，语义完全一致
														
 
															+- "获取素材" vs "下载素材" → 0.97
														
 
															+  - 获取≈下载，语义完全一致
														
 
															+
														
 
															+- 特殊规则: 如果sug词的核心动作是原始问题动作的**具体化子集**，也判定为完全一致
														
 
															+例: 原始问题"扣除猫咪主体的方法" vs sug词"扣除猫咪眼睛的方法"（子集但目的一致
														
 
															+**注意**：此处不考虑对象和场景是否一致，只看动作本身
														
 
															+
														
 
															+###+0.75~0.95: 核心动作语义相近或为同义表达
														
 
															+  - 例: 原始问题"如何获取素材" vs sug词"如何下载素材"
														
 
															+  - 同义词对: 获取≈下载≈寻找, 技巧≈方法≈教程≈攻略
														
 
															+
														
 
															+### +0.50~0.75：动作意图相关
														
 
															+**判定标准**：
														
 
															+- 动作是实现原始意图的相关路径
														
 
															+- 或动作是原始意图的前置/后置步骤
														
 
															+
														
 
															+**示例**：
														
 
															+- "获取素材" vs "管理素材" → 0.65
														
 
															+  - 管理是获取后的相关步骤
														
 
															+- "规划行程" vs "预订酒店" → 0.60
														
 
															+  - 预订是规划的具体实施步骤
														
 
															+
														
 
															+### +0.25~0.50：动作意图弱相关
														
 
															+**判定标准**：
														
 
															+- 动作在同一大类但方向不同
														
 
															+- 或动作有间接关联
														
 
															+
														
 
															+**示例**：
														
 
															+- "学习摄影技巧" vs "欣赏摄影作品" → 0.35
														
 
															+  - 都与摄影有关，但学习≠欣赏
														
 
															+- "规划旅行" vs "回忆旅行" → 0.30
														
 
															+  - 都与旅行有关，但方向不同
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 【中性/无关】
														
 
															+
														
 
															+### 0：无动作意图或动作完全无关
														
 
															+**适用场景**：
														
 
															+1. 原始问题或sug词条无法识别动作
														
 
															+2. 两者动作意图完全无关
														
 
															+
														
 
															+**示例**：
														
 
															+- "如何获取素材" vs "摄影器材" → 0
														
 
															+  - sug词条无动作意图
														
 
															+- "川西风光" vs "风光摄影作品" → 0
														
 
															+  - 原始问题无动作意图
														
 
															+
														
 
															+**理由模板**：
														
 
															+- "sug词条无明确动作意图，无法评估动作匹配度"
														
 
															+- "原始问题无明确动作意图，动作维度得分为0"
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 【负向偏离】
														
 
															+
														
 
															+### -0.2~-0.05：动作方向轻度偏离
														
 
															+**示例**：
														
 
															+- "学习摄影技巧" vs "销售摄影课程" → -0.10
														
 
															+  - 学习 vs 销售，方向有偏差
														
 
															+
														
 
															+### -0.5~-0.25：动作意图明显冲突
														
 
															+**示例**：
														
 
															+- "获取免费素材" vs "购买素材" → -0.35
														
 
															+  - 获取免费 vs 购买，明显冲突
														
 
															+
														
 
															+### -1.0~-0.55：动作意图完全相反
														
 
															+**示例**：
														
 
															+- "下载素材" vs "上传素材" → -0.70
														
 
															+  - 下载 vs 上传，方向完全相反
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 得分为零的原因（语义判断）
														
 
															+
														
 
															+当动机维度得分为 0 时，需要在 `得分为零的原因` 字段中选择以下之一：
														
 
															+- **"原始问题无动机"**：原始问题是纯名词短语，无法识别任何动作意图
														
 
															+- **"sug词条无动机"**：sug词条中不包含任何动作意图
														
 
															+- **"动机不匹配"**：双方都有动作，但完全无关联
														
 
															+- **"不适用"**：得分不为零时使用此默认值
														
 
															+
														
 
															+---
														
 
															+
														
 
															+# 批量评估核心原则
														
 
															+
														
 
															+## 【极其重要】独立评估原则
														
 
															+1. **绝对评分**：每个SUG的评分必须基于与原始问题的匹配度，使用固定的评分标准
														
 
															+2. **禁止相对比较**：不要比较SUG之间的好坏，不要因为"其他SUG更好"而降低某个SUG的分数
														
 
															+3. **标准一致性**：对第1个SUG和第10个SUG使用完全相同的评分标准
														
 
															+4. **独立判断**：评估SUG A时，完全不考虑SUG B/C/D的存在
														
 
															+
														
 
															+**错误示例**：
														
 
															+- ❌ "这个SUG比列表中其他的更好，给0.9"
														
 
															+- ❌ "相比第一个SUG，这个稍差一些，给0.7"
														
 
															+
														
 
															+**正确示例**：
														
 
															+- ✅ "这个SUG的动作'获取'与原始问题'获取'完全一致，根据评分标准给0.97"
														
 
															+- ✅ "这个SUG无动作意图，根据评分标准给0"
														
 
															+
														
 
															+---
														
 
															+
														
 
															+# 输出格式
														
 
															+输出结果必须为一个 **JSON 格式**，包含evaluations数组，每个元素包含：
														
 
															+```json
														
 
															+{
														
 
															+  "evaluations": [
														
 
															+    {
														
 
															+      "sug_text": "SUG文本",
														
 
															+      "原始问题核心动机提取": {
														
 
															+        "简要说明核心动机": ""
														
 
															+      },
														
 
															+      "动机维度得分": "-1到1之间的小数",
														
 
															+      "简要说明动机维度相关度理由": "评估理由",
														
 
															+      "得分为零的原因": "原始问题无动机/sug词条无动机/动机不匹配/不适用"
														
 
															+    }
														
 
															+  ]
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+**输出约束（非常重要）**：
														
 
															+1. **字符串长度限制**：\"简要说明动机维度相关度理由\"字段必须控制在**150字以内**
														
 
															+2. **JSON格式规范**：必须生成完整的JSON格式，确保字符串用双引号包裹且正确闭合
														
 
															+3. **引号使用**：字符串中如需表达引用，请使用《》或「」代替单引号或双引号
														
 
															+4. **顺序严格对应（极其重要）**：
														
 
															+   - evaluations数组必须与输入的sug词条列表严格1对1对应
														
 
															+   - 第1个元素必须是输入列表的第1个SUG，第2个元素必须是第2个SUG，以此类推
														
 
															+   - 每个元素的sug_text必须与输入SUG完全一致（逐字匹配，包括标点）
														
 
															+   - 禁止改变顺序、禁止遗漏任何SUG、禁止重复评估
														
 
															+   - 示例：输入"1. 秋季摄影素材  2. 川西风光" → 输出[{sug_text:"秋季摄影素材",...}, {sug_text:"川西风光",...}]
														
 
															+   - 错误示例：输出[{sug_text:"川西风光",...}, {sug_text:"秋季摄影素材",...}] ← 顺序错误❌
														
 
															+
														
 
															+---
														
 
															+
														
 
															+# 核心原则总结
														
 
															+1. **只评估动作**：完全聚焦于动作意图，不管对象和场景
														
 
															+2. **作用域识别**：识别作用域但只评估动机层
														
 
															+3. **严格标准一致性**：对所有用例使用相同的评估标准，避免评分飘移
														
 
															+4. **理由纯粹**：评分理由只能谈动作，不能谈对象、场景、主题
														
 
															+5. **独立评估**：每个SUG完全独立评估，禁止相对比较
														
 
															+""".strip()
														
 
															+
														
 
															+# 批量品类评估prompt - 从batch_evaluation_demo.py复制（与单个品类prompt类似，添加批量说明）
														
 
															+# 注：完整prompt见batch_evaluation_demo.py:724-966行，此处使用相同内容
														
 
															+batch_category_evaluation_instructions = category_evaluation_instructions.replace(
														
 
															+    "- **<平台sug词条>**：待评估的词条，可能是单个或多个作用域的组合",
														
 
															+    """- **<平台sug词条列表>**：待评估的多个词条（编号1-N），每个词条需要独立评估
														
 
															+
														
 
															+**批量评估说明**：
														
 
															+- 输入格式为编号列表：1. 词条1  2. 词条2  ...
														
 
															+- 每个词条都是独立的评估对象
														
 
															+- 对每个词条使用完全相同的评估标准"""
														
 
															+).replace(
														
 
															+    '"品类维度得分": "-1到1之间的小数",\n  "简要说明品类维度相关度理由": "评估该sug词条与原始问题品类匹配程度的理由，包含作用域覆盖理由"',
														
 
															+    '''  "evaluations": [
														
 
															+    {
														
 
															+      "sug_text": "SUG文本",
														
 
															+      "品类维度得分": "-1到1之间的小数",
														
 
															+      "简要说明品类维度相关度理由": "评估理由"
														
 
															+    }
														
 
															+  ]'''
														
 
															+).replace(
														
 
															+    "1. **只看名词和限定词**：完全忽略动作和意图",
														
 
															+    """## 【极其重要】独立评估原则
														
 
															+1. **绝对评分**：每个SUG的评分必须基于与原始问题的匹配度，使用固定的评分标准
														
 
															+2. **禁止相对比较**：不要比较SUG之间的好坏，不要因为"其他SUG更好"而降低某个SUG的分数
														
 
															+3. **标准一致性**：对第1个SUG和第10个SUG使用完全相同的评分标准
														
 
															+4. **独立判断**：评估SUG A时，完全不考虑SUG B/C/D的存在
														
 
															+
														
 
															+---
														
 
															+
														
 
															+# 核心原则总结
														
 
															+
														
 
															+1. **只看名词和限定词**：完全忽略动作和意图"""
														
 
															+) + """
														
 
															+6. **独立评估**：每个SUG完全独立评估，禁止相对比较
														
 
															+7. **顺序严格对应（极其重要）**：evaluations数组必须与输入的sug词条列表严格1对1对应
														
 
															+"""
														
 
															+
														
 
															+# 批量评估Agent定义
														
 
															+batch_motivation_evaluator = Agent[None](
														
 
															+    name="批量动机维度评估专家",
														
 
															+    instructions=batch_motivation_evaluation_instructions,
														
 
															+    model=get_model(MODEL_NAME),
														
 
															+    output_type=BatchMotivationResult,
														
 
															+)
														
 
															+
														
 
															+batch_category_evaluator = Agent[None](
														
 
															+    name="批量品类维度评估专家",
														
 
															+    instructions=batch_category_evaluation_instructions,
														
 
															+    model=get_model(MODEL_NAME),
														
 
															+    output_type=BatchCategoryResult,
														
 
															+)
														
 
															 # ============================================================================
														
 
															 # Round 0 专用 Agent（v124新增 - 需求1）
														
@@ -2486,6 +2801,198 @@ async def evaluate_with_o(text: str, o: str, cache: dict[str, tuple[float, str]]
 
															     return 0.0, fallback_reason
														
 
															+async def evaluate_batch_with_o(
														
 
															+    texts: list[str],
														
 
															+    o: str,
														
 
															+    cache: dict[str, tuple[float, str]] | None = None,
														
 
															+    context: RunContext | None = None,
														
 
															+    round_num: int = 1
														
 
															+) -> list[tuple[float, str]]:
														
 
															+    """批量评估函数（每批最多10个）- Round 1+
														
 
															+
														
 
															+    对多个SUG进行批量评估,自动分批处理（每批最多10个）
														
 
															+    使用批量Agent一次性评估多个SUG,显著提升性能
														
 
															+
														
 
															+    Args:
														
 
															+        texts: 待评估的SUG列表
														
 
															+        o: 原始问题
														
 
															+        cache: 评估缓存（可选）
														
 
															+        context: 运行上下文（可选），用于统计
														
 
															+        round_num: 轮次编号,用于日志输出
														
 
															+
														
 
															+    Returns:
														
 
															+        list[tuple[float, str]]: 每个SUG的(最终得分, 评估理由)列表,顺序与输入严格对应
														
 
															+    """
														
 
															+    import time
														
 
															+
														
 
															+    BATCH_SIZE = 10  # 每批最多10个SUG
														
 
															+    results = []
														
 
															+
														
 
															+    # 分批处理
														
 
															+    for batch_idx in range(0, len(texts), BATCH_SIZE):
														
 
															+        batch_texts = texts[batch_idx:batch_idx + BATCH_SIZE]
														
 
															+        batch_start_time = time.time()
														
 
															+
														
 
															+        print(f"  [Round {round_num} 批量评估] 批次{batch_idx//BATCH_SIZE + 1}: 评估 {len(batch_texts)} 个SUG...")
														
 
															+
														
 
															+        # 先检查缓存,分离已缓存和未缓存的
														
 
															+        cached_results = {}
														
 
															+        uncached_texts = []
														
 
															+        uncached_indices = []
														
 
															+
														
 
															+        for i, text in enumerate(batch_texts):
														
 
															+            if cache is not None and text in cache:
														
 
															+                cached_results[i] = cache[text]
														
 
															+                print(f"    ⚡ 缓存命中: {text} -> {cache[text][0]:.2f}")
														
 
															+            else:
														
 
															+                uncached_texts.append(text)
														
 
															+                uncached_indices.append(i)
														
 
															+
														
 
															+        # 如果全部命中缓存,直接返回
														
 
															+        if not uncached_texts:
														
 
															+            print(f"    ✅ 全部命中缓存,跳过批量评估")
														
 
															+            results.extend([cached_results[i] for i in range(len(batch_texts))])
														
 
															+            continue
														
 
															+
														
 
															+        # 构建批量评估输入
														
 
															+        sug_list_str = "\n".join([f"{i}. {sug}" for i, sug in enumerate(uncached_texts, 1)])
														
 
															+
														
 
															+        batch_input = f"""
														
 
															+<原始问题>
														
 
															+{o}
														
 
															+</原始问题>
														
 
															+
														
 
															+<平台sug词条列表>
														
 
															+{sug_list_str}
														
 
															+</平台sug词条列表>
														
 
															+
														
 
															+请对以上所有SUG每一个进行完全独立评估。
														
 
															+"""
														
 
															+
														
 
															+        # 统计LLM调用（批量调用计为2次:动机+品类）
														
 
															+        if context is not None:
														
 
															+            context.stats_llm_calls += 2
														
 
															+
														
 
															+        # 添加重试机制
														
 
															+        max_retries = 2
														
 
															+        last_error = None
														
 
															+        batch_success = False
														
 
															+
														
 
															+        for attempt in range(max_retries):
														
 
															+            try:
														
 
															+                # 并发调用批量评估器（不含延伸词）
														
 
															+                motivation_task = Runner.run(batch_motivation_evaluator, batch_input)
														
 
															+                category_task = Runner.run(batch_category_evaluator, batch_input)
														
 
															+
														
 
															+                motivation_result, category_result = await asyncio.gather(
														
 
															+                    motivation_task,
														
 
															+                    category_task
														
 
															+                )
														
 
															+
														
 
															+                batch_motivation: BatchMotivationResult = motivation_result.final_output
														
 
															+                batch_category: BatchCategoryResult = category_result.final_output
														
 
															+
														
 
															+                # 验证返回数量
														
 
															+                if len(batch_motivation.evaluations) != len(uncached_texts):
														
 
															+                    raise ValueError(f"动机评估数量不匹配: 期望{len(uncached_texts)},实际{len(batch_motivation.evaluations)}")
														
 
															+                if len(batch_category.evaluations) != len(uncached_texts):
														
 
															+                    raise ValueError(f"品类评估数量不匹配: 期望{len(uncached_texts)},实际{len(batch_category.evaluations)}")
														
 
															+
														
 
															+                # 验证顺序
														
 
															+                for i, (expected_text, mot_item, cat_item) in enumerate(zip(uncached_texts, batch_motivation.evaluations, batch_category.evaluations)):
														
 
															+                    if mot_item.sug_text != expected_text:
														
 
															+                        raise ValueError(f"动机评估顺序错误: 位置{i+1}期望'{expected_text}',实际'{mot_item.sug_text}'")
														
 
															+                    if cat_item.sug_text != expected_text:
														
 
															+                        raise ValueError(f"品类评估顺序错误: 位置{i+1}期望'{expected_text}',实际'{cat_item.sug_text}'")
														
 
															+
														
 
															+                # 处理每个SUG的结果
														
 
															+                batch_results_temp = []
														
 
															+                for mot_item, cat_item in zip(batch_motivation.evaluations, batch_category.evaluations):
														
 
															+                    motivation_score = mot_item.动机维度得分
														
 
															+                    category_score = cat_item.品类维度得分
														
 
															+                    zero_reason = mot_item.得分为零的原因
														
 
															+
														
 
															+                    # 应用规则计算最终得分（不含延伸词维度）
														
 
															+                    final_score, rule_applied = calculate_final_score_v2(
														
 
															+                        motivation_score, category_score
														
 
															+                    )
														
 
															+
														
 
															+                    # 组合评估理由
														
 
															+                    core_motivation = mot_item.原始问题核心动机提取.简要说明核心动机
														
 
															+                    motivation_reason = mot_item.简要说明动机维度相关度理由
														
 
															+                    category_reason = cat_item.简要说明品类维度相关度理由
														
 
															+
														
 
															+                    combined_reason = (
														
 
															+                        f'【评估对象】词条"{mot_item.sug_text}" vs 原始问题"{o}"\n'
														
 
															+                        f"【核心动机】{core_motivation}\n"
														
 
															+                        f"【动机维度 {motivation_score:.2f}】{motivation_reason}\n"
														
 
															+                        f"【品类维度 {category_score:.2f}】{category_reason}\n"
														
 
															+                        f"【最终得分 {final_score:.2f}】"
														
 
															+                    )
														
 
															+
														
 
															+                    if rule_applied:
														
 
															+                        combined_reason += f"\n【规则说明】{rule_applied}"
														
 
															+
														
 
															+                    batch_results_temp.append((final_score, combined_reason))
														
 
															+
														
 
															+                    # 存入缓存
														
 
															+                    if cache is not None:
														
 
															+                        cache[mot_item.sug_text] = (final_score, combined_reason)
														
 
															+
														
 
															+                # 合并缓存结果和批量评估结果
														
 
															+                final_batch_results = []
														
 
															+                uncached_idx = 0
														
 
															+                for i in range(len(batch_texts)):
														
 
															+                    if i in cached_results:
														
 
															+                        final_batch_results.append(cached_results[i])
														
 
															+                    else:
														
 
															+                        final_batch_results.append(batch_results_temp[uncached_idx])
														
 
															+                        uncached_idx += 1
														
 
															+
														
 
															+                results.extend(final_batch_results)
														
 
															+                batch_success = True
														
 
															+
														
 
															+                batch_elapsed = time.time() - batch_start_time
														
 
															+                print(f"    ✅ 批次{batch_idx//BATCH_SIZE + 1}完成: {len(uncached_texts)}个SUG,耗时{batch_elapsed:.2f}秒")
														
 
															+                break
														
 
															+
														
 
															+            except Exception as e:
														
 
															+                last_error = e
														
 
															+                error_msg = str(e)
														
 
															+
														
 
															+                if attempt < max_retries - 1:
														
 
															+                    print(f"    ⚠️  批量评估失败 (尝试 {attempt+1}/{max_retries}): {error_msg[:150]}")
														
 
															+                    print(f"    正在重试...")
														
 
															+                    await asyncio.sleep(1)
														
 
															+                else:
														
 
															+                    print(f"    ❌ 批量评估失败 (已达最大重试次数): {error_msg[:150]}")
														
 
															+
														
 
															+        # 如果批量评估失败,回退到单个评估
														
 
															+        if not batch_success:
														
 
															+            print(f"    ⚠️  批量评估失败,回退到单个评估模式...")
														
 
															+            for text in uncached_texts:
														
 
															+                try:
														
 
															+                    score, reason = await evaluate_with_o(text, o, cache, context)
														
 
															+                    batch_results_temp.append((score, reason))
														
 
															+                except Exception as e:
														
 
															+                    print(f"    ❌ 单个评估也失败: {text[:30]}... - {str(e)[:100]}")
														
 
															+                    batch_results_temp.append((0.0, f"评估失败: {str(e)[:100]}"))
														
 
															+
														
 
															+            # 合并结果
														
 
															+            final_batch_results = []
														
 
															+            uncached_idx = 0
														
 
															+            for i in range(len(batch_texts)):
														
 
															+                if i in cached_results:
														
 
															+                    final_batch_results.append(cached_results[i])
														
 
															+                else:
														
 
															+                    final_batch_results.append(batch_results_temp[uncached_idx])
														
 
															+                    uncached_idx += 1
														
 
															+
														
 
															+            results.extend(final_batch_results)
														
 
															+
														
 
															+    return results
														
 
															+
														
 
															+
														
 
															 async def evaluate_with_o_round0(text: str, o: str, cache: dict[str, tuple[float, str]] | None = None) -> tuple[float, str]:
														
 
															     """Round 0专用评估函数（v124新增 - 需求1）
														
@@ -2593,6 +3100,194 @@ async def evaluate_with_o_round0(text: str, o: str, cache: dict[str, tuple[float
 
															     return 0.0, fallback_reason
														
 
															+async def evaluate_batch_with_o_round0(
														
 
															+    texts: list[str],
														
 
															+    o: str,
														
 
															+    cache: dict[str, tuple[float, str]] | None = None
														
 
															+) -> list[tuple[float, str]]:
														
 
															+    """批量评估函数（每批最多10个）- Round 0 专用
														
 
															+
														
 
															+    对多个words进行批量评估,自动分批处理（每批最多10个）
														
 
															+    使用批量Agent一次性评估多个words,显著提升性能
														
 
															+    专用于Round 0的segment和word评估
														
 
															+
														
 
															+    Args:
														
 
															+        texts: 待评估的word列表
														
 
															+        o: 原始问题
														
 
															+        cache: 评估缓存（可选）
														
 
															+
														
 
															+    Returns:
														
 
															+        list[tuple[float, str]]: 每个word的(最终得分, 评估理由)列表,顺序与输入严格对应
														
 
															+    """
														
 
															+    import time
														
 
															+
														
 
															+    BATCH_SIZE = 10  # 每批最多10个words
														
 
															+    results = []
														
 
															+
														
 
															+    # 分批处理
														
 
															+    for batch_idx in range(0, len(texts), BATCH_SIZE):
														
 
															+        batch_texts = texts[batch_idx:batch_idx + BATCH_SIZE]
														
 
															+        batch_start_time = time.time()
														
 
															+
														
 
															+        print(f"  [Round 0 批量评估] 批次{batch_idx//BATCH_SIZE + 1}: 评估 {len(batch_texts)} 个words...")
														
 
															+
														
 
															+        # 先检查缓存,分离已缓存和未缓存的
														
 
															+        cached_results = {}
														
 
															+        uncached_texts = []
														
 
															+        uncached_indices = []
														
 
															+
														
 
															+        for i, text in enumerate(batch_texts):
														
 
															+            cache_key = f"round0:{text}:{o}"
														
 
															+            if cache is not None and cache_key in cache:
														
 
															+                cached_results[i] = cache[cache_key]
														
 
															+                print(f"    ⚡ Round0缓存命中: {text} -> {cache[cache_key][0]:.2f}")
														
 
															+            else:
														
 
															+                uncached_texts.append(text)
														
 
															+                uncached_indices.append(i)
														
 
															+
														
 
															+        # 如果全部命中缓存,直接返回
														
 
															+        if not uncached_texts:
														
 
															+            print(f"    ✅ 全部命中缓存,跳过批量评估")
														
 
															+            results.extend([cached_results[i] for i in range(len(batch_texts))])
														
 
															+            continue
														
 
															+
														
 
															+        # 构建批量评估输入
														
 
															+        word_list_str = "\n".join([f"{i}. {word}" for i, word in enumerate(uncached_texts, 1)])
														
 
															+
														
 
															+        batch_input = f"""
														
 
															+<原始问题>
														
 
															+{o}
														
 
															+</原始问题>
														
 
															+
														
 
															+<词条列表>
														
 
															+{word_list_str}
														
 
															+</词条列表>
														
 
															+
														
 
															+请对以上所有词条每一个进行完全独立评估。
														
 
															+"""
														
 
															+
														
 
															+        # 添加重试机制
														
 
															+        max_retries = 2
														
 
															+        last_error = None
														
 
															+        batch_success = False
														
 
															+
														
 
															+        for attempt in range(max_retries):
														
 
															+            try:
														
 
															+                # 并发调用批量评估器（不含延伸词,使用Round 0专用prompt）
														
 
															+                # 注意: Round 0使用与Round 1+相同的批量Agent,因为prompt中已包含所有必要约束
														
 
															+                motivation_task = Runner.run(batch_motivation_evaluator, batch_input)
														
 
															+                category_task = Runner.run(batch_category_evaluator, batch_input)
														
 
															+
														
 
															+                motivation_result, category_result = await asyncio.gather(
														
 
															+                    motivation_task,
														
 
															+                    category_task
														
 
															+                )
														
 
															+
														
 
															+                batch_motivation: BatchMotivationResult = motivation_result.final_output
														
 
															+                batch_category: BatchCategoryResult = category_result.final_output
														
 
															+
														
 
															+                # 验证返回数量
														
 
															+                if len(batch_motivation.evaluations) != len(uncached_texts):
														
 
															+                    raise ValueError(f"Round0动机评估数量不匹配: 期望{len(uncached_texts)},实际{len(batch_motivation.evaluations)}")
														
 
															+                if len(batch_category.evaluations) != len(uncached_texts):
														
 
															+                    raise ValueError(f"Round0品类评估数量不匹配: 期望{len(uncached_texts)},实际{len(batch_category.evaluations)}")
														
 
															+
														
 
															+                # 验证顺序
														
 
															+                for i, (expected_text, mot_item, cat_item) in enumerate(zip(uncached_texts, batch_motivation.evaluations, batch_category.evaluations)):
														
 
															+                    if mot_item.sug_text != expected_text:
														
 
															+                        raise ValueError(f"Round0动机评估顺序错误: 位置{i+1}期望'{expected_text}',实际'{mot_item.sug_text}'")
														
 
															+                    if cat_item.sug_text != expected_text:
														
 
															+                        raise ValueError(f"Round0品类评估顺序错误: 位置{i+1}期望'{expected_text}',实际'{cat_item.sug_text}'")
														
 
															+
														
 
															+                # 处理每个word的结果
														
 
															+                batch_results_temp = []
														
 
															+                for mot_item, cat_item in zip(batch_motivation.evaluations, batch_category.evaluations):
														
 
															+                    motivation_score = mot_item.动机维度得分
														
 
															+                    category_score = cat_item.品类维度得分
														
 
															+
														
 
															+                    # 应用Round 0专用规则计算最终得分（不含延伸词）
														
 
															+                    final_score, rule_applied = calculate_final_score_v2(
														
 
															+                        motivation_score, category_score
														
 
															+                    )
														
 
															+
														
 
															+                    # 组合评估理由
														
 
															+                    core_motivation = mot_item.原始问题核心动机提取.简要说明核心动机
														
 
															+                    motivation_reason = mot_item.简要说明动机维度相关度理由
														
 
															+                    category_reason = cat_item.简要说明品类维度相关度理由
														
 
															+
														
 
															+                    combined_reason = (
														
 
															+                        f'【评估对象】词条"{mot_item.sug_text}" vs 原始问题"{o}"\n'
														
 
															+                        f"【核心动机】{core_motivation}\n"
														
 
															+                        f"【动机维度 {motivation_score:.2f}】{motivation_reason}\n"
														
 
															+                        f"【品类维度 {category_score:.2f}】{category_reason}\n"
														
 
															+                        f"【最终得分 {final_score:.2f}】"
														
 
															+                    )
														
 
															+
														
 
															+                    if rule_applied:
														
 
															+                        combined_reason += f"\n【规则说明】{rule_applied}"
														
 
															+
														
 
															+                    batch_results_temp.append((final_score, combined_reason))
														
 
															+
														
 
															+                    # 存入缓存(使用round0前缀)
														
 
															+                    if cache is not None:
														
 
															+                        cache_key = f"round0:{mot_item.sug_text}:{o}"
														
 
															+                        cache[cache_key] = (final_score, combined_reason)
														
 
															+
														
 
															+                # 合并缓存结果和批量评估结果
														
 
															+                final_batch_results = []
														
 
															+                uncached_idx = 0
														
 
															+                for i in range(len(batch_texts)):
														
 
															+                    if i in cached_results:
														
 
															+                        final_batch_results.append(cached_results[i])
														
 
															+                    else:
														
 
															+                        final_batch_results.append(batch_results_temp[uncached_idx])
														
 
															+                        uncached_idx += 1
														
 
															+
														
 
															+                results.extend(final_batch_results)
														
 
															+                batch_success = True
														
 
															+
														
 
															+                batch_elapsed = time.time() - batch_start_time
														
 
															+                print(f"    ✅ 批次{batch_idx//BATCH_SIZE + 1}完成: {len(uncached_texts)}个words,耗时{batch_elapsed:.2f}秒")
														
 
															+                break
														
 
															+
														
 
															+            except Exception as e:
														
 
															+                last_error = e
														
 
															+                error_msg = str(e)
														
 
															+
														
 
															+                if attempt < max_retries - 1:
														
 
															+                    print(f"    ⚠️  Round0批量评估失败 (尝试 {attempt+1}/{max_retries}): {error_msg[:150]}")
														
 
															+                    print(f"    正在重试...")
														
 
															+                    await asyncio.sleep(1)
														
 
															+                else:
														
 
															+                    print(f"    ❌ Round0批量评估失败 (已达最大重试次数): {error_msg[:150]}")
														
 
															+
														
 
															+        # 如果批量评估失败,回退到单个评估
														
 
															+        if not batch_success:
														
 
															+            print(f"    ⚠️  Round0批量评估失败,回退到单个评估模式...")
														
 
															+            batch_results_temp = []
														
 
															+            for text in uncached_texts:
														
 
															+                try:
														
 
															+                    score, reason = await evaluate_with_o_round0(text, o, cache)
														
 
															+                    batch_results_temp.append((score, reason))
														
 
															+                except Exception as e:
														
 
															+                    print(f"    ❌ Round0单个评估也失败: {text[:30]}... - {str(e)[:100]}")
														
 
															+                    batch_results_temp.append((0.0, f"Round0评估失败: {str(e)[:100]}"))
														
 
															+
														
 
															+            # 合并结果
														
 
															+            final_batch_results = []
														
 
															+            uncached_idx = 0
														
 
															+            for i in range(len(batch_texts)):
														
 
															+                if i in cached_results:
														
 
															+                    final_batch_results.append(cached_results[i])
														
 
															+                else:
														
 
															+                    final_batch_results.append(batch_results_temp[uncached_idx])
														
 
															+                    uncached_idx += 1
														
 
															+
														
 
															+            results.extend(final_batch_results)
														
 
															+
														
 
															+    return results
														
 
															+
														
 
															+
														
 
															 async def evaluate_within_scope(text: str, scope_text: str, cache: dict[str, tuple[float, str]] | None = None, context: RunContext | None = None) -> tuple[float, str]:
														
 
															     """域内/域间专用评估函数（v124新增 - 需求2&3）
														
@@ -3061,21 +3756,27 @@ async def run_round(
 
															                 all_sugs.append(sug)
														
 
															                 sug_to_q_map[id(sug)] = q_text
														
 
															-    # 2.2 并发评估所有sug（使用信号量限制并发数）
														
 
															-    # 每个 evaluate_sug 内部会并发调用 2 个 LLM，所以这里限制为 5，实际并发 LLM 请求为 10
														
 
															-    MAX_CONCURRENT_EVALUATIONS = 30  # 🚀 性能优化：从5提升到30，并发评估能力提升6倍
														
 
															-    semaphore = asyncio.Semaphore(MAX_CONCURRENT_EVALUATIONS)
														
 
															+    # 2.2 批量评估所有sug（每批最多10个）
														
 
															+    # 🚀 性能优化：使用批量评估替代单个并发评估，显著提升性能
														
 
															+    if all_sugs:
														
 
															+        print(f"  开始批量评估 {len(all_sugs)} 个建议词（每批最多10个）...")
														
 
															-    async def evaluate_sug(sug: Sug) -> Sug:
														
 
															-        async with semaphore:  # 限制并发数
														
 
															-            # 根据轮次选择 prompt: 第一轮使用 round1 prompt，后续使用标准 prompt
														
 
															-            sug.score_with_o, sug.reason = await evaluate_with_o(sug.text, o, context.evaluation_cache, context=context, round_num=round_num)
														
 
															-            return sug
														
 
															+        # 提取所有sug的text
														
 
															+        sug_texts = [sug.text for sug in all_sugs]
														
 
															-    if all_sugs:
														
 
															-        print(f"  开始评估 {len(all_sugs)} 个建议词（并发限制: {MAX_CONCURRENT_EVALUATIONS}）...")
														
 
															-        eval_tasks = [evaluate_sug(sug) for sug in all_sugs]
														
 
															-        await asyncio.gather(*eval_tasks)
														
 
															+        # 批量评估
														
 
															+        batch_results = await evaluate_batch_with_o(
														
 
															+            texts=sug_texts,
														
 
															+            o=o,
														
 
															+            cache=context.evaluation_cache,
														
 
															+            context=context,
														
 
															+            round_num=round_num
														
 
															+        )
														
 
															+
														
 
															+        # 将结果分配回sug对象
														
 
															+        for sug, (score, reason) in zip(all_sugs, batch_results):
														
 
															+            sug.score_with_o = score
														
 
															+            sug.reason = reason
														
 
															     # 2.3 打印结果并组织到sug_details
														
 
															     sug_details = {}  # 保存每个Q对应的sug列表
														
@@ -3568,41 +4269,60 @@ async def initialize_v2(o: str, context: RunContext) -> list[Segment]:
 
															     # 2. 对每个segment拆词并评估
														
 
															     print(f"\n[步骤2] 对每个segment拆词并评估...")
														
 
															-    MAX_CONCURRENT_EVALUATIONS = 30  # 🚀 性能优化：从5提升到30，并发评估能力提升6倍
														
 
															+    # 2.1 先对所有segment拆词（并发）
														
 
															+    MAX_CONCURRENT_EVALUATIONS = 30
														
 
															     semaphore = asyncio.Semaphore(MAX_CONCURRENT_EVALUATIONS)
														
 
															-    async def process_segment(segment: Segment) -> Segment:
														
 
															-        """处理单个segment: 拆词 + 评估segment + 评估词"""
														
 
															+    async def segment_words(segment: Segment) -> Segment:
														
 
															+        """对segment进行拆词"""
														
 
															         async with semaphore:
														
 
															-            # 2.1 拆词
														
 
															             word_result = await Runner.run(word_segmenter, segment.text)
														
 
															             word_segmentation: WordSegmentation = word_result.final_output
														
 
															             segment.words = word_segmentation.words
														
 
															+            return segment
														
 
															-            # 2.2 评估segment与原始问题的相关度（使用Round 0专用评估）
														
 
															-            segment.score_with_o, segment.reason = await evaluate_with_o_round0(
														
 
															-                segment.text, o, context.evaluation_cache
														
 
															-            )
														
 
															-
														
 
															-            # 2.3 评估每个词与原始问题的相关度（使用Round 0专用评估）
														
 
															-            word_eval_tasks = []
														
 
															-            for word in segment.words:
														
 
															-                async def eval_word(w: str) -> tuple[str, float, str]:
														
 
															-                    score, reason = await evaluate_with_o_round0(w, o, context.evaluation_cache)
														
 
															-                    return w, score, reason
														
 
															-                word_eval_tasks.append(eval_word(word))
														
 
															+    if segment_list:
														
 
															+        print(f"  [步骤2.1] 对 {len(segment_list)} 个segment进行拆词...")
														
 
															+        word_tasks = [segment_words(seg) for seg in segment_list]
														
 
															+        await asyncio.gather(*word_tasks)
														
 
															+
														
 
															+        # 2.2 批量评估所有segments
														
 
															+        print(f"  [步骤2.2] 批量评估 {len(segment_list)} 个segments...")
														
 
															+        segment_texts = [seg.text for seg in segment_list]
														
 
															+        segment_results = await evaluate_batch_with_o_round0(
														
 
															+            texts=segment_texts,
														
 
															+            o=o,
														
 
															+            cache=context.evaluation_cache
														
 
															+        )
														
 
															-            word_results = await asyncio.gather(*word_eval_tasks)
														
 
															-            for word, score, reason in word_results:
														
 
															-                segment.word_scores[word] = score
														
 
															-                segment.word_reasons[word] = reason
														
 
															+        # 分配segment评估结果
														
 
															+        for segment, (score, reason) in zip(segment_list, segment_results):
														
 
															+            segment.score_with_o = score
														
 
															+            segment.reason = reason
														
 
															-            return segment
														
 
															+        # 2.3 收集所有words并批量评估
														
 
															+        all_words = []
														
 
															+        word_to_segments = {}  # 记录每个word属于哪些segments
														
 
															+        for segment in segment_list:
														
 
															+            for word in segment.words:
														
 
															+                if word not in word_to_segments:
														
 
															+                    all_words.append(word)
														
 
															+                    word_to_segments[word] = []
														
 
															+                word_to_segments[word].append(segment)
														
 
															+
														
 
															+        if all_words:
														
 
															+            print(f"  [步骤2.3] 批量评估 {len(all_words)} 个words（去重后）...")
														
 
															+            word_results = await evaluate_batch_with_o_round0(
														
 
															+                texts=all_words,
														
 
															+                o=o,
														
 
															+                cache=context.evaluation_cache
														
 
															+            )
														
 
															-    if segment_list:
														
 
															-        print(f"  开始处理 {len(segment_list)} 个segment（并发限制: {MAX_CONCURRENT_EVALUATIONS}）...")
														
 
															-        process_tasks = [process_segment(seg) for seg in segment_list]
														
 
															-        await asyncio.gather(*process_tasks)
														
 
															+            # 分配word评估结果到所有相关的segments
														
 
															+            for word, (score, reason) in zip(all_words, word_results):
														
 
															+                for segment in word_to_segments[word]:
														
 
															+                    segment.word_scores[word] = score
														
 
															+                    segment.word_reasons[word] = reason
														
 
															     # 打印步骤1结果
														
 
															     print(f"\n[步骤1: 分段及拆词 结果]")
														
@@ -4093,6 +4813,7 @@ async def iterative_loop_v2(
 
															     enable_evaluation: bool = False
														
 
															 ):
														
 
															     """v121 主迭代循环"""
														
 
															+    import time
														
 
															     print(f"\n{'='*60}")
														
 
															     print(f"开始v121迭代循环（语义分段跨域组词版）")
														
@@ -4101,7 +4822,13 @@ async def iterative_loop_v2(
 
															     print(f"{'='*60}")
														
 
															     # Round 0: 初始化（语义分段 + 拆词）
														
 
															+    print(f"\n{'='*60}")
														
 
															+    print(f"Round 0: 初始化（语义分段 + 拆词）")
														
 
															+    print(f"{'='*60}")
														
 
															+    round0_start_time = time.time()
														
 
															     segments = await initialize_v2(context.o, context)
														
 
															+    round0_elapsed = time.time() - round0_start_time
														
 
															+    print(f"\n✅ Round 0 完成，耗时: {round0_elapsed:.2f}秒")
														
 
															     # API实例
														
 
															     xiaohongshu_api = XiaohongshuSearchRecommendations()
														
@@ -4120,8 +4847,11 @@ async def iterative_loop_v2(
 
															     num_segments = len(segments)
														
 
															     actual_max_rounds = min(max_rounds, num_segments)
														
 
															     round_num = 1
														
 
															+    rounds_elapsed_times = []  # 记录每轮耗时
														
 
															     while query_input and round_num <= actual_max_rounds:
														
 
															+        round_start_time = time.time()
														
 
															+
														
 
															         query_input, search_list = await run_round_v2(  # 不再接收提取结果
														
 
															             round_num=round_num,
														
 
															             query_input=query_input,  # 传递上一轮的输出
														
@@ -4135,6 +4865,10 @@ async def iterative_loop_v2(
 
															             enable_evaluation=enable_evaluation
														
 
															         )
														
 
															+        round_elapsed = time.time() - round_start_time
														
 
															+        rounds_elapsed_times.append(round_elapsed)
														
 
															+        print(f"\n✅ Round {round_num} 完成，耗时: {round_elapsed:.2f}秒")
														
 
															+
														
 
															         all_search_list.extend(search_list)
														
 
															         # all_extraction_results.update(extraction_results)  # 内容提取流程已断开
														
@@ -4151,6 +4885,12 @@ async def iterative_loop_v2(
 
															     print(f"  总搜索次数: {len(all_search_list)}")
														
 
															     print(f"  总帖子数: {sum(len(s.post_list) for s in all_search_list)}")
														
 
															     # print(f"  提取帖子数: {len(all_extraction_results)}")  # 内容提取流程已断开
														
 
															+    print(f"\n[耗时统计]")
														
 
															+    print(f"  Round 0 耗时: {round0_elapsed:.2f}秒")
														
 
															+    for i, elapsed in enumerate(rounds_elapsed_times, 1):
														
 
															+        print(f"  Round {i} 耗时: {elapsed:.2f}秒")
														
 
															+    total_rounds_time = round0_elapsed + sum(rounds_elapsed_times)
														
 
															+    print(f"  所有轮次总耗时: {total_rounds_time:.2f}秒 ({total_rounds_time/60:.2f}分钟)")
														
 
															     print(f"\n[统计信息]")
														
 
															     print(f"  LLM评估调用: {context.stats_llm_calls} 次")
														
 
															     print(f"  SUG请求: {context.stats_sug_requests} 次 (缓存命中: {context.stats_sug_cache_hits} 次)")
														
@@ -4166,6 +4906,9 @@ async def iterative_loop_v2(
 
															 async def main(input_dir: str, max_rounds: int = 2, sug_threshold: float = 0.7, visualize: bool = False, enable_evaluation: bool = False):
														
 
															     """主函数"""
														
 
															+    import time
														
 
															+    total_start_time = time.time()  # 记录总开始时间
														
 
															+
														
 
															     current_time, log_url = set_trace()
														
 
															     # 读取输入
														
@@ -4229,7 +4972,12 @@ async def main(input_dir: str, max_rounds: int = 2, sug_threshold: float = 0.7,
 
															         output += f"总搜索次数：{len(all_search_list)}\n"
														
 
															         output += f"总帖子数：{sum(len(s.post_list) for s in all_search_list)}\n"
														
 
															         # output += f"提取帖子数：{len(all_extraction_results)}\n"  # 内容提取流程已断开
														
 
															+
														
 
															+        # 计算总耗时
														
 
															+        total_elapsed_time = time.time() - total_start_time
														
 
															+
														
 
															         output += f"\n统计信息：\n"
														
 
															+        output += f"  总耗时: {total_elapsed_time:.2f}秒 ({total_elapsed_time/60:.2f}分钟)\n"
														
 
															         output += f"  LLM评估调用: {run_context.stats_llm_calls} 次\n"
														
 
															         output += f"  SUG请求: {run_context.stats_sug_requests} 次 (缓存命中: {run_context.stats_sug_cache_hits} 次)\n"
														
 
															         output += f"  搜索调用: {run_context.stats_search_calls} 次\n"