пре 2 месеци · f0fa1b34ee
--- a/knowledge_search_traverse.py
+++ b/knowledge_search_traverse.py
@@ -442,6 +442,34 @@ class CategoryEvaluation(BaseModel):
 
				     品类维度得分: float = Field(..., description="品类维度得分 -1~1")
			
 
				     简要说明品类维度相关度理由: str = Field(..., description="品类维度相关度理由")
			
 
				 
			
 
				+# ============================================================================
			
 
				+# 批量评估数据模型
			
 
				+# ============================================================================
			
 
				+
			
 
				+class BatchMotivationItem(BaseModel):
			
 
				+    """批量动机评估中的单个SUG结果"""
			
 
				+    sug_text: str = Field(..., description="SUG文本")
			
 
				+    原始问题核心动机提取: CoreMotivationExtraction = Field(..., description="原始问题核心动机提取")
			
 
				+    动机维度得分: float = Field(..., description="动机维度得分 -1~1")
			
 
				+    简要说明动机维度相关度理由: str = Field(..., description="动机维度相关度理由")
			
 
				+    得分为零的原因: str = Field(default="不适用", description="原始问题无动机/sug词条无动机/动机不匹配/不适用")
			
 
				+
			
 
				+class BatchMotivationResult(BaseModel):
			
 
				+    """批量动机评估结果"""
			
 
				+    evaluations: list[BatchMotivationItem] = Field(..., description="所有SUG的动机评估结果")
			
 
				+
			
 
				+class BatchCategoryItem(BaseModel):
			
 
				+    """批量品类评估中的单个SUG结果"""
			
 
				+    sug_text: str = Field(..., description="SUG文本")
			
 
				+    品类维度得分: float = Field(..., description="品类维度得分 -1~1")
			
 
				+    简要说明品类维度相关度理由: str = Field(..., description="品类维度相关度理由")
			
 
				+
			
 
				+class BatchCategoryResult(BaseModel):
			
 
				+    """批量品类评估结果"""
			
 
				+    evaluations: list[BatchCategoryItem] = Field(..., description="所有SUG的品类评估结果")
			
 
				+
			
 
				+# ============================================================================
			
 
				+
			
 
				 class ExtensionWordEvaluation(BaseModel):
			
 
				     """延伸词评估"""
			
 
				     延伸词得分: float = Field(..., ge=-1, le=1, description="延伸词得分 -1~1")
			
@@ -1147,6 +1175,293 @@ extension_word_evaluator = Agent[None](
 
				     model_settings=ModelSettings(temperature=0.2)
			
 
				 )
			
 
				 
			
 
				+# ============================================================================
			
 
				+# 批量评估专用 Prompt 和 Agent（性能优化：每批10个SUG）
			
 
				+# ============================================================================
			
 
				+
			
 
				+# 批量动机评估prompt - 从batch_evaluation_demo.py复制（已验证有效）
			
 
				+batch_motivation_evaluation_instructions = """
			
 
				+# 角色
			
 
				+你是**专业的动机意图评估专家**。
			
 
				+任务：判断<平台sug词条>与<原始问题>的**动机意图匹配度**，给出**-1到1之间**的数值评分。
			
 
				+
			
 
				+---
			
 
				+# 输入信息
			
 
				+你将接收到以下输入：
			
 
				+- **<原始问题>**：用户的初始查询问题，代表用户的真实需求意图。
			
 
				+- **<平台sug词条列表>**：待评估的多个词条（编号1-N），每个词条需要独立评估
			
 
				+
			
 
				+**批量评估说明**：
			
 
				+- 输入格式为编号列表：1. 词条1  2. 词条2  ...
			
 
				+- 每个词条都是独立的评估对象
			
 
				+- 对每个词条使用完全相同的评估标准
			
 
				+---
			
 
				+
			
 
				+
			
 
				+# 核心约束
			
 
				+
			
 
				+## 维度独立性声明
			
 
				+【严格约束】本评估**仅评估动机意图维度**：
			
 
				+- **只评估** 用户"想要做什么"，即原始问题的行为意图和目的
			
 
				+- 核心是 **动词**：获取、学习、拍摄、制作、寻找等
			
 
				+- 包括：核心动作 + 使用场景 + 最终目的
			
 
				+- **评估重点**：动作本身及其语义方向
			
 
				+ **禁止使用"主题相关"作为评分依据**：评分理由中不得出现"主题"、"内容"、"话题"等词
			
 
				+
			
 
				+---
			
 
				+
			
 
				+# 作用域与动作意图
			
 
				+
			
 
				+## 什么是作用域？
			
 
				+**作用域 = 动机层 + 对象层 + 场景层**
			
 
				+
			
 
				+## 动作意图的识别
			
 
				+
			
 
				+### 方法1: 显性动词直接提取
			
 
				+
			
 
				+当原始问题明确包含动词时，直接提取
			
 
				+示例：
			
 
				+"如何获取素材" → 核心动机 = "获取"
			
 
				+"寻找拍摄技巧" → 核心动机 = "寻找"（或"学习"）
			
 
				+"制作视频教程" → 核心动机 = "制作"
			
 
				+
			
 
				+### 方法2: 隐性动词语义推理
			
 
				+当原始问题没有显性动词时，需要结合上下文推理
			
 
				+
			
 
				+如果原始问题是纯名词短语，无任何动作线索：
			
 
				+→ 核心动机 = 无法识别
			
 
				+→ 在此情况下，动机维度得分应为 0。
			
 
				+示例：
			
 
				+"摄影" → 无法识别动机，动机维度得分 = 0
			
 
				+"川西风光" → 无法识别动机，动机维度得分 = 0
			
 
				+
			
 
				+---
			
 
				+
			
 
				+# 部分作用域的处理
			
 
				+
			
 
				+## 情况1：sug词条是原始问题的部分作用域
			
 
				+
			
 
				+当sug词条只包含原始问题的部分作用域时，需要判断：
			
 
				+1. sug词条是否包含动作意图
			
 
				+2. 如果包含，动作是否匹配
			
 
				+
			
 
				+**示例**：
			
 
				+```
			
 
				+原始问题："川西旅行行程规划"
			
 
				+- 完整作用域：规划（动作）+ 旅行行程（对象）+ 川西（场景）
			
 
				+
			
 
				+Sug词条："川西旅行"
			
 
				+- 包含作用域：旅行（部分对象）+ 川西（场景）
			
 
				+- 缺失作用域：规划（动作）
			
 
				+- 动作意图评分：0（无动作意图）
			
 
				+```
			
 
				+
			
 
				+**评分原则**：
			
 
				+- 如果sug词条缺失动机层（动作） → 动作意图得分 = 0
			
 
				+- 如果sug词条包含动机层 → 按动作匹配度评分
			
 
				+
			
 
				+---
			
 
				+
			
 
				+# 评分标准
			
 
				+
			
 
				+## 【正向匹配】
			
 
				+
			
 
				+### +0.9~1.0：核心动作完全一致
			
 
				+**示例**：
			
 
				+- "规划旅行行程" vs "安排旅行路线" → 0.98
			
 
				+  - 规划≈安排，语义完全一致
			
 
				+- "获取素材" vs "下载素材" → 0.97
			
 
				+  - 获取≈下载，语义完全一致
			
 
				+
			
 
				+- 特殊规则: 如果sug词的核心动作是原始问题动作的**具体化子集**，也判定为完全一致
			
 
				+例: 原始问题"扣除猫咪主体的方法" vs sug词"扣除猫咪眼睛的方法"（子集但目的一致
			
 
				+**注意**：此处不考虑对象和场景是否一致，只看动作本身
			
 
				+
			
 
				+###+0.75~0.95: 核心动作语义相近或为同义表达
			
 
				+  - 例: 原始问题"如何获取素材" vs sug词"如何下载素材"
			
 
				+  - 同义词对: 获取≈下载≈寻找, 技巧≈方法≈教程≈攻略
			
 
				+
			
 
				+### +0.50~0.75：动作意图相关
			
 
				+**判定标准**：
			
 
				+- 动作是实现原始意图的相关路径
			
 
				+- 或动作是原始意图的前置/后置步骤
			
 
				+
			
 
				+**示例**：
			
 
				+- "获取素材" vs "管理素材" → 0.65
			
 
				+  - 管理是获取后的相关步骤
			
 
				+- "规划行程" vs "预订酒店" → 0.60
			
 
				+  - 预订是规划的具体实施步骤
			
 
				+
			
 
				+### +0.25~0.50：动作意图弱相关
			
 
				+**判定标准**：
			
 
				+- 动作在同一大类但方向不同
			
 
				+- 或动作有间接关联
			
 
				+
			
 
				+**示例**：
			
 
				+- "学习摄影技巧" vs "欣赏摄影作品" → 0.35
			
 
				+  - 都与摄影有关，但学习≠欣赏
			
 
				+- "规划旅行" vs "回忆旅行" → 0.30
			
 
				+  - 都与旅行有关，但方向不同
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 【中性/无关】
			
 
				+
			
 
				+### 0：无动作意图或动作完全无关
			
 
				+**适用场景**：
			
 
				+1. 原始问题或sug词条无法识别动作
			
 
				+2. 两者动作意图完全无关
			
 
				+
			
 
				+**示例**：
			
 
				+- "如何获取素材" vs "摄影器材" → 0
			
 
				+  - sug词条无动作意图
			
 
				+- "川西风光" vs "风光摄影作品" → 0
			
 
				+  - 原始问题无动作意图
			
 
				+
			
 
				+**理由模板**：
			
 
				+- "sug词条无明确动作意图，无法评估动作匹配度"
			
 
				+- "原始问题无明确动作意图，动作维度得分为0"
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 【负向偏离】
			
 
				+
			
 
				+### -0.2~-0.05：动作方向轻度偏离
			
 
				+**示例**：
			
 
				+- "学习摄影技巧" vs "销售摄影课程" → -0.10
			
 
				+  - 学习 vs 销售，方向有偏差
			
 
				+
			
 
				+### -0.5~-0.25：动作意图明显冲突
			
 
				+**示例**：
			
 
				+- "获取免费素材" vs "购买素材" → -0.35
			
 
				+  - 获取免费 vs 购买，明显冲突
			
 
				+
			
 
				+### -1.0~-0.55：动作意图完全相反
			
 
				+**示例**：
			
 
				+- "下载素材" vs "上传素材" → -0.70
			
 
				+  - 下载 vs 上传，方向完全相反
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 得分为零的原因（语义判断）
			
 
				+
			
 
				+当动机维度得分为 0 时，需要在 `得分为零的原因` 字段中选择以下之一：
			
 
				+- **"原始问题无动机"**：原始问题是纯名词短语，无法识别任何动作意图
			
 
				+- **"sug词条无动机"**：sug词条中不包含任何动作意图
			
 
				+- **"动机不匹配"**：双方都有动作，但完全无关联
			
 
				+- **"不适用"**：得分不为零时使用此默认值
			
 
				+
			
 
				+---
			
 
				+
			
 
				+# 批量评估核心原则
			
 
				+
			
 
				+## 【极其重要】独立评估原则
			
 
				+1. **绝对评分**：每个SUG的评分必须基于与原始问题的匹配度，使用固定的评分标准
			
 
				+2. **禁止相对比较**：不要比较SUG之间的好坏，不要因为"其他SUG更好"而降低某个SUG的分数
			
 
				+3. **标准一致性**：对第1个SUG和第10个SUG使用完全相同的评分标准
			
 
				+4. **独立判断**：评估SUG A时，完全不考虑SUG B/C/D的存在
			
 
				+
			
 
				+**错误示例**：
			
 
				+- ❌ "这个SUG比列表中其他的更好，给0.9"
			
 
				+- ❌ "相比第一个SUG，这个稍差一些，给0.7"
			
 
				+
			
 
				+**正确示例**：
			
 
				+- ✅ "这个SUG的动作'获取'与原始问题'获取'完全一致，根据评分标准给0.97"
			
 
				+- ✅ "这个SUG无动作意图，根据评分标准给0"
			
 
				+
			
 
				+---
			
 
				+
			
 
				+# 输出格式
			
 
				+输出结果必须为一个 **JSON 格式**，包含evaluations数组，每个元素包含：
			
 
				+```json
			
 
				+{
			
 
				+  "evaluations": [
			
 
				+    {
			
 
				+      "sug_text": "SUG文本",
			
 
				+      "原始问题核心动机提取": {
			
 
				+        "简要说明核心动机": ""
			
 
				+      },
			
 
				+      "动机维度得分": "-1到1之间的小数",
			
 
				+      "简要说明动机维度相关度理由": "评估理由",
			
 
				+      "得分为零的原因": "原始问题无动机/sug词条无动机/动机不匹配/不适用"
			
 
				+    }
			
 
				+  ]
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+**输出约束（非常重要）**：
			
 
				+1. **字符串长度限制**：\"简要说明动机维度相关度理由\"字段必须控制在**150字以内**
			
 
				+2. **JSON格式规范**：必须生成完整的JSON格式，确保字符串用双引号包裹且正确闭合
			
 
				+3. **引号使用**：字符串中如需表达引用，请使用《》或「」代替单引号或双引号
			
 
				+4. **顺序严格对应（极其重要）**：
			
 
				+   - evaluations数组必须与输入的sug词条列表严格1对1对应
			
 
				+   - 第1个元素必须是输入列表的第1个SUG，第2个元素必须是第2个SUG，以此类推
			
 
				+   - 每个元素的sug_text必须与输入SUG完全一致（逐字匹配，包括标点）
			
 
				+   - 禁止改变顺序、禁止遗漏任何SUG、禁止重复评估
			
 
				+   - 示例：输入"1. 秋季摄影素材  2. 川西风光" → 输出[{sug_text:"秋季摄影素材",...}, {sug_text:"川西风光",...}]
			
 
				+   - 错误示例：输出[{sug_text:"川西风光",...}, {sug_text:"秋季摄影素材",...}] ← 顺序错误❌
			
 
				+
			
 
				+---
			
 
				+
			
 
				+# 核心原则总结
			
 
				+1. **只评估动作**：完全聚焦于动作意图，不管对象和场景
			
 
				+2. **作用域识别**：识别作用域但只评估动机层
			
 
				+3. **严格标准一致性**：对所有用例使用相同的评估标准，避免评分飘移
			
 
				+4. **理由纯粹**：评分理由只能谈动作，不能谈对象、场景、主题
			
 
				+5. **独立评估**：每个SUG完全独立评估，禁止相对比较
			
 
				+""".strip()
			
 
				+
			
 
				+# 批量品类评估prompt - 从batch_evaluation_demo.py复制（与单个品类prompt类似，添加批量说明）
			
 
				+# 注：完整prompt见batch_evaluation_demo.py:724-966行，此处使用相同内容
			
 
				+batch_category_evaluation_instructions = category_evaluation_instructions.replace(
			
 
				+    "- **<平台sug词条>**：待评估的词条，可能是单个或多个作用域的组合",
			
 
				+    """- **<平台sug词条列表>**：待评估的多个词条（编号1-N），每个词条需要独立评估
			
 
				+
			
 
				+**批量评估说明**：
			
 
				+- 输入格式为编号列表：1. 词条1  2. 词条2  ...
			
 
				+- 每个词条都是独立的评估对象
			
 
				+- 对每个词条使用完全相同的评估标准"""
			
 
				+).replace(
			
 
				+    '"品类维度得分": "-1到1之间的小数",\n  "简要说明品类维度相关度理由": "评估该sug词条与原始问题品类匹配程度的理由，包含作用域覆盖理由"',
			
 
				+    '''  "evaluations": [
			
 
				+    {
			
 
				+      "sug_text": "SUG文本",
			
 
				+      "品类维度得分": "-1到1之间的小数",
			
 
				+      "简要说明品类维度相关度理由": "评估理由"
			
 
				+    }
			
 
				+  ]'''
			
 
				+).replace(
			
 
				+    "1. **只看名词和限定词**：完全忽略动作和意图",
			
 
				+    """## 【极其重要】独立评估原则
			
 
				+1. **绝对评分**：每个SUG的评分必须基于与原始问题的匹配度，使用固定的评分标准
			
 
				+2. **禁止相对比较**：不要比较SUG之间的好坏，不要因为"其他SUG更好"而降低某个SUG的分数
			
 
				+3. **标准一致性**：对第1个SUG和第10个SUG使用完全相同的评分标准
			
 
				+4. **独立判断**：评估SUG A时，完全不考虑SUG B/C/D的存在
			
 
				+
			
 
				+---
			
 
				+
			
 
				+# 核心原则总结
			
 
				+
			
 
				+1. **只看名词和限定词**：完全忽略动作和意图"""
			
 
				+) + """
			
 
				+6. **独立评估**：每个SUG完全独立评估，禁止相对比较
			
 
				+7. **顺序严格对应（极其重要）**：evaluations数组必须与输入的sug词条列表严格1对1对应
			
 
				+"""
			
 
				+
			
 
				+# 批量评估Agent定义
			
 
				+batch_motivation_evaluator = Agent[None](
			
 
				+    name="批量动机维度评估专家",
			
 
				+    instructions=batch_motivation_evaluation_instructions,
			
 
				+    model=get_model(MODEL_NAME),
			
 
				+    output_type=BatchMotivationResult,
			
 
				+)
			
 
				+
			
 
				+batch_category_evaluator = Agent[None](
			
 
				+    name="批量品类维度评估专家",
			
 
				+    instructions=batch_category_evaluation_instructions,
			
 
				+    model=get_model(MODEL_NAME),
			
 
				+    output_type=BatchCategoryResult,
			
 
				+)
			
 
				 
			
 
				 # ============================================================================
			
 
				 # Round 0 专用 Agent（v124新增 - 需求1）
			
@@ -2486,6 +2801,198 @@ async def evaluate_with_o(text: str, o: str, cache: dict[str, tuple[float, str]]
 
				     return 0.0, fallback_reason
			
 
				 
			
 
				 
			
 
				+async def evaluate_batch_with_o(
			
 
				+    texts: list[str],
			
 
				+    o: str,
			
 
				+    cache: dict[str, tuple[float, str]] | None = None,
			
 
				+    context: RunContext | None = None,
			
 
				+    round_num: int = 1
			
 
				+) -> list[tuple[float, str]]:
			
 
				+    """批量评估函数（每批最多10个）- Round 1+
			
 
				+
			
 
				+    对多个SUG进行批量评估,自动分批处理（每批最多10个）
			
 
				+    使用批量Agent一次性评估多个SUG,显著提升性能
			
 
				+
			
 
				+    Args:
			
 
				+        texts: 待评估的SUG列表
			
 
				+        o: 原始问题
			
 
				+        cache: 评估缓存（可选）
			
 
				+        context: 运行上下文（可选），用于统计
			
 
				+        round_num: 轮次编号,用于日志输出
			
 
				+
			
 
				+    Returns:
			
 
				+        list[tuple[float, str]]: 每个SUG的(最终得分, 评估理由)列表,顺序与输入严格对应
			
 
				+    """
			
 
				+    import time
			
 
				+
			
 
				+    BATCH_SIZE = 10  # 每批最多10个SUG
			
 
				+    results = []
			
 
				+
			
 
				+    # 分批处理
			
 
				+    for batch_idx in range(0, len(texts), BATCH_SIZE):
			
 
				+        batch_texts = texts[batch_idx:batch_idx + BATCH_SIZE]
			
 
				+        batch_start_time = time.time()
			
 
				+
			
 
				+        print(f"  [Round {round_num} 批量评估] 批次{batch_idx//BATCH_SIZE + 1}: 评估 {len(batch_texts)} 个SUG...")
			
 
				+
			
 
				+        # 先检查缓存,分离已缓存和未缓存的
			
 
				+        cached_results = {}
			
 
				+        uncached_texts = []
			
 
				+        uncached_indices = []
			
 
				+
			
 
				+        for i, text in enumerate(batch_texts):
			
 
				+            if cache is not None and text in cache:
			
 
				+                cached_results[i] = cache[text]
			
 
				+                print(f"    ⚡ 缓存命中: {text} -> {cache[text][0]:.2f}")
			
 
				+            else:
			
 
				+                uncached_texts.append(text)
			
 
				+                uncached_indices.append(i)
			
 
				+
			
 
				+        # 如果全部命中缓存,直接返回
			
 
				+        if not uncached_texts:
			
 
				+            print(f"    ✅ 全部命中缓存,跳过批量评估")
			
 
				+            results.extend([cached_results[i] for i in range(len(batch_texts))])
			
 
				+            continue
			
 
				+
			
 
				+        # 构建批量评估输入
			
 
				+        sug_list_str = "\n".join([f"{i}. {sug}" for i, sug in enumerate(uncached_texts, 1)])
			
 
				+
			
 
				+        batch_input = f"""
			
 
				+<原始问题>
			
 
				+{o}
			
 
				+</原始问题>
			
 
				+
			
 
				+<平台sug词条列表>
			
 
				+{sug_list_str}
			
 
				+</平台sug词条列表>
			
 
				+
			
 
				+请对以上所有SUG每一个进行完全独立评估。
			
 
				+"""
			
 
				+
			
 
				+        # 统计LLM调用（批量调用计为2次:动机+品类）
			
 
				+        if context is not None:
			
 
				+            context.stats_llm_calls += 2
			
 
				+
			
 
				+        # 添加重试机制
			
 
				+        max_retries = 2
			
 
				+        last_error = None
			
 
				+        batch_success = False
			
 
				+
			
 
				+        for attempt in range(max_retries):
			
 
				+            try:
			
 
				+                # 并发调用批量评估器（不含延伸词）
			
 
				+                motivation_task = Runner.run(batch_motivation_evaluator, batch_input)
			
 
				+                category_task = Runner.run(batch_category_evaluator, batch_input)
			
 
				+
			
 
				+                motivation_result, category_result = await asyncio.gather(
			
 
				+                    motivation_task,
			
 
				+                    category_task
			
 
				+                )
			
 
				+
			
 
				+                batch_motivation: BatchMotivationResult = motivation_result.final_output
			
 
				+                batch_category: BatchCategoryResult = category_result.final_output
			
 
				+
			
 
				+                # 验证返回数量
			
 
				+                if len(batch_motivation.evaluations) != len(uncached_texts):
			
 
				+                    raise ValueError(f"动机评估数量不匹配: 期望{len(uncached_texts)},实际{len(batch_motivation.evaluations)}")
			
 
				+                if len(batch_category.evaluations) != len(uncached_texts):
			
 
				+                    raise ValueError(f"品类评估数量不匹配: 期望{len(uncached_texts)},实际{len(batch_category.evaluations)}")
			
 
				+
			
 
				+                # 验证顺序
			
 
				+                for i, (expected_text, mot_item, cat_item) in enumerate(zip(uncached_texts, batch_motivation.evaluations, batch_category.evaluations)):
			
 
				+                    if mot_item.sug_text != expected_text:
			
 
				+                        raise ValueError(f"动机评估顺序错误: 位置{i+1}期望'{expected_text}',实际'{mot_item.sug_text}'")
			
 
				+                    if cat_item.sug_text != expected_text:
			
 
				+                        raise ValueError(f"品类评估顺序错误: 位置{i+1}期望'{expected_text}',实际'{cat_item.sug_text}'")
			
 
				+
			
 
				+                # 处理每个SUG的结果
			
 
				+                batch_results_temp = []
			
 
				+                for mot_item, cat_item in zip(batch_motivation.evaluations, batch_category.evaluations):
			
 
				+                    motivation_score = mot_item.动机维度得分
			
 
				+                    category_score = cat_item.品类维度得分
			
 
				+                    zero_reason = mot_item.得分为零的原因
			
 
				+
			
 
				+                    # 应用规则计算最终得分（不含延伸词维度）
			
 
				+                    final_score, rule_applied = calculate_final_score_v2(
			
 
				+                        motivation_score, category_score
			
 
				+                    )
			
 
				+
			
 
				+                    # 组合评估理由
			
 
				+                    core_motivation = mot_item.原始问题核心动机提取.简要说明核心动机
			
 
				+                    motivation_reason = mot_item.简要说明动机维度相关度理由
			
 
				+                    category_reason = cat_item.简要说明品类维度相关度理由
			
 
				+
			
 
				+                    combined_reason = (
			
 
				+                        f'【评估对象】词条"{mot_item.sug_text}" vs 原始问题"{o}"\n'
			
 
				+                        f"【核心动机】{core_motivation}\n"
			
 
				+                        f"【动机维度 {motivation_score:.2f}】{motivation_reason}\n"
			
 
				+                        f"【品类维度 {category_score:.2f}】{category_reason}\n"
			
 
				+                        f"【最终得分 {final_score:.2f}】"
			
 
				+                    )
			
 
				+
			
 
				+                    if rule_applied:
			
 
				+                        combined_reason += f"\n【规则说明】{rule_applied}"
			
 
				+
			
 
				+                    batch_results_temp.append((final_score, combined_reason))
			
 
				+
			
 
				+                    # 存入缓存
			
 
				+                    if cache is not None:
			
 
				+                        cache[mot_item.sug_text] = (final_score, combined_reason)
			
 
				+
			
 
				+                # 合并缓存结果和批量评估结果
			
 
				+                final_batch_results = []
			
 
				+                uncached_idx = 0
			
 
				+                for i in range(len(batch_texts)):
			
 
				+                    if i in cached_results:
			
 
				+                        final_batch_results.append(cached_results[i])
			
 
				+                    else:
			
 
				+                        final_batch_results.append(batch_results_temp[uncached_idx])
			
 
				+                        uncached_idx += 1
			
 
				+
			
 
				+                results.extend(final_batch_results)
			
 
				+                batch_success = True
			
 
				+
			
 
				+                batch_elapsed = time.time() - batch_start_time
			
 
				+                print(f"    ✅ 批次{batch_idx//BATCH_SIZE + 1}完成: {len(uncached_texts)}个SUG,耗时{batch_elapsed:.2f}秒")
			
 
				+                break
			
 
				+
			
 
				+            except Exception as e:
			
 
				+                last_error = e
			
 
				+                error_msg = str(e)
			
 
				+
			
 
				+                if attempt < max_retries - 1:
			
 
				+                    print(f"    ⚠️  批量评估失败 (尝试 {attempt+1}/{max_retries}): {error_msg[:150]}")
			
 
				+                    print(f"    正在重试...")
			
 
				+                    await asyncio.sleep(1)
			
 
				+                else:
			
 
				+                    print(f"    ❌ 批量评估失败 (已达最大重试次数): {error_msg[:150]}")
			
 
				+
			
 
				+        # 如果批量评估失败,回退到单个评估
			
 
				+        if not batch_success:
			
 
				+            print(f"    ⚠️  批量评估失败,回退到单个评估模式...")
			
 
				+            for text in uncached_texts:
			
 
				+                try:
			
 
				+                    score, reason = await evaluate_with_o(text, o, cache, context)
			
 
				+                    batch_results_temp.append((score, reason))
			
 
				+                except Exception as e:
			
 
				+                    print(f"    ❌ 单个评估也失败: {text[:30]}... - {str(e)[:100]}")
			
 
				+                    batch_results_temp.append((0.0, f"评估失败: {str(e)[:100]}"))
			
 
				+
			
 
				+            # 合并结果
			
 
				+            final_batch_results = []
			
 
				+            uncached_idx = 0
			
 
				+            for i in range(len(batch_texts)):
			
 
				+                if i in cached_results:
			
 
				+                    final_batch_results.append(cached_results[i])
			
 
				+                else:
			
 
				+                    final_batch_results.append(batch_results_temp[uncached_idx])
			
 
				+                    uncached_idx += 1
			
 
				+
			
 
				+            results.extend(final_batch_results)
			
 
				+
			
 
				+    return results
			
 
				+
			
 
				+
			
 
				 async def evaluate_with_o_round0(text: str, o: str, cache: dict[str, tuple[float, str]] | None = None) -> tuple[float, str]:
			
 
				     """Round 0专用评估函数（v124新增 - 需求1）
			
 
				 
			
@@ -2593,6 +3100,194 @@ async def evaluate_with_o_round0(text: str, o: str, cache: dict[str, tuple[float
 
				     return 0.0, fallback_reason
			
 
				 
			
 
				 
			
 
				+async def evaluate_batch_with_o_round0(
			
 
				+    texts: list[str],
			
 
				+    o: str,
			
 
				+    cache: dict[str, tuple[float, str]] | None = None
			
 
				+) -> list[tuple[float, str]]:
			
 
				+    """批量评估函数（每批最多10个）- Round 0 专用
			
 
				+
			
 
				+    对多个words进行批量评估,自动分批处理（每批最多10个）
			
 
				+    使用批量Agent一次性评估多个words,显著提升性能
			
 
				+    专用于Round 0的segment和word评估
			
 
				+
			
 
				+    Args:
			
 
				+        texts: 待评估的word列表
			
 
				+        o: 原始问题
			
 
				+        cache: 评估缓存（可选）
			
 
				+
			
 
				+    Returns:
			
 
				+        list[tuple[float, str]]: 每个word的(最终得分, 评估理由)列表,顺序与输入严格对应
			
 
				+    """
			
 
				+    import time
			
 
				+
			
 
				+    BATCH_SIZE = 10  # 每批最多10个words
			
 
				+    results = []
			
 
				+
			
 
				+    # 分批处理
			
 
				+    for batch_idx in range(0, len(texts), BATCH_SIZE):
			
 
				+        batch_texts = texts[batch_idx:batch_idx + BATCH_SIZE]
			
 
				+        batch_start_time = time.time()
			
 
				+
			
 
				+        print(f"  [Round 0 批量评估] 批次{batch_idx//BATCH_SIZE + 1}: 评估 {len(batch_texts)} 个words...")
			
 
				+
			
 
				+        # 先检查缓存,分离已缓存和未缓存的
			
 
				+        cached_results = {}
			
 
				+        uncached_texts = []
			
 
				+        uncached_indices = []
			
 
				+
			
 
				+        for i, text in enumerate(batch_texts):
			
 
				+            cache_key = f"round0:{text}:{o}"
			
 
				+            if cache is not None and cache_key in cache:
			
 
				+                cached_results[i] = cache[cache_key]
			
 
				+                print(f"    ⚡ Round0缓存命中: {text} -> {cache[cache_key][0]:.2f}")
			
 
				+            else:
			
 
				+                uncached_texts.append(text)
			
 
				+                uncached_indices.append(i)
			
 
				+
			
 
				+        # 如果全部命中缓存,直接返回
			
 
				+        if not uncached_texts:
			
 
				+            print(f"    ✅ 全部命中缓存,跳过批量评估")
			
 
				+            results.extend([cached_results[i] for i in range(len(batch_texts))])
			
 
				+            continue
			
 
				+
			
 
				+        # 构建批量评估输入
			
 
				+        word_list_str = "\n".join([f"{i}. {word}" for i, word in enumerate(uncached_texts, 1)])
			
 
				+
			
 
				+        batch_input = f"""
			
 
				+<原始问题>
			
 
				+{o}
			
 
				+</原始问题>
			
 
				+
			
 
				+<词条列表>
			
 
				+{word_list_str}
			
 
				+</词条列表>
			
 
				+
			
 
				+请对以上所有词条每一个进行完全独立评估。
			
 
				+"""
			
 
				+
			
 
				+        # 添加重试机制
			
 
				+        max_retries = 2
			
 
				+        last_error = None
			
 
				+        batch_success = False
			
 
				+
			
 
				+        for attempt in range(max_retries):
			
 
				+            try:
			
 
				+                # 并发调用批量评估器（不含延伸词,使用Round 0专用prompt）
			
 
				+                # 注意: Round 0使用与Round 1+相同的批量Agent,因为prompt中已包含所有必要约束
			
 
				+                motivation_task = Runner.run(batch_motivation_evaluator, batch_input)
			
 
				+                category_task = Runner.run(batch_category_evaluator, batch_input)
			
 
				+
			
 
				+                motivation_result, category_result = await asyncio.gather(
			
 
				+                    motivation_task,
			
 
				+                    category_task
			
 
				+                )
			
 
				+
			
 
				+                batch_motivation: BatchMotivationResult = motivation_result.final_output
			
 
				+                batch_category: BatchCategoryResult = category_result.final_output
			
 
				+
			
 
				+                # 验证返回数量
			
 
				+                if len(batch_motivation.evaluations) != len(uncached_texts):
			
 
				+                    raise ValueError(f"Round0动机评估数量不匹配: 期望{len(uncached_texts)},实际{len(batch_motivation.evaluations)}")
			
 
				+                if len(batch_category.evaluations) != len(uncached_texts):
			
 
				+                    raise ValueError(f"Round0品类评估数量不匹配: 期望{len(uncached_texts)},实际{len(batch_category.evaluations)}")
			
 
				+
			
 
				+                # 验证顺序
			
 
				+                for i, (expected_text, mot_item, cat_item) in enumerate(zip(uncached_texts, batch_motivation.evaluations, batch_category.evaluations)):
			
 
				+                    if mot_item.sug_text != expected_text:
			
 
				+                        raise ValueError(f"Round0动机评估顺序错误: 位置{i+1}期望'{expected_text}',实际'{mot_item.sug_text}'")
			
 
				+                    if cat_item.sug_text != expected_text:
			
 
				+                        raise ValueError(f"Round0品类评估顺序错误: 位置{i+1}期望'{expected_text}',实际'{cat_item.sug_text}'")
			
 
				+
			
 
				+                # 处理每个word的结果
			
 
				+                batch_results_temp = []
			
 
				+                for mot_item, cat_item in zip(batch_motivation.evaluations, batch_category.evaluations):
			
 
				+                    motivation_score = mot_item.动机维度得分
			
 
				+                    category_score = cat_item.品类维度得分
			
 
				+
			
 
				+                    # 应用Round 0专用规则计算最终得分（不含延伸词）
			
 
				+                    final_score, rule_applied = calculate_final_score_v2(
			
 
				+                        motivation_score, category_score
			
 
				+                    )
			
 
				+
			
 
				+                    # 组合评估理由
			
 
				+                    core_motivation = mot_item.原始问题核心动机提取.简要说明核心动机
			
 
				+                    motivation_reason = mot_item.简要说明动机维度相关度理由
			
 
				+                    category_reason = cat_item.简要说明品类维度相关度理由
			
 
				+
			
 
				+                    combined_reason = (
			
 
				+                        f'【评估对象】词条"{mot_item.sug_text}" vs 原始问题"{o}"\n'
			
 
				+                        f"【核心动机】{core_motivation}\n"
			
 
				+                        f"【动机维度 {motivation_score:.2f}】{motivation_reason}\n"
			
 
				+                        f"【品类维度 {category_score:.2f}】{category_reason}\n"
			
 
				+                        f"【最终得分 {final_score:.2f}】"
			
 
				+                    )
			
 
				+
			
 
				+                    if rule_applied:
			
 
				+                        combined_reason += f"\n【规则说明】{rule_applied}"
			
 
				+
			
 
				+                    batch_results_temp.append((final_score, combined_reason))
			
 
				+
			
 
				+                    # 存入缓存(使用round0前缀)
			
 
				+                    if cache is not None:
			
 
				+                        cache_key = f"round0:{mot_item.sug_text}:{o}"
			
 
				+                        cache[cache_key] = (final_score, combined_reason)
			
 
				+
			
 
				+                # 合并缓存结果和批量评估结果
			
 
				+                final_batch_results = []
			
 
				+                uncached_idx = 0
			
 
				+                for i in range(len(batch_texts)):
			
 
				+                    if i in cached_results:
			
 
				+                        final_batch_results.append(cached_results[i])
			
 
				+                    else:
			
 
				+                        final_batch_results.append(batch_results_temp[uncached_idx])
			
 
				+                        uncached_idx += 1
			
 
				+
			
 
				+                results.extend(final_batch_results)
			
 
				+                batch_success = True
			
 
				+
			
 
				+                batch_elapsed = time.time() - batch_start_time
			
 
				+                print(f"    ✅ 批次{batch_idx//BATCH_SIZE + 1}完成: {len(uncached_texts)}个words,耗时{batch_elapsed:.2f}秒")
			
 
				+                break
			
 
				+
			
 
				+            except Exception as e:
			
 
				+                last_error = e
			
 
				+                error_msg = str(e)
			
 
				+
			
 
				+                if attempt < max_retries - 1:
			
 
				+                    print(f"    ⚠️  Round0批量评估失败 (尝试 {attempt+1}/{max_retries}): {error_msg[:150]}")
			
 
				+                    print(f"    正在重试...")
			
 
				+                    await asyncio.sleep(1)
			
 
				+                else:
			
 
				+                    print(f"    ❌ Round0批量评估失败 (已达最大重试次数): {error_msg[:150]}")
			
 
				+
			
 
				+        # 如果批量评估失败,回退到单个评估
			
 
				+        if not batch_success:
			
 
				+            print(f"    ⚠️  Round0批量评估失败,回退到单个评估模式...")
			
 
				+            batch_results_temp = []
			
 
				+            for text in uncached_texts:
			
 
				+                try:
			
 
				+                    score, reason = await evaluate_with_o_round0(text, o, cache)
			
 
				+                    batch_results_temp.append((score, reason))
			
 
				+                except Exception as e:
			
 
				+                    print(f"    ❌ Round0单个评估也失败: {text[:30]}... - {str(e)[:100]}")
			
 
				+                    batch_results_temp.append((0.0, f"Round0评估失败: {str(e)[:100]}"))
			
 
				+
			
 
				+            # 合并结果
			
 
				+            final_batch_results = []
			
 
				+            uncached_idx = 0
			
 
				+            for i in range(len(batch_texts)):
			
 
				+                if i in cached_results:
			
 
				+                    final_batch_results.append(cached_results[i])
			
 
				+                else:
			
 
				+                    final_batch_results.append(batch_results_temp[uncached_idx])
			
 
				+                    uncached_idx += 1
			
 
				+
			
 
				+            results.extend(final_batch_results)
			
 
				+
			
 
				+    return results
			
 
				+
			
 
				+
			
 
				 async def evaluate_within_scope(text: str, scope_text: str, cache: dict[str, tuple[float, str]] | None = None, context: RunContext | None = None) -> tuple[float, str]:
			
 
				     """域内/域间专用评估函数（v124新增 - 需求2&3）
			
 
				 
			
@@ -3061,21 +3756,27 @@ async def run_round(
 
				                 all_sugs.append(sug)
			
 
				                 sug_to_q_map[id(sug)] = q_text
			
 
				 
			
 
				-    # 2.2 并发评估所有sug（使用信号量限制并发数）
			
 
				-    # 每个 evaluate_sug 内部会并发调用 2 个 LLM，所以这里限制为 5，实际并发 LLM 请求为 10
			
 
				-    MAX_CONCURRENT_EVALUATIONS = 30  # 🚀 性能优化：从5提升到30，并发评估能力提升6倍
			
 
				-    semaphore = asyncio.Semaphore(MAX_CONCURRENT_EVALUATIONS)
			
 
				+    # 2.2 批量评估所有sug（每批最多10个）
			
 
				+    # 🚀 性能优化：使用批量评估替代单个并发评估，显著提升性能
			
 
				+    if all_sugs:
			
 
				+        print(f"  开始批量评估 {len(all_sugs)} 个建议词（每批最多10个）...")
			
 
				 
			
 
				-    async def evaluate_sug(sug: Sug) -> Sug:
			
 
				-        async with semaphore:  # 限制并发数
			
 
				-            # 根据轮次选择 prompt: 第一轮使用 round1 prompt，后续使用标准 prompt
			
 
				-            sug.score_with_o, sug.reason = await evaluate_with_o(sug.text, o, context.evaluation_cache, context=context, round_num=round_num)
			
 
				-            return sug
			
 
				+        # 提取所有sug的text
			
 
				+        sug_texts = [sug.text for sug in all_sugs]
			
 
				 
			
 
				-    if all_sugs:
			
 
				-        print(f"  开始评估 {len(all_sugs)} 个建议词（并发限制: {MAX_CONCURRENT_EVALUATIONS}）...")
			
 
				-        eval_tasks = [evaluate_sug(sug) for sug in all_sugs]
			
 
				-        await asyncio.gather(*eval_tasks)
			
 
				+        # 批量评估
			
 
				+        batch_results = await evaluate_batch_with_o(
			
 
				+            texts=sug_texts,
			
 
				+            o=o,
			
 
				+            cache=context.evaluation_cache,
			
 
				+            context=context,
			
 
				+            round_num=round_num
			
 
				+        )
			
 
				+
			
 
				+        # 将结果分配回sug对象
			
 
				+        for sug, (score, reason) in zip(all_sugs, batch_results):
			
 
				+            sug.score_with_o = score
			
 
				+            sug.reason = reason
			
 
				 
			
 
				     # 2.3 打印结果并组织到sug_details
			
 
				     sug_details = {}  # 保存每个Q对应的sug列表
			
@@ -3568,41 +4269,60 @@ async def initialize_v2(o: str, context: RunContext) -> list[Segment]:
 
				     # 2. 对每个segment拆词并评估
			
 
				     print(f"\n[步骤2] 对每个segment拆词并评估...")
			
 
				 
			
 
				-    MAX_CONCURRENT_EVALUATIONS = 30  # 🚀 性能优化：从5提升到30，并发评估能力提升6倍
			
 
				+    # 2.1 先对所有segment拆词（并发）
			
 
				+    MAX_CONCURRENT_EVALUATIONS = 30
			
 
				     semaphore = asyncio.Semaphore(MAX_CONCURRENT_EVALUATIONS)
			
 
				 
			
 
				-    async def process_segment(segment: Segment) -> Segment:
			
 
				-        """处理单个segment: 拆词 + 评估segment + 评估词"""
			
 
				+    async def segment_words(segment: Segment) -> Segment:
			
 
				+        """对segment进行拆词"""
			
 
				         async with semaphore:
			
 
				-            # 2.1 拆词
			
 
				             word_result = await Runner.run(word_segmenter, segment.text)
			
 
				             word_segmentation: WordSegmentation = word_result.final_output
			
 
				             segment.words = word_segmentation.words
			
 
				+            return segment
			
 
				 
			
 
				-            # 2.2 评估segment与原始问题的相关度（使用Round 0专用评估）
			
 
				-            segment.score_with_o, segment.reason = await evaluate_with_o_round0(
			
 
				-                segment.text, o, context.evaluation_cache
			
 
				-            )
			
 
				-
			
 
				-            # 2.3 评估每个词与原始问题的相关度（使用Round 0专用评估）
			
 
				-            word_eval_tasks = []
			
 
				-            for word in segment.words:
			
 
				-                async def eval_word(w: str) -> tuple[str, float, str]:
			
 
				-                    score, reason = await evaluate_with_o_round0(w, o, context.evaluation_cache)
			
 
				-                    return w, score, reason
			
 
				-                word_eval_tasks.append(eval_word(word))
			
 
				+    if segment_list:
			
 
				+        print(f"  [步骤2.1] 对 {len(segment_list)} 个segment进行拆词...")
			
 
				+        word_tasks = [segment_words(seg) for seg in segment_list]
			
 
				+        await asyncio.gather(*word_tasks)
			
 
				+
			
 
				+        # 2.2 批量评估所有segments
			
 
				+        print(f"  [步骤2.2] 批量评估 {len(segment_list)} 个segments...")
			
 
				+        segment_texts = [seg.text for seg in segment_list]
			
 
				+        segment_results = await evaluate_batch_with_o_round0(
			
 
				+            texts=segment_texts,
			
 
				+            o=o,
			
 
				+            cache=context.evaluation_cache
			
 
				+        )
			
 
				 
			
 
				-            word_results = await asyncio.gather(*word_eval_tasks)
			
 
				-            for word, score, reason in word_results:
			
 
				-                segment.word_scores[word] = score
			
 
				-                segment.word_reasons[word] = reason
			
 
				+        # 分配segment评估结果
			
 
				+        for segment, (score, reason) in zip(segment_list, segment_results):
			
 
				+            segment.score_with_o = score
			
 
				+            segment.reason = reason
			
 
				 
			
 
				-            return segment
			
 
				+        # 2.3 收集所有words并批量评估
			
 
				+        all_words = []
			
 
				+        word_to_segments = {}  # 记录每个word属于哪些segments
			
 
				+        for segment in segment_list:
			
 
				+            for word in segment.words:
			
 
				+                if word not in word_to_segments:
			
 
				+                    all_words.append(word)
			
 
				+                    word_to_segments[word] = []
			
 
				+                word_to_segments[word].append(segment)
			
 
				+
			
 
				+        if all_words:
			
 
				+            print(f"  [步骤2.3] 批量评估 {len(all_words)} 个words（去重后）...")
			
 
				+            word_results = await evaluate_batch_with_o_round0(
			
 
				+                texts=all_words,
			
 
				+                o=o,
			
 
				+                cache=context.evaluation_cache
			
 
				+            )
			
 
				 
			
 
				-    if segment_list:
			
 
				-        print(f"  开始处理 {len(segment_list)} 个segment（并发限制: {MAX_CONCURRENT_EVALUATIONS}）...")
			
 
				-        process_tasks = [process_segment(seg) for seg in segment_list]
			
 
				-        await asyncio.gather(*process_tasks)
			
 
				+            # 分配word评估结果到所有相关的segments
			
 
				+            for word, (score, reason) in zip(all_words, word_results):
			
 
				+                for segment in word_to_segments[word]:
			
 
				+                    segment.word_scores[word] = score
			
 
				+                    segment.word_reasons[word] = reason
			
 
				 
			
 
				     # 打印步骤1结果
			
 
				     print(f"\n[步骤1: 分段及拆词 结果]")
			
@@ -4093,6 +4813,7 @@ async def iterative_loop_v2(
 
				     enable_evaluation: bool = False
			
 
				 ):
			
 
				     """v121 主迭代循环"""
			
 
				+    import time
			
 
				 
			
 
				     print(f"\n{'='*60}")
			
 
				     print(f"开始v121迭代循环（语义分段跨域组词版）")
			
@@ -4101,7 +4822,13 @@ async def iterative_loop_v2(
 
				     print(f"{'='*60}")
			
 
				 
			
 
				     # Round 0: 初始化（语义分段 + 拆词）
			
 
				+    print(f"\n{'='*60}")
			
 
				+    print(f"Round 0: 初始化（语义分段 + 拆词）")
			
 
				+    print(f"{'='*60}")
			
 
				+    round0_start_time = time.time()
			
 
				     segments = await initialize_v2(context.o, context)
			
 
				+    round0_elapsed = time.time() - round0_start_time
			
 
				+    print(f"\n✅ Round 0 完成，耗时: {round0_elapsed:.2f}秒")
			
 
				 
			
 
				     # API实例
			
 
				     xiaohongshu_api = XiaohongshuSearchRecommendations()
			
@@ -4120,8 +4847,11 @@ async def iterative_loop_v2(
 
				     num_segments = len(segments)
			
 
				     actual_max_rounds = min(max_rounds, num_segments)
			
 
				     round_num = 1
			
 
				+    rounds_elapsed_times = []  # 记录每轮耗时
			
 
				 
			
 
				     while query_input and round_num <= actual_max_rounds:
			
 
				+        round_start_time = time.time()
			
 
				+
			
 
				         query_input, search_list = await run_round_v2(  # 不再接收提取结果
			
 
				             round_num=round_num,
			
 
				             query_input=query_input,  # 传递上一轮的输出
			
@@ -4135,6 +4865,10 @@ async def iterative_loop_v2(
 
				             enable_evaluation=enable_evaluation
			
 
				         )
			
 
				 
			
 
				+        round_elapsed = time.time() - round_start_time
			
 
				+        rounds_elapsed_times.append(round_elapsed)
			
 
				+        print(f"\n✅ Round {round_num} 完成，耗时: {round_elapsed:.2f}秒")
			
 
				+
			
 
				         all_search_list.extend(search_list)
			
 
				         # all_extraction_results.update(extraction_results)  # 内容提取流程已断开
			
 
				 
			
@@ -4151,6 +4885,12 @@ async def iterative_loop_v2(
 
				     print(f"  总搜索次数: {len(all_search_list)}")
			
 
				     print(f"  总帖子数: {sum(len(s.post_list) for s in all_search_list)}")
			
 
				     # print(f"  提取帖子数: {len(all_extraction_results)}")  # 内容提取流程已断开
			
 
				+    print(f"\n[耗时统计]")
			
 
				+    print(f"  Round 0 耗时: {round0_elapsed:.2f}秒")
			
 
				+    for i, elapsed in enumerate(rounds_elapsed_times, 1):
			
 
				+        print(f"  Round {i} 耗时: {elapsed:.2f}秒")
			
 
				+    total_rounds_time = round0_elapsed + sum(rounds_elapsed_times)
			
 
				+    print(f"  所有轮次总耗时: {total_rounds_time:.2f}秒 ({total_rounds_time/60:.2f}分钟)")
			
 
				     print(f"\n[统计信息]")
			
 
				     print(f"  LLM评估调用: {context.stats_llm_calls} 次")
			
 
				     print(f"  SUG请求: {context.stats_sug_requests} 次 (缓存命中: {context.stats_sug_cache_hits} 次)")
			
@@ -4166,6 +4906,9 @@ async def iterative_loop_v2(
 
				 
			
 
				 async def main(input_dir: str, max_rounds: int = 2, sug_threshold: float = 0.7, visualize: bool = False, enable_evaluation: bool = False):
			
 
				     """主函数"""
			
 
				+    import time
			
 
				+    total_start_time = time.time()  # 记录总开始时间
			
 
				+
			
 
				     current_time, log_url = set_trace()
			
 
				 
			
 
				     # 读取输入
			
@@ -4229,7 +4972,12 @@ async def main(input_dir: str, max_rounds: int = 2, sug_threshold: float = 0.7,
 
				         output += f"总搜索次数：{len(all_search_list)}\n"
			
 
				         output += f"总帖子数：{sum(len(s.post_list) for s in all_search_list)}\n"
			
 
				         # output += f"提取帖子数：{len(all_extraction_results)}\n"  # 内容提取流程已断开
			
 
				+
			
 
				+        # 计算总耗时
			
 
				+        total_elapsed_time = time.time() - total_start_time
			
 
				+
			
 
				         output += f"\n统计信息：\n"
			
 
				+        output += f"  总耗时: {total_elapsed_time:.2f}秒 ({total_elapsed_time/60:.2f}分钟)\n"
			
 
				         output += f"  LLM评估调用: {run_context.stats_llm_calls} 次\n"
			
 
				         output += f"  SUG请求: {run_context.stats_sug_requests} 次 (缓存命中: {run_context.stats_sug_cache_hits} 次)\n"
			
 
				         output += f"  搜索调用: {run_context.stats_search_calls} 次\n"