|
@@ -266,6 +266,11 @@ motivation_evaluation_instructions = """
|
|
|
}
|
|
}
|
|
|
```
|
|
```
|
|
|
|
|
|
|
|
|
|
+**输出约束(非常重要)**:
|
|
|
|
|
+1. **字符串长度限制**:\"简要说明动机维度相关度理由\"字段必须控制在**150字以内**
|
|
|
|
|
+2. **JSON格式规范**:必须生成完整的JSON格式,确保字符串用双引号包裹且正确闭合
|
|
|
|
|
+3. **引号使用**:字符串中如需表达引用,请使用《》或「」代替单引号或双引号
|
|
|
|
|
+
|
|
|
#注意事项:
|
|
#注意事项:
|
|
|
始终围绕动机维度:所有评估都基于"动机"维度,不偏离
|
|
始终围绕动机维度:所有评估都基于"动机"维度,不偏离
|
|
|
核心动机必须是动词:在评估前,必须先提取原始问题的核心动机(动词),这是整个评估的基础
|
|
核心动机必须是动词:在评估前,必须先提取原始问题的核心动机(动词),这是整个评估的基础
|
|
@@ -357,6 +362,12 @@ category_evaluation_instructions = """
|
|
|
"简要说明品类维度相关度理由": "评估该sug词条与原始问题品类匹配程度的理由"
|
|
"简要说明品类维度相关度理由": "评估该sug词条与原始问题品类匹配程度的理由"
|
|
|
}
|
|
}
|
|
|
```
|
|
```
|
|
|
|
|
+
|
|
|
|
|
+**输出约束(非常重要)**:
|
|
|
|
|
+1. **字符串长度限制**:\"简要说明品类维度相关度理由\"字段必须控制在**150字以内**
|
|
|
|
|
+2. **JSON格式规范**:必须生成完整的JSON格式,确保字符串用双引号包裹且正确闭合
|
|
|
|
|
+3. **引号使用**:字符串中如需表达引用,请使用《》或「」代替单引号或双引号
|
|
|
|
|
+
|
|
|
---
|
|
---
|
|
|
|
|
|
|
|
#注意事项:
|
|
#注意事项:
|
|
@@ -415,8 +426,7 @@ word_selection_instructions = """
|
|
|
1. **只能使用seed和word的原始文本**
|
|
1. **只能使用seed和word的原始文本**
|
|
|
2. **不能添加任何连接词**(如"的"、"和"、"与"、"在"等)
|
|
2. **不能添加任何连接词**(如"的"、"和"、"与"、"在"等)
|
|
|
3. **不能添加任何额外的词**
|
|
3. **不能添加任何额外的词**
|
|
|
-4. **组合方式**:seed+word 或 word+seed,选择更符合搜索习惯的顺序
|
|
|
|
|
-5. **简单拼接**:直接将两个词拼接,不做任何修饰
|
|
|
|
|
|
|
+4. **组合方式**:seed+word 或 word+seed,或者word插入seed中间,选择更符合搜索习惯的顺序
|
|
|
|
|
|
|
|
|
|
|
|
|
## 错误示例
|
|
## 错误示例
|
|
@@ -569,52 +579,74 @@ async def evaluate_with_o(text: str, o: str) -> tuple[float, str]:
|
|
|
请评估平台sug词条与原始问题的匹配度。
|
|
请评估平台sug词条与原始问题的匹配度。
|
|
|
"""
|
|
"""
|
|
|
|
|
|
|
|
- # 并发调用两个评估器
|
|
|
|
|
- motivation_task = Runner.run(motivation_evaluator, eval_input)
|
|
|
|
|
- category_task = Runner.run(category_evaluator, eval_input)
|
|
|
|
|
|
|
+ # 添加重试机制
|
|
|
|
|
+ max_retries = 2
|
|
|
|
|
+ last_error = None
|
|
|
|
|
|
|
|
- motivation_result, category_result = await asyncio.gather(
|
|
|
|
|
- motivation_task,
|
|
|
|
|
- category_task
|
|
|
|
|
- )
|
|
|
|
|
|
|
+ for attempt in range(max_retries):
|
|
|
|
|
+ try:
|
|
|
|
|
+ # 并发调用两个评估器
|
|
|
|
|
+ motivation_task = Runner.run(motivation_evaluator, eval_input)
|
|
|
|
|
+ category_task = Runner.run(category_evaluator, eval_input)
|
|
|
|
|
|
|
|
- # 获取评估结果
|
|
|
|
|
- motivation_eval: MotivationEvaluation = motivation_result.final_output
|
|
|
|
|
- category_eval: CategoryEvaluation = category_result.final_output
|
|
|
|
|
|
|
+ motivation_result, category_result = await asyncio.gather(
|
|
|
|
|
+ motivation_task,
|
|
|
|
|
+ category_task
|
|
|
|
|
+ )
|
|
|
|
|
|
|
|
- # 提取得分
|
|
|
|
|
- motivation_score = motivation_eval.动机维度得分
|
|
|
|
|
- category_score = category_eval.品类维度得分
|
|
|
|
|
|
|
+ # 获取评估结果
|
|
|
|
|
+ motivation_eval: MotivationEvaluation = motivation_result.final_output
|
|
|
|
|
+ category_eval: CategoryEvaluation = category_result.final_output
|
|
|
|
|
|
|
|
- # 计算基础得分
|
|
|
|
|
- base_score = motivation_score * 0.7 + category_score * 0.3
|
|
|
|
|
|
|
+ # 提取得分
|
|
|
|
|
+ motivation_score = motivation_eval.动机维度得分
|
|
|
|
|
+ category_score = category_eval.品类维度得分
|
|
|
|
|
|
|
|
- # 应用规则计算最终得分
|
|
|
|
|
- final_score = calculate_final_score(motivation_score, category_score)
|
|
|
|
|
|
|
+ # 计算基础得分
|
|
|
|
|
+ base_score = motivation_score * 0.7 + category_score * 0.3
|
|
|
|
|
|
|
|
- # 组合评估理由
|
|
|
|
|
- core_motivation = motivation_eval.原始问题核心动机提取.简要说明核心动机
|
|
|
|
|
- motivation_reason = motivation_eval.简要说明动机维度相关度理由
|
|
|
|
|
- category_reason = category_eval.简要说明品类维度相关度理由
|
|
|
|
|
|
|
+ # 应用规则计算最终得分
|
|
|
|
|
+ final_score = calculate_final_score(motivation_score, category_score)
|
|
|
|
|
|
|
|
- combined_reason = (
|
|
|
|
|
- f"【核心动机】{core_motivation}\n"
|
|
|
|
|
- f"【动机维度 {motivation_score:.2f}】{motivation_reason}\n"
|
|
|
|
|
- f"【品类维度 {category_score:.2f}】{category_reason}\n"
|
|
|
|
|
- f"【基础得分 {base_score:.2f}】= 动机({motivation_score:.2f})*0.7 + 品类({category_score:.2f})*0.3\n"
|
|
|
|
|
- f"【最终得分 {final_score:.2f}】"
|
|
|
|
|
- )
|
|
|
|
|
|
|
+ # 组合评估理由
|
|
|
|
|
+ core_motivation = motivation_eval.原始问题核心动机提取.简要说明核心动机
|
|
|
|
|
+ motivation_reason = motivation_eval.简要说明动机维度相关度理由
|
|
|
|
|
+ category_reason = category_eval.简要说明品类维度相关度理由
|
|
|
|
|
|
|
|
- # 如果应用了规则,添加规则说明
|
|
|
|
|
- if final_score != base_score:
|
|
|
|
|
- if motivation_score < 0:
|
|
|
|
|
- combined_reason += "(应用规则C:动机负向决定机制)"
|
|
|
|
|
- elif motivation_score >= 0.8:
|
|
|
|
|
- combined_reason += "(应用规则A:动机高分保护机制)"
|
|
|
|
|
- elif motivation_score <= 0.2:
|
|
|
|
|
- combined_reason += "(应用规则B:动机低分限制机制)"
|
|
|
|
|
|
|
+ combined_reason = (
|
|
|
|
|
+ f"【核心动机】{core_motivation}\n"
|
|
|
|
|
+ f"【动机维度 {motivation_score:.2f}】{motivation_reason}\n"
|
|
|
|
|
+ f"【品类维度 {category_score:.2f}】{category_reason}\n"
|
|
|
|
|
+ f"【基础得分 {base_score:.2f}】= 动机({motivation_score:.2f})*0.7 + 品类({category_score:.2f})*0.3\n"
|
|
|
|
|
+ f"【最终得分 {final_score:.2f}】"
|
|
|
|
|
+ )
|
|
|
|
|
|
|
|
- return final_score, combined_reason
|
|
|
|
|
|
|
+ # 如果应用了规则,添加规则说明
|
|
|
|
|
+ if final_score != base_score:
|
|
|
|
|
+ if motivation_score < 0:
|
|
|
|
|
+ combined_reason += "(应用规则C:动机负向决定机制)"
|
|
|
|
|
+ elif motivation_score >= 0.8:
|
|
|
|
|
+ combined_reason += "(应用规则A:动机高分保护机制)"
|
|
|
|
|
+ elif motivation_score <= 0.2:
|
|
|
|
|
+ combined_reason += "(应用规则B:动机低分限制机制)"
|
|
|
|
|
+
|
|
|
|
|
+ return final_score, combined_reason
|
|
|
|
|
+
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ last_error = e
|
|
|
|
|
+ error_msg = str(e)
|
|
|
|
|
+
|
|
|
|
|
+ if attempt < max_retries - 1:
|
|
|
|
|
+ print(f" ⚠️ 评估失败 (尝试 {attempt+1}/{max_retries}): {error_msg[:150]}")
|
|
|
|
|
+ print(f" 正在重试...")
|
|
|
|
|
+ await asyncio.sleep(1) # 等待1秒后重试
|
|
|
|
|
+ else:
|
|
|
|
|
+ print(f" ❌ 评估失败 (已达最大重试次数): {error_msg[:150]}")
|
|
|
|
|
+
|
|
|
|
|
+ # 所有重试失败后,返回默认值
|
|
|
|
|
+ fallback_reason = f"评估失败(重试{max_retries}次): {str(last_error)[:200]}"
|
|
|
|
|
+ print(f" 使用默认值: score=0.0, reason={fallback_reason[:100]}...")
|
|
|
|
|
+ return 0.0, fallback_reason
|
|
|
|
|
|
|
|
|
|
|
|
|
# ============================================================================
|
|
# ============================================================================
|