|
@@ -172,6 +172,10 @@ class RunContext(BaseModel):
|
|
|
evaluation_cache: dict[str, tuple[float, str]] = Field(default_factory=dict)
|
|
evaluation_cache: dict[str, tuple[float, str]] = Field(default_factory=dict)
|
|
|
# key: 文本, value: (score, reason)
|
|
# key: 文本, value: (score, reason)
|
|
|
|
|
|
|
|
|
|
+ # 历史词/组合得分追踪(用于Round 2+计算系数)
|
|
|
|
|
+ word_score_history: dict[str, float] = Field(default_factory=dict)
|
|
|
|
|
+ # key: 词/组合文本, value: 最终得分
|
|
|
|
|
+
|
|
|
|
|
|
|
|
# ============================================================================
|
|
# ============================================================================
|
|
|
# Agent 定义
|
|
# Agent 定义
|
|
@@ -1300,14 +1304,16 @@ round0_motivation_evaluator = Agent[None](
|
|
|
name="Round 0动机维度评估专家",
|
|
name="Round 0动机维度评估专家",
|
|
|
instructions=round0_motivation_evaluation_instructions,
|
|
instructions=round0_motivation_evaluation_instructions,
|
|
|
model=get_model(MODEL_NAME),
|
|
model=get_model(MODEL_NAME),
|
|
|
- output_type=MotivationEvaluation
|
|
|
|
|
|
|
+ output_type=MotivationEvaluation,
|
|
|
|
|
+ model_settings=ModelSettings(temperature=0.2)
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
round0_category_evaluator = Agent[None](
|
|
round0_category_evaluator = Agent[None](
|
|
|
name="Round 0品类维度评估专家",
|
|
name="Round 0品类维度评估专家",
|
|
|
instructions=round0_category_evaluation_instructions,
|
|
instructions=round0_category_evaluation_instructions,
|
|
|
model=get_model(MODEL_NAME),
|
|
model=get_model(MODEL_NAME),
|
|
|
- output_type=CategoryEvaluation
|
|
|
|
|
|
|
+ output_type=CategoryEvaluation,
|
|
|
|
|
+ model_settings=ModelSettings(temperature=0.2)
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@@ -1608,14 +1614,16 @@ scope_motivation_evaluator = Agent[None](
|
|
|
name="域内动机维度评估专家",
|
|
name="域内动机维度评估专家",
|
|
|
instructions=scope_motivation_evaluation_instructions,
|
|
instructions=scope_motivation_evaluation_instructions,
|
|
|
model=get_model(MODEL_NAME),
|
|
model=get_model(MODEL_NAME),
|
|
|
- output_type=MotivationEvaluation
|
|
|
|
|
|
|
+ output_type=MotivationEvaluation,
|
|
|
|
|
+ model_settings=ModelSettings(temperature=0.2)
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
scope_category_evaluator = Agent[None](
|
|
scope_category_evaluator = Agent[None](
|
|
|
name="域内品类维度评估专家",
|
|
name="域内品类维度评估专家",
|
|
|
instructions=scope_category_evaluation_instructions,
|
|
instructions=scope_category_evaluation_instructions,
|
|
|
model=get_model(MODEL_NAME),
|
|
model=get_model(MODEL_NAME),
|
|
|
- output_type=CategoryEvaluation
|
|
|
|
|
|
|
+ output_type=CategoryEvaluation,
|
|
|
|
|
+ model_settings=ModelSettings(temperature=0.2)
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@@ -2364,6 +2372,198 @@ async def evaluate_within_scope(text: str, scope_text: str, cache: dict[str, tup
|
|
|
return 0.0, fallback_reason
|
|
return 0.0, fallback_reason
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
+# ============================================================================
|
|
|
|
|
+# v125 新增辅助函数(用于新评分逻辑)
|
|
|
|
|
+# ============================================================================
|
|
|
|
|
+
|
|
|
|
|
+def get_source_word_score(
|
|
|
|
|
+ word_text: str,
|
|
|
|
|
+ segment: Segment,
|
|
|
|
|
+ context: RunContext
|
|
|
|
|
+) -> float:
|
|
|
|
|
+ """
|
|
|
|
|
+ 查找来源词的得分
|
|
|
|
|
+
|
|
|
|
|
+ 查找顺序:
|
|
|
|
|
+ 1. 先查 segment.word_scores (Round 0的单个词)
|
|
|
|
|
+ 2. 再查 context.word_score_history (Round 1+的组合)
|
|
|
|
|
+
|
|
|
|
|
+ Args:
|
|
|
|
|
+ word_text: 词文本
|
|
|
|
|
+ segment: 该词所在的segment
|
|
|
|
|
+ context: 运行上下文
|
|
|
|
|
+
|
|
|
|
|
+ Returns:
|
|
|
|
|
+ 词的得分,找不到返回0.0
|
|
|
|
|
+ """
|
|
|
|
|
+ # 优先查Round 0的词得分
|
|
|
|
|
+ if word_text in segment.word_scores:
|
|
|
|
|
+ return segment.word_scores[word_text]
|
|
|
|
|
+
|
|
|
|
|
+ # 其次查历史组合得分
|
|
|
|
|
+ if word_text in context.word_score_history:
|
|
|
|
|
+ return context.word_score_history[word_text]
|
|
|
|
|
+
|
|
|
|
|
+ # 都找不到
|
|
|
|
|
+ print(f" ⚠️ 警告: 未找到来源词得分: {word_text}")
|
|
|
|
|
+ return 0.0
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+async def evaluate_domain_combination_round1(
|
|
|
|
|
+ comb: DomainCombination,
|
|
|
|
|
+ segments: list[Segment],
|
|
|
|
|
+ context: RunContext
|
|
|
|
|
+) -> tuple[float, str]:
|
|
|
|
|
+ """
|
|
|
|
|
+ Round 1 域内组合评估(新逻辑)
|
|
|
|
|
+
|
|
|
|
|
+ 最终得分 = 品类得分 × 原始域得分
|
|
|
|
|
+
|
|
|
|
|
+ Args:
|
|
|
|
|
+ comb: 域内组合对象
|
|
|
|
|
+ segments: 所有segment列表
|
|
|
|
|
+ context: 运行上下文
|
|
|
|
|
+
|
|
|
|
|
+ Returns:
|
|
|
|
|
+ (最终得分, 评估理由)
|
|
|
|
|
+ """
|
|
|
|
|
+ # 获取所属segment
|
|
|
|
|
+ domain_idx = comb.domains[0] if comb.domains else 0
|
|
|
|
|
+ segment = segments[domain_idx] if 0 <= domain_idx < len(segments) else None
|
|
|
|
|
+
|
|
|
|
|
+ if not segment:
|
|
|
|
|
+ return 0.0, "错误: 无法找到所属segment"
|
|
|
|
|
+
|
|
|
|
|
+ # 拼接作用域文本
|
|
|
|
|
+ scope_text = segment.text
|
|
|
|
|
+
|
|
|
|
|
+ # 准备输入
|
|
|
|
|
+ eval_input = f"""
|
|
|
|
|
+<同一作用域词条>
|
|
|
|
|
+{scope_text}
|
|
|
|
|
+</同一作用域词条>
|
|
|
|
|
+
|
|
|
|
|
+<词条>
|
|
|
|
|
+{comb.text}
|
|
|
|
|
+</词条>
|
|
|
|
|
+
|
|
|
|
|
+请评估词条与同一作用域词条的匹配度。
|
|
|
|
|
+"""
|
|
|
|
|
+
|
|
|
|
|
+ # 只调用品类评估器
|
|
|
|
|
+ try:
|
|
|
|
|
+ category_result = await Runner.run(scope_category_evaluator, eval_input)
|
|
|
|
|
+ category_eval: CategoryEvaluation = category_result.final_output
|
|
|
|
|
+ category_score = category_eval.品类维度得分
|
|
|
|
|
+ category_reason = category_eval.简要说明品类维度相关度理由
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ print(f" ❌ Round 1品类评估失败: {e}")
|
|
|
|
|
+ return 0.0, f"评估失败: {str(e)[:100]}"
|
|
|
|
|
+
|
|
|
|
|
+ # 计算最终得分
|
|
|
|
|
+ domain_score = segment.score_with_o
|
|
|
|
|
+ final_score = category_score * domain_score
|
|
|
|
|
+
|
|
|
|
|
+ # 组合评估理由
|
|
|
|
|
+ combined_reason = (
|
|
|
|
|
+ f'【Round 1 域内评估】\n'
|
|
|
|
|
+ f'【评估对象】组合"{comb.text}" vs 作用域"{scope_text}"\n'
|
|
|
|
|
+ f'【品类得分】{category_score:.2f} - {category_reason}\n'
|
|
|
|
|
+ f'【原始域得分】{domain_score:.2f}\n'
|
|
|
|
|
+ f'【计算公式】品类得分 × 域得分 = {category_score:.2f} × {domain_score:.2f}\n'
|
|
|
|
|
+ f'【最终得分】{final_score:.2f}'
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ return final_score, combined_reason
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+async def evaluate_domain_combination_round2plus(
|
|
|
|
|
+ comb: DomainCombination,
|
|
|
|
|
+ segments: list[Segment],
|
|
|
|
|
+ context: RunContext
|
|
|
|
|
+) -> tuple[float, str]:
|
|
|
|
|
+ """
|
|
|
|
|
+ Round 2+ 域间组合评估(新逻辑)
|
|
|
|
|
+
|
|
|
|
|
+ 步骤:
|
|
|
|
|
+ 1. 用现有逻辑评估得到 base_score
|
|
|
|
|
+ 2. 计算加权系数 = Σ(来源词得分) / Σ(域得分)
|
|
|
|
|
+ 3. 最终得分 = base_score × 系数,截断到1.0
|
|
|
|
|
+
|
|
|
|
|
+ Args:
|
|
|
|
|
+ comb: 域间组合对象
|
|
|
|
|
+ segments: 所有segment列表
|
|
|
|
|
+ context: 运行上下文
|
|
|
|
|
+
|
|
|
|
|
+ Returns:
|
|
|
|
|
+ (最终得分, 评估理由)
|
|
|
|
|
+ """
|
|
|
|
|
+ # 步骤1: 现有逻辑评估(域内评估)
|
|
|
|
|
+ scope_text = "".join(comb.from_segments)
|
|
|
|
|
+
|
|
|
|
|
+ base_score, base_reason = await evaluate_within_scope(
|
|
|
|
|
+ comb.text,
|
|
|
|
|
+ scope_text,
|
|
|
|
|
+ context.evaluation_cache
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ # 步骤2: 计算加权系数
|
|
|
|
|
+ total_source_score = 0.0
|
|
|
|
|
+ total_domain_score = 0.0
|
|
|
|
|
+ coefficient_details = []
|
|
|
|
|
+
|
|
|
|
|
+ for domain_idx, source_words_list in zip(comb.domains, comb.source_words):
|
|
|
|
|
+ # 获取segment
|
|
|
|
|
+ segment = segments[domain_idx] if 0 <= domain_idx < len(segments) else None
|
|
|
|
|
+ if not segment:
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ domain_score = segment.score_with_o
|
|
|
|
|
+ total_domain_score += domain_score
|
|
|
|
|
+
|
|
|
|
|
+ # 如果该域贡献了多个词(组合),需要拼接后查找
|
|
|
|
|
+ if len(source_words_list) == 1:
|
|
|
|
|
+ # 单个词
|
|
|
|
|
+ source_word_text = source_words_list[0]
|
|
|
|
|
+ else:
|
|
|
|
|
+ # 多个词组合
|
|
|
|
|
+ source_word_text = "".join(source_words_list)
|
|
|
|
|
+
|
|
|
|
|
+ # 查找来源词得分
|
|
|
|
|
+ source_score = get_source_word_score(source_word_text, segment, context)
|
|
|
|
|
+ total_source_score += source_score
|
|
|
|
|
+
|
|
|
|
|
+ coefficient_details.append(
|
|
|
|
|
+ f" 域{domain_idx}[{segment.type}]: \"{source_word_text}\"得分={source_score:.2f}, 域得分={domain_score:.2f}"
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ # 计算系数
|
|
|
|
|
+ if total_domain_score > 0:
|
|
|
|
|
+ coefficient = total_source_score / total_domain_score
|
|
|
|
|
+ else:
|
|
|
|
|
+ coefficient = 0.0
|
|
|
|
|
+
|
|
|
|
|
+ # 步骤3: 计算最终得分并截断
|
|
|
|
|
+ final_score = base_score * total_source_score
|
|
|
|
|
+ final_score = min(1.0, max(-1.0, final_score)) # 截断到[-1.0, 1.0]
|
|
|
|
|
+
|
|
|
|
|
+ # 组合评估理由
|
|
|
|
|
+ coefficient_detail_str = "\n".join(coefficient_details)
|
|
|
|
|
+ combined_reason = (
|
|
|
|
|
+ f'【Round 2+ 域间评估】\n'
|
|
|
|
|
+ f'【评估对象】组合"{comb.text}"\n'
|
|
|
|
|
+ f'{base_reason}\n'
|
|
|
|
|
+ f'【加权系数计算】\n'
|
|
|
|
|
+ f'{total_source_score}\n'
|
|
|
|
|
+ f' 来源词总得分: {total_source_score:.2f}\n'
|
|
|
|
|
+ f' 系数: {total_source_score:.2f}'
|
|
|
|
|
+ f'【计算公式】base_score × 系数 = {base_score:.2f} × {total_source_score:.2f}\n'
|
|
|
|
|
+ f'【最终得分(截断后)】{final_score:.2f}'
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ return final_score, combined_reason
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
# ============================================================================
|
|
# ============================================================================
|
|
|
# 核心流程函数
|
|
# 核心流程函数
|
|
|
# ============================================================================
|
|
# ============================================================================
|
|
@@ -3096,6 +3296,13 @@ async def initialize_v2(o: str, context: RunContext) -> list[Segment]:
|
|
|
]
|
|
]
|
|
|
})
|
|
})
|
|
|
|
|
|
|
|
|
|
+ # 🆕 存储Round 0的所有word得分到历史记录
|
|
|
|
|
+ print(f"\n[存储Round 0词得分到历史记录]")
|
|
|
|
|
+ for segment in segment_list:
|
|
|
|
|
+ for word, score in segment.word_scores.items():
|
|
|
|
|
+ context.word_score_history[word] = score
|
|
|
|
|
+ print(f" {word}: {score:.2f}")
|
|
|
|
|
+
|
|
|
print(f"\n[Round 0 完成]")
|
|
print(f"\n[Round 0 完成]")
|
|
|
print(f" 分段数: {len(segment_list)}")
|
|
print(f" 分段数: {len(segment_list)}")
|
|
|
total_words = sum(len(seg.words) for seg in segment_list)
|
|
total_words = sum(len(seg.words) for seg in segment_list)
|
|
@@ -3277,15 +3484,21 @@ async def run_round_v2(
|
|
|
|
|
|
|
|
async def evaluate_combination(comb: DomainCombination) -> DomainCombination:
|
|
async def evaluate_combination(comb: DomainCombination) -> DomainCombination:
|
|
|
async with semaphore:
|
|
async with semaphore:
|
|
|
- # 使用域内评估:组合词条与拼接的segments作为作用域进行评估
|
|
|
|
|
- # 拼接所有参与组合的segments文本
|
|
|
|
|
- scope_text = "".join(comb.from_segments)
|
|
|
|
|
-
|
|
|
|
|
- comb.score_with_o, comb.reason = await evaluate_within_scope(
|
|
|
|
|
- comb.text, # 组合结果,如 "获取川西"
|
|
|
|
|
- scope_text, # 拼接的segments,如 "获取川西秋季风光摄影素材"
|
|
|
|
|
- context.evaluation_cache
|
|
|
|
|
- )
|
|
|
|
|
|
|
+ # 🆕 根据轮次选择评估逻辑
|
|
|
|
|
+ if round_num == 1:
|
|
|
|
|
+ # Round 1: 域内评估(新逻辑)
|
|
|
|
|
+ comb.score_with_o, comb.reason = await evaluate_domain_combination_round1(
|
|
|
|
|
+ comb, segments, context
|
|
|
|
|
+ )
|
|
|
|
|
+ else:
|
|
|
|
|
+ # Round 2+: 域间评估(新逻辑)
|
|
|
|
|
+ comb.score_with_o, comb.reason = await evaluate_domain_combination_round2plus(
|
|
|
|
|
+ comb, segments, context
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ # 🆕 存储组合得分到历史记录
|
|
|
|
|
+ context.word_score_history[comb.text] = comb.score_with_o
|
|
|
|
|
+
|
|
|
return comb
|
|
return comb
|
|
|
|
|
|
|
|
eval_tasks = [evaluate_combination(comb) for comb in domain_combinations]
|
|
eval_tasks = [evaluate_combination(comb) for comb in domain_combinations]
|
|
@@ -3295,7 +3508,8 @@ async def run_round_v2(
|
|
|
# domain_combinations.sort(key=lambda x: x.score_with_o, reverse=True)
|
|
# domain_combinations.sort(key=lambda x: x.score_with_o, reverse=True)
|
|
|
|
|
|
|
|
# 打印所有组合(保持原始顺序)
|
|
# 打印所有组合(保持原始顺序)
|
|
|
- print(f" 评估完成,共{len(domain_combinations)}个组合:")
|
|
|
|
|
|
|
+ evaluation_strategy = 'Round 1 域内评估(品类×域得分)' if round_num == 1 else 'Round 2+ 域间评估(加权系数调整)'
|
|
|
|
|
+ print(f" 评估完成,共{len(domain_combinations)}个组合 [策略: {evaluation_strategy}]")
|
|
|
for i, comb in enumerate(domain_combinations, 1):
|
|
for i, comb in enumerate(domain_combinations, 1):
|
|
|
print(f" {i}. {comb.text} {comb.type_label} (分数: {comb.score_with_o:.2f})")
|
|
print(f" {i}. {comb.text} {comb.type_label} (分数: {comb.score_with_o:.2f})")
|
|
|
|
|
|