|
|
@@ -88,11 +88,12 @@ async def test_evaluation_v3(run_context_path: str, max_posts: int = 10):
|
|
|
|
|
|
# 3. 处理结果
|
|
|
results = []
|
|
|
+ detailed_reports = [] # 收集详细评估报告
|
|
|
print(f"📊 处理评估结果...\n")
|
|
|
for i, ((round_idx, search_idx, post_id, post, _), eval_result) in enumerate(zip(tasks, all_eval_results), 1):
|
|
|
knowledge_eval, content_eval, purpose_eval, category_eval, final_score, match_level = eval_result
|
|
|
|
|
|
- print(f" [{i}/{len(tasks)}] {post.note_id}", end="")
|
|
|
+ print(f" [{i}/{len(tasks)}] {post.note_id} - {post.title[:40]}", end="")
|
|
|
if knowledge_eval:
|
|
|
if final_score is not None:
|
|
|
print(f" → {match_level} ({final_score:.1f}分)")
|
|
|
@@ -103,6 +104,52 @@ async def test_evaluation_v3(run_context_path: str, max_posts: int = 10):
|
|
|
else:
|
|
|
print(f" → 评估未完成")
|
|
|
|
|
|
+ # 打印详细判断原因
|
|
|
+ print(f" 📝 知识评估: {knowledge_eval.conclusion if knowledge_eval.conclusion else '无'}")
|
|
|
+ if content_eval and content_eval.is_content_knowledge:
|
|
|
+ print(f" 📚 内容知识: {content_eval.summary[:80] if content_eval.summary else '无'}...")
|
|
|
+ if purpose_eval:
|
|
|
+ print(f" 🎯 目的匹配: {purpose_eval.core_basis[:80] if purpose_eval.core_basis else '无'}...")
|
|
|
+ if category_eval:
|
|
|
+ print(f" 🏷️ 品类匹配: {category_eval.core_basis[:80] if category_eval.core_basis else '无'}...")
|
|
|
+ print()
|
|
|
+
|
|
|
+ # 收集详细报告
|
|
|
+ detailed_report = {
|
|
|
+ 'post_index': i,
|
|
|
+ 'note_id': post.note_id,
|
|
|
+ 'title': post.title,
|
|
|
+ 'final_score': final_score,
|
|
|
+ 'match_level': match_level,
|
|
|
+ 'is_knowledge': knowledge_eval.is_knowledge if knowledge_eval else None,
|
|
|
+ 'is_content_knowledge': content_eval.is_content_knowledge if content_eval else None,
|
|
|
+ 'knowledge_score': content_eval.final_score if content_eval else None,
|
|
|
+ 'evaluations': {
|
|
|
+ 'knowledge': {
|
|
|
+ 'conclusion': knowledge_eval.conclusion if knowledge_eval else None,
|
|
|
+ 'core_evidence': knowledge_eval.core_evidence if knowledge_eval and hasattr(knowledge_eval, 'core_evidence') else None,
|
|
|
+ 'issues': knowledge_eval.issues if knowledge_eval and hasattr(knowledge_eval, 'issues') else None
|
|
|
+ },
|
|
|
+ 'content_knowledge': {
|
|
|
+ 'summary': content_eval.summary if content_eval else None,
|
|
|
+ 'final_score': content_eval.final_score if content_eval else None,
|
|
|
+ 'level': content_eval.level if content_eval else None
|
|
|
+ } if content_eval and content_eval.is_content_knowledge else None,
|
|
|
+ 'purpose': {
|
|
|
+ 'score': purpose_eval.purpose_score if purpose_eval else None,
|
|
|
+ 'core_motivation': purpose_eval.core_motivation if purpose_eval else None,
|
|
|
+ 'core_basis': purpose_eval.core_basis if purpose_eval else None,
|
|
|
+ 'match_level': purpose_eval.match_level if purpose_eval else None
|
|
|
+ } if purpose_eval else None,
|
|
|
+ 'category': {
|
|
|
+ 'score': category_eval.category_score if category_eval else None,
|
|
|
+ 'core_basis': category_eval.core_basis if category_eval else None,
|
|
|
+ 'match_level': category_eval.match_level if category_eval else None
|
|
|
+ } if category_eval else None
|
|
|
+ }
|
|
|
+ }
|
|
|
+ detailed_reports.append(detailed_report)
|
|
|
+
|
|
|
# 应用评估结果
|
|
|
apply_evaluation_v3_to_post(
|
|
|
post,
|
|
|
@@ -115,7 +162,7 @@ async def test_evaluation_v3(run_context_path: str, max_posts: int = 10):
|
|
|
)
|
|
|
results.append((round_idx, search_idx, post_id, post))
|
|
|
else:
|
|
|
- print(f" → ❌ 评估失败")
|
|
|
+ print(f" → ❌ 评估失败\n")
|
|
|
|
|
|
print(f"\n✅ 评估完成: {len(results)}/{len(posts)} 成功\n")
|
|
|
|
|
|
@@ -155,7 +202,22 @@ async def test_evaluation_v3(run_context_path: str, max_posts: int = 10):
|
|
|
output_path = run_context_path.replace('.json', '_v3.json')
|
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
|
json.dump(run_context, f, ensure_ascii=False, indent=2)
|
|
|
- print(f"✅ 已保存: {output_path}\n")
|
|
|
+ print(f"✅ 已保存: {output_path}")
|
|
|
+
|
|
|
+ # 保存详细评估报告
|
|
|
+ report_path = run_context_path.replace('.json', '_evaluation_report.json')
|
|
|
+ evaluation_report = {
|
|
|
+ 'metadata': {
|
|
|
+ 'original_query': original_query,
|
|
|
+ 'total_posts': len(results),
|
|
|
+ 'evaluation_time': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
|
|
+ 'evaluator_version': 'v3.0'
|
|
|
+ },
|
|
|
+ 'detailed_reports': detailed_reports
|
|
|
+ }
|
|
|
+ with open(report_path, 'w', encoding='utf-8') as f:
|
|
|
+ json.dump(evaluation_report, f, ensure_ascii=False, indent=2)
|
|
|
+ print(f"📄 已保存详细评估报告: {report_path}\n")
|
|
|
|
|
|
# 生成统计报告
|
|
|
print(f"\n{'='*80}")
|