|
|
@@ -12,7 +12,7 @@ from collections import defaultdict
|
|
|
|
|
|
# 导入必要的模块
|
|
|
from knowledge_search_traverse import Post
|
|
|
-from post_evaluator_v3 import evaluate_post_v3, apply_evaluation_v3_to_post
|
|
|
+from post_evaluator_v3 import evaluate_post_v3, apply_evaluation_v3_to_post, two_stage_batch_evaluate
|
|
|
|
|
|
|
|
|
async def test_evaluation_v3(run_context_path: str, max_posts: int = 10):
|
|
|
@@ -79,98 +79,61 @@ async def test_evaluation_v3(run_context_path: str, max_posts: int = 10):
|
|
|
)
|
|
|
posts.append((round_idx, search_idx, post_id, post))
|
|
|
|
|
|
- # 批量评估
|
|
|
- print(f"🚀 开始并行评估 (最多{len(posts)}个任务,并发限制: 5)...\n")
|
|
|
+ # 提取纯post列表用于两阶段评估
|
|
|
+ post_list = [post for _, _, _, post in posts]
|
|
|
|
|
|
- semaphore = asyncio.Semaphore(5)
|
|
|
- tasks = []
|
|
|
+ # 使用两阶段批量评估
|
|
|
+ await two_stage_batch_evaluate(post_list, original_query, quick_concurrent=15, detail_concurrent=15)
|
|
|
|
|
|
- # 1. 创建所有任务
|
|
|
- for round_idx, search_idx, post_id, post in posts:
|
|
|
- task = evaluate_post_v3(post, original_query, semaphore)
|
|
|
- tasks.append((round_idx, search_idx, post_id, post, task))
|
|
|
-
|
|
|
- # 2. 并行执行所有任务
|
|
|
- task_coroutines = [task for _, _, _, _, task in tasks]
|
|
|
- all_eval_results = await asyncio.gather(*task_coroutines)
|
|
|
-
|
|
|
- # 3. 处理结果
|
|
|
+ # 处理评估结果(两阶段评估已将结果应用到post对象)
|
|
|
results = []
|
|
|
detailed_reports = [] # 收集详细评估报告
|
|
|
- print(f"📊 处理评估结果...\n")
|
|
|
- for i, ((round_idx, search_idx, post_id, post, _), eval_result) in enumerate(zip(tasks, all_eval_results), 1):
|
|
|
- knowledge_eval, content_eval, purpose_eval, category_eval, final_score, match_level = eval_result
|
|
|
|
|
|
- print(f" [{i}/{len(tasks)}] {post.note_id} - {post.title[:40]}", end="")
|
|
|
+ print(f"📊 收集评估报告...\n")
|
|
|
+ for i, (round_idx, search_idx, post_id, post) in enumerate(posts, 1):
|
|
|
+ # 从post对象的嵌套字段中提取评估结果
|
|
|
+ knowledge_eval = post.knowledge_evaluation
|
|
|
+ content_eval = post.content_knowledge_evaluation
|
|
|
+ purpose_eval = post.purpose_evaluation
|
|
|
+ category_eval = post.category_evaluation
|
|
|
+
|
|
|
+ # 收集详细报告
|
|
|
if knowledge_eval:
|
|
|
- if final_score is not None:
|
|
|
- print(f" → {match_level} ({final_score:.1f}分)")
|
|
|
- elif content_eval and not content_eval.is_content_knowledge:
|
|
|
- print(f" → 非内容知识")
|
|
|
- elif knowledge_eval and not knowledge_eval.is_knowledge:
|
|
|
- print(f" → 非知识")
|
|
|
- else:
|
|
|
- print(f" → 评估未完成")
|
|
|
-
|
|
|
- # 打印详细判断原因
|
|
|
- print(f" 📝 知识评估: {knowledge_eval.conclusion if knowledge_eval.conclusion else '无'}")
|
|
|
- if content_eval and content_eval.is_content_knowledge:
|
|
|
- print(f" 📚 内容知识: {content_eval.summary[:80] if content_eval.summary else '无'}...")
|
|
|
- if purpose_eval:
|
|
|
- print(f" 🎯 目的匹配: {purpose_eval.core_basis[:80] if purpose_eval.core_basis else '无'}...")
|
|
|
- if category_eval:
|
|
|
- print(f" 🏷️ 品类匹配: {category_eval.core_basis[:80] if category_eval.core_basis else '无'}...")
|
|
|
- print()
|
|
|
-
|
|
|
- # 收集详细报告
|
|
|
detailed_report = {
|
|
|
'post_index': i,
|
|
|
'note_id': post.note_id,
|
|
|
'title': post.title,
|
|
|
- 'final_score': final_score,
|
|
|
- 'match_level': match_level,
|
|
|
- 'is_knowledge': knowledge_eval.is_knowledge if knowledge_eval else None,
|
|
|
- 'is_content_knowledge': content_eval.is_content_knowledge if content_eval else None,
|
|
|
- 'knowledge_score': content_eval.final_score if content_eval else None,
|
|
|
+ 'final_score': post.final_score,
|
|
|
+ 'match_level': post.match_level,
|
|
|
+ 'is_knowledge': post.is_knowledge,
|
|
|
+ 'is_content_knowledge': post.is_content_knowledge,
|
|
|
+ 'knowledge_score': post.knowledge_score,
|
|
|
'evaluations': {
|
|
|
'knowledge': {
|
|
|
- 'conclusion': knowledge_eval.conclusion if knowledge_eval else None,
|
|
|
- 'core_evidence': knowledge_eval.core_evidence if knowledge_eval and hasattr(knowledge_eval, 'core_evidence') else None,
|
|
|
- 'issues': knowledge_eval.issues if knowledge_eval and hasattr(knowledge_eval, 'issues') else None
|
|
|
+ 'conclusion': knowledge_eval.get('conclusion') if isinstance(knowledge_eval, dict) else getattr(knowledge_eval, 'conclusion', None),
|
|
|
+ 'core_evidence': knowledge_eval.get('core_evidence') if isinstance(knowledge_eval, dict) else getattr(knowledge_eval, 'core_evidence', None),
|
|
|
+ 'issues': knowledge_eval.get('issues') if isinstance(knowledge_eval, dict) else getattr(knowledge_eval, 'issues', None)
|
|
|
},
|
|
|
'content_knowledge': {
|
|
|
- 'summary': content_eval.summary if content_eval else None,
|
|
|
- 'final_score': content_eval.final_score if content_eval else None,
|
|
|
- 'level': content_eval.level if content_eval else None
|
|
|
- } if content_eval and content_eval.is_content_knowledge else None,
|
|
|
+ 'summary': content_eval.get('summary') if isinstance(content_eval, dict) else getattr(content_eval, 'summary', None),
|
|
|
+ 'final_score': content_eval.get('final_score') if isinstance(content_eval, dict) else getattr(content_eval, 'final_score', None),
|
|
|
+ 'level': content_eval.get('level') if isinstance(content_eval, dict) else getattr(content_eval, 'level', None)
|
|
|
+ } if content_eval and post.is_content_knowledge else None,
|
|
|
'purpose': {
|
|
|
- 'score': purpose_eval.purpose_score if purpose_eval else None,
|
|
|
- 'core_motivation': purpose_eval.core_motivation if purpose_eval else None,
|
|
|
- 'core_basis': purpose_eval.core_basis if purpose_eval else None,
|
|
|
- 'match_level': purpose_eval.match_level if purpose_eval else None
|
|
|
+ 'score': purpose_eval.get('purpose_score') if isinstance(purpose_eval, dict) else getattr(purpose_eval, 'purpose_score', None),
|
|
|
+ 'core_motivation': purpose_eval.get('core_motivation') if isinstance(purpose_eval, dict) else getattr(purpose_eval, 'core_motivation', None),
|
|
|
+ 'core_basis': purpose_eval.get('core_basis') if isinstance(purpose_eval, dict) else getattr(purpose_eval, 'core_basis', None),
|
|
|
+ 'match_level': purpose_eval.get('match_level') if isinstance(purpose_eval, dict) else getattr(purpose_eval, 'match_level', None)
|
|
|
} if purpose_eval else None,
|
|
|
'category': {
|
|
|
- 'score': category_eval.category_score if category_eval else None,
|
|
|
- 'core_basis': category_eval.core_basis if category_eval else None,
|
|
|
- 'match_level': category_eval.match_level if category_eval else None
|
|
|
+ 'score': category_eval.get('category_score') if isinstance(category_eval, dict) else getattr(category_eval, 'category_score', None),
|
|
|
+ 'core_basis': category_eval.get('core_basis') if isinstance(category_eval, dict) else getattr(category_eval, 'core_basis', None),
|
|
|
+ 'match_level': category_eval.get('match_level') if isinstance(category_eval, dict) else getattr(category_eval, 'match_level', None)
|
|
|
} if category_eval else None
|
|
|
}
|
|
|
}
|
|
|
detailed_reports.append(detailed_report)
|
|
|
-
|
|
|
- # 应用评估结果
|
|
|
- apply_evaluation_v3_to_post(
|
|
|
- post,
|
|
|
- knowledge_eval,
|
|
|
- content_eval,
|
|
|
- purpose_eval,
|
|
|
- category_eval,
|
|
|
- final_score,
|
|
|
- match_level
|
|
|
- )
|
|
|
results.append((round_idx, search_idx, post_id, post))
|
|
|
- else:
|
|
|
- print(f" → ❌ 评估失败\n")
|
|
|
|
|
|
print(f"\n✅ 评估完成: {len(results)}/{len(posts)} 成功\n")
|
|
|
|