|
|
@@ -24,7 +24,10 @@ class RunContext(BaseModel):
|
|
|
log_url: str
|
|
|
log_dir: str
|
|
|
|
|
|
- # 探索阶段记录
|
|
|
+ # 步骤化日志
|
|
|
+ steps: list[dict] = Field(default_factory=list, description="执行步骤的详细记录")
|
|
|
+
|
|
|
+ # 探索阶段记录(保留用于向后兼容)
|
|
|
keywords: list[str] | None = Field(default=None, description="提取的关键词")
|
|
|
exploration_levels: list[dict] = Field(default_factory=list, description="每一层的探索结果")
|
|
|
level_analyses: list[dict] = Field(default_factory=list, description="每一层的主Agent分析")
|
|
|
@@ -271,24 +274,50 @@ evaluator = Agent[None](
|
|
|
)
|
|
|
|
|
|
|
|
|
+# ============================================================================
|
|
|
+# 日志辅助函数
|
|
|
+# ============================================================================
|
|
|
+
|
|
|
+def add_step(context: RunContext, step_name: str, step_type: str, data: dict):
|
|
|
+ """添加步骤记录"""
|
|
|
+ step = {
|
|
|
+ "step_number": len(context.steps) + 1,
|
|
|
+ "step_name": step_name,
|
|
|
+ "step_type": step_type,
|
|
|
+ "timestamp": datetime.now().isoformat(),
|
|
|
+ "data": data
|
|
|
+ }
|
|
|
+ context.steps.append(step)
|
|
|
+ return step
|
|
|
+
|
|
|
+
|
|
|
# ============================================================================
|
|
|
# 核心函数
|
|
|
# ============================================================================
|
|
|
|
|
|
-async def extract_keywords(q: str) -> KeywordList:
|
|
|
+async def extract_keywords(q: str, context: RunContext) -> KeywordList:
|
|
|
"""提取关键词"""
|
|
|
- print("\n正在提取关键词...")
|
|
|
+ print("\n[步骤 1] 正在提取关键词...")
|
|
|
result = await Runner.run(keyword_extractor, q)
|
|
|
keyword_list: KeywordList = result.final_output
|
|
|
print(f"提取的关键词:{keyword_list.keywords}")
|
|
|
print(f"提取理由:{keyword_list.reasoning}")
|
|
|
+
|
|
|
+ # 记录步骤
|
|
|
+ add_step(context, "提取关键词", "keyword_extraction", {
|
|
|
+ "input_question": q,
|
|
|
+ "keywords": keyword_list.keywords,
|
|
|
+ "reasoning": keyword_list.reasoning
|
|
|
+ })
|
|
|
+
|
|
|
return keyword_list
|
|
|
|
|
|
|
|
|
async def explore_level(queries: list[str], level_num: int, context: RunContext) -> dict:
|
|
|
"""探索一个层级(并发获取所有query的推荐词)"""
|
|
|
+ step_num = len(context.steps) + 1
|
|
|
print(f"\n{'='*60}")
|
|
|
- print(f"Level {level_num} 探索:{len(queries)} 个query")
|
|
|
+ print(f"[步骤 {step_num}] Level {level_num} 探索:{len(queries)} 个query")
|
|
|
print(f"{'='*60}")
|
|
|
|
|
|
xiaohongshu_api = XiaohongshuSearchRecommendations()
|
|
|
@@ -312,12 +341,23 @@ async def explore_level(queries: list[str], level_num: int, context: RunContext)
|
|
|
}
|
|
|
|
|
|
context.exploration_levels.append(level_data)
|
|
|
+
|
|
|
+ # 记录步骤
|
|
|
+ add_step(context, f"Level {level_num} 探索", "level_exploration", {
|
|
|
+ "level": level_num,
|
|
|
+ "input_queries": queries,
|
|
|
+ "query_count": len(queries),
|
|
|
+ "results": results,
|
|
|
+ "total_suggestions": sum(len(r['suggestions']) for r in results)
|
|
|
+ })
|
|
|
+
|
|
|
return level_data
|
|
|
|
|
|
|
|
|
async def analyze_level(level_data: dict, all_levels: list[dict], original_question: str, context: RunContext) -> LevelAnalysis:
|
|
|
"""分析当前层级,决定下一步"""
|
|
|
- print(f"\n正在分析 Level {level_data['level']}...")
|
|
|
+ step_num = len(context.steps) + 1
|
|
|
+ print(f"\n[步骤 {step_num}] 正在分析 Level {level_data['level']}...")
|
|
|
|
|
|
# 构造输入
|
|
|
analysis_input = f"""
|
|
|
@@ -357,13 +397,26 @@ Level {level_data['level']}
|
|
|
"analysis": analysis.model_dump()
|
|
|
})
|
|
|
|
|
|
+ # 记录步骤
|
|
|
+ add_step(context, f"Level {level_data['level']} 分析", "level_analysis", {
|
|
|
+ "level": level_data['level'],
|
|
|
+ "key_findings": analysis.key_findings,
|
|
|
+ "promising_signals_count": len(analysis.promising_signals),
|
|
|
+ "promising_signals": [s.model_dump() for s in analysis.promising_signals],
|
|
|
+ "should_evaluate_now": analysis.should_evaluate_now,
|
|
|
+ "candidates_to_evaluate": analysis.candidates_to_evaluate if analysis.should_evaluate_now else [],
|
|
|
+ "next_combinations": analysis.next_combinations if not analysis.should_evaluate_now else [],
|
|
|
+ "reasoning": analysis.reasoning
|
|
|
+ })
|
|
|
+
|
|
|
return analysis
|
|
|
|
|
|
|
|
|
async def evaluate_candidates(candidates: list[str], original_question: str, context: RunContext) -> list[dict]:
|
|
|
"""评估候选query"""
|
|
|
+ step_num = len(context.steps) + 1
|
|
|
print(f"\n{'='*60}")
|
|
|
- print(f"评估 {len(candidates)} 个候选query")
|
|
|
+ print(f"[步骤 {step_num}] 评估 {len(candidates)} 个候选query")
|
|
|
print(f"{'='*60}")
|
|
|
|
|
|
xiaohongshu_api = XiaohongshuSearchRecommendations()
|
|
|
@@ -418,6 +471,15 @@ async def evaluate_candidates(candidates: list[str], original_question: str, con
|
|
|
results = await asyncio.gather(*[evaluate_single_candidate(c) for c in candidates])
|
|
|
|
|
|
context.evaluation_results = results
|
|
|
+
|
|
|
+ # 记录步骤
|
|
|
+ add_step(context, "评估候选query", "candidate_evaluation", {
|
|
|
+ "candidate_count": len(candidates),
|
|
|
+ "candidates": candidates,
|
|
|
+ "results": results,
|
|
|
+ "total_evaluations": sum(len(r['evaluations']) for r in results)
|
|
|
+ })
|
|
|
+
|
|
|
return results
|
|
|
|
|
|
|
|
|
@@ -467,7 +529,7 @@ async def progressive_exploration(context: RunContext, max_levels: int = 4) -> d
|
|
|
"""
|
|
|
|
|
|
# 阶段1:提取关键词(从原始问题提取)
|
|
|
- keyword_result = await extract_keywords(context.q)
|
|
|
+ keyword_result = await extract_keywords(context.q, context)
|
|
|
context.keywords = keyword_result.keywords
|
|
|
|
|
|
# 阶段2:渐进式探索
|
|
|
@@ -629,6 +691,27 @@ async def main(input_dir: str, max_levels: int = 4):
|
|
|
run_context.optimization_result = optimization_result
|
|
|
run_context.final_output = final_output
|
|
|
|
|
|
+ # 记录最终输出步骤(保存完整的结果详情)
|
|
|
+ qualified_results = optimization_result.get("results", [])
|
|
|
+ add_step(run_context, "生成最终结果", "final_result", {
|
|
|
+ "success": optimization_result["success"],
|
|
|
+ "message": optimization_result["message"],
|
|
|
+ "qualified_query_count": len(qualified_results),
|
|
|
+ "qualified_queries": [r["query"] for r in qualified_results], # 保存所有合格query
|
|
|
+ "qualified_results_detail": [ # 保存完整的评估详情
|
|
|
+ {
|
|
|
+ "rank": idx + 1,
|
|
|
+ "query": r["query"],
|
|
|
+ "from_candidate": r["from_candidate"],
|
|
|
+ "intent_match": r["intent_match"],
|
|
|
+ "relevance_score": r["relevance_score"],
|
|
|
+ "reason": r["reason"]
|
|
|
+ }
|
|
|
+ for idx, r in enumerate(qualified_results)
|
|
|
+ ],
|
|
|
+ "final_output": final_output
|
|
|
+ })
|
|
|
+
|
|
|
# 保存 RunContext 到 log_dir
|
|
|
os.makedirs(run_context.log_dir, exist_ok=True)
|
|
|
context_file_path = os.path.join(run_context.log_dir, "run_context.json")
|
|
|
@@ -636,6 +719,78 @@ async def main(input_dir: str, max_levels: int = 4):
|
|
|
json.dump(run_context.model_dump(), f, ensure_ascii=False, indent=2)
|
|
|
print(f"\nRunContext saved to: {context_file_path}")
|
|
|
|
|
|
+ # 保存步骤化日志(更直观的格式)
|
|
|
+ steps_file_path = os.path.join(run_context.log_dir, "steps.json")
|
|
|
+ with open(steps_file_path, "w", encoding="utf-8") as f:
|
|
|
+ json.dump(run_context.steps, f, ensure_ascii=False, indent=2)
|
|
|
+ print(f"Steps log saved to: {steps_file_path}")
|
|
|
+
|
|
|
+ # 生成步骤化的可读文本日志
|
|
|
+ steps_text_path = os.path.join(run_context.log_dir, "steps.md")
|
|
|
+ with open(steps_text_path, "w", encoding="utf-8") as f:
|
|
|
+ f.write(f"# 执行步骤日志\n\n")
|
|
|
+ f.write(f"**原始问题**: {run_context.q}\n\n")
|
|
|
+ f.write(f"**执行版本**: {run_context.version}\n\n")
|
|
|
+ f.write(f"**总步骤数**: {len(run_context.steps)}\n\n")
|
|
|
+ f.write("---\n\n")
|
|
|
+
|
|
|
+ for step in run_context.steps:
|
|
|
+ f.write(f"## 步骤 {step['step_number']}: {step['step_name']}\n\n")
|
|
|
+ f.write(f"**类型**: `{step['step_type']}`\n\n")
|
|
|
+ f.write(f"**时间**: {step['timestamp']}\n\n")
|
|
|
+
|
|
|
+ # 根据不同类型格式化数据
|
|
|
+ if step['step_type'] == 'keyword_extraction':
|
|
|
+ f.write(f"**提取的关键词**: {', '.join(step['data']['keywords'])}\n\n")
|
|
|
+ f.write(f"**提取理由**: {step['data']['reasoning']}\n\n")
|
|
|
+
|
|
|
+ elif step['step_type'] == 'level_exploration':
|
|
|
+ f.write(f"**探索层级**: Level {step['data']['level']}\n\n")
|
|
|
+ f.write(f"**输入query数量**: {step['data']['query_count']}\n\n")
|
|
|
+ f.write(f"**总推荐词数**: {step['data']['total_suggestions']}\n\n")
|
|
|
+ f.write(f"**探索的query**: {', '.join(step['data']['input_queries'])}\n\n")
|
|
|
+
|
|
|
+ elif step['step_type'] == 'level_analysis':
|
|
|
+ f.write(f"**关键发现**: {step['data']['key_findings']}\n\n")
|
|
|
+ f.write(f"**有价值信号数**: {step['data']['promising_signals_count']}\n\n")
|
|
|
+ f.write(f"**是否评估**: {step['data']['should_evaluate_now']}\n\n")
|
|
|
+ if step['data']['should_evaluate_now']:
|
|
|
+ f.write(f"**候选query**: {', '.join(step['data']['candidates_to_evaluate'])}\n\n")
|
|
|
+ else:
|
|
|
+ f.write(f"**下一层探索**: {', '.join(step['data']['next_combinations'])}\n\n")
|
|
|
+
|
|
|
+ elif step['step_type'] == 'candidate_evaluation':
|
|
|
+ f.write(f"**评估候选数**: {step['data']['candidate_count']}\n\n")
|
|
|
+ f.write(f"**候选query**: {', '.join(step['data']['candidates'])}\n\n")
|
|
|
+ f.write(f"**总评估数**: {step['data']['total_evaluations']}\n\n")
|
|
|
+
|
|
|
+ elif step['step_type'] == 'final_result':
|
|
|
+ f.write(f"**执行状态**: {'✅ 成功' if step['data']['success'] else '❌ 失败'}\n\n")
|
|
|
+ f.write(f"**结果消息**: {step['data']['message']}\n\n")
|
|
|
+ f.write(f"**合格query数量**: {step['data']['qualified_query_count']}\n\n")
|
|
|
+
|
|
|
+ # 显示详细的评估结果
|
|
|
+ if step['data'].get('qualified_results_detail'):
|
|
|
+ f.write(f"### 合格的query详情\n\n")
|
|
|
+ for result in step['data']['qualified_results_detail']:
|
|
|
+ f.write(f"#### {result['rank']}. {result['query']}\n\n")
|
|
|
+ f.write(f"- **来自候选**: {result['from_candidate']}\n")
|
|
|
+ f.write(f"- **意图匹配**: {'✅ 是' if result['intent_match'] else '❌ 否'}\n")
|
|
|
+ f.write(f"- **相关性分数**: {result['relevance_score']:.2f}\n")
|
|
|
+ f.write(f"- **评估理由**: {result['reason']}\n\n")
|
|
|
+ elif step['data']['qualified_queries']:
|
|
|
+ # 兼容旧格式(如果没有详情)
|
|
|
+ f.write(f"**合格的query列表**:\n")
|
|
|
+ for idx, q in enumerate(step['data']['qualified_queries'], 1):
|
|
|
+ f.write(f" {idx}. {q}\n")
|
|
|
+ f.write("\n")
|
|
|
+
|
|
|
+ f.write(f"### 完整输出\n\n```\n{step['data']['final_output']}\n```\n\n")
|
|
|
+
|
|
|
+ f.write("---\n\n")
|
|
|
+
|
|
|
+ print(f"Steps markdown saved to: {steps_text_path}")
|
|
|
+
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
parser = argparse.ArgumentParser(description="搜索query优化工具 - v6.1 意图匹配+相关性评分版")
|