yangxiaohui преди 1 месец
родител
ревизия
4de151fbe9
променени са 1 файла, в които са добавени 162 реда и са изтрити 7 реда
  1. 162 7
      sug_v6_1_2_1.py

+ 162 - 7
sug_v6_1_2_1.py

@@ -24,7 +24,10 @@ class RunContext(BaseModel):
     log_url: str
     log_dir: str
 
-    # 探索阶段记录
+    # 步骤化日志
+    steps: list[dict] = Field(default_factory=list, description="执行步骤的详细记录")
+
+    # 探索阶段记录(保留用于向后兼容)
     keywords: list[str] | None = Field(default=None, description="提取的关键词")
     exploration_levels: list[dict] = Field(default_factory=list, description="每一层的探索结果")
     level_analyses: list[dict] = Field(default_factory=list, description="每一层的主Agent分析")
@@ -271,24 +274,50 @@ evaluator = Agent[None](
 )
 
 
+# ============================================================================
+# 日志辅助函数
+# ============================================================================
+
+def add_step(context: RunContext, step_name: str, step_type: str, data: dict):
+    """添加步骤记录"""
+    step = {
+        "step_number": len(context.steps) + 1,
+        "step_name": step_name,
+        "step_type": step_type,
+        "timestamp": datetime.now().isoformat(),
+        "data": data
+    }
+    context.steps.append(step)
+    return step
+
+
 # ============================================================================
 # 核心函数
 # ============================================================================
 
-async def extract_keywords(q: str) -> KeywordList:
+async def extract_keywords(q: str, context: RunContext) -> KeywordList:
     """提取关键词"""
-    print("\n正在提取关键词...")
+    print("\n[步骤 1] 正在提取关键词...")
     result = await Runner.run(keyword_extractor, q)
     keyword_list: KeywordList = result.final_output
     print(f"提取的关键词:{keyword_list.keywords}")
     print(f"提取理由:{keyword_list.reasoning}")
+
+    # 记录步骤
+    add_step(context, "提取关键词", "keyword_extraction", {
+        "input_question": q,
+        "keywords": keyword_list.keywords,
+        "reasoning": keyword_list.reasoning
+    })
+
     return keyword_list
 
 
 async def explore_level(queries: list[str], level_num: int, context: RunContext) -> dict:
     """探索一个层级(并发获取所有query的推荐词)"""
+    step_num = len(context.steps) + 1
     print(f"\n{'='*60}")
-    print(f"Level {level_num} 探索:{len(queries)} 个query")
+    print(f"[步骤 {step_num}] Level {level_num} 探索:{len(queries)} 个query")
     print(f"{'='*60}")
 
     xiaohongshu_api = XiaohongshuSearchRecommendations()
@@ -312,12 +341,23 @@ async def explore_level(queries: list[str], level_num: int, context: RunContext)
     }
 
     context.exploration_levels.append(level_data)
+
+    # 记录步骤
+    add_step(context, f"Level {level_num} 探索", "level_exploration", {
+        "level": level_num,
+        "input_queries": queries,
+        "query_count": len(queries),
+        "results": results,
+        "total_suggestions": sum(len(r['suggestions']) for r in results)
+    })
+
     return level_data
 
 
 async def analyze_level(level_data: dict, all_levels: list[dict], original_question: str, context: RunContext) -> LevelAnalysis:
     """分析当前层级,决定下一步"""
-    print(f"\n正在分析 Level {level_data['level']}...")
+    step_num = len(context.steps) + 1
+    print(f"\n[步骤 {step_num}] 正在分析 Level {level_data['level']}...")
 
     # 构造输入
     analysis_input = f"""
@@ -357,13 +397,26 @@ Level {level_data['level']}
         "analysis": analysis.model_dump()
     })
 
+    # 记录步骤
+    add_step(context, f"Level {level_data['level']} 分析", "level_analysis", {
+        "level": level_data['level'],
+        "key_findings": analysis.key_findings,
+        "promising_signals_count": len(analysis.promising_signals),
+        "promising_signals": [s.model_dump() for s in analysis.promising_signals],
+        "should_evaluate_now": analysis.should_evaluate_now,
+        "candidates_to_evaluate": analysis.candidates_to_evaluate if analysis.should_evaluate_now else [],
+        "next_combinations": analysis.next_combinations if not analysis.should_evaluate_now else [],
+        "reasoning": analysis.reasoning
+    })
+
     return analysis
 
 
 async def evaluate_candidates(candidates: list[str], original_question: str, context: RunContext) -> list[dict]:
     """评估候选query"""
+    step_num = len(context.steps) + 1
     print(f"\n{'='*60}")
-    print(f"评估 {len(candidates)} 个候选query")
+    print(f"[步骤 {step_num}] 评估 {len(candidates)} 个候选query")
     print(f"{'='*60}")
 
     xiaohongshu_api = XiaohongshuSearchRecommendations()
@@ -418,6 +471,15 @@ async def evaluate_candidates(candidates: list[str], original_question: str, con
     results = await asyncio.gather(*[evaluate_single_candidate(c) for c in candidates])
 
     context.evaluation_results = results
+
+    # 记录步骤
+    add_step(context, "评估候选query", "candidate_evaluation", {
+        "candidate_count": len(candidates),
+        "candidates": candidates,
+        "results": results,
+        "total_evaluations": sum(len(r['evaluations']) for r in results)
+    })
+
     return results
 
 
@@ -467,7 +529,7 @@ async def progressive_exploration(context: RunContext, max_levels: int = 4) -> d
     """
 
     # 阶段1:提取关键词(从原始问题提取)
-    keyword_result = await extract_keywords(context.q)
+    keyword_result = await extract_keywords(context.q, context)
     context.keywords = keyword_result.keywords
 
     # 阶段2:渐进式探索
@@ -629,6 +691,27 @@ async def main(input_dir: str, max_levels: int = 4):
     run_context.optimization_result = optimization_result
     run_context.final_output = final_output
 
+    # 记录最终输出步骤(保存完整的结果详情)
+    qualified_results = optimization_result.get("results", [])
+    add_step(run_context, "生成最终结果", "final_result", {
+        "success": optimization_result["success"],
+        "message": optimization_result["message"],
+        "qualified_query_count": len(qualified_results),
+        "qualified_queries": [r["query"] for r in qualified_results],  # 保存所有合格query
+        "qualified_results_detail": [  # 保存完整的评估详情
+            {
+                "rank": idx + 1,
+                "query": r["query"],
+                "from_candidate": r["from_candidate"],
+                "intent_match": r["intent_match"],
+                "relevance_score": r["relevance_score"],
+                "reason": r["reason"]
+            }
+            for idx, r in enumerate(qualified_results)
+        ],
+        "final_output": final_output
+    })
+
     # 保存 RunContext 到 log_dir
     os.makedirs(run_context.log_dir, exist_ok=True)
     context_file_path = os.path.join(run_context.log_dir, "run_context.json")
@@ -636,6 +719,78 @@ async def main(input_dir: str, max_levels: int = 4):
         json.dump(run_context.model_dump(), f, ensure_ascii=False, indent=2)
     print(f"\nRunContext saved to: {context_file_path}")
 
+    # 保存步骤化日志(更直观的格式)
+    steps_file_path = os.path.join(run_context.log_dir, "steps.json")
+    with open(steps_file_path, "w", encoding="utf-8") as f:
+        json.dump(run_context.steps, f, ensure_ascii=False, indent=2)
+    print(f"Steps log saved to: {steps_file_path}")
+
+    # 生成步骤化的可读文本日志
+    steps_text_path = os.path.join(run_context.log_dir, "steps.md")
+    with open(steps_text_path, "w", encoding="utf-8") as f:
+        f.write(f"# 执行步骤日志\n\n")
+        f.write(f"**原始问题**: {run_context.q}\n\n")
+        f.write(f"**执行版本**: {run_context.version}\n\n")
+        f.write(f"**总步骤数**: {len(run_context.steps)}\n\n")
+        f.write("---\n\n")
+
+        for step in run_context.steps:
+            f.write(f"## 步骤 {step['step_number']}: {step['step_name']}\n\n")
+            f.write(f"**类型**: `{step['step_type']}`\n\n")
+            f.write(f"**时间**: {step['timestamp']}\n\n")
+
+            # 根据不同类型格式化数据
+            if step['step_type'] == 'keyword_extraction':
+                f.write(f"**提取的关键词**: {', '.join(step['data']['keywords'])}\n\n")
+                f.write(f"**提取理由**: {step['data']['reasoning']}\n\n")
+
+            elif step['step_type'] == 'level_exploration':
+                f.write(f"**探索层级**: Level {step['data']['level']}\n\n")
+                f.write(f"**输入query数量**: {step['data']['query_count']}\n\n")
+                f.write(f"**总推荐词数**: {step['data']['total_suggestions']}\n\n")
+                f.write(f"**探索的query**: {', '.join(step['data']['input_queries'])}\n\n")
+
+            elif step['step_type'] == 'level_analysis':
+                f.write(f"**关键发现**: {step['data']['key_findings']}\n\n")
+                f.write(f"**有价值信号数**: {step['data']['promising_signals_count']}\n\n")
+                f.write(f"**是否评估**: {step['data']['should_evaluate_now']}\n\n")
+                if step['data']['should_evaluate_now']:
+                    f.write(f"**候选query**: {', '.join(step['data']['candidates_to_evaluate'])}\n\n")
+                else:
+                    f.write(f"**下一层探索**: {', '.join(step['data']['next_combinations'])}\n\n")
+
+            elif step['step_type'] == 'candidate_evaluation':
+                f.write(f"**评估候选数**: {step['data']['candidate_count']}\n\n")
+                f.write(f"**候选query**: {', '.join(step['data']['candidates'])}\n\n")
+                f.write(f"**总评估数**: {step['data']['total_evaluations']}\n\n")
+
+            elif step['step_type'] == 'final_result':
+                f.write(f"**执行状态**: {'✅ 成功' if step['data']['success'] else '❌ 失败'}\n\n")
+                f.write(f"**结果消息**: {step['data']['message']}\n\n")
+                f.write(f"**合格query数量**: {step['data']['qualified_query_count']}\n\n")
+
+                # 显示详细的评估结果
+                if step['data'].get('qualified_results_detail'):
+                    f.write(f"### 合格的query详情\n\n")
+                    for result in step['data']['qualified_results_detail']:
+                        f.write(f"#### {result['rank']}. {result['query']}\n\n")
+                        f.write(f"- **来自候选**: {result['from_candidate']}\n")
+                        f.write(f"- **意图匹配**: {'✅ 是' if result['intent_match'] else '❌ 否'}\n")
+                        f.write(f"- **相关性分数**: {result['relevance_score']:.2f}\n")
+                        f.write(f"- **评估理由**: {result['reason']}\n\n")
+                elif step['data']['qualified_queries']:
+                    # 兼容旧格式(如果没有详情)
+                    f.write(f"**合格的query列表**:\n")
+                    for idx, q in enumerate(step['data']['qualified_queries'], 1):
+                        f.write(f"  {idx}. {q}\n")
+                    f.write("\n")
+
+                f.write(f"### 完整输出\n\n```\n{step['data']['final_output']}\n```\n\n")
+
+            f.write("---\n\n")
+
+    print(f"Steps markdown saved to: {steps_text_path}")
+
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="搜索query优化工具 - v6.1 意图匹配+相关性评分版")