yangxiaohui 2 weken geleden
bovenliggende
commit
18d8e9e0c0
2 gewijzigde bestanden met toevoegingen van 670 en 0 verwijderingen
  1. 295 0
      run_inspiration_analysis.py
  2. 375 0
      step3_generate_inspirations.py

+ 295 - 0
run_inspiration_analysis.py

@@ -0,0 +1,295 @@
+"""
+主流程脚本:串联 Step1 和 Step2
+
+执行完整的灵感分析流程:
+1. Step1: 灵感与人设匹配(调用 step1 main,自动保存结果)
+2. Step2: 增量词在人设中的匹配(调用 step2 main,自动保存结果)
+3. 生成流程汇总文件
+"""
+import os
+import sys
+import json
+import asyncio
+
+from agents import trace
+from lib.my_trace import set_trace_smith as set_trace
+from lib.data_loader import load_inspiration_list, select_inspiration
+from lib.utils import read_json
+
+# 导入 step1 和 step2 的 main 函数
+import step1_inspiration_match
+import step2_incremental_match
+
+
+def find_step1_output(persona_dir: str, inspiration: str, max_tasks: int = None) -> str:
+    """查找 step1 输出文件
+
+    Args:
+        persona_dir: 人设目录
+        inspiration: 灵感点名称
+        max_tasks: 任务数限制
+
+    Returns:
+        step1 文件路径
+    """
+    from pathlib import Path
+
+    step1_dir = os.path.join(persona_dir, "how", "灵感点", inspiration)
+    scope_prefix = f"top{max_tasks}" if max_tasks is not None else "all"
+    step1_pattern = f"{scope_prefix}_step1_*.json"
+
+    step1_files = list(Path(step1_dir).glob(step1_pattern))
+    if not step1_files:
+        raise FileNotFoundError(f"找不到 step1 输出文件: {step1_dir}/{step1_pattern}")
+
+    return str(step1_files[0])
+
+
+def find_step2_output(persona_dir: str, inspiration: str, max_tasks: int = None) -> str:
+    """查找 step2 输出文件
+
+    Args:
+        persona_dir: 人设目录
+        inspiration: 灵感点名称
+        max_tasks: 任务数限制
+
+    Returns:
+        step2 文件路径
+    """
+    from pathlib import Path
+
+    step2_dir = os.path.join(persona_dir, "how", "灵感点", inspiration)
+    scope_prefix = f"top{max_tasks}" if max_tasks is not None else "all"
+    step2_pattern = f"{scope_prefix}_step2_*.json"
+
+    step2_files = list(Path(step2_dir).glob(step2_pattern))
+    if not step2_files:
+        raise FileNotFoundError(f"找不到 step2 输出文件: {step2_dir}/{step2_pattern}")
+
+    return str(step2_files[0])
+
+
+async def run_full_analysis(
+    persona_dir: str,
+    inspiration: str,
+    max_tasks: int = None,
+    current_time: str = None,
+    log_url: str = None
+) -> dict:
+    """执行完整的灵感分析流程(Step1 + Step2)
+
+    Args:
+        persona_dir: 人设目录路径
+        inspiration: 灵感点文本
+        max_tasks: step1 最大任务数(None 表示不限制)
+        current_time: 当前时间戳
+        log_url: 日志链接
+
+    Returns:
+        包含文件路径和状态的字典
+    """
+    print(f"\n{'=' * 80}")
+    print(f"开始完整分析流程: {inspiration}")
+    print(f"{'=' * 80}\n")
+
+    # ========== Step1: 灵感与人设匹配 ==========
+    print(f"{'─' * 80}")
+    print(f"Step1: 灵感与人设匹配")
+    print(f"{'─' * 80}\n")
+
+    # 临时修改 sys.argv 来传递参数给 step1
+    original_argv = sys.argv.copy()
+    sys.argv = [
+        "step1_inspiration_match.py",
+        persona_dir,
+        inspiration,
+        str(max_tasks) if max_tasks is not None else "all"
+    ]
+
+    try:
+        # 调用 step1 的 main 函数
+        await step1_inspiration_match.main(current_time, log_url)
+    finally:
+        # 恢复原始参数
+        sys.argv = original_argv
+
+    # 查找 step1 输出文件
+    step1_file = find_step1_output(persona_dir, inspiration, max_tasks)
+    print(f"✓ Step1 完成,结果文件: {step1_file}\n")
+
+    # 读取 step1 结果
+    step1_data = read_json(step1_file)
+    step1_results = step1_data.get("匹配结果列表", [])
+    if not step1_results:
+        print("⚠️  Step1 结果为空,终止流程")
+        return {
+            "step1_file": step1_file,
+            "step2_file": None,
+            "summary_file": None,
+            "status": "step1_empty"
+        }
+
+    step1_top1 = step1_results[0]
+    step1_score = step1_top1.get('匹配结果', {}).get('score', 0)
+    step1_element = step1_top1.get("业务信息", {}).get("匹配要素", "")
+    print(f"Top1 匹配要素: {step1_element}, score: {step1_score:.2f}")
+
+    # ========== Step2: 增量词匹配 ==========
+    print(f"\n{'─' * 80}")
+    print(f"Step2: 增量词在人设中的匹配")
+    print(f"{'─' * 80}\n")
+
+    # 临时修改 sys.argv 来传递参数给 step2
+    sys.argv = [
+        "step2_incremental_match.py",
+        persona_dir,
+        inspiration
+    ]
+
+    try:
+        # 调用 step2 的 main 函数
+        await step2_incremental_match.main(current_time, log_url)
+    finally:
+        # 恢复原始参数
+        sys.argv = original_argv
+
+    # 查找 step2 输出文件
+    step2_file = find_step2_output(persona_dir, inspiration, max_tasks)
+    print(f"✓ Step2 完成,结果文件: {step2_file}\n")
+
+    # 读取 step2 结果
+    step2_data = read_json(step2_file)
+    step2_score = step2_data.get("匹配结果", {}).get("score", 0)
+    step2_b_content = step2_data.get("输入信息", {}).get("B", "")
+    step2_word_count = len(step2_b_content.split("\n")) if step2_b_content else 0
+
+    # ========== 保存流程汇总 ==========
+    output_dir = os.path.join(persona_dir, "how", "灵感点", inspiration)
+    scope_prefix = f"top{max_tasks}" if max_tasks is not None else "all"
+
+    # 从 step1 文件名提取模型名称
+    step1_filename = os.path.basename(step1_file)
+    model_short = step1_filename.split("_")[-1].replace(".json", "")
+
+    summary_filename = f"{scope_prefix}_summary_完整流程_{model_short}.json"
+    summary_file = os.path.join(output_dir, summary_filename)
+
+    summary = {
+        "元数据": {
+            "current_time": current_time,
+            "log_url": log_url,
+            "流程": "Step1 + Step2 完整分析",
+            "step1_model": step1_data.get("元数据", {}).get("model", ""),
+            "step2_model": step2_data.get("元数据", {}).get("model", "")
+        },
+        "灵感": inspiration,
+        "文件路径": {
+            "step1": step1_file,
+            "step2": step2_file
+        },
+        "关键指标": {
+            "step1_top1_score": step1_score,
+            "step1_top1_匹配要素": step1_element,
+            "step2_增量词数量": step2_word_count,
+            "step2_score": step2_score
+        }
+    }
+
+    with open(summary_file, 'w', encoding='utf-8') as f:
+        json.dump(summary, f, ensure_ascii=False, indent=2)
+
+    print(f"{'=' * 80}")
+    print(f"完整流程执行完成")
+    print(f"{'=' * 80}")
+    print(f"\n结果文件:")
+    print(f"  Step1: {step1_file}")
+    print(f"  Step2: {step2_file}")
+    print(f"  汇总:  {summary_file}\n")
+
+    return {
+        "step1_file": step1_file,
+        "step2_file": step2_file,
+        "summary_file": summary_file,
+        "status": "success"
+    }
+
+
+async def main(current_time: str, log_url: str):
+    """主函数"""
+    # 解析命令行参数
+    persona_dir = sys.argv[1] if len(sys.argv) > 1 else "data/阿里多多酱/out/人设_1110"
+
+    # 第二个参数:灵感数量限制,默认为 1(处理第一个灵感)
+    # 可以是数字(如 1, 5, 10)或 "all"(所有灵感)
+    inspiration_count_arg = sys.argv[2] if len(sys.argv) > 2 else "1"
+
+    # 第三个参数:step1 任务数限制,默认为 None(所有任务)
+    max_tasks = None if len(sys.argv) > 3 and sys.argv[3] == "all" else (
+        int(sys.argv[3]) if len(sys.argv) > 3 else None
+    )
+
+    print(f"{'=' * 80}")
+    print(f"灵感分析主流程 (Step1 + Step2)")
+    print(f"{'=' * 80}")
+    print(f"人设目录: {persona_dir}")
+
+    # 加载灵感列表
+    inspiration_list = load_inspiration_list(persona_dir)
+
+    # 确定要处理的灵感数量
+    if inspiration_count_arg == "all":
+        inspiration_count = len(inspiration_list)
+        print(f"处理灵感: 全部 ({inspiration_count} 个)")
+    else:
+        inspiration_count = int(inspiration_count_arg)
+        print(f"处理灵感: 前 {inspiration_count} 个")
+
+    if max_tasks:
+        print(f"Step1 任务数限制: {max_tasks}")
+
+    # 选择要处理的灵感列表
+    inspirations_to_process = inspiration_list[:inspiration_count]
+
+    print(f"\n将处理以下灵感:")
+    for i, insp in enumerate(inspirations_to_process, 1):
+        print(f"  {i}. {insp}")
+
+    # 批量执行流程
+    results = []
+    for i, inspiration in enumerate(inspirations_to_process, 1):
+        print(f"\n{'#' * 80}")
+        print(f"处理第 {i}/{len(inspirations_to_process)} 个灵感")
+        print(f"{'#' * 80}")
+
+        result = await run_full_analysis(
+            persona_dir=persona_dir,
+            inspiration=inspiration,
+            max_tasks=max_tasks,
+            current_time=current_time,
+            log_url=log_url
+        )
+        results.append(result)
+
+    # 输出最终汇总
+    print(f"\n{'=' * 80}")
+    print(f"批量处理完成")
+    print(f"{'=' * 80}")
+
+    success_count = sum(1 for r in results if r["status"] == "success")
+    print(f"\n成功: {success_count}/{len(results)}")
+
+    for i, (insp, result) in enumerate(zip(inspirations_to_process, results), 1):
+        status_icon = "✓" if result["status"] == "success" else "✗"
+        print(f"  {status_icon} [{i}] {insp}")
+
+    if log_url:
+        print(f"\nTrace: {log_url}")
+
+
+if __name__ == "__main__":
+    # 设置 trace
+    current_time, log_url = set_trace()
+
+    # 使用 trace 上下文包裹整个执行流程
+    with trace("灵感分析完整流程 (Step1+Step2)"):
+        asyncio.run(main(current_time, log_url))

+ 375 - 0
step3_generate_inspirations.py

@@ -0,0 +1,375 @@
+"""
+Step3: 基于匹配节点生成灵感点
+
+基于 Step1 的 Top1 匹配结果,以匹配到的人设要素作为锚点,
+让 Agent 分析可以产生哪些灵感点
+"""
+import os
+import sys
+import json
+import asyncio
+from pathlib import Path
+
+from agents import Agent, Runner, trace
+from agents.tracing.create import custom_span
+from lib.my_trace import set_trace_smith as set_trace
+from lib.client import get_model
+from lib.data_loader import load_persona_data, load_inspiration_list, select_inspiration
+
+# 模型配置
+MODEL_NAME = "google/gemini-2.5-pro"
+
+
+# ========== System Prompt ==========
+GENERATE_INSPIRATIONS_PROMPT = """
+# 任务
+基于给定的人设要素(锚点),分析和生成可能的灵感点。
+
+## 输入说明
+
+- **<人设要素></人设要素>**: 作为锚点的人设要素(一级或二级分类)
+- **<要素上下文></要素上下文>**: 该要素的上下文信息(所属视角、一级分类等)
+- **<参考灵感></参考灵感>**: 一个已匹配到该要素的灵感点示例(可选)
+
+## 分析方法
+
+### 核心原则:基于要素特征发散灵感
+
+从人设要素的核心特征出发,思考可能触发该要素的各种灵感来源。
+
+### 分析步骤
+
+1. **理解要素核心**
+   - 分析人设要素的核心特征
+   - 理解该要素代表的内容类型或表达方式
+   - 结合上下文理解要素的定位
+
+2. **参考已有灵感**(如果提供)
+   - 分析参考灵感如何触发该要素
+   - 识别灵感的关键特征
+
+3. **发散思考**
+   - 从不同角度思考可能的灵感来源
+   - 考虑不同的场景、话题、情感、事件等
+   - 保持与要素核心特征的相关性
+
+4. **生成灵感点列表**
+   - 每个灵感点应该简洁明确
+   - 灵感点之间应有一定的多样性
+   - 灵感点应该能够触发该人设要素
+
+---
+
+## 输出格式(严格JSON)
+
+```json
+{
+  "要素分析": {
+    "核心特征": "简要描述该要素的核心特征(1-2句话)",
+    "适用场景": "该要素适用的内容场景或表达方式"
+  },
+  "灵感点列表": [
+    {
+      "灵感点": "具体的灵感点描述",
+      "说明": "为什么这个灵感可能触发该要素"
+    },
+    {
+      "灵感点": "具体的灵感点描述",
+      "说明": "为什么这个灵感可能触发该要素"
+    }
+  ]
+}
+```
+
+**输出要求**:
+1. 必须严格按照上述JSON格式输出
+2. 所有字段都必须填写
+3. **要素分析**:包含核心特征和适用场景
+4. **灵感点列表**:生成 5-10 个灵感点
+5. 每个灵感点包含:
+   - **灵感点**:简洁的灵感描述(一句话)
+   - **说明**:解释为什么这个灵感可能触发该要素(1-2句话)
+6. 灵感点应该多样化,覆盖不同角度和场景
+""".strip()
+
+
+def create_generate_agent(model_name: str) -> Agent:
+    """创建灵感生成的 Agent
+
+    Args:
+        model_name: 模型名称
+
+    Returns:
+        Agent 实例
+    """
+    agent = Agent(
+        name="Inspiration Generator Expert",
+        instructions=GENERATE_INSPIRATIONS_PROMPT,
+        model=get_model(model_name),
+        tools=[],
+    )
+
+    return agent
+
+
+def parse_generate_response(response_content: str) -> dict:
+    """解析生成响应
+
+    Args:
+        response_content: Agent 返回的响应内容
+
+    Returns:
+        解析后的字典
+    """
+    try:
+        # 如果响应包含在 markdown 代码块中,提取 JSON 部分
+        if "```json" in response_content:
+            json_start = response_content.index("```json") + 7
+            json_end = response_content.index("```", json_start)
+            json_text = response_content[json_start:json_end].strip()
+        elif "```" in response_content:
+            json_start = response_content.index("```") + 3
+            json_end = response_content.index("```", json_start)
+            json_text = response_content[json_start:json_end].strip()
+        else:
+            json_text = response_content.strip()
+
+        return json.loads(json_text)
+    except Exception as e:
+        print(f"解析响应失败: {e}")
+        return {
+            "要素分析": {
+                "核心特征": "解析失败",
+                "适用场景": "解析失败"
+            },
+            "灵感点列表": []
+        }
+
+
+def find_step1_file(persona_dir: str, inspiration: str, model_name: str) -> str:
+    """查找 step1 输出文件
+
+    Args:
+        persona_dir: 人设目录
+        inspiration: 灵感点名称
+        model_name: 模型名称
+
+    Returns:
+        step1 文件路径
+
+    Raises:
+        SystemExit: 找不到文件时退出
+    """
+    step1_dir = os.path.join(persona_dir, "how", "灵感点", inspiration)
+    model_name_short = model_name.replace("google/", "").replace("/", "_")
+    step1_file_pattern = f"*_step1_*_{model_name_short}.json"
+
+    step1_files = list(Path(step1_dir).glob(step1_file_pattern))
+    if not step1_files:
+        print(f"❌ 找不到 step1 输出文件")
+        print(f"查找路径: {step1_dir}/{step1_file_pattern}")
+        sys.exit(1)
+
+    return str(step1_files[0])
+
+
+async def process_step3_generate_inspirations(
+    step1_top1: dict,
+    reference_inspiration: str,
+    current_time: str = None,
+    log_url: str = None
+) -> dict:
+    """执行灵感生成分析(核心业务逻辑)
+
+    Args:
+        step1_top1: step1 的 top1 匹配结果
+        reference_inspiration: 参考灵感(step1 输入的灵感)
+        current_time: 当前时间戳
+        log_url: trace URL
+
+    Returns:
+        生成结果字典
+    """
+    # 从 step1 结果中提取信息
+    business_info = step1_top1.get("业务信息", {})
+    input_info = step1_top1.get("输入信息", {})
+
+    matched_element = business_info.get("匹配要素", "")
+    element_context = input_info.get("A_Context", "")
+
+    print(f"\n开始灵感生成分析")
+    print(f"锚点要素: {matched_element}")
+    print(f"参考灵感: {reference_inspiration}")
+    print(f"模型: {MODEL_NAME}\n")
+
+    # 构建任务描述
+    task_description = f"""## 本次分析任务
+
+<人设要素>
+{matched_element}
+</人设要素>
+
+<要素上下文>
+{element_context}
+</要素上下文>
+
+<参考灵感>
+{reference_inspiration}
+</参考灵感>
+
+请基于上述人设要素作为锚点,分析并生成可能的灵感点列表,严格按照系统提示中的 JSON 格式输出结果。"""
+
+    # 构造消息
+    messages = [{
+        "role": "user",
+        "content": [
+            {
+                "type": "input_text",
+                "text": task_description
+            }
+        ]
+    }]
+
+    # 使用 custom_span 追踪生成过程
+    with custom_span(
+        name=f"Step3: 灵感生成 - {matched_element}",
+        data={
+            "锚点要素": matched_element,
+            "参考灵感": reference_inspiration,
+            "模型": MODEL_NAME,
+            "步骤": "基于要素生成灵感点"
+        }
+    ):
+        # 创建 Agent
+        agent = create_generate_agent(MODEL_NAME)
+
+        # 运行 Agent
+        result = await Runner.run(agent, input=messages)
+
+    # 解析响应
+    parsed_result = parse_generate_response(result.final_output)
+
+    # 构建输出
+    return {
+        "元数据": {
+            "current_time": current_time,
+            "log_url": log_url,
+            "model": MODEL_NAME,
+            "步骤": "Step3: 基于匹配节点生成灵感点"
+        },
+        "锚点信息": {
+            "人设要素": matched_element,
+            "要素上下文": element_context,
+            "参考灵感": reference_inspiration
+        },
+        "step1_结果": step1_top1,
+        "生成结果": parsed_result
+    }
+
+
+async def main(current_time: str, log_url: str):
+    """主函数"""
+    # 解析命令行参数
+    persona_dir = sys.argv[1] if len(sys.argv) > 1 else "data/阿里多多酱/out/人设_1110"
+    inspiration_arg = sys.argv[2] if len(sys.argv) > 2 else "0"
+
+    print(f"{'=' * 80}")
+    print(f"Step3: 基于匹配节点生成灵感点")
+    print(f"{'=' * 80}")
+    print(f"人设目录: {persona_dir}")
+    print(f"灵感参数: {inspiration_arg}")
+
+    # 加载数据
+    persona_data = load_persona_data(persona_dir)
+    inspiration_list = load_inspiration_list(persona_dir)
+
+    # 选择灵感
+    try:
+        inspiration_index = int(inspiration_arg)
+        if 0 <= inspiration_index < len(inspiration_list):
+            test_inspiration = inspiration_list[inspiration_index]
+            print(f"使用灵感[{inspiration_index}]: {test_inspiration}")
+        else:
+            print(f"❌ 灵感索引超出范围: {inspiration_index}")
+            sys.exit(1)
+    except ValueError:
+        if inspiration_arg in inspiration_list:
+            test_inspiration = inspiration_arg
+            print(f"使用灵感: {test_inspiration}")
+        else:
+            print(f"❌ 找不到灵感: {inspiration_arg}")
+            sys.exit(1)
+
+    # 查找并加载 step1 结果
+    step1_file = find_step1_file(persona_dir, test_inspiration, MODEL_NAME)
+    step1_filename = os.path.basename(step1_file)
+    step1_basename = os.path.splitext(step1_filename)[0]
+
+    print(f"Step1 输入文件: {step1_file}")
+
+    with open(step1_file, 'r', encoding='utf-8') as f:
+        step1_data = json.load(f)
+
+    actual_inspiration = step1_data.get("灵感", "")
+    step1_results = step1_data.get("匹配结果列表", [])
+
+    if not step1_results:
+        print("❌ step1 结果为空")
+        sys.exit(1)
+
+    print(f"灵感: {actual_inspiration}")
+
+    # 默认处理 top1
+    result_index = 0
+    selected_result = step1_results[result_index]
+    print(f"处理第 {result_index + 1} 个匹配结果(Top{result_index + 1})\n")
+
+    # 执行核心业务逻辑
+    output = await process_step3_generate_inspirations(
+        step1_top1=selected_result,
+        reference_inspiration=actual_inspiration,
+        current_time=current_time,
+        log_url=log_url
+    )
+
+    # 在元数据中添加 step1 匹配索引
+    output["元数据"]["step1_匹配索引"] = result_index + 1
+
+    # 保存结果
+    output_dir = os.path.join(persona_dir, "how", "灵感点", test_inspiration)
+    model_name_short = MODEL_NAME.replace("google/", "").replace("/", "_")
+
+    # 提取 step1 的范围标识(all 或 top10 等)
+    scope_prefix = step1_basename.split("_")[0]
+    output_filename = f"{scope_prefix}_step3_top{result_index + 1}_生成灵感_{model_name_short}.json"
+
+    os.makedirs(output_dir, exist_ok=True)
+    output_file = os.path.join(output_dir, output_filename)
+
+    with open(output_file, 'w', encoding='utf-8') as f:
+        json.dump(output, f, ensure_ascii=False, indent=2)
+
+    # 输出生成的灵感点预览
+    generated = output.get("生成结果", {})
+    inspirations = generated.get("灵感点列表", [])
+
+    print(f"\n{'=' * 80}")
+    print(f"生成了 {len(inspirations)} 个灵感点:")
+    print(f"{'=' * 80}")
+    for i, item in enumerate(inspirations[:5], 1):
+        print(f"{i}. {item.get('灵感点', '')}")
+    if len(inspirations) > 5:
+        print(f"... 还有 {len(inspirations) - 5} 个")
+
+    print(f"\n完成!结果已保存到: {output_file}")
+    if log_url:
+        print(f"Trace: {log_url}\n")
+
+
+if __name__ == "__main__":
+    # 设置 trace
+    current_time, log_url = set_trace()
+
+    # 使用 trace 上下文包裹整个执行流程
+    with trace("Step3: 生成灵感点"):
+        asyncio.run(main(current_time, log_url))