""" Step2: 增量词在人设中的匹配分析 基于 Step1 的匹配结果(取 Top1),分析增量词在人设系统中的匹配情况 """ import os import sys import json import asyncio from pathlib import Path from agents import trace from agents.tracing.create import custom_span from lib.my_trace import set_trace_smith as set_trace from lib.match_analyzer import match_single from lib.data_loader import load_persona_data, load_inspiration_list, select_inspiration # 模型配置 MODEL_NAME = "google/gemini-2.5-pro" def format_persona_system(persona_data: dict) -> str: """格式化完整人设系统为参考文本 Args: persona_data: 人设数据 Returns: 格式化的人设系统文本 """ lines = ["# 人设系统"] # 处理三个部分:灵感点列表、目的点、关键点列表 for section_key, section_title in [ ("灵感点列表", "【灵感点】灵感的来源和性质"), ("目的点", "【目的点】创作的目的和价值导向"), ("关键点列表", "【关键点】内容的核心主体和表达方式") ]: section_data = persona_data.get(section_key, []) if not section_data: continue lines.append(f"\n## {section_title}\n") for perspective in section_data: perspective_name = perspective.get("视角名称", "") lines.append(f"\n### 视角:{perspective_name}") for pattern in perspective.get("模式列表", []): pattern_name = pattern.get("分类名称", "") pattern_def = pattern.get("核心定义", "") lines.append(f"\n 【一级】{pattern_name}") if pattern_def: lines.append(f" 定义:{pattern_def}") # 二级细分 for sub in pattern.get("二级细分", []): sub_name = sub.get("分类名称", "") sub_def = sub.get("分类定义", "") lines.append(f" 【二级】{sub_name}:{sub_def}") return "\n".join(lines) def find_step1_file(persona_dir: str, inspiration: str, model_name: str) -> str: """查找 step1 输出文件 Args: persona_dir: 人设目录 inspiration: 灵感点名称 model_name: 模型名称 Returns: step1 文件路径 Raises: SystemExit: 找不到文件时退出 """ step1_dir = os.path.join(persona_dir, "how", "灵感点", inspiration) model_name_short = model_name.replace("google/", "").replace("/", "_") step1_file_pattern = f"*_step1_*_{model_name_short}.json" step1_files = list(Path(step1_dir).glob(step1_file_pattern)) if not step1_files: print(f"❌ 找不到 step1 输出文件") print(f"查找路径: {step1_dir}/{step1_file_pattern}") sys.exit(1) return str(step1_files[0]) async def process_step2_incremental_match( step1_top1: dict, persona_data: dict, inspiration: str, current_time: str = None, log_url: str = None ) -> dict: """执行增量词匹配分析(核心业务逻辑) Args: step1_top1: step1 的 top1 匹配结果 persona_data: 人设数据 inspiration: 灵感名称 current_time: 当前时间戳 log_url: trace URL Returns: 匹配结果字典 """ # 从 step1 结果中提取信息 business_info = step1_top1.get("业务信息", {}) match_result = step1_top1.get("匹配结果", {}) step1_inspiration = business_info.get("灵感", "") matched_element = business_info.get("匹配要素", "") incremental_parts = match_result.get("增量部分", {}) incremental_words = list(incremental_parts.keys()) # 格式化人设系统 persona_system_text = format_persona_system(persona_data) # 构建补充上下文(B_Context - 统一构造一次) b_context = f"""这些增量词来自灵感「{step1_inspiration}」, 在 step1 匹配中,与人设要素「{matched_element}」匹配时产生的增量部分。""" if not incremental_words: print("⚠️ Top1 结果没有增量词,跳过分析") return { "元数据": { "current_time": current_time, "log_url": log_url, "model": MODEL_NAME, "步骤": "Step2: 增量词在人设中的匹配" }, "灵感": step1_inspiration, "输入信息": { "B": "", # 空字符串 "A": persona_system_text, "B_Context": b_context, # 使用统一构造的 context "A_Context": "" }, "step1_结果": step1_top1, "匹配结果": { "score": 0.0, "score说明": "无增量词", "相同部分": {}, "增量部分": {} } } print(f"\n开始增量词匹配分析: {step1_inspiration}") print(f"匹配要素: {matched_element}") print(f"增量词数量: {len(incremental_words)}, 模型: {MODEL_NAME}\n") # 将增量词列表拼接成一个字符串(用换行符分隔) b_content = "\n".join(incremental_words) # 使用 custom_span 标识整个流程 with custom_span( name=f"Step2: 增量词匹配 - {step1_inspiration}", data={ "灵感": step1_inspiration, "匹配要素": matched_element, "增量词数量": len(incremental_words), "模型": MODEL_NAME, "步骤": "增量词在人设中的匹配分析" } ): # 调用通用匹配模块(单次调用) match_result = await match_single( b_content=b_content, a_content=persona_system_text, model_name=MODEL_NAME, b_context=b_context ) # 构建输出(使用统一构造的变量) return { "元数据": { "current_time": current_time, "log_url": log_url, "model": MODEL_NAME, "步骤": "Step2: 增量词在人设中的匹配" }, "灵感": step1_inspiration, "输入信息": { "B": b_content, # 拼接后的字符串 "A": persona_system_text, "B_Context": b_context, # 使用统一构造的 context "A_Context": "" }, "匹配结果": match_result, # 单个匹配结果对象 "step1_结果": step1_top1, } async def main(current_time: str, log_url: str, force: bool = False): """主函数 Args: current_time: 当前时间戳 log_url: 日志链接 force: 是否强制重新执行(跳过已存在文件检查) """ # 解析命令行参数 persona_dir = sys.argv[1] if len(sys.argv) > 1 else "data/阿里多多酱/out/人设_1110" inspiration_arg = sys.argv[2] if len(sys.argv) > 2 else "0" # 第三个参数:force(如果从命令行调用且有该参数,则覆盖函数参数) if len(sys.argv) > 3 and sys.argv[3] == "force": force = True print(f"{'=' * 80}") print(f"Step2: 增量词在人设中的匹配分析(Top1)") print(f"{'=' * 80}") print(f"人设目录: {persona_dir}") print(f"灵感参数: {inspiration_arg}") # 加载数据 persona_data = load_persona_data(persona_dir) inspiration_list = load_inspiration_list(persona_dir) test_inspiration = select_inspiration(inspiration_arg, inspiration_list) # 查找并加载 step1 结果 step1_file = find_step1_file(persona_dir, test_inspiration, MODEL_NAME) step1_filename = os.path.basename(step1_file) step1_basename = os.path.splitext(step1_filename)[0] print(f"Step1 输入文件: {step1_file}") # 构建输出文件路径 output_dir = os.path.join(persona_dir, "how", "灵感点", test_inspiration) model_name_short = MODEL_NAME.replace("google/", "").replace("/", "_") scope_prefix = step1_basename.split("_")[0] # 提取 "all" 或 "top10" 等 result_index = 0 # 使用第 1 个匹配结果(top1) output_filename = f"{scope_prefix}_step2_top{result_index + 1}_增量词匹配_{model_name_short}.json" output_file = os.path.join(output_dir, output_filename) # 检查文件是否已存在 if not force and os.path.exists(output_file): print(f"\n✓ 输出文件已存在,跳过执行: {output_file}") print(f"提示: 如需重新执行,请添加 'force' 参数\n") return with open(step1_file, 'r', encoding='utf-8') as f: step1_data = json.load(f) actual_inspiration = step1_data.get("灵感", "") step1_results = step1_data.get("匹配结果列表", []) if not step1_results: print("❌ step1 结果为空") sys.exit(1) print(f"灵感: {actual_inspiration}") # 默认处理 top1 selected_result = step1_results[result_index] print(f"处理第 {result_index + 1} 个匹配结果(Top{result_index + 1})\n") # 执行核心业务逻辑 output = await process_step2_incremental_match( step1_top1=selected_result, persona_data=persona_data, inspiration=actual_inspiration, current_time=current_time, log_url=log_url ) # 在元数据中添加 step1 匹配索引 output["元数据"]["step1_匹配索引"] = result_index + 1 # 保存结果 os.makedirs(output_dir, exist_ok=True) with open(output_file, 'w', encoding='utf-8') as f: json.dump(output, f, ensure_ascii=False, indent=2) print(f"\n完成!结果已保存到: {output_file}") if log_url: print(f"Trace: {log_url}\n") if __name__ == "__main__": # 设置 trace current_time, log_url = set_trace() # 使用 trace 上下文包裹整个执行流程 with trace("Step2: 增量词匹配分析"): asyncio.run(main(current_time, log_url))