yangxiaohui
/
how


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
							"""
分析灵感点匹配结果

从 how/灵感点 目录读取所有灵感的 step1 和 step2 结果，
按 step1 score 和 step2 score 排序，输出汇总报告
"""
import os
import json
import argparse
from pathlib import Path
from typing import List, Dict


def collect_inspiration_results(persona_dir: str) -> List[Dict]:
    """收集所有灵感的匹配结果

    Args:
        persona_dir: 人设目录路径

    Returns:
        结果列表，每项包含灵感名称、step1 结果、step2 结果
    """
    inspiration_base_dir = os.path.join(persona_dir, "how", "灵感点")

    if not os.path.exists(inspiration_base_dir):
        print(f"❌ 目录不存在: {inspiration_base_dir}")
        return []

    results = []

    # 遍历所有灵感目录
    for inspiration_name in os.listdir(inspiration_base_dir):
        inspiration_dir = os.path.join(inspiration_base_dir, inspiration_name)

        # 跳过非目录
        if not os.path.isdir(inspiration_dir):
            continue

        # 查找 step1 文件
        step1_files = list(Path(inspiration_dir).glob("*_step1_*.json"))
        if not step1_files:
            continue

        step1_file = str(step1_files[0])

        # 查找 step2 文件
        step2_files = list(Path(inspiration_dir).glob("*_step2_*.json"))
        step2_file = str(step2_files[0]) if step2_files else None

        # 读取 step1 结果
        try:
            with open(step1_file, 'r', encoding='utf-8') as f:
                step1_data = json.load(f)
        except Exception as e:
            print(f"⚠️  读取 step1 失败: {inspiration_name}, {e}")
            continue

        # 提取 step1 top1 信息（输入信息 + 完整匹配结果）
        step1_results = step1_data.get("匹配结果列表", [])
        if not step1_results:
            continue

        step1_top1 = step1_results[0]
        step1_input_info = step1_top1.get("输入信息", {})
        step1_match_result = step1_top1.get("匹配结果", {})
        step1_score = step1_match_result.get("score", 0)
        step1_element = step1_top1.get("业务信息", {}).get("匹配要素", "")

        # 读取 step2 结果（如果存在，包含输入信息 + 完整匹配结果）
        step2_input_info = None
        step2_match_result = None
        step2_score = None
        step2_word_count = 0
        if step2_file:
            try:
                with open(step2_file, 'r', encoding='utf-8') as f:
                    step2_data = json.load(f)
                    step2_input_info = step2_data.get("输入信息", {})
                    step2_match_result = step2_data.get("匹配结果", {})
                    step2_score = step2_match_result.get("score", 0)
                    step2_b_content = step2_input_info.get("B", "")
                    step2_word_count = len(step2_b_content.split("\n")) if step2_b_content else 0
            except Exception as e:
                print(f"⚠️  读取 step2 失败: {inspiration_name}, {e}")

        # 构建结果项（包含输入信息和完整匹配结果）
        result_item = {
            "灵感": inspiration_name,
            "step1": {
                "输入信息": step1_input_info,
                "匹配结果": step1_match_result,
                "匹配要素": step1_element
            },
            "step2": {
                "输入信息": step2_input_info,
                "匹配结果": step2_match_result,
                "增量词数量": step2_word_count
            } if step2_file else None,
            "文件信息": {
                "step1": os.path.basename(step1_file),
                "step2": os.path.basename(step2_file) if step2_file else None
            }
        }

        results.append(result_item)

    return results


def main():
    """主函数"""
    parser = argparse.ArgumentParser(
        description="分析灵感点匹配结果",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
使用示例:
  # 分析默认目录
  python analyze_inspiration_results.py

  # 指定人设目录
  python analyze_inspiration_results.py --dir data/阿里多多酱/out/人设_1110

  # 指定输出文件
  python analyze_inspiration_results.py --output analysis_report.json
        """
    )

    parser.add_argument(
        "--dir",
        default="data/阿里多多酱/out/人设_1110",
        help="人设目录路径 (默认: data/阿里多多酱/out/人设_1110)"
    )

    parser.add_argument(
        "--output",
        default=None,
        help="输出文件路径 (默认: 在人设目录下的 how/灵感匹配分析.json)"
    )

    args = parser.parse_args()
    persona_dir = args.dir

    print(f"{'=' * 80}")
    print(f"灵感点匹配结果分析")
    print(f"{'=' * 80}")
    print(f"人设目录: {persona_dir}\n")

    # 收集结果
    results = collect_inspiration_results(persona_dir)

    if not results:
        print("❌ 未找到任何灵感结果")
        return

    print(f"找到 {len(results)} 个灵感的匹配结果\n")

    # 排序：先按 step1 score 降序，再按 step2 score 降序
    # step2 score 为 None 的排在最后
    def sort_key(item):
        step1_score = item["step1"]["匹配结果"].get("score", 0)
        step2_score = item["step2"]["匹配结果"].get("score", 0) if item["step2"] else -1
        return (-step1_score, -step2_score)

    results.sort(key=sort_key)

    # 构建输出
    output_data = {
        "元数据": {
            "人设目录": persona_dir,
            "灵感总数": len(results),
            "排序规则": "先按 step1 score 降序，再按 step2 score 降序"
        },
        "排序结果": results
    }

    # 输出统计信息
    has_step2 = sum(1 for r in results if r["step2"] is not None)
    print(f"统计信息:")
    print(f"  总灵感数: {len(results)}")
    print(f"  完成 step2: {has_step2}")
    print(f"  仅 step1: {len(results) - has_step2}")

    # Top 5 预览
    print(f"\nTop 5 灵感 (按排序规则):")
    for i, item in enumerate(results[:5], 1):
        step1_score = item["step1"]["匹配结果"].get("score", 0)
        step2_score = item["step2"]["匹配结果"].get("score", 0) if item["step2"] else None
        step2_info = f", step2: {step2_score:.2f}" if step2_score is not None else ""
        print(f"  {i}. {item['灵感']}")
        print(f"     step1: {step1_score:.2f} → {item['step1']['匹配要素']}{step2_info}")

    # 保存结果
    if args.output:
        output_file = args.output
    else:
        output_file = os.path.join(persona_dir, "how", "灵感匹配分析.json")

    os.makedirs(os.path.dirname(output_file), exist_ok=True)

    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(output_data, f, ensure_ascii=False, indent=2)

    print(f"\n完成！分析结果已保存到: {output_file}\n")


if __name__ == "__main__":
    main()