#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 统计 how 解构文件中所有原始特征匹配到的分类/标签及其路径 """ import json from collections import defaultdict from typing import Dict, List, Set, Any def build_classification_path(classification_list: List[str]) -> str: """构建分类路径字符串""" if not classification_list: return "" return "/".join(classification_list) def analyze_feature_matches(json_file_path: str) -> Dict[str, Any]: """ 分析文件中所有原始特征的匹配情况 返回结构: { "原始特征1": { "匹配的分类标签": [ { "名称": "...", "类型": "标签/分类", "路径": "...", "层级": "...", "相似度": 0.xxx } ], "统计": { "总匹配数": xxx, "高相似度匹配数(>0.8)": xxx, "中等相似度匹配数(0.5-0.8)": xxx, "低相似度匹配数(<0.5)": xxx } } } """ # 读取JSON文件 with open(json_file_path, 'r', encoding='utf-8') as f: data = json.load(f) # 存储结果 feature_matches = defaultdict(lambda: { "匹配的分类标签": [], "统计": { "高相似度匹配数(>=0.8)": 0 } }) # 遍历 how解构结果 how_result = data.get('how解构结果', {}) # 处理三种列表:灵感点列表、目的点列表、关键点列表 for level_name in ['灵感点列表', '目的点列表', '关键点列表']: level_list = how_result.get(level_name, []) for item in level_list: # 遍历how步骤列表 for step in item.get('how步骤列表', []): # 遍历每个步骤中的特征 for feature in step.get('特征列表', []): feature_name = feature.get('特征名称', '') matches = feature.get('匹配结果', []) if not feature_name: continue # 处理每个匹配结果 for match in matches: persona_feature_name = match.get('人设特征名称', '') feature_type = match.get('特征类型', '') classification_list = match.get('特征分类', []) feature_level = match.get('人设特征层级', '') similarity = match.get('匹配结果', {}).get('相似度', 0) # 只保留相似度>=0.8的匹配 if similarity < 0.8: continue # 构建路径 path = build_classification_path(classification_list) # 添加到结果 match_info = { "名称": persona_feature_name, "类型": feature_type, "路径": path, "层级": feature_level, "相似度": round(similarity, 3) } feature_matches[feature_name]["匹配的分类标签"].append(match_info) # 更新统计 stats = feature_matches[feature_name]["统计"] stats["高相似度匹配数(>=0.8)"] += 1 # 对每个原始特征的匹配结果按相似度降序排序 for feature_name in feature_matches: feature_matches[feature_name]["匹配的分类标签"].sort( key=lambda x: x["相似度"], reverse=True ) return dict(feature_matches) def print_summary(results: Dict[str, Any]): """打印统计摘要""" print("=" * 80) print("原始特征匹配统计摘要(仅相似度>=0.8)") print("=" * 80) total_features = len(results) # 统计有匹配的特征数 features_with_matches = sum(1 for data in results.values() if data["统计"]["高相似度匹配数(>=0.8)"] > 0) print(f"\n总原始特征数: {total_features}") print(f"有高相似度匹配的特征数: {features_with_matches}") print(f"无匹配的特征数: {total_features - features_with_matches}") # 统计总体数据 total_matches = 0 for feature_name, data in results.items(): stats = data["统计"] total_matches += stats["高相似度匹配数(>=0.8)"] print(f"\n总高相似度匹配数(>=0.8): {total_matches}") print("\n" + "=" * 80) print("各原始特征详细匹配情况") print("=" * 80) def print_detailed_results(results: Dict[str, Any], top_n: int = None): """打印详细结果""" for idx, (feature_name, data) in enumerate(results.items(), 1): stats = data["统计"] matches = data["匹配的分类标签"] match_count = stats['高相似度匹配数(>=0.8)'] # 跳过没有匹配的特征 if match_count == 0: continue print(f"\n[{idx}] 原始特征: {feature_name}") print(f" 高相似度匹配数(>=0.8): {match_count}") # 显示所有匹配(如果指定了top_n则只显示前N个) display_matches = matches[:top_n] if top_n else matches print(f" 匹配列表(共{len(display_matches)}个):") for i, match in enumerate(display_matches, 1): print(f" {i}. {match['名称']} ({match['相似度']:.3f})") print(f" 类型: {match['类型']}, 层级: {match['层级']}") if match['路径']: print(f" 路径: {match['路径']}") else: print(f" 路径: (顶级分类)") def save_results(results: Dict[str, Any], output_file: str): """保存结果到JSON文件""" with open(output_file, 'w', encoding='utf-8') as f: json.dump(results, f, ensure_ascii=False, indent=2) print(f"\n详细结果已保存到: {output_file}") def main(): # 输入文件路径 input_file = "/Users/liulidong/project/pattern相关文件/optimization/690d977d0000000007036331_how.json" # 输出文件路径 output_file = "/Users/liulidong/project/pattern相关文件/optimization/feature_matches_analysis.json" print("开始分析特征匹配...") # 分析 results = analyze_feature_matches(input_file) # 打印摘要 print_summary(results) # 打印详细结果(显示所有匹配,不限制数量) print_detailed_results(results, top_n=None) # 保存结果 save_results(results, output_file) print("\n分析完成!") if __name__ == "__main__": main()