| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- 统计 how 解构文件中所有原始特征匹配到的分类/标签及其路径
- """
- import json
- from collections import defaultdict
- from typing import Dict, List, Set, Any
- def build_classification_path(classification_list: List[str]) -> str:
- """构建分类路径字符串"""
- if not classification_list:
- return ""
- return "/".join(classification_list)
- def analyze_feature_matches(json_file_path: str) -> Dict[str, Any]:
- """
- 分析文件中所有原始特征的匹配情况
- 返回结构:
- {
- "原始特征1": {
- "匹配的分类标签": [
- {
- "名称": "...",
- "类型": "标签/分类",
- "路径": "...",
- "层级": "...",
- "相似度": 0.xxx
- }
- ],
- "统计": {
- "总匹配数": xxx,
- "高相似度匹配数(>0.8)": xxx,
- "中等相似度匹配数(0.5-0.8)": xxx,
- "低相似度匹配数(<0.5)": xxx
- }
- }
- }
- """
- # 读取JSON文件
- with open(json_file_path, 'r', encoding='utf-8') as f:
- data = json.load(f)
- # 存储结果
- feature_matches = defaultdict(lambda: {
- "匹配的分类标签": [],
- "统计": {
- "高相似度匹配数(>=0.8)": 0
- }
- })
- # 遍历 how解构结果
- how_result = data.get('how解构结果', {})
- # 处理三种列表:灵感点列表、目的点列表、关键点列表
- for level_name in ['灵感点列表', '目的点列表', '关键点列表']:
- level_list = how_result.get(level_name, [])
- for item in level_list:
- # 遍历how步骤列表
- for step in item.get('how步骤列表', []):
- # 遍历每个步骤中的特征
- for feature in step.get('特征列表', []):
- feature_name = feature.get('特征名称', '')
- matches = feature.get('匹配结果', [])
- if not feature_name:
- continue
- # 处理每个匹配结果
- for match in matches:
- persona_feature_name = match.get('人设特征名称', '')
- feature_type = match.get('特征类型', '')
- classification_list = match.get('特征分类', [])
- feature_level = match.get('人设特征层级', '')
- similarity = match.get('匹配结果', {}).get('相似度', 0)
- # 只保留相似度>=0.8的匹配
- if similarity < 0.8:
- continue
- # 构建路径
- path = build_classification_path(classification_list)
- # 添加到结果
- match_info = {
- "名称": persona_feature_name,
- "类型": feature_type,
- "路径": path,
- "层级": feature_level,
- "相似度": round(similarity, 3)
- }
- feature_matches[feature_name]["匹配的分类标签"].append(match_info)
- # 更新统计
- stats = feature_matches[feature_name]["统计"]
- stats["高相似度匹配数(>=0.8)"] += 1
- # 对每个原始特征的匹配结果按相似度降序排序
- for feature_name in feature_matches:
- feature_matches[feature_name]["匹配的分类标签"].sort(
- key=lambda x: x["相似度"],
- reverse=True
- )
- return dict(feature_matches)
- def print_summary(results: Dict[str, Any]):
- """打印统计摘要"""
- print("=" * 80)
- print("原始特征匹配统计摘要(仅相似度>=0.8)")
- print("=" * 80)
- total_features = len(results)
- # 统计有匹配的特征数
- features_with_matches = sum(1 for data in results.values() if data["统计"]["高相似度匹配数(>=0.8)"] > 0)
- print(f"\n总原始特征数: {total_features}")
- print(f"有高相似度匹配的特征数: {features_with_matches}")
- print(f"无匹配的特征数: {total_features - features_with_matches}")
- # 统计总体数据
- total_matches = 0
- for feature_name, data in results.items():
- stats = data["统计"]
- total_matches += stats["高相似度匹配数(>=0.8)"]
- print(f"\n总高相似度匹配数(>=0.8): {total_matches}")
- print("\n" + "=" * 80)
- print("各原始特征详细匹配情况")
- print("=" * 80)
- def print_detailed_results(results: Dict[str, Any], top_n: int = None):
- """打印详细结果"""
- for idx, (feature_name, data) in enumerate(results.items(), 1):
- stats = data["统计"]
- matches = data["匹配的分类标签"]
- match_count = stats['高相似度匹配数(>=0.8)']
- # 跳过没有匹配的特征
- if match_count == 0:
- continue
- print(f"\n[{idx}] 原始特征: {feature_name}")
- print(f" 高相似度匹配数(>=0.8): {match_count}")
- # 显示所有匹配(如果指定了top_n则只显示前N个)
- display_matches = matches[:top_n] if top_n else matches
- print(f" 匹配列表(共{len(display_matches)}个):")
- for i, match in enumerate(display_matches, 1):
- print(f" {i}. {match['名称']} ({match['相似度']:.3f})")
- print(f" 类型: {match['类型']}, 层级: {match['层级']}")
- if match['路径']:
- print(f" 路径: {match['路径']}")
- else:
- print(f" 路径: (顶级分类)")
- def save_results(results: Dict[str, Any], output_file: str):
- """保存结果到JSON文件"""
- with open(output_file, 'w', encoding='utf-8') as f:
- json.dump(results, f, ensure_ascii=False, indent=2)
- print(f"\n详细结果已保存到: {output_file}")
- def main():
- # 输入文件路径
- input_file = "/Users/liulidong/project/pattern相关文件/optimization/690d977d0000000007036331_how.json"
- # 输出文件路径
- output_file = "/Users/liulidong/project/pattern相关文件/optimization/feature_matches_analysis.json"
- print("开始分析特征匹配...")
- # 分析
- results = analyze_feature_matches(input_file)
- # 打印摘要
- print_summary(results)
- # 打印详细结果(显示所有匹配,不限制数量)
- print_detailed_results(results, top_n=None)
- # 保存结果
- save_results(results, output_file)
- print("\n分析完成!")
- if __name__ == "__main__":
- main()
|