analyze_feature_matches.py 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 统计 how 解构文件中所有原始特征匹配到的分类/标签及其路径
  5. """
  6. import json
  7. from collections import defaultdict
  8. from typing import Dict, List, Set, Any
  9. def build_classification_path(classification_list: List[str]) -> str:
  10. """构建分类路径字符串"""
  11. if not classification_list:
  12. return ""
  13. return "/".join(classification_list)
  14. def analyze_feature_matches(json_file_path: str) -> Dict[str, Any]:
  15. """
  16. 分析文件中所有原始特征的匹配情况
  17. 返回结构:
  18. {
  19. "原始特征1": {
  20. "匹配的分类标签": [
  21. {
  22. "名称": "...",
  23. "类型": "标签/分类",
  24. "路径": "...",
  25. "层级": "...",
  26. "相似度": 0.xxx
  27. }
  28. ],
  29. "统计": {
  30. "总匹配数": xxx,
  31. "高相似度匹配数(>0.8)": xxx,
  32. "中等相似度匹配数(0.5-0.8)": xxx,
  33. "低相似度匹配数(<0.5)": xxx
  34. }
  35. }
  36. }
  37. """
  38. # 读取JSON文件
  39. with open(json_file_path, 'r', encoding='utf-8') as f:
  40. data = json.load(f)
  41. # 存储结果
  42. feature_matches = defaultdict(lambda: {
  43. "匹配的分类标签": [],
  44. "统计": {
  45. "高相似度匹配数(>=0.8)": 0
  46. }
  47. })
  48. # 遍历 how解构结果
  49. how_result = data.get('how解构结果', {})
  50. # 处理三种列表:灵感点列表、目的点列表、关键点列表
  51. for level_name in ['灵感点列表', '目的点列表', '关键点列表']:
  52. level_list = how_result.get(level_name, [])
  53. for item in level_list:
  54. # 遍历how步骤列表
  55. for step in item.get('how步骤列表', []):
  56. # 遍历每个步骤中的特征
  57. for feature in step.get('特征列表', []):
  58. feature_name = feature.get('特征名称', '')
  59. matches = feature.get('匹配结果', [])
  60. if not feature_name:
  61. continue
  62. # 处理每个匹配结果
  63. for match in matches:
  64. persona_feature_name = match.get('人设特征名称', '')
  65. feature_type = match.get('特征类型', '')
  66. classification_list = match.get('特征分类', [])
  67. feature_level = match.get('人设特征层级', '')
  68. similarity = match.get('匹配结果', {}).get('相似度', 0)
  69. # 只保留相似度>=0.8的匹配
  70. if similarity < 0.8:
  71. continue
  72. # 构建路径
  73. path = build_classification_path(classification_list)
  74. # 添加到结果
  75. match_info = {
  76. "名称": persona_feature_name,
  77. "类型": feature_type,
  78. "路径": path,
  79. "层级": feature_level,
  80. "相似度": round(similarity, 3)
  81. }
  82. feature_matches[feature_name]["匹配的分类标签"].append(match_info)
  83. # 更新统计
  84. stats = feature_matches[feature_name]["统计"]
  85. stats["高相似度匹配数(>=0.8)"] += 1
  86. # 对每个原始特征的匹配结果按相似度降序排序
  87. for feature_name in feature_matches:
  88. feature_matches[feature_name]["匹配的分类标签"].sort(
  89. key=lambda x: x["相似度"],
  90. reverse=True
  91. )
  92. return dict(feature_matches)
  93. def print_summary(results: Dict[str, Any]):
  94. """打印统计摘要"""
  95. print("=" * 80)
  96. print("原始特征匹配统计摘要(仅相似度>=0.8)")
  97. print("=" * 80)
  98. total_features = len(results)
  99. # 统计有匹配的特征数
  100. features_with_matches = sum(1 for data in results.values() if data["统计"]["高相似度匹配数(>=0.8)"] > 0)
  101. print(f"\n总原始特征数: {total_features}")
  102. print(f"有高相似度匹配的特征数: {features_with_matches}")
  103. print(f"无匹配的特征数: {total_features - features_with_matches}")
  104. # 统计总体数据
  105. total_matches = 0
  106. for feature_name, data in results.items():
  107. stats = data["统计"]
  108. total_matches += stats["高相似度匹配数(>=0.8)"]
  109. print(f"\n总高相似度匹配数(>=0.8): {total_matches}")
  110. print("\n" + "=" * 80)
  111. print("各原始特征详细匹配情况")
  112. print("=" * 80)
  113. def print_detailed_results(results: Dict[str, Any], top_n: int = None):
  114. """打印详细结果"""
  115. for idx, (feature_name, data) in enumerate(results.items(), 1):
  116. stats = data["统计"]
  117. matches = data["匹配的分类标签"]
  118. match_count = stats['高相似度匹配数(>=0.8)']
  119. # 跳过没有匹配的特征
  120. if match_count == 0:
  121. continue
  122. print(f"\n[{idx}] 原始特征: {feature_name}")
  123. print(f" 高相似度匹配数(>=0.8): {match_count}")
  124. # 显示所有匹配(如果指定了top_n则只显示前N个)
  125. display_matches = matches[:top_n] if top_n else matches
  126. print(f" 匹配列表(共{len(display_matches)}个):")
  127. for i, match in enumerate(display_matches, 1):
  128. print(f" {i}. {match['名称']} ({match['相似度']:.3f})")
  129. print(f" 类型: {match['类型']}, 层级: {match['层级']}")
  130. if match['路径']:
  131. print(f" 路径: {match['路径']}")
  132. else:
  133. print(f" 路径: (顶级分类)")
  134. def save_results(results: Dict[str, Any], output_file: str):
  135. """保存结果到JSON文件"""
  136. with open(output_file, 'w', encoding='utf-8') as f:
  137. json.dump(results, f, ensure_ascii=False, indent=2)
  138. print(f"\n详细结果已保存到: {output_file}")
  139. def main():
  140. # 输入文件路径
  141. input_file = "/Users/liulidong/project/pattern相关文件/optimization/690d977d0000000007036331_how.json"
  142. # 输出文件路径
  143. output_file = "/Users/liulidong/project/pattern相关文件/optimization/feature_matches_analysis.json"
  144. print("开始分析特征匹配...")
  145. # 分析
  146. results = analyze_feature_matches(input_file)
  147. # 打印摘要
  148. print_summary(results)
  149. # 打印详细结果(显示所有匹配,不限制数量)
  150. print_detailed_results(results, top_n=None)
  151. # 保存结果
  152. save_results(results, output_file)
  153. print("\n分析完成!")
  154. if __name__ == "__main__":
  155. main()