analyze_feature_tree.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 分析特征分类树,统计灵感点、目的点、关键点及其所有子分类的特征数量
  5. """
  6. import json
  7. import os
  8. from typing import Dict, Any, Tuple, List
  9. def count_features(node: Dict[str, Any]) -> Tuple[int, int]:
  10. """
  11. 递归统计节点的特征点数量
  12. Args:
  13. node: 树节点字典
  14. Returns:
  15. (直接特征数, 总特征数) 元组
  16. """
  17. # 统计直接特征点数量
  18. direct_count = 0
  19. if "特征列表" in node and isinstance(node["特征列表"], list):
  20. direct_count = len(node["特征列表"])
  21. # 统计所有子节点的特征点数量
  22. total_count = direct_count
  23. # 遍历所有子节点(排除特殊键)
  24. special_keys = {"_meta", "特征列表", "帖子数", "特征数", "帖子列表"}
  25. for key, value in node.items():
  26. if key not in special_keys and isinstance(value, dict):
  27. _, child_total = count_features(value)
  28. total_count += child_total
  29. return direct_count, total_count
  30. def collect_all_categories(node: Dict[str, Any], path: str, results: List[Dict[str, Any]]):
  31. """
  32. 递归收集所有子分类及其特征统计信息
  33. Args:
  34. node: 当前节点字典
  35. path: 当前分类的路径(如:灵感点列表 > 实质 > 拟人化穿搭)
  36. results: 结果列表,用于存储所有分类的统计信息
  37. """
  38. # 统计当前节点的特征
  39. direct_count, total_count = count_features(node)
  40. # 记录当前分类的统计信息
  41. results.append({
  42. "分类路径": path,
  43. "直接特征数": direct_count,
  44. "总特征数": total_count
  45. })
  46. # 遍历所有子节点(排除特殊键)
  47. special_keys = {"_meta", "特征列表", "帖子数", "特征数", "帖子列表"}
  48. for key, value in node.items():
  49. if key not in special_keys and isinstance(value, dict):
  50. # 构建子分类的路径
  51. child_path = f"{path} > {key}" if path else key
  52. # 递归处理子分类
  53. collect_all_categories(value, child_path, results)
  54. def analyze_top_category(data: Dict[str, Any], category_name: str) -> List[Dict[str, Any]]:
  55. """
  56. 分析顶层分类及其所有子分类
  57. Args:
  58. data: 整个JSON数据
  59. category_name: 顶层分类名称
  60. Returns:
  61. 包含所有子分类统计信息的列表
  62. """
  63. if category_name not in data:
  64. return []
  65. category_node = data[category_name]
  66. results = []
  67. # 收集顶层分类本身(如果有直接特征)
  68. direct_count, total_count = count_features(category_node)
  69. results.append({
  70. "分类路径": category_name,
  71. "直接特征数": direct_count,
  72. "总特征数": total_count
  73. })
  74. # 递归收集所有子分类
  75. special_keys = {"_meta", "特征列表", "帖子数", "特征数", "帖子列表"}
  76. for key, value in category_node.items():
  77. if key not in special_keys and isinstance(value, dict):
  78. path = f"{category_name} > {key}"
  79. collect_all_categories(value, path, results)
  80. return results
  81. def main():
  82. # 获取脚本所在目录
  83. script_dir = os.path.dirname(os.path.abspath(__file__))
  84. # 读取JSON文件(与脚本在同一目录)
  85. json_file = os.path.join(script_dir, "detail_tree.json")
  86. if not os.path.exists(json_file):
  87. print(f"错误: 找不到文件 {json_file}")
  88. return
  89. print(f"正在读取文件: {json_file}")
  90. with open(json_file, 'r', encoding='utf-8') as f:
  91. data = json.load(f)
  92. # 定义要分析的三个顶层分类
  93. top_categories = ["灵感点列表", "目的点", "关键点列表"]
  94. # 按顶层分类分组统计
  95. print("\n" + "="*100)
  96. print("按顶层分类分组统计:")
  97. print("="*100)
  98. for category in top_categories:
  99. print(f"\n正在分析: {category}")
  100. category_results = analyze_top_category(data, category)
  101. print(f" 找到 {len(category_results)} 个子分类")
  102. if category_results:
  103. # 按总特征数排序
  104. category_sorted = sorted(category_results, key=lambda x: x['总特征数'], reverse=True)
  105. print(f"\n【{category}】共 {len(category_results)} 个子分类")
  106. print(f"{'排名':<6} {'分类路径':<80} {'直接特征数':<12} {'总特征数':<12}")
  107. print("-" * 110)
  108. for i, result in enumerate(category_sorted, 1):
  109. path = result['分类路径']
  110. print(f"{i:<6} {path:<80} {result['直接特征数']:<12} {result['总特征数']:<12}")
  111. if __name__ == "__main__":
  112. main()