| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- 分析特征分类树,统计灵感点、目的点、关键点及其所有子分类的特征数量
- """
- import json
- import os
- from typing import Dict, Any, Tuple, List
- def count_features(node: Dict[str, Any]) -> Tuple[int, int]:
- """
- 递归统计节点的特征点数量
-
- Args:
- node: 树节点字典
-
- Returns:
- (直接特征数, 总特征数) 元组
- """
- # 统计直接特征点数量
- direct_count = 0
- if "特征列表" in node and isinstance(node["特征列表"], list):
- direct_count = len(node["特征列表"])
-
- # 统计所有子节点的特征点数量
- total_count = direct_count
-
- # 遍历所有子节点(排除特殊键)
- special_keys = {"_meta", "特征列表", "帖子数", "特征数", "帖子列表"}
-
- for key, value in node.items():
- if key not in special_keys and isinstance(value, dict):
- _, child_total = count_features(value)
- total_count += child_total
-
- return direct_count, total_count
- def collect_all_categories(node: Dict[str, Any], path: str, results: List[Dict[str, Any]]):
- """
- 递归收集所有子分类及其特征统计信息
-
- Args:
- node: 当前节点字典
- path: 当前分类的路径(如:灵感点列表 > 实质 > 拟人化穿搭)
- results: 结果列表,用于存储所有分类的统计信息
- """
- # 统计当前节点的特征
- direct_count, total_count = count_features(node)
-
- # 记录当前分类的统计信息
- results.append({
- "分类路径": path,
- "直接特征数": direct_count,
- "总特征数": total_count
- })
-
- # 遍历所有子节点(排除特殊键)
- special_keys = {"_meta", "特征列表", "帖子数", "特征数", "帖子列表"}
-
- for key, value in node.items():
- if key not in special_keys and isinstance(value, dict):
- # 构建子分类的路径
- child_path = f"{path} > {key}" if path else key
- # 递归处理子分类
- collect_all_categories(value, child_path, results)
- def analyze_top_category(data: Dict[str, Any], category_name: str) -> List[Dict[str, Any]]:
- """
- 分析顶层分类及其所有子分类
-
- Args:
- data: 整个JSON数据
- category_name: 顶层分类名称
-
- Returns:
- 包含所有子分类统计信息的列表
- """
- if category_name not in data:
- return []
-
- category_node = data[category_name]
- results = []
-
- # 收集顶层分类本身(如果有直接特征)
- direct_count, total_count = count_features(category_node)
- results.append({
- "分类路径": category_name,
- "直接特征数": direct_count,
- "总特征数": total_count
- })
-
- # 递归收集所有子分类
- special_keys = {"_meta", "特征列表", "帖子数", "特征数", "帖子列表"}
- for key, value in category_node.items():
- if key not in special_keys and isinstance(value, dict):
- path = f"{category_name} > {key}"
- collect_all_categories(value, path, results)
-
- return results
- def main():
- # 获取脚本所在目录
- script_dir = os.path.dirname(os.path.abspath(__file__))
- # 读取JSON文件(与脚本在同一目录)
- json_file = os.path.join(script_dir, "detail_tree.json")
-
- if not os.path.exists(json_file):
- print(f"错误: 找不到文件 {json_file}")
- return
-
- print(f"正在读取文件: {json_file}")
- with open(json_file, 'r', encoding='utf-8') as f:
- data = json.load(f)
-
- # 定义要分析的三个顶层分类
- top_categories = ["灵感点列表", "目的点", "关键点列表"]
-
- # 按顶层分类分组统计
- print("\n" + "="*100)
- print("按顶层分类分组统计:")
- print("="*100)
-
- for category in top_categories:
- print(f"\n正在分析: {category}")
- category_results = analyze_top_category(data, category)
- print(f" 找到 {len(category_results)} 个子分类")
-
- if category_results:
- # 按总特征数排序
- category_sorted = sorted(category_results, key=lambda x: x['总特征数'], reverse=True)
- print(f"\n【{category}】共 {len(category_results)} 个子分类")
- print(f"{'排名':<6} {'分类路径':<80} {'直接特征数':<12} {'总特征数':<12}")
- print("-" * 110)
- for i, result in enumerate(category_sorted, 1):
- path = result['分类路径']
- print(f"{i:<6} {path:<80} {result['直接特征数']:<12} {result['总特征数']:<12}")
- if __name__ == "__main__":
- main()
|