#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 分析特征分类树,统计灵感点、目的点、关键点及其所有子分类的特征数量 """ import json import os from typing import Dict, Any, Tuple, List def count_features(node: Dict[str, Any]) -> Tuple[int, int]: """ 递归统计节点的特征点数量 Args: node: 树节点字典 Returns: (直接特征数, 总特征数) 元组 """ # 统计直接特征点数量 direct_count = 0 if "特征列表" in node and isinstance(node["特征列表"], list): direct_count = len(node["特征列表"]) # 统计所有子节点的特征点数量 total_count = direct_count # 遍历所有子节点(排除特殊键) special_keys = {"_meta", "特征列表", "帖子数", "特征数", "帖子列表"} for key, value in node.items(): if key not in special_keys and isinstance(value, dict): _, child_total = count_features(value) total_count += child_total return direct_count, total_count def collect_all_categories(node: Dict[str, Any], path: str, results: List[Dict[str, Any]]): """ 递归收集所有子分类及其特征统计信息 Args: node: 当前节点字典 path: 当前分类的路径(如:灵感点列表 > 实质 > 拟人化穿搭) results: 结果列表,用于存储所有分类的统计信息 """ # 统计当前节点的特征 direct_count, total_count = count_features(node) # 记录当前分类的统计信息 results.append({ "分类路径": path, "直接特征数": direct_count, "总特征数": total_count }) # 遍历所有子节点(排除特殊键) special_keys = {"_meta", "特征列表", "帖子数", "特征数", "帖子列表"} for key, value in node.items(): if key not in special_keys and isinstance(value, dict): # 构建子分类的路径 child_path = f"{path} > {key}" if path else key # 递归处理子分类 collect_all_categories(value, child_path, results) def analyze_top_category(data: Dict[str, Any], category_name: str) -> List[Dict[str, Any]]: """ 分析顶层分类及其所有子分类 Args: data: 整个JSON数据 category_name: 顶层分类名称 Returns: 包含所有子分类统计信息的列表 """ if category_name not in data: return [] category_node = data[category_name] results = [] # 收集顶层分类本身(如果有直接特征) direct_count, total_count = count_features(category_node) results.append({ "分类路径": category_name, "直接特征数": direct_count, "总特征数": total_count }) # 递归收集所有子分类 special_keys = {"_meta", "特征列表", "帖子数", "特征数", "帖子列表"} for key, value in category_node.items(): if key not in special_keys and isinstance(value, dict): path = f"{category_name} > {key}" collect_all_categories(value, path, results) return results def main(): # 获取脚本所在目录 script_dir = os.path.dirname(os.path.abspath(__file__)) # 读取JSON文件(与脚本在同一目录) json_file = os.path.join(script_dir, "detail_tree.json") if not os.path.exists(json_file): print(f"错误: 找不到文件 {json_file}") return print(f"正在读取文件: {json_file}") with open(json_file, 'r', encoding='utf-8') as f: data = json.load(f) # 定义要分析的三个顶层分类 top_categories = ["灵感点列表", "目的点", "关键点列表"] # 按顶层分类分组统计 print("\n" + "="*100) print("按顶层分类分组统计:") print("="*100) for category in top_categories: print(f"\n正在分析: {category}") category_results = analyze_top_category(data, category) print(f" 找到 {len(category_results)} 个子分类") if category_results: # 按总特征数排序 category_sorted = sorted(category_results, key=lambda x: x['总特征数'], reverse=True) print(f"\n【{category}】共 {len(category_results)} 个子分类") print(f"{'排名':<6} {'分类路径':<80} {'直接特征数':<12} {'总特征数':<12}") print("-" * 110) for i, result in enumerate(category_sorted, 1): path = result['分类路径'] print(f"{i:<6} {path:<80} {result['直接特征数']:<12} {result['总特征数']:<12}") if __name__ == "__main__": main()