yangxiaohui
/
kg_agent


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
							#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
分析特征分类树，统计灵感点、目的点、关键点及其所有子分类的特征数量
"""

import json
import os
from typing import Dict, Any, Tuple, List


def count_features(node: Dict[str, Any]) -> Tuple[int, int]:
    """
    递归统计节点的特征点数量
    
    Args:
        node: 树节点字典
        
    Returns:
        (直接特征数, 总特征数) 元组
    """
    # 统计直接特征点数量
    direct_count = 0
    if "特征列表" in node and isinstance(node["特征列表"], list):
        direct_count = len(node["特征列表"])
    
    # 统计所有子节点的特征点数量
    total_count = direct_count
    
    # 遍历所有子节点（排除特殊键）
    special_keys = {"_meta", "特征列表", "帖子数", "特征数", "帖子列表"}
    
    for key, value in node.items():
        if key not in special_keys and isinstance(value, dict):
            _, child_total = count_features(value)
            total_count += child_total
    
    return direct_count, total_count


def collect_all_categories(node: Dict[str, Any], path: str, results: List[Dict[str, Any]]):
    """
    递归收集所有子分类及其特征统计信息
    
    Args:
        node: 当前节点字典
        path: 当前分类的路径（如：灵感点列表 > 实质 > 拟人化穿搭）
        results: 结果列表，用于存储所有分类的统计信息
    """
    # 统计当前节点的特征
    direct_count, total_count = count_features(node)
    
    # 记录当前分类的统计信息
    results.append({
        "分类路径": path,
        "直接特征数": direct_count,
        "总特征数": total_count
    })
    
    # 遍历所有子节点（排除特殊键）
    special_keys = {"_meta", "特征列表", "帖子数", "特征数", "帖子列表"}
    
    for key, value in node.items():
        if key not in special_keys and isinstance(value, dict):
            # 构建子分类的路径
            child_path = f"{path} > {key}" if path else key
            # 递归处理子分类
            collect_all_categories(value, child_path, results)


def analyze_top_category(data: Dict[str, Any], category_name: str) -> List[Dict[str, Any]]:
    """
    分析顶层分类及其所有子分类
    
    Args:
        data: 整个JSON数据
        category_name: 顶层分类名称
        
    Returns:
        包含所有子分类统计信息的列表
    """
    if category_name not in data:
        return []
    
    category_node = data[category_name]
    results = []
    
    # 收集顶层分类本身（如果有直接特征）
    direct_count, total_count = count_features(category_node)
    results.append({
        "分类路径": category_name,
        "直接特征数": direct_count,
        "总特征数": total_count
    })
    
    # 递归收集所有子分类
    special_keys = {"_meta", "特征列表", "帖子数", "特征数", "帖子列表"}
    for key, value in category_node.items():
        if key not in special_keys and isinstance(value, dict):
            path = f"{category_name} > {key}"
            collect_all_categories(value, path, results)
    
    return results


def main():
    # 获取脚本所在目录
    script_dir = os.path.dirname(os.path.abspath(__file__))
    # 读取JSON文件（与脚本在同一目录）
    json_file = os.path.join(script_dir, "detail_tree.json")
    
    if not os.path.exists(json_file):
        print(f"错误: 找不到文件 {json_file}")
        return
    
    print(f"正在读取文件: {json_file}")
    with open(json_file, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    # 定义要分析的三个顶层分类
    top_categories = ["灵感点列表", "目的点", "关键点列表"]
    
    # 按顶层分类分组统计
    print("\n" + "="*100)
    print("按顶层分类分组统计:")
    print("="*100)
    
    for category in top_categories:
        print(f"\n正在分析: {category}")
        category_results = analyze_top_category(data, category)
        print(f"  找到 {len(category_results)} 个子分类")
        
        if category_results:
            # 按总特征数排序
            category_sorted = sorted(category_results, key=lambda x: x['总特征数'], reverse=True)
            print(f"\n【{category}】共 {len(category_results)} 个子分类")
            print(f"{'排名':<6} {'分类路径':<80} {'直接特征数':<12} {'总特征数':<12}")
            print("-" * 110)
            for i, result in enumerate(category_sorted, 1):
                path = result['分类路径']
                print(f"{i:<6} {path:<80} {result['直接特征数']:<12} {result['总特征数']:<12}")


if __name__ == "__main__":
    main()