#!/usr/bin/env python3 """ 检查指定目录下JSON文件中的 三点解构.关键点.key_points 是否为空数组 """ import json import os from pathlib import Path from typing import Dict, List, Tuple def check_key_points_empty(file_path: str) -> Tuple[bool, str]: """ 检查单个文件中的 key_points 是否为空 Args: file_path: JSON文件路径 Returns: (is_empty, error_msg): is_empty表示是否为空数组,error_msg表示错误信息 """ try: with open(file_path, 'r', encoding='utf-8') as f: data = json.load(f) # 导航到 三点解构.关键点.key_points san_dian = data.get('三点解构', None) if san_dian is None: return None, "缺少'三点解构'字段" guan_jian_dian = san_dian.get('关键点', None) if guan_jian_dian is None: return None, "缺少'关键点'字段" key_points = guan_jian_dian.get('key_points', None) if key_points is None: return None, "缺少'key_points'字段" # 检查是否为空数组 if not isinstance(key_points, list): return None, f"key_points不是数组类型,而是{type(key_points).__name__}" is_empty = len(key_points) == 0 return is_empty, "" except json.JSONDecodeError as e: return None, f"JSON解析错误: {e}" except Exception as e: return None, f"读取文件错误: {e}" def scan_directory(directory: str) -> Dict[str, List[Tuple[str, bool, str]]]: """ 扫描目录下所有JSON文件并检查key_points Args: directory: 目录路径 Returns: 结果字典,包含统计信息 """ directory_path = Path(directory) results = { 'empty': [], # (文件路径, is_empty, error_msg) 'not_empty': [], 'error': [] } # 遍历目录下所有JSON文件 json_files = list(directory_path.glob('*.json')) for json_file in json_files: is_empty, error_msg = check_key_points_empty(str(json_file)) if is_empty is None: results['error'].append((str(json_file), is_empty, error_msg)) elif is_empty: results['empty'].append((str(json_file), is_empty, error_msg)) else: results['not_empty'].append((str(json_file), is_empty, error_msg)) return results def print_results(dir_name: str, results: Dict[str, List[Tuple[str, bool, str]]], base_dir: str): """打印检查结果""" total = len(results['empty']) + len(results['not_empty']) + len(results['error']) print(f"\n{'='*80}") print(f"目录: {dir_name}") print(f"{'='*80}") print(f"总文件数: {total}") print(f" - key_points为空的文件: {len(results['empty'])}") print(f" - key_points不为空的文件: {len(results['not_empty'])}") print(f" - 错误/缺失字段的文件: {len(results['error'])}") # 显示key_points为空的文件 if results['empty']: print(f"\n【key_points为空的文件】({len(results['empty'])}个):") for file_path, _, _ in results['empty']: rel_path = os.path.relpath(file_path, base_dir) print(f" - {rel_path}") # 显示错误的文件 if results['error']: print(f"\n【错误/缺失字段的文件】({len(results['error'])}个):") for file_path, _, error_msg in results['error']: rel_path = os.path.relpath(file_path, base_dir) print(f" - {rel_path}: {error_msg}") # 显示key_points不为空的文件摘要 if results['not_empty']: print(f"\n【key_points不为空的文件】({len(results['not_empty'])}个)") for file_path, _, _ in results['not_empty']: rel_path = os.path.relpath(file_path, base_dir) # 读取key_points数量 try: with open(file_path, 'r', encoding='utf-8') as f: data = json.load(f) count = len(data.get('三点解构', {}).get('关键点', {}).get('key_points', [])) print(f" - {rel_path} (key_points数量: {count})") except: print(f" - {rel_path}") def main(): """主函数""" # 自动扫描所有账号目录 base_path = "/Users/semsevens/Desktop/workspace/daily/1113/how_1121_v2/data/账号" # 获取当前工作目录作为基准目录 base_dir = os.getcwd() print("开始检查 三点解构.关键点.key_points 是否为空...") all_results = [] account_stats = {} # 存储每个账号的统计信息 # 遍历所有账号 if os.path.exists(base_path): account_dirs = [d for d in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, d)) and not d.startswith('.')] for account_name in sorted(account_dirs): account_path = os.path.join(base_path, account_name) print(f"\n{'#'*80}") print(f"账号: {account_name}") print(f"{'#'*80}") account_results = [] # 检查两个子目录 subdirs = ["用于pattern聚类", "what单独解构"] for subdir_name in subdirs: subdir_path = os.path.join(account_path, subdir_name) if os.path.exists(subdir_path): results = scan_directory(subdir_path) print_results(subdir_name, results, base_dir) all_results.append(results) account_results.append(results) else: print(f"\n目录不存在: {subdir_name}") # 计算该账号的统计信息 if account_results: account_empty = sum(len(r['empty']) for r in account_results) account_not_empty = sum(len(r['not_empty']) for r in account_results) account_error = sum(len(r['error']) for r in account_results) account_total = account_empty + account_not_empty + account_error account_stats[account_name] = { 'total': account_total, 'empty': account_empty, 'not_empty': account_not_empty, 'error': account_error } # 打印该账号的小结 print(f"\n{'-'*80}") print(f"【{account_name} 账号小结】") print(f"{'-'*80}") print(f"文件总数: {account_total}") print(f" - key_points为空: {account_empty} ({account_empty/account_total*100:.1f}%)") print(f" - key_points不为空: {account_not_empty} ({account_not_empty/account_total*100:.1f}%)") if account_error > 0: print(f" - 错误/缺失字段: {account_error} ({account_error/account_total*100:.1f}%)") else: print(f"基础路径不存在: {base_path}") return # 总体统计 print(f"\n{'='*80}") print("【总体统计】") print(f"{'='*80}") total_empty = sum(len(r['empty']) for r in all_results) total_not_empty = sum(len(r['not_empty']) for r in all_results) total_error = sum(len(r['error']) for r in all_results) total_all = total_empty + total_not_empty + total_error if total_all > 0: print(f"所有账号共计文件数: {total_all}") print(f" - key_points为空: {total_empty} ({total_empty/total_all*100:.1f}%)") print(f" - key_points不为空: {total_not_empty} ({total_not_empty/total_all*100:.1f}%)") if total_error > 0: print(f" - 错误/缺失字段: {total_error} ({total_error/total_all*100:.1f}%)") # 按账号显示汇总 if account_stats: print(f"\n{'='*80}") print("【各账号汇总】") print(f"{'='*80}") for account_name, stats in sorted(account_stats.items()): print(f"\n{account_name}:") print(f" 文件总数: {stats['total']}") print(f" key_points为空: {stats['empty']} ({stats['empty']/stats['total']*100:.1f}%)") print(f" key_points不为空: {stats['not_empty']} ({stats['not_empty']/stats['total']*100:.1f}%)") else: print("未找到任何文件") if __name__ == "__main__": main()