| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227 |
- #!/usr/bin/env python3
- """
- 检查指定目录下JSON文件中的 三点解构.关键点.key_points 是否为空数组
- """
- import json
- import os
- from pathlib import Path
- from typing import Dict, List, Tuple
- def check_key_points_empty(file_path: str) -> Tuple[bool, str]:
- """
- 检查单个文件中的 key_points 是否为空
- Args:
- file_path: JSON文件路径
- Returns:
- (is_empty, error_msg): is_empty表示是否为空数组,error_msg表示错误信息
- """
- try:
- with open(file_path, 'r', encoding='utf-8') as f:
- data = json.load(f)
- # 导航到 三点解构.关键点.key_points
- san_dian = data.get('三点解构', None)
- if san_dian is None:
- return None, "缺少'三点解构'字段"
- guan_jian_dian = san_dian.get('关键点', None)
- if guan_jian_dian is None:
- return None, "缺少'关键点'字段"
- key_points = guan_jian_dian.get('key_points', None)
- if key_points is None:
- return None, "缺少'key_points'字段"
- # 检查是否为空数组
- if not isinstance(key_points, list):
- return None, f"key_points不是数组类型,而是{type(key_points).__name__}"
- is_empty = len(key_points) == 0
- return is_empty, ""
- except json.JSONDecodeError as e:
- return None, f"JSON解析错误: {e}"
- except Exception as e:
- return None, f"读取文件错误: {e}"
- def scan_directory(directory: str) -> Dict[str, List[Tuple[str, bool, str]]]:
- """
- 扫描目录下所有JSON文件并检查key_points
- Args:
- directory: 目录路径
- Returns:
- 结果字典,包含统计信息
- """
- directory_path = Path(directory)
- results = {
- 'empty': [], # (文件路径, is_empty, error_msg)
- 'not_empty': [],
- 'error': []
- }
- # 遍历目录下所有JSON文件
- json_files = list(directory_path.glob('*.json'))
- for json_file in json_files:
- is_empty, error_msg = check_key_points_empty(str(json_file))
- if is_empty is None:
- results['error'].append((str(json_file), is_empty, error_msg))
- elif is_empty:
- results['empty'].append((str(json_file), is_empty, error_msg))
- else:
- results['not_empty'].append((str(json_file), is_empty, error_msg))
- return results
- def print_results(dir_name: str, results: Dict[str, List[Tuple[str, bool, str]]], base_dir: str):
- """打印检查结果"""
- total = len(results['empty']) + len(results['not_empty']) + len(results['error'])
- print(f"\n{'='*80}")
- print(f"目录: {dir_name}")
- print(f"{'='*80}")
- print(f"总文件数: {total}")
- print(f" - key_points为空的文件: {len(results['empty'])}")
- print(f" - key_points不为空的文件: {len(results['not_empty'])}")
- print(f" - 错误/缺失字段的文件: {len(results['error'])}")
- # 显示key_points为空的文件
- if results['empty']:
- print(f"\n【key_points为空的文件】({len(results['empty'])}个):")
- for file_path, _, _ in results['empty']:
- rel_path = os.path.relpath(file_path, base_dir)
- print(f" - {rel_path}")
- # 显示错误的文件
- if results['error']:
- print(f"\n【错误/缺失字段的文件】({len(results['error'])}个):")
- for file_path, _, error_msg in results['error']:
- rel_path = os.path.relpath(file_path, base_dir)
- print(f" - {rel_path}: {error_msg}")
- # 显示key_points不为空的文件摘要
- if results['not_empty']:
- print(f"\n【key_points不为空的文件】({len(results['not_empty'])}个)")
- for file_path, _, _ in results['not_empty']:
- rel_path = os.path.relpath(file_path, base_dir)
- # 读取key_points数量
- try:
- with open(file_path, 'r', encoding='utf-8') as f:
- data = json.load(f)
- count = len(data.get('三点解构', {}).get('关键点', {}).get('key_points', []))
- print(f" - {rel_path} (key_points数量: {count})")
- except:
- print(f" - {rel_path}")
- def main():
- """主函数"""
- # 自动扫描所有账号目录
- base_path = "/Users/semsevens/Desktop/workspace/daily/1113/how_1121_v2/data/账号"
- # 获取当前工作目录作为基准目录
- base_dir = os.getcwd()
- print("开始检查 三点解构.关键点.key_points 是否为空...")
- all_results = []
- account_stats = {} # 存储每个账号的统计信息
- # 遍历所有账号
- if os.path.exists(base_path):
- account_dirs = [d for d in os.listdir(base_path)
- if os.path.isdir(os.path.join(base_path, d)) and not d.startswith('.')]
- for account_name in sorted(account_dirs):
- account_path = os.path.join(base_path, account_name)
- print(f"\n{'#'*80}")
- print(f"账号: {account_name}")
- print(f"{'#'*80}")
- account_results = []
- # 检查两个子目录
- subdirs = ["用于pattern聚类", "what单独解构"]
- for subdir_name in subdirs:
- subdir_path = os.path.join(account_path, subdir_name)
- if os.path.exists(subdir_path):
- results = scan_directory(subdir_path)
- print_results(subdir_name, results, base_dir)
- all_results.append(results)
- account_results.append(results)
- else:
- print(f"\n目录不存在: {subdir_name}")
- # 计算该账号的统计信息
- if account_results:
- account_empty = sum(len(r['empty']) for r in account_results)
- account_not_empty = sum(len(r['not_empty']) for r in account_results)
- account_error = sum(len(r['error']) for r in account_results)
- account_total = account_empty + account_not_empty + account_error
- account_stats[account_name] = {
- 'total': account_total,
- 'empty': account_empty,
- 'not_empty': account_not_empty,
- 'error': account_error
- }
- # 打印该账号的小结
- print(f"\n{'-'*80}")
- print(f"【{account_name} 账号小结】")
- print(f"{'-'*80}")
- print(f"文件总数: {account_total}")
- print(f" - key_points为空: {account_empty} ({account_empty/account_total*100:.1f}%)")
- print(f" - key_points不为空: {account_not_empty} ({account_not_empty/account_total*100:.1f}%)")
- if account_error > 0:
- print(f" - 错误/缺失字段: {account_error} ({account_error/account_total*100:.1f}%)")
- else:
- print(f"基础路径不存在: {base_path}")
- return
- # 总体统计
- print(f"\n{'='*80}")
- print("【总体统计】")
- print(f"{'='*80}")
- total_empty = sum(len(r['empty']) for r in all_results)
- total_not_empty = sum(len(r['not_empty']) for r in all_results)
- total_error = sum(len(r['error']) for r in all_results)
- total_all = total_empty + total_not_empty + total_error
- if total_all > 0:
- print(f"所有账号共计文件数: {total_all}")
- print(f" - key_points为空: {total_empty} ({total_empty/total_all*100:.1f}%)")
- print(f" - key_points不为空: {total_not_empty} ({total_not_empty/total_all*100:.1f}%)")
- if total_error > 0:
- print(f" - 错误/缺失字段: {total_error} ({total_error/total_all*100:.1f}%)")
- # 按账号显示汇总
- if account_stats:
- print(f"\n{'='*80}")
- print("【各账号汇总】")
- print(f"{'='*80}")
- for account_name, stats in sorted(account_stats.items()):
- print(f"\n{account_name}:")
- print(f" 文件总数: {stats['total']}")
- print(f" key_points为空: {stats['empty']} ({stats['empty']/stats['total']*100:.1f}%)")
- print(f" key_points不为空: {stats['not_empty']} ({stats['not_empty']/stats['total']*100:.1f}%)")
- else:
- print("未找到任何文件")
- if __name__ == "__main__":
- main()
|