|
@@ -0,0 +1,227 @@
|
|
|
|
|
+#!/usr/bin/env python3
|
|
|
|
|
+"""
|
|
|
|
|
+检查指定目录下JSON文件中的 三点解构.关键点.key_points 是否为空数组
|
|
|
|
|
+"""
|
|
|
|
|
+
|
|
|
|
|
+import json
|
|
|
|
|
+import os
|
|
|
|
|
+from pathlib import Path
|
|
|
|
|
+from typing import Dict, List, Tuple
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def check_key_points_empty(file_path: str) -> Tuple[bool, str]:
|
|
|
|
|
+ """
|
|
|
|
|
+ 检查单个文件中的 key_points 是否为空
|
|
|
|
|
+
|
|
|
|
|
+ Args:
|
|
|
|
|
+ file_path: JSON文件路径
|
|
|
|
|
+
|
|
|
|
|
+ Returns:
|
|
|
|
|
+ (is_empty, error_msg): is_empty表示是否为空数组,error_msg表示错误信息
|
|
|
|
|
+ """
|
|
|
|
|
+ try:
|
|
|
|
|
+ with open(file_path, 'r', encoding='utf-8') as f:
|
|
|
|
|
+ data = json.load(f)
|
|
|
|
|
+
|
|
|
|
|
+ # 导航到 三点解构.关键点.key_points
|
|
|
|
|
+ san_dian = data.get('三点解构', None)
|
|
|
|
|
+ if san_dian is None:
|
|
|
|
|
+ return None, "缺少'三点解构'字段"
|
|
|
|
|
+
|
|
|
|
|
+ guan_jian_dian = san_dian.get('关键点', None)
|
|
|
|
|
+ if guan_jian_dian is None:
|
|
|
|
|
+ return None, "缺少'关键点'字段"
|
|
|
|
|
+
|
|
|
|
|
+ key_points = guan_jian_dian.get('key_points', None)
|
|
|
|
|
+ if key_points is None:
|
|
|
|
|
+ return None, "缺少'key_points'字段"
|
|
|
|
|
+
|
|
|
|
|
+ # 检查是否为空数组
|
|
|
|
|
+ if not isinstance(key_points, list):
|
|
|
|
|
+ return None, f"key_points不是数组类型,而是{type(key_points).__name__}"
|
|
|
|
|
+
|
|
|
|
|
+ is_empty = len(key_points) == 0
|
|
|
|
|
+ return is_empty, ""
|
|
|
|
|
+
|
|
|
|
|
+ except json.JSONDecodeError as e:
|
|
|
|
|
+ return None, f"JSON解析错误: {e}"
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ return None, f"读取文件错误: {e}"
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def scan_directory(directory: str) -> Dict[str, List[Tuple[str, bool, str]]]:
|
|
|
|
|
+ """
|
|
|
|
|
+ 扫描目录下所有JSON文件并检查key_points
|
|
|
|
|
+
|
|
|
|
|
+ Args:
|
|
|
|
|
+ directory: 目录路径
|
|
|
|
|
+
|
|
|
|
|
+ Returns:
|
|
|
|
|
+ 结果字典,包含统计信息
|
|
|
|
|
+ """
|
|
|
|
|
+ directory_path = Path(directory)
|
|
|
|
|
+
|
|
|
|
|
+ results = {
|
|
|
|
|
+ 'empty': [], # (文件路径, is_empty, error_msg)
|
|
|
|
|
+ 'not_empty': [],
|
|
|
|
|
+ 'error': []
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ # 遍历目录下所有JSON文件
|
|
|
|
|
+ json_files = list(directory_path.glob('*.json'))
|
|
|
|
|
+
|
|
|
|
|
+ for json_file in json_files:
|
|
|
|
|
+ is_empty, error_msg = check_key_points_empty(str(json_file))
|
|
|
|
|
+
|
|
|
|
|
+ if is_empty is None:
|
|
|
|
|
+ results['error'].append((str(json_file), is_empty, error_msg))
|
|
|
|
|
+ elif is_empty:
|
|
|
|
|
+ results['empty'].append((str(json_file), is_empty, error_msg))
|
|
|
|
|
+ else:
|
|
|
|
|
+ results['not_empty'].append((str(json_file), is_empty, error_msg))
|
|
|
|
|
+
|
|
|
|
|
+ return results
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def print_results(dir_name: str, results: Dict[str, List[Tuple[str, bool, str]]], base_dir: str):
|
|
|
|
|
+ """打印检查结果"""
|
|
|
|
|
+ total = len(results['empty']) + len(results['not_empty']) + len(results['error'])
|
|
|
|
|
+
|
|
|
|
|
+ print(f"\n{'='*80}")
|
|
|
|
|
+ print(f"目录: {dir_name}")
|
|
|
|
|
+ print(f"{'='*80}")
|
|
|
|
|
+ print(f"总文件数: {total}")
|
|
|
|
|
+ print(f" - key_points为空的文件: {len(results['empty'])}")
|
|
|
|
|
+ print(f" - key_points不为空的文件: {len(results['not_empty'])}")
|
|
|
|
|
+ print(f" - 错误/缺失字段的文件: {len(results['error'])}")
|
|
|
|
|
+
|
|
|
|
|
+ # 显示key_points为空的文件
|
|
|
|
|
+ if results['empty']:
|
|
|
|
|
+ print(f"\n【key_points为空的文件】({len(results['empty'])}个):")
|
|
|
|
|
+ for file_path, _, _ in results['empty']:
|
|
|
|
|
+ rel_path = os.path.relpath(file_path, base_dir)
|
|
|
|
|
+ print(f" - {rel_path}")
|
|
|
|
|
+
|
|
|
|
|
+ # 显示错误的文件
|
|
|
|
|
+ if results['error']:
|
|
|
|
|
+ print(f"\n【错误/缺失字段的文件】({len(results['error'])}个):")
|
|
|
|
|
+ for file_path, _, error_msg in results['error']:
|
|
|
|
|
+ rel_path = os.path.relpath(file_path, base_dir)
|
|
|
|
|
+ print(f" - {rel_path}: {error_msg}")
|
|
|
|
|
+
|
|
|
|
|
+ # 显示key_points不为空的文件摘要
|
|
|
|
|
+ if results['not_empty']:
|
|
|
|
|
+ print(f"\n【key_points不为空的文件】({len(results['not_empty'])}个)")
|
|
|
|
|
+ for file_path, _, _ in results['not_empty']:
|
|
|
|
|
+ rel_path = os.path.relpath(file_path, base_dir)
|
|
|
|
|
+ # 读取key_points数量
|
|
|
|
|
+ try:
|
|
|
|
|
+ with open(file_path, 'r', encoding='utf-8') as f:
|
|
|
|
|
+ data = json.load(f)
|
|
|
|
|
+ count = len(data.get('三点解构', {}).get('关键点', {}).get('key_points', []))
|
|
|
|
|
+ print(f" - {rel_path} (key_points数量: {count})")
|
|
|
|
|
+ except:
|
|
|
|
|
+ print(f" - {rel_path}")
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def main():
|
|
|
|
|
+ """主函数"""
|
|
|
|
|
+ # 自动扫描所有账号目录
|
|
|
|
|
+ base_path = "/Users/semsevens/Desktop/workspace/daily/1113/how_1121_v2/data/账号"
|
|
|
|
|
+
|
|
|
|
|
+ # 获取当前工作目录作为基准目录
|
|
|
|
|
+ base_dir = os.getcwd()
|
|
|
|
|
+
|
|
|
|
|
+ print("开始检查 三点解构.关键点.key_points 是否为空...")
|
|
|
|
|
+
|
|
|
|
|
+ all_results = []
|
|
|
|
|
+ account_stats = {} # 存储每个账号的统计信息
|
|
|
|
|
+
|
|
|
|
|
+ # 遍历所有账号
|
|
|
|
|
+ if os.path.exists(base_path):
|
|
|
|
|
+ account_dirs = [d for d in os.listdir(base_path)
|
|
|
|
|
+ if os.path.isdir(os.path.join(base_path, d)) and not d.startswith('.')]
|
|
|
|
|
+
|
|
|
|
|
+ for account_name in sorted(account_dirs):
|
|
|
|
|
+ account_path = os.path.join(base_path, account_name)
|
|
|
|
|
+
|
|
|
|
|
+ print(f"\n{'#'*80}")
|
|
|
|
|
+ print(f"账号: {account_name}")
|
|
|
|
|
+ print(f"{'#'*80}")
|
|
|
|
|
+
|
|
|
|
|
+ account_results = []
|
|
|
|
|
+
|
|
|
|
|
+ # 检查两个子目录
|
|
|
|
|
+ subdirs = ["用于pattern聚类", "what单独解构"]
|
|
|
|
|
+
|
|
|
|
|
+ for subdir_name in subdirs:
|
|
|
|
|
+ subdir_path = os.path.join(account_path, subdir_name)
|
|
|
|
|
+
|
|
|
|
|
+ if os.path.exists(subdir_path):
|
|
|
|
|
+ results = scan_directory(subdir_path)
|
|
|
|
|
+ print_results(subdir_name, results, base_dir)
|
|
|
|
|
+ all_results.append(results)
|
|
|
|
|
+ account_results.append(results)
|
|
|
|
|
+ else:
|
|
|
|
|
+ print(f"\n目录不存在: {subdir_name}")
|
|
|
|
|
+
|
|
|
|
|
+ # 计算该账号的统计信息
|
|
|
|
|
+ if account_results:
|
|
|
|
|
+ account_empty = sum(len(r['empty']) for r in account_results)
|
|
|
|
|
+ account_not_empty = sum(len(r['not_empty']) for r in account_results)
|
|
|
|
|
+ account_error = sum(len(r['error']) for r in account_results)
|
|
|
|
|
+ account_total = account_empty + account_not_empty + account_error
|
|
|
|
|
+
|
|
|
|
|
+ account_stats[account_name] = {
|
|
|
|
|
+ 'total': account_total,
|
|
|
|
|
+ 'empty': account_empty,
|
|
|
|
|
+ 'not_empty': account_not_empty,
|
|
|
|
|
+ 'error': account_error
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ # 打印该账号的小结
|
|
|
|
|
+ print(f"\n{'-'*80}")
|
|
|
|
|
+ print(f"【{account_name} 账号小结】")
|
|
|
|
|
+ print(f"{'-'*80}")
|
|
|
|
|
+ print(f"文件总数: {account_total}")
|
|
|
|
|
+ print(f" - key_points为空: {account_empty} ({account_empty/account_total*100:.1f}%)")
|
|
|
|
|
+ print(f" - key_points不为空: {account_not_empty} ({account_not_empty/account_total*100:.1f}%)")
|
|
|
|
|
+ if account_error > 0:
|
|
|
|
|
+ print(f" - 错误/缺失字段: {account_error} ({account_error/account_total*100:.1f}%)")
|
|
|
|
|
+ else:
|
|
|
|
|
+ print(f"基础路径不存在: {base_path}")
|
|
|
|
|
+ return
|
|
|
|
|
+
|
|
|
|
|
+ # 总体统计
|
|
|
|
|
+ print(f"\n{'='*80}")
|
|
|
|
|
+ print("【总体统计】")
|
|
|
|
|
+ print(f"{'='*80}")
|
|
|
|
|
+
|
|
|
|
|
+ total_empty = sum(len(r['empty']) for r in all_results)
|
|
|
|
|
+ total_not_empty = sum(len(r['not_empty']) for r in all_results)
|
|
|
|
|
+ total_error = sum(len(r['error']) for r in all_results)
|
|
|
|
|
+ total_all = total_empty + total_not_empty + total_error
|
|
|
|
|
+
|
|
|
|
|
+ if total_all > 0:
|
|
|
|
|
+ print(f"所有账号共计文件数: {total_all}")
|
|
|
|
|
+ print(f" - key_points为空: {total_empty} ({total_empty/total_all*100:.1f}%)")
|
|
|
|
|
+ print(f" - key_points不为空: {total_not_empty} ({total_not_empty/total_all*100:.1f}%)")
|
|
|
|
|
+ if total_error > 0:
|
|
|
|
|
+ print(f" - 错误/缺失字段: {total_error} ({total_error/total_all*100:.1f}%)")
|
|
|
|
|
+
|
|
|
|
|
+ # 按账号显示汇总
|
|
|
|
|
+ if account_stats:
|
|
|
|
|
+ print(f"\n{'='*80}")
|
|
|
|
|
+ print("【各账号汇总】")
|
|
|
|
|
+ print(f"{'='*80}")
|
|
|
|
|
+ for account_name, stats in sorted(account_stats.items()):
|
|
|
|
|
+ print(f"\n{account_name}:")
|
|
|
|
|
+ print(f" 文件总数: {stats['total']}")
|
|
|
|
|
+ print(f" key_points为空: {stats['empty']} ({stats['empty']/stats['total']*100:.1f}%)")
|
|
|
|
|
+ print(f" key_points不为空: {stats['not_empty']} ({stats['not_empty']/stats['total']*100:.1f}%)")
|
|
|
|
|
+ else:
|
|
|
|
|
+ print("未找到任何文件")
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+if __name__ == "__main__":
|
|
|
|
|
+ main()
|