check_empty_key_points.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. #!/usr/bin/env python3
  2. """
  3. 检查指定目录下JSON文件中的 三点解构.关键点.key_points 是否为空数组
  4. """
  5. import json
  6. import os
  7. from pathlib import Path
  8. from typing import Dict, List, Tuple
  9. def check_key_points_empty(file_path: str) -> Tuple[bool, str]:
  10. """
  11. 检查单个文件中的 key_points 是否为空
  12. Args:
  13. file_path: JSON文件路径
  14. Returns:
  15. (is_empty, error_msg): is_empty表示是否为空数组,error_msg表示错误信息
  16. """
  17. try:
  18. with open(file_path, 'r', encoding='utf-8') as f:
  19. data = json.load(f)
  20. # 导航到 三点解构.关键点.key_points
  21. san_dian = data.get('三点解构', None)
  22. if san_dian is None:
  23. return None, "缺少'三点解构'字段"
  24. guan_jian_dian = san_dian.get('关键点', None)
  25. if guan_jian_dian is None:
  26. return None, "缺少'关键点'字段"
  27. key_points = guan_jian_dian.get('key_points', None)
  28. if key_points is None:
  29. return None, "缺少'key_points'字段"
  30. # 检查是否为空数组
  31. if not isinstance(key_points, list):
  32. return None, f"key_points不是数组类型,而是{type(key_points).__name__}"
  33. is_empty = len(key_points) == 0
  34. return is_empty, ""
  35. except json.JSONDecodeError as e:
  36. return None, f"JSON解析错误: {e}"
  37. except Exception as e:
  38. return None, f"读取文件错误: {e}"
  39. def scan_directory(directory: str) -> Dict[str, List[Tuple[str, bool, str]]]:
  40. """
  41. 扫描目录下所有JSON文件并检查key_points
  42. Args:
  43. directory: 目录路径
  44. Returns:
  45. 结果字典,包含统计信息
  46. """
  47. directory_path = Path(directory)
  48. results = {
  49. 'empty': [], # (文件路径, is_empty, error_msg)
  50. 'not_empty': [],
  51. 'error': []
  52. }
  53. # 遍历目录下所有JSON文件
  54. json_files = list(directory_path.glob('*.json'))
  55. for json_file in json_files:
  56. is_empty, error_msg = check_key_points_empty(str(json_file))
  57. if is_empty is None:
  58. results['error'].append((str(json_file), is_empty, error_msg))
  59. elif is_empty:
  60. results['empty'].append((str(json_file), is_empty, error_msg))
  61. else:
  62. results['not_empty'].append((str(json_file), is_empty, error_msg))
  63. return results
  64. def print_results(dir_name: str, results: Dict[str, List[Tuple[str, bool, str]]], base_dir: str):
  65. """打印检查结果"""
  66. total = len(results['empty']) + len(results['not_empty']) + len(results['error'])
  67. print(f"\n{'='*80}")
  68. print(f"目录: {dir_name}")
  69. print(f"{'='*80}")
  70. print(f"总文件数: {total}")
  71. print(f" - key_points为空的文件: {len(results['empty'])}")
  72. print(f" - key_points不为空的文件: {len(results['not_empty'])}")
  73. print(f" - 错误/缺失字段的文件: {len(results['error'])}")
  74. # 显示key_points为空的文件
  75. if results['empty']:
  76. print(f"\n【key_points为空的文件】({len(results['empty'])}个):")
  77. for file_path, _, _ in results['empty']:
  78. rel_path = os.path.relpath(file_path, base_dir)
  79. print(f" - {rel_path}")
  80. # 显示错误的文件
  81. if results['error']:
  82. print(f"\n【错误/缺失字段的文件】({len(results['error'])}个):")
  83. for file_path, _, error_msg in results['error']:
  84. rel_path = os.path.relpath(file_path, base_dir)
  85. print(f" - {rel_path}: {error_msg}")
  86. # 显示key_points不为空的文件摘要
  87. if results['not_empty']:
  88. print(f"\n【key_points不为空的文件】({len(results['not_empty'])}个)")
  89. for file_path, _, _ in results['not_empty']:
  90. rel_path = os.path.relpath(file_path, base_dir)
  91. # 读取key_points数量
  92. try:
  93. with open(file_path, 'r', encoding='utf-8') as f:
  94. data = json.load(f)
  95. count = len(data.get('三点解构', {}).get('关键点', {}).get('key_points', []))
  96. print(f" - {rel_path} (key_points数量: {count})")
  97. except:
  98. print(f" - {rel_path}")
  99. def main():
  100. """主函数"""
  101. # 自动扫描所有账号目录
  102. base_path = "/Users/semsevens/Desktop/workspace/daily/1113/how_1121_v2/data/账号"
  103. # 获取当前工作目录作为基准目录
  104. base_dir = os.getcwd()
  105. print("开始检查 三点解构.关键点.key_points 是否为空...")
  106. all_results = []
  107. account_stats = {} # 存储每个账号的统计信息
  108. # 遍历所有账号
  109. if os.path.exists(base_path):
  110. account_dirs = [d for d in os.listdir(base_path)
  111. if os.path.isdir(os.path.join(base_path, d)) and not d.startswith('.')]
  112. for account_name in sorted(account_dirs):
  113. account_path = os.path.join(base_path, account_name)
  114. print(f"\n{'#'*80}")
  115. print(f"账号: {account_name}")
  116. print(f"{'#'*80}")
  117. account_results = []
  118. # 检查两个子目录
  119. subdirs = ["用于pattern聚类", "what单独解构"]
  120. for subdir_name in subdirs:
  121. subdir_path = os.path.join(account_path, subdir_name)
  122. if os.path.exists(subdir_path):
  123. results = scan_directory(subdir_path)
  124. print_results(subdir_name, results, base_dir)
  125. all_results.append(results)
  126. account_results.append(results)
  127. else:
  128. print(f"\n目录不存在: {subdir_name}")
  129. # 计算该账号的统计信息
  130. if account_results:
  131. account_empty = sum(len(r['empty']) for r in account_results)
  132. account_not_empty = sum(len(r['not_empty']) for r in account_results)
  133. account_error = sum(len(r['error']) for r in account_results)
  134. account_total = account_empty + account_not_empty + account_error
  135. account_stats[account_name] = {
  136. 'total': account_total,
  137. 'empty': account_empty,
  138. 'not_empty': account_not_empty,
  139. 'error': account_error
  140. }
  141. # 打印该账号的小结
  142. print(f"\n{'-'*80}")
  143. print(f"【{account_name} 账号小结】")
  144. print(f"{'-'*80}")
  145. print(f"文件总数: {account_total}")
  146. print(f" - key_points为空: {account_empty} ({account_empty/account_total*100:.1f}%)")
  147. print(f" - key_points不为空: {account_not_empty} ({account_not_empty/account_total*100:.1f}%)")
  148. if account_error > 0:
  149. print(f" - 错误/缺失字段: {account_error} ({account_error/account_total*100:.1f}%)")
  150. else:
  151. print(f"基础路径不存在: {base_path}")
  152. return
  153. # 总体统计
  154. print(f"\n{'='*80}")
  155. print("【总体统计】")
  156. print(f"{'='*80}")
  157. total_empty = sum(len(r['empty']) for r in all_results)
  158. total_not_empty = sum(len(r['not_empty']) for r in all_results)
  159. total_error = sum(len(r['error']) for r in all_results)
  160. total_all = total_empty + total_not_empty + total_error
  161. if total_all > 0:
  162. print(f"所有账号共计文件数: {total_all}")
  163. print(f" - key_points为空: {total_empty} ({total_empty/total_all*100:.1f}%)")
  164. print(f" - key_points不为空: {total_not_empty} ({total_not_empty/total_all*100:.1f}%)")
  165. if total_error > 0:
  166. print(f" - 错误/缺失字段: {total_error} ({total_error/total_all*100:.1f}%)")
  167. # 按账号显示汇总
  168. if account_stats:
  169. print(f"\n{'='*80}")
  170. print("【各账号汇总】")
  171. print(f"{'='*80}")
  172. for account_name, stats in sorted(account_stats.items()):
  173. print(f"\n{account_name}:")
  174. print(f" 文件总数: {stats['total']}")
  175. print(f" key_points为空: {stats['empty']} ({stats['empty']/stats['total']*100:.1f}%)")
  176. print(f" key_points不为空: {stats['not_empty']} ({stats['not_empty']/stats['total']*100:.1f}%)")
  177. else:
  178. print("未找到任何文件")
  179. if __name__ == "__main__":
  180. main()