analyze_tables.py 1.2 KB

12345678910111213141516171819202122232425262728293031323334353637
  1. import json, os
  2. files = [f for f in os.listdir('input') if f.endswith('_制作表.json')]
  3. files.sort()
  4. for fname in files:
  5. with open(f'input/{fname}', 'r') as f:
  6. data = json.load(f)
  7. img_id = fname.split('__')[1].split('_')[0]
  8. print(f'\n=== {img_id} ({fname}) ===')
  9. def extract_key_info(items, depth=0):
  10. if isinstance(items, list):
  11. for item in items:
  12. extract_key_info(item, depth)
  13. elif isinstance(items, dict):
  14. pid = items.get('段落ID', '')
  15. name = items.get('名称', '')
  16. desc = items.get('描述', '')
  17. # 实质信息
  18. shizhi = items.get('实质', {})
  19. if shizhi:
  20. print(f' [{pid}] {name}: {desc[:80]}')
  21. for k, v in shizhi.items():
  22. if isinstance(v, dict):
  23. vdesc = v.get('描述', '')
  24. print(f' 实质.{k}: {vdesc[:100]}')
  25. elif pid:
  26. print(f' [{pid}] {name}: {desc[:80]}')
  27. children = items.get('子段落', [])
  28. if children:
  29. extract_key_info(children, depth+1)
  30. extract_key_info(data)