""" 修复 case JSON 文件中的引号错误 常见问题: 1. 中文引号("")误用为英文引号 2. 字符串值中包含未转义的英文双引号(LLM 生成时常见) """ import json import re from pathlib import Path from typing import Any, Dict, Optional, Tuple def fix_chinese_quotes(text: str) -> str: """将中文引号替换为英文引号""" return text.replace('“', '"').replace('”', '"').replace('‘', "'").replace('’', "'") def fix_unescaped_quotes_in_values(text: str) -> str: """ 修复 JSON 字符串值中未转义的双引号。 策略:找到 JSON 键值对中的字符串值,将值内部的未转义引号替换为中文引号。 """ # 匹配 "key": "value" 中的 value 部分(value 可能包含未转义的引号) # 使用逐字符解析来精确处理 result = [] i = 0 in_string = False is_value = False # 是否在值的字符串中(而非键) colon_seen = False while i < len(text): c = text[i] if not in_string: if c == '"': in_string = True # 判断这是键还是值 is_value = colon_seen if colon_seen: colon_seen = False result.append(c) elif c == ':': colon_seen = True result.append(c) elif c in ('{', '[', ',', '\n', ' ', '\t', '\r'): if c in ('{', '[', ','): colon_seen = False result.append(c) else: result.append(c) else: if c == '\\': # 转义序列,直接保留 result.append(c) i += 1 if i < len(text): result.append(text[i]) elif c == '"': # 检查是否是字符串结束 # 向后看:如果后面跟着 ,/}/]/: 或空白+这些字符,则是结束引号 j = i + 1 while j < len(text) and text[j] in (' ', '\t', '\r', '\n'): j += 1 next_char = text[j] if j < len(text) else '' if next_char in (',', '}', ']', ':') or j >= len(text): # 这是字符串结束引号 in_string = False result.append(c) else: # 这是值内部的未转义引号,替换为中文引号 if is_value: result.append('“') # " else: result.append(c) else: result.append(c) i += 1 return ''.join(result) def try_fix_and_parse(content: str) -> Tuple[bool, Any, str]: """ 尝试多种修复策略解析 JSON Returns: (success, data, fix_description) """ # 策略 1:直接解析 try: return True, json.loads(content), "valid" except json.JSONDecodeError: pass # 策略 2:替换中文引号 fixed = fix_chinese_quotes(content) try: return True, json.loads(fixed), "fixed_chinese_quotes" except json.JSONDecodeError: pass # 策略 3:修复值中未转义的引号 fixed2 = fix_unescaped_quotes_in_values(fixed) try: return True, json.loads(fixed2), "fixed_unescaped_quotes" except json.JSONDecodeError: pass # 策略 4:尝试 json_repair 库(如果可用) try: import json_repair data = json_repair.repair_json(content, return_objects=True) if data: return True, data, "fixed_by_json_repair" except ImportError: pass except Exception: pass return False, None, "unfixable" def fix_json_file(file_path: Path, backup: bool = True) -> Dict[str, Any]: """修复 JSON 文件""" result = {"success": False, "message": "", "fixed": False, "file": str(file_path)} try: with open(file_path, 'r', encoding='utf-8') as f: content = f.read() except Exception as e: result["message"] = f"File read error: {e}" return result success, data, fix_desc = try_fix_and_parse(content) if success: if fix_desc != "valid": if backup: backup_path = file_path.with_suffix('.json.bak') backup_path.write_text(content, encoding='utf-8') result["backup"] = str(backup_path) with open(file_path, 'w', encoding='utf-8') as f: json.dump(data, f, ensure_ascii=False, indent=2) result["fixed"] = True result["message"] = fix_desc else: result["message"] = "Already valid JSON" result["success"] = True else: result["message"] = "unfixable" return result def fix_directory(dir_path: Path, pattern: str = "case_*.json") -> Dict[str, Any]: """修复目录下所有匹配的 JSON 文件""" results = [] total = fixed = failed = 0 for file_path in sorted(dir_path.glob(pattern)): total += 1 result = fix_json_file(file_path, backup=True) results.append(result) if result["success"]: if result["fixed"]: fixed += 1 print(f"[FIXED] {file_path.name}: {result['message']}") else: print(f"[OK] {file_path.name}") else: failed += 1 print(f"[FAILED] {file_path.name}: {result['message']}") return {"total": total, "fixed": fixed, "failed": failed, "results": results} if __name__ == "__main__": import sys if len(sys.argv) < 2: print("Usage: python fix_json_quotes.py ") sys.exit(1) dir_path = Path(sys.argv[1]) print(f"Fixing JSON files in: {dir_path}") print("=" * 60) summary = fix_directory(dir_path) print("=" * 60) print(f"Total: {summary['total']}, Fixed: {summary['fixed']}, Failed: {summary['failed']}")