extractors.py 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. from typing import Dict
  2. from jsonpath_ng import parse
  3. def safe_extract(json_obj, path, default=None):
  4. """
  5. 安全提取单个字段值,返回匹配到的第一个,否则返回默认值。
  6. :param json_obj: 输入的 JSON 对象
  7. :param path: JSONPath 表达式
  8. :param default: 提取失败时返回的默认值
  9. :return: 提取结果或默认值
  10. """
  11. try:
  12. jsonpath_expr = parse(path)
  13. match = jsonpath_expr.find(json_obj)
  14. if match:
  15. return match[0].value
  16. except Exception as e:
  17. print(f"[extractor] Error extracting {path}: {e}")
  18. return default
  19. def extract_multiple(json_obj, fields: dict) -> dict:
  20. """
  21. 根据字段配置提取多个字段。
  22. :param json_obj: 输入的 JSON 对象
  23. :param fields: 字段配置,如 {"title": "$.title", "id": "$.id"}
  24. :return: 字段名 -> 提取值的字典
  25. """
  26. return {key: safe_extract(json_obj, path) for key, path in fields.items()}
  27. def extract_fields(video: Dict, field_map: Dict, logger=None, trace_id=None,aliyun_log=None) -> Dict:
  28. result = {}
  29. for field, path in field_map.items():
  30. if not isinstance(path, str) or not path.startswith("$"):
  31. result[field] = path
  32. continue
  33. value = safe_extract(video, path)
  34. if value is None and logger:
  35. logger.warning(f"字段提取失败: {field} 路径: {path}")
  36. aliyun_log.logging(
  37. code="9024",
  38. message=f"字段提取失败: {field} 路径: {path}"
  39. )
  40. result[field] = value
  41. return result