1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950 |
- from typing import Dict
- from jsonpath_ng import parse
- def safe_extract(json_obj, path, default=None):
- """
- 安全提取单个字段值,返回匹配到的第一个,否则返回默认值。
- :param json_obj: 输入的 JSON 对象
- :param path: JSONPath 表达式
- :param default: 提取失败时返回的默认值
- :return: 提取结果或默认值
- """
- try:
- jsonpath_expr = parse(path)
- match = jsonpath_expr.find(json_obj)
- if match:
- return match[0].value
- except Exception as e:
- print(f"[extractor] Error extracting {path}: {e}")
- return default
- def extract_multiple(json_obj, fields: dict) -> dict:
- """
- 根据字段配置提取多个字段。
- :param json_obj: 输入的 JSON 对象
- :param fields: 字段配置,如 {"title": "$.title", "id": "$.id"}
- :return: 字段名 -> 提取值的字典
- """
- return {key: safe_extract(json_obj, path) for key, path in fields.items()}
- def extract_fields(video: Dict, field_map: Dict, logger=None, trace_id=None,aliyun_log=None) -> Dict:
- result = {}
- for field, path in field_map.items():
- if not isinstance(path, str) or not path.startswith("$"):
- result[field] = path
- continue
- value = safe_extract(video, path)
- if value is None and logger:
- logger.warning(f"{trace_id} 字段提取失败: {field} 路径: {path}")
- # aliyun_log.logging(
- # code=""
- # trace_id=trace_id,
- # )
- result[field] = value
- return result
|