|
@@ -129,7 +129,28 @@ class QueryDataTool:
|
|
|
continue
|
|
|
try:
|
|
|
parsed = json.loads(data_cell) if isinstance(data_cell, (str, bytes)) else data_cell
|
|
|
- if isinstance(parsed, list):
|
|
|
+ logger.info(f"parsed: {parsed}")
|
|
|
+
|
|
|
+ # 处理元组类型(数据库查询结果)
|
|
|
+ if isinstance(parsed, tuple) and len(parsed) > 4:
|
|
|
+ # 假设第4个元素是JSON字符串
|
|
|
+ json_str = parsed[4]
|
|
|
+ if isinstance(json_str, str):
|
|
|
+ try:
|
|
|
+ json_data = json.loads(json_str)
|
|
|
+ if isinstance(json_data, dict):
|
|
|
+ results.append({"crawl_data": json_data, "raw": parsed})
|
|
|
+ elif isinstance(json_data, list):
|
|
|
+ for item in json_data:
|
|
|
+ if isinstance(item, dict):
|
|
|
+ results.append({"crawl_data": item, "raw": parsed})
|
|
|
+ except json.JSONDecodeError:
|
|
|
+ logger.warning(f"元组中第4个元素不是有效的JSON: {json_str}")
|
|
|
+ else:
|
|
|
+ logger.warning(f"元组中第4个元素不是字符串: {type(json_str)}")
|
|
|
+
|
|
|
+ # 处理列表类型
|
|
|
+ elif isinstance(parsed, list):
|
|
|
for item in parsed:
|
|
|
if isinstance(item, dict):
|
|
|
crawl_data = item.get('crawl_data')
|
|
@@ -137,14 +158,17 @@ class QueryDataTool:
|
|
|
results.append({"crawl_data": crawl_data, "raw": item})
|
|
|
else:
|
|
|
results.append({"crawl_data": item, "raw": item})
|
|
|
+
|
|
|
+ # 处理字典类型
|
|
|
elif isinstance(parsed, dict):
|
|
|
crawl_data = parsed.get('crawl_data')
|
|
|
if isinstance(crawl_data, (dict, list)):
|
|
|
results.append({"crawl_data": crawl_data, "raw": parsed})
|
|
|
else:
|
|
|
results.append({"crawl_data": parsed, "raw": parsed})
|
|
|
+
|
|
|
else:
|
|
|
- logger.warning("data 字段非期望的 JSON 结构,已跳过一行")
|
|
|
+ logger.warning(f"data 字段非期望的数据结构: {type(parsed)}, 已跳过一行")
|
|
|
except Exception as e:
|
|
|
logger.error(f"解析 data JSON 失败: {e}")
|
|
|
logger.info(f"request_id={request_id} 提取 crawl_data 数量: {len(results)}")
|