|
@@ -254,10 +254,16 @@ class UpdateDataTool:
|
|
|
插入的行ID,失败返回None
|
|
|
"""
|
|
|
try:
|
|
|
- logger.info(f"存储识别结果: request_id={request_id}, crawl_raw={crawl_raw}, identify_result={identify_result}")
|
|
|
# 从原始数据中提取必要字段
|
|
|
content_id = crawl_raw.get('content_id') or ''
|
|
|
task_id = crawl_raw.get('task_id') or '' # 默认任务ID,可根据需要调整
|
|
|
+ # 仅输出 identify_result 的前100个字符,避免日志过长
|
|
|
+ try:
|
|
|
+ identify_str = identify_result if isinstance(identify_result, str) else json.dumps(identify_result, ensure_ascii=False)
|
|
|
+ except Exception:
|
|
|
+ identify_str = str(identify_result)
|
|
|
+ identify_preview = identify_str[:100]
|
|
|
+ logger.info(f"存储识别结果: request_id={request_id}, content_id={content_id}, task_id={task_id}, crawl_raw={crawl_raw}, identify_result_preview={identify_preview}")
|
|
|
|
|
|
# 先查询是否存在相同 request_id + content_id 的记录
|
|
|
check_sql = "SELECT id, status FROM knowledge_parsing_content WHERE request_id = %s AND content_id = %s LIMIT 1"
|