jihuaqiang 5 дней назад
Родитель
Сommit
fba1f94045
2 измененных файлов с 12 добавлено и 6 удалено
  1. 2 0
      agent.py
  2. 10 6
      tools/agent_tools.py

+ 2 - 0
agent.py

@@ -427,6 +427,8 @@ async def process_request_background(request_id: str):
                     "recursion_limit": 100  # 增加递归限制
                 }
             )
+            # 所有数据处理完毕,更新状态为2
+            update_request_status(request_id, 2)
             logger.info(f"LangGraph 后台处理完成: requestId={request_id}, processed={final_state.get('processed', 0)}, success={final_state.get('success', 0)}")
         
     except Exception as e:

+ 10 - 6
tools/agent_tools.py

@@ -135,15 +135,16 @@ class QueryDataTool:
                 if isinstance(parsed, tuple) and len(parsed) > 4:
                     # 假设第4个元素是JSON字符串
                     json_str = parsed[4]
+                    content_id = parsed[1]
                     if isinstance(json_str, str):
                         try:
                             json_data = json.loads(json_str)
                             if isinstance(json_data, dict):
-                                results.append({"crawl_data": json_data, "raw": parsed})
+                                results.append({"crawl_data": json_data, "content_id": content_id, "raw": parsed})
                             elif isinstance(json_data, list):
                                 for item in json_data:
                                     if isinstance(item, dict):
-                                        results.append({"crawl_data": item, "raw": parsed})
+                                        results.append({"crawl_data": item, "content_id": content_id, "raw": parsed})
                         except json.JSONDecodeError:
                             logger.warning(f"元组中第4个元素不是有效的JSON: {json_str}")
                     else:
@@ -154,18 +155,20 @@ class QueryDataTool:
                     for item in parsed:
                         if isinstance(item, dict):
                             crawl_data = item.get('crawl_data')
+                            content_id = item.get('content_id')
                             if isinstance(crawl_data, (dict, list)):
-                                results.append({"crawl_data": crawl_data, "raw": item})
+                                results.append({"crawl_data": crawl_data, "content_id": content_id, "raw": item})
                             else:
-                                results.append({"crawl_data": item, "raw": item})
+                                results.append({"crawl_data": item, "content_id": content_id, "raw": item})
                 
                 # 处理字典类型
                 elif isinstance(parsed, dict):
                     crawl_data = parsed.get('crawl_data')
+                    content_id = parsed.get('content_id')
                     if isinstance(crawl_data, (dict, list)):
-                        results.append({"crawl_data": crawl_data, "raw": parsed})
+                        results.append({"crawl_data": crawl_data, "content_id": content_id, "raw": parsed})
                     else:
-                        results.append({"crawl_data": parsed, "raw": parsed})
+                        results.append({"crawl_data": parsed, "content_id": content_id, "raw": parsed})
                 
                 else:
                     logger.warning(f"data 字段非期望的数据结构: {type(parsed)}, 已跳过一行")
@@ -251,6 +254,7 @@ class UpdateDataTool:
             插入的行ID,失败返回None
         """
         try:
+            logger.info(f"存储识别结果: request_id={request_id}, crawl_raw={crawl_raw}, identify_result={identify_result}")
             # 从原始数据中提取必要字段
             content_id = crawl_raw.get('content_id') or ''
             task_id = crawl_raw.get('task_id') or ''  # 默认任务ID,可根据需要调整