jihuaqiang 1 week ago
parent
commit
5a1cb2eaf0
2 changed files with 26 additions and 14 deletions
  1. 15 3
      agent.py
  2. 11 11
      agent_tools.py

+ 15 - 3
agent.py

@@ -130,6 +130,8 @@ def create_langgraph_workflow():
             
             item = items[current_index]
             state["current_item"] = item
+            state["content_id"] = item.get('content_id') or ''
+            state["task_id"] = item.get('task_id') or ''
             state["current_index"] = current_index + 1
             
             # 处理当前项
@@ -144,7 +146,10 @@ def create_langgraph_workflow():
             # Step 2: 结构化并入库
             affected = StructureTool.store_parsing_result(
                 state["request_id"], 
-                item.get('raw') or {}, 
+                {
+                    "content_id": state["content_id"],
+                    "task_id": state["task_id"]
+                }, 
                 identify_result
             )
             
@@ -319,7 +324,10 @@ async def parse_processing(request: TriggerRequest, background_tasks: Background
                     # Step 2: 结构化并入库
                     affected = StructureTool.store_parsing_result(
                         request.requestId, 
-                        item.get('raw') or {}, 
+                        {
+                            content_id: item.get('content_id') or '',
+                            task_id: item.get('task_id') or ''
+                        }, 
                         identify_result
                     )
                     
@@ -418,6 +426,7 @@ async def process_request_background(request_id: str):
             for idx, item in enumerate(items, start=1):
                 try:
                     crawl_data = item.get('crawl_data') or {}
+                    content_id = item.get('content_id') or ''
                     
                     identify_result = identify_tool.run(
                         crawl_data if isinstance(crawl_data, dict) else {}
@@ -425,7 +434,10 @@ async def process_request_background(request_id: str):
                     
                     affected = StructureTool.store_parsing_result(
                         request_id, 
-                        item.get('raw') or {}, 
+                        {
+                            content_id: item.get('content_id') or '',
+                            task_id: item.get('task_id') or ''
+                        },
                         identify_result
                     )
                     

+ 11 - 11
agent_tools.py

@@ -23,6 +23,10 @@ class QueryDataTool:
             logger.info(f"request_id={request_id} 未查询到数据,使用默认值")
             # 返回默认数据
             default_data = {
+                "request_id": request_id,
+                "content_id": "684a789b000000002202a61b",
+                "id": 1,
+                "task_id": 1,
                 "crawl_data": {
                     "channel": 1,
                     "channel_content_id": "684a789b000000002202a61b",
@@ -42,10 +46,10 @@ class QueryDataTool:
                         }
                     ],
                     "video_url_list": [
-                        {
-                            "video_url": "http://rescdn.yishihui.com/pipeline/video/9e38400e-21dc-4063-bab5-47c1667bb59d.mp4",
-                            "video_duration": 615
-                        }
+                        # {
+                        #     "video_url": "http://rescdn.yishihui.com/pipeline/video/9e38400e-21dc-4063-bab5-47c1667bb59d.mp4",
+                        #     "video_duration": 615
+                        # }
                     ],
                     "bgm_data": None,
                     "ad_info": None,
@@ -175,14 +179,10 @@ class StructureTool:
         """
         try:
             # 从原始数据中提取必要字段
-            content_id = crawl_raw.get('channel_content_id') or ''
-            task_id = 0  # 默认任务ID,可根据需要调整
+            content_id = crawl_raw.get('content_id') or ''
+            task_id = crawl_raw.get('task_id') or ''  # 默认任务ID,可根据需要调整
             
             # 构建存储数据
-            parsing_data = {
-                'crawl_raw': crawl_raw,
-                'identify_result': identify_result,
-            }
             
             sql = (
                 "INSERT INTO knowledge_parsing_content "
@@ -196,7 +196,7 @@ class StructureTool:
                 content_id, 
                 request_id, 
                 task_id, 
-                json.dumps(parsing_data, ensure_ascii=False),
+                json.dumps(identify_result, ensure_ascii=False),
                 status
             )