1 місяць тому · 5a1cb2eaf0
--- a/agent.py
+++ b/agent.py
@@ -130,6 +130,8 @@ def create_langgraph_workflow():
 
				             
			
 
				             item = items[current_index]
			
 
				             state["current_item"] = item
			
 
				+            state["content_id"] = item.get('content_id') or ''
			
 
				+            state["task_id"] = item.get('task_id') or ''
			
 
				             state["current_index"] = current_index + 1
			
 
				             
			
 
				             # 处理当前项
			
@@ -144,7 +146,10 @@ def create_langgraph_workflow():
 
				             # Step 2: 结构化并入库
			
 
				             affected = StructureTool.store_parsing_result(
			
 
				                 state["request_id"], 
			
 
				-                item.get('raw') or {}, 
			
 
				+                {
			
 
				+                    "content_id": state["content_id"],
			
 
				+                    "task_id": state["task_id"]
			
 
				+                }, 
			
 
				                 identify_result
			
 
				             )
			
 
				             
			
@@ -319,7 +324,10 @@ async def parse_processing(request: TriggerRequest, background_tasks: Background
 
				                     # Step 2: 结构化并入库
			
 
				                     affected = StructureTool.store_parsing_result(
			
 
				                         request.requestId, 
			
 
				-                        item.get('raw') or {}, 
			
 
				+                        {
			
 
				+                            content_id: item.get('content_id') or '',
			
 
				+                            task_id: item.get('task_id') or ''
			
 
				+                        }, 
			
 
				                         identify_result
			
 
				                     )
			
 
				                     
			
@@ -418,6 +426,7 @@ async def process_request_background(request_id: str):
 
				             for idx, item in enumerate(items, start=1):
			
 
				                 try:
			
 
				                     crawl_data = item.get('crawl_data') or {}
			
 
				+                    content_id = item.get('content_id') or ''
			
 
				                     
			
 
				                     identify_result = identify_tool.run(
			
 
				                         crawl_data if isinstance(crawl_data, dict) else {}
			
@@ -425,7 +434,10 @@ async def process_request_background(request_id: str):
 
				                     
			
 
				                     affected = StructureTool.store_parsing_result(
			
 
				                         request_id, 
			
 
				-                        item.get('raw') or {}, 
			
 
				+                        {
			
 
				+                            content_id: item.get('content_id') or '',
			
 
				+                            task_id: item.get('task_id') or ''
			
 
				+                        },
			
 
				                         identify_result
			
 
				                     )
			
 
				                     
			
--- a/agent_tools.py
+++ b/agent_tools.py
@@ -23,6 +23,10 @@ class QueryDataTool:
 
				             logger.info(f"request_id={request_id} 未查询到数据，使用默认值")
			
 
				             # 返回默认数据
			
 
				             default_data = {
			
 
				+                "request_id": request_id,
			
 
				+                "content_id": "684a789b000000002202a61b",
			
 
				+                "id": 1,
			
 
				+                "task_id": 1,
			
 
				                 "crawl_data": {
			
 
				                     "channel": 1,
			
 
				                     "channel_content_id": "684a789b000000002202a61b",
			
@@ -42,10 +46,10 @@ class QueryDataTool:
 
				                         }
			
 
				                     ],
			
 
				                     "video_url_list": [
			
 
				-                        {
			
 
				-                            "video_url": "http://rescdn.yishihui.com/pipeline/video/9e38400e-21dc-4063-bab5-47c1667bb59d.mp4",
			
 
				-                            "video_duration": 615
			
 
				-                        }
			
 
				+                        # {
			
 
				+                        #     "video_url": "http://rescdn.yishihui.com/pipeline/video/9e38400e-21dc-4063-bab5-47c1667bb59d.mp4",
			
 
				+                        #     "video_duration": 615
			
 
				+                        # }
			
 
				                     ],
			
 
				                     "bgm_data": None,
			
 
				                     "ad_info": None,
			
@@ -175,14 +179,10 @@ class StructureTool:
 
				         """
			
 
				         try:
			
 
				             # 从原始数据中提取必要字段
			
 
				-            content_id = crawl_raw.get('channel_content_id') or ''
			
 
				-            task_id = 0  # 默认任务ID，可根据需要调整
			
 
				+            content_id = crawl_raw.get('content_id') or ''
			
 
				+            task_id = crawl_raw.get('task_id') or ''  # 默认任务ID，可根据需要调整
			
 
				             
			
 
				             # 构建存储数据
			
 
				-            parsing_data = {
			
 
				-                'crawl_raw': crawl_raw,
			
 
				-                'identify_result': identify_result,
			
 
				-            }
			
 
				             
			
 
				             sql = (
			
 
				                 "INSERT INTO knowledge_parsing_content "
			
@@ -196,7 +196,7 @@ class StructureTool:
 
				                 content_id, 
			
 
				                 request_id, 
			
 
				                 task_id, 
			
 
				-                json.dumps(parsing_data, ensure_ascii=False),
			
 
				+                json.dumps(identify_result, ensure_ascii=False),
			
 
				                 status
			
 
				             )