jihuaqiang 18 ساعت پیش
والد
کامیت
0d912c3474
2فایلهای تغییر یافته به همراه53 افزوده شده و 1 حذف شده
  1. 1 1
      agent.py
  2. 52 0
      agents/expand_agent/agent.py

+ 1 - 1
agent.py

@@ -412,7 +412,7 @@ def create_langgraph_workflow():
                     result_indentify_data = {}
                     if check_result:
                         id, status, indentify_data = check_result[0]
-                        logger.info(f"查询到待结构化处理的条目,id: {id}, status: {status}, indentify_data: {indentify_data}")
+                        logger.info(f"查询到待结构化处理的条目,id: {id}, status: {status}, indentify_data: {str(indentify_data)[:100]}")
                         result_status = status
                         result_id = id
                         result_indentify_data = indentify_data

+ 52 - 0
agents/expand_agent/agent.py

@@ -119,6 +119,58 @@ def _run_llm(prompt: str) -> List[str]:
                 return []
         except json.JSONDecodeError as e:
             logger.error(f"JSON 解析失败: {e}, 原始文本: {text}")
+            
+            # 尝试修复常见的JSON格式错误
+            try:
+                # 检查是否是对象格式的数组(缺少方括号)
+                if text.strip().startswith('{') and text.strip().endswith('}'):
+                    # 尝试将对象转换为数组
+                    # 如果内容是逗号分隔的字符串,尝试解析
+                    content = text.strip()[1:-1]  # 去掉首尾的大括号
+                    
+                    # 按逗号分割,但要注意字符串内的逗号
+                    items = []
+                    current_item = ""
+                    in_quotes = False
+                    quote_char = None
+                    
+                    for char in content:
+                        if char in ['"', "'"] and not in_quotes:
+                            in_quotes = True
+                            quote_char = char
+                            current_item += char
+                        elif char == quote_char and in_quotes:
+                            in_quotes = False
+                            quote_char = None
+                            current_item += char
+                        elif char == ',' and not in_quotes:
+                            if current_item.strip():
+                                items.append(current_item.strip())
+                            current_item = ""
+                        else:
+                            current_item += char
+                    
+                    # 添加最后一个项目
+                    if current_item.strip():
+                        items.append(current_item.strip())
+                    
+                    # 清理项目(去掉引号)
+                    cleaned_items = []
+                    for item in items:
+                        item = item.strip()
+                        if item.startswith('"') and item.endswith('"'):
+                            item = item[1:-1]
+                        elif item.startswith("'") and item.endswith("'"):
+                            item = item[1:-1]
+                        cleaned_items.append(item)
+                    
+                    if cleaned_items:
+                        logger.info(f"成功修复JSON格式,提取到 {len(cleaned_items)} 个项目")
+                        return cleaned_items
+                        
+            except Exception as fix_error:
+                logger.error(f"JSON修复尝试失败: {fix_error}")
+            
             return []
             
     except Exception as e: