Browse Source

clean_agent

丁云鹏 5 days ago
parent
commit
45ba6c7b4b
1 changed files with 8 additions and 2 deletions
  1. 8 2
      agents/clean_agent/tools.py

+ 8 - 2
agents/clean_agent/tools.py

@@ -250,7 +250,12 @@ def batch_call_llm_for_evaluation(contents: list, query_word: str) -> list:
         return evaluation_results
         
     except Exception as e:
-        logger.error(f"批量评估过程异常: {str(e)}")
+        exc_type, exc_value, exc_traceback = sys.exc_info()
+        # 提取错误的行号
+        tb = traceback.extract_tb(exc_traceback)[-1]  # 获取最后一个 traceback(即错误发生的位置)
+        line_number = tb.lineno  # 行号
+        line_content = tb.line   # 错误行的代码内容
+        logger.error(f"批量评估过程异常: {line_number} 行: {line_content}")
         # 返回默认结果
         return [(content.id, 0, "评估过程异常", content.data if hasattr(content, 'data') else (content.parsing_data or "")) for content in contents]
 
@@ -271,7 +276,8 @@ def batch_call_llm_for_extraction(evaluation_results: list, query_word: str) ->
         # 处理返回结果
         extraction_results = []
         for i, result in enumerate(results):
-            result = re.sub(r'^\s*```json|\s*```\s*$', '', result, flags=re.MULTILINE).strip()
+            # 只处理大括号外面的内容,保留JSON内部格式
+            result = re.sub(r'(^\s*```json)|(\s*```\s*$)', '', result, flags=re.MULTILINE).strip()
             result = json.loads(result)
             extracted_data = result.get("extracted_content", "未提取到内容")
             clean_reason = result.get("analysis_reason", "未返回原因")