Browse Source

clean_agent

丁云鹏 3 days ago
parent
commit
c268093f7c
1 changed files with 7 additions and 10 deletions
  1. 7 10
      agents/clean_agent/tools.py

+ 7 - 10
agents/clean_agent/tools.py

@@ -60,12 +60,11 @@ def execute_continuous_evaluation_extraction(request_id: str, db: Session, query
     logger.info(f"开始处理,request_id: {request_id}, query_word: {query_word}")
     
     total_processed = 0
-    offset = 0
     
     try:
         while True:
-            # 分批获取待评估的内容,使用offset实现分页
-            contents = get_batch_contents_for_evaluation(request_id, db, BATCH_SIZE, offset)
+            # 分批获取待评估的内容
+            contents = get_batch_contents_for_evaluation(request_id, db, BATCH_SIZE)
             
             logger.info(f"获取到 {len(contents)} 条待评估内容")
 
@@ -89,14 +88,12 @@ def execute_continuous_evaluation_extraction(request_id: str, db: Session, query
                     batch_extract_and_save_content(high_score_results, db, request_id, query_word)
                 
                 total_processed += len(contents)
-                offset += len(contents)  # 更新offset值,以便下次获取下一批数据
                 db.commit()  # 每批次处理完成后提交事务
             except Exception as e:
                 # 当前批次处理失败时回滚事务
                 db.rollback()
                 logger.error(f"处理批次数据时出错: {e}")
-                # 继续处理下一批数据
-                offset += len(contents)
+                # 继续处理下一批数据,不需要offset变量,while循环会自动获取下一批数据
     except Exception as e:
         # 发生严重异常时回滚事务并抛出异常
         db.rollback()
@@ -104,7 +101,7 @@ def execute_continuous_evaluation_extraction(request_id: str, db: Session, query
         raise
     # 这里的代码永远不会被执行到,因为在while循环中,当contents为空时会返回
 
-def get_batch_contents_for_evaluation(request_id: str, db: Session, batch_size: int, offset: int = 0) -> list:
+def get_batch_contents_for_evaluation(request_id: str, db: Session, batch_size: int) -> list:
     query = db.query(KnowledgeParsingContent).outerjoin(
         KnowledgeExtractionContent,
         KnowledgeParsingContent.id == KnowledgeExtractionContent.parsing_id
@@ -114,7 +111,7 @@ def get_batch_contents_for_evaluation(request_id: str, db: Session, batch_size:
         KnowledgeExtractionContent.parsing_id == None
     )
     
-    return query.offset(offset).limit(batch_size).all()
+    return query.limit(batch_size).all()
 
 def batch_evaluate_content(contents: list, db: Session, request_id: str, query_word: str) -> list:
     if not contents:
@@ -292,5 +289,5 @@ def batch_call_llm_for_extraction(evaluation_results: list, query_word: str) ->
         
     except Exception as e:
         logger.error(f"批量抽取过程异常: {str(e)}")
-        # 返回空结果
-        return ["{}"] * len(evaluation_results)
+        # 返回空结果,确保返回类型为元组列表
+        return [("未提取到内容", "抽取过程异常") for _ in range(len(evaluation_results))]