hace 3 meses · c268093f7c
--- a/agents/clean_agent/tools.py
+++ b/agents/clean_agent/tools.py
@@ -60,12 +60,11 @@ def execute_continuous_evaluation_extraction(request_id: str, db: Session, query
 
				     logger.info(f"开始处理，request_id: {request_id}, query_word: {query_word}")
			
 
				     
			
 
				     total_processed = 0
			
 
				-    offset = 0
			
 
				     
			
 
				     try:
			
 
				         while True:
			
 
				-            # 分批获取待评估的内容，使用offset实现分页
			
 
				-            contents = get_batch_contents_for_evaluation(request_id, db, BATCH_SIZE, offset)
			
 
				+            # 分批获取待评估的内容
			
 
				+            contents = get_batch_contents_for_evaluation(request_id, db, BATCH_SIZE)
			
 
				             
			
 
				             logger.info(f"获取到 {len(contents)} 条待评估内容")
			
 
				 
			
@@ -89,14 +88,12 @@ def execute_continuous_evaluation_extraction(request_id: str, db: Session, query
 
				                     batch_extract_and_save_content(high_score_results, db, request_id, query_word)
			
 
				                 
			
 
				                 total_processed += len(contents)
			
 
				-                offset += len(contents)  # 更新offset值，以便下次获取下一批数据
			
 
				                 db.commit()  # 每批次处理完成后提交事务
			
 
				             except Exception as e:
			
 
				                 # 当前批次处理失败时回滚事务
			
 
				                 db.rollback()
			
 
				                 logger.error(f"处理批次数据时出错: {e}")
			
 
				-                # 继续处理下一批数据
			
 
				-                offset += len(contents)
			
 
				+                # 继续处理下一批数据，不需要offset变量，while循环会自动获取下一批数据
			
 
				     except Exception as e:
			
 
				         # 发生严重异常时回滚事务并抛出异常
			
 
				         db.rollback()
			
@@ -104,7 +101,7 @@ def execute_continuous_evaluation_extraction(request_id: str, db: Session, query
 
				         raise
			
 
				     # 这里的代码永远不会被执行到，因为在while循环中，当contents为空时会返回
			
 
				 
			
 
				-def get_batch_contents_for_evaluation(request_id: str, db: Session, batch_size: int, offset: int = 0) -> list:
			
 
				+def get_batch_contents_for_evaluation(request_id: str, db: Session, batch_size: int) -> list:
			
 
				     query = db.query(KnowledgeParsingContent).outerjoin(
			
 
				         KnowledgeExtractionContent,
			
 
				         KnowledgeParsingContent.id == KnowledgeExtractionContent.parsing_id
			
@@ -114,7 +111,7 @@ def get_batch_contents_for_evaluation(request_id: str, db: Session, batch_size:
 
				         KnowledgeExtractionContent.parsing_id == None
			
 
				     )
			
 
				     
			
 
				-    return query.offset(offset).limit(batch_size).all()
			
 
				+    return query.limit(batch_size).all()
			
 
				 
			
 
				 def batch_evaluate_content(contents: list, db: Session, request_id: str, query_word: str) -> list:
			
 
				     if not contents:
			
@@ -292,5 +289,5 @@ def batch_call_llm_for_extraction(evaluation_results: list, query_word: str) ->
 
				         
			
 
				     except Exception as e:
			
 
				         logger.error(f"批量抽取过程异常: {str(e)}")
			
 
				-        # 返回空结果
			
 
				-        return ["{}"] * len(evaluation_results)
			
 
				+        # 返回空结果，确保返回类型为元组列表
			
 
				+        return [("未提取到内容", "抽取过程异常") for _ in range(len(evaluation_results))]