|
@@ -60,12 +60,11 @@ def execute_continuous_evaluation_extraction(request_id: str, db: Session, query
|
|
|
logger.info(f"开始处理,request_id: {request_id}, query_word: {query_word}")
|
|
|
|
|
|
total_processed = 0
|
|
|
- offset = 0
|
|
|
|
|
|
try:
|
|
|
while True:
|
|
|
- # 分批获取待评估的内容,使用offset实现分页
|
|
|
- contents = get_batch_contents_for_evaluation(request_id, db, BATCH_SIZE, offset)
|
|
|
+ # 分批获取待评估的内容
|
|
|
+ contents = get_batch_contents_for_evaluation(request_id, db, BATCH_SIZE)
|
|
|
|
|
|
logger.info(f"获取到 {len(contents)} 条待评估内容")
|
|
|
|
|
@@ -89,14 +88,12 @@ def execute_continuous_evaluation_extraction(request_id: str, db: Session, query
|
|
|
batch_extract_and_save_content(high_score_results, db, request_id, query_word)
|
|
|
|
|
|
total_processed += len(contents)
|
|
|
- offset += len(contents) # 更新offset值,以便下次获取下一批数据
|
|
|
db.commit() # 每批次处理完成后提交事务
|
|
|
except Exception as e:
|
|
|
# 当前批次处理失败时回滚事务
|
|
|
db.rollback()
|
|
|
logger.error(f"处理批次数据时出错: {e}")
|
|
|
- # 继续处理下一批数据
|
|
|
- offset += len(contents)
|
|
|
+ # 继续处理下一批数据,不需要offset变量,while循环会自动获取下一批数据
|
|
|
except Exception as e:
|
|
|
# 发生严重异常时回滚事务并抛出异常
|
|
|
db.rollback()
|
|
@@ -104,7 +101,7 @@ def execute_continuous_evaluation_extraction(request_id: str, db: Session, query
|
|
|
raise
|
|
|
# 这里的代码永远不会被执行到,因为在while循环中,当contents为空时会返回
|
|
|
|
|
|
-def get_batch_contents_for_evaluation(request_id: str, db: Session, batch_size: int, offset: int = 0) -> list:
|
|
|
+def get_batch_contents_for_evaluation(request_id: str, db: Session, batch_size: int) -> list:
|
|
|
query = db.query(KnowledgeParsingContent).outerjoin(
|
|
|
KnowledgeExtractionContent,
|
|
|
KnowledgeParsingContent.id == KnowledgeExtractionContent.parsing_id
|
|
@@ -114,7 +111,7 @@ def get_batch_contents_for_evaluation(request_id: str, db: Session, batch_size:
|
|
|
KnowledgeExtractionContent.parsing_id == None
|
|
|
)
|
|
|
|
|
|
- return query.offset(offset).limit(batch_size).all()
|
|
|
+ return query.limit(batch_size).all()
|
|
|
|
|
|
def batch_evaluate_content(contents: list, db: Session, request_id: str, query_word: str) -> list:
|
|
|
if not contents:
|
|
@@ -292,5 +289,5 @@ def batch_call_llm_for_extraction(evaluation_results: list, query_word: str) ->
|
|
|
|
|
|
except Exception as e:
|
|
|
logger.error(f"批量抽取过程异常: {str(e)}")
|
|
|
- # 返回空结果
|
|
|
- return ["{}"] * len(evaluation_results)
|
|
|
+ # 返回空结果,确保返回类型为元组列表
|
|
|
+ return [("未提取到内容", "抽取过程异常") for _ in range(len(evaluation_results))]
|