|
@@ -79,6 +79,13 @@ identify_tool = None
|
|
# 全局线程池
|
|
# 全局线程池
|
|
THREAD_POOL = concurrent.futures.ThreadPoolExecutor(max_workers=20)
|
|
THREAD_POOL = concurrent.futures.ThreadPoolExecutor(max_workers=20)
|
|
|
|
|
|
|
|
+def get_identify_tool():
|
|
|
|
+ """惰性初始化 IdentifyTool,确保在子进程中可用"""
|
|
|
|
+ global identify_tool
|
|
|
|
+ if identify_tool is None:
|
|
|
|
+ identify_tool = IdentifyTool()
|
|
|
|
+ return identify_tool
|
|
|
|
+
|
|
def update_request_status(request_id: str, status: int):
|
|
def update_request_status(request_id: str, status: int):
|
|
"""
|
|
"""
|
|
更新 knowledge_request 表中的 parsing_status
|
|
更新 knowledge_request 表中的 parsing_status
|
|
@@ -393,7 +400,7 @@ def process_single_item(args):
|
|
# 0 未识别 3识别失败,需要重新进行识别
|
|
# 0 未识别 3识别失败,需要重新进行识别
|
|
if result_status == 0 or result_status == 3:
|
|
if result_status == 0 or result_status == 3:
|
|
# Step 1: 识别
|
|
# Step 1: 识别
|
|
- identify_result = identify_tool.run(
|
|
|
|
|
|
+ identify_result = get_identify_tool().run(
|
|
crawl_data if isinstance(crawl_data, dict) else {}
|
|
crawl_data if isinstance(crawl_data, dict) else {}
|
|
)
|
|
)
|
|
|
|
|