|
|
@@ -5,6 +5,8 @@ from tqdm import tqdm
|
|
|
from app.core.database import DatabaseManager
|
|
|
from app.core.observability import LogService
|
|
|
|
|
|
+from app.infra.shared import run_tasks_with_asyncio_task_group
|
|
|
+
|
|
|
from ._const import AdPlatformArticlesDecodeConst
|
|
|
from ._mapper import AdPlatformArticlesDecodeMapper
|
|
|
from ._util import AdPlatformArticlesDecodeUtil
|
|
|
@@ -52,7 +54,9 @@ class AdPlatformArticlesDecodeTask(AdPlatformArticlesDecodeConst):
|
|
|
)
|
|
|
return
|
|
|
|
|
|
- task_id = response.get("data", {}).get("task_id") or response.get("data", {}).get("taskId")
|
|
|
+ task_id = response.get("data", {}).get("task_id") or response.get(
|
|
|
+ "data", {}
|
|
|
+ ).get("taskId")
|
|
|
if not task_id:
|
|
|
# 解构任务创建失败
|
|
|
await self.mapper.update_article_decode_status(
|
|
|
@@ -195,11 +199,87 @@ class AdPlatformArticlesDecodeTask(AdPlatformArticlesDecodeConst):
|
|
|
}
|
|
|
)
|
|
|
|
|
|
+ async def extract_single_result(self, task):
|
|
|
+ task_id = task["id"]
|
|
|
+
|
|
|
+ # acquire lock by extract_status
|
|
|
+ acquire_lock = await self.mapper.update_extract_status(
|
|
|
+ task_id, self.INIT_STATUS, self.PROCESSING_STATUS
|
|
|
+ )
|
|
|
+ if not acquire_lock:
|
|
|
+ return
|
|
|
+
|
|
|
+ try:
|
|
|
+ result = json.loads(task["result"])["result"]
|
|
|
+ except (TypeError, KeyError, json.JSONDecodeError) as e:
|
|
|
+ await self.mapper.update_extract_status(
|
|
|
+ task_id,
|
|
|
+ self.PROCESSING_STATUS,
|
|
|
+ self.FAILED_STATUS,
|
|
|
+ )
|
|
|
+ await self.log_service.log(
|
|
|
+ contents={
|
|
|
+ "task": "extract_single_result",
|
|
|
+ "task_id": task_id,
|
|
|
+ "status": "fail",
|
|
|
+ "message": f"parse decode result error: {e}",
|
|
|
+ "raw": task.get("result"),
|
|
|
+ }
|
|
|
+ )
|
|
|
+ return
|
|
|
+
|
|
|
+ detail = self.tool.extract_decode_result(result)
|
|
|
+ # 如果工具返回错误信息,直接标记为失败
|
|
|
+ if detail.get("error"):
|
|
|
+ await self.mapper.update_extract_status(
|
|
|
+ task_id,
|
|
|
+ self.PROCESSING_STATUS,
|
|
|
+ self.FAILED_STATUS,
|
|
|
+ )
|
|
|
+ await self.log_service.log(
|
|
|
+ contents={
|
|
|
+ "task": "extract_single_result",
|
|
|
+ "task_id": task_id,
|
|
|
+ "status": "fail",
|
|
|
+ "message": detail["error"],
|
|
|
+ }
|
|
|
+ )
|
|
|
+ return
|
|
|
+
|
|
|
+ # 写入明细表
|
|
|
+ saved = await self.mapper.record_extract_detail(task_id, detail)
|
|
|
+ if not saved:
|
|
|
+ await self.mapper.update_extract_status(
|
|
|
+ task_id,
|
|
|
+ self.PROCESSING_STATUS,
|
|
|
+ self.FAILED_STATUS,
|
|
|
+ )
|
|
|
+ await self.log_service.log(
|
|
|
+ contents={
|
|
|
+ "task": "extract_single_result",
|
|
|
+ "task_id": task_id,
|
|
|
+ "status": "fail",
|
|
|
+ "message": "insert long_articles_decode_task_detail failed",
|
|
|
+ "detail": detail,
|
|
|
+ }
|
|
|
+ )
|
|
|
+ return
|
|
|
+
|
|
|
+ # 写入成功,更新状态为成功
|
|
|
+ await self.mapper.update_extract_status(
|
|
|
+ task_id,
|
|
|
+ self.PROCESSING_STATUS,
|
|
|
+ self.SUCCESS_STATUS,
|
|
|
+ )
|
|
|
+
|
|
|
async def create_tasks(self):
|
|
|
article_list = await self.mapper.fetch_decode_articles()
|
|
|
if not article_list:
|
|
|
await self.log_service.log(
|
|
|
- contents={"task": "create_tasks", "message": "No more articles to decode"}
|
|
|
+ contents={
|
|
|
+ "task": "create_tasks",
|
|
|
+ "message": "No more articles to decode",
|
|
|
+ }
|
|
|
)
|
|
|
return
|
|
|
|
|
|
@@ -217,6 +297,15 @@ class AdPlatformArticlesDecodeTask(AdPlatformArticlesDecodeConst):
|
|
|
for task in decoding_tasks:
|
|
|
await self.fetch_single_task(task)
|
|
|
|
|
|
+ async def extract_task(self):
|
|
|
+ tasks = await self.mapper.fetch_extract_tasks()
|
|
|
+ await run_tasks_with_asyncio_task_group(
|
|
|
+ task_list=tasks,
|
|
|
+ handler=self.extract_single_result,
|
|
|
+ description="批量解析结构结果",
|
|
|
+ unit="task",
|
|
|
+ )
|
|
|
+
|
|
|
async def deal(self, task_name):
|
|
|
match task_name:
|
|
|
case "create_tasks":
|
|
|
@@ -225,5 +314,8 @@ class AdPlatformArticlesDecodeTask(AdPlatformArticlesDecodeConst):
|
|
|
case "fetch_results":
|
|
|
await self.fetch_results()
|
|
|
|
|
|
+ case "extract":
|
|
|
+ await self.extract_task()
|
|
|
+
|
|
|
|
|
|
__all__ = ["AdPlatformArticlesDecodeTask"]
|