hai 1 semana · b4e22773c3
--- a/app/domains/llm_tasks/decode_video/__init__.py
+++ b/app/domains/llm_tasks/decode_video/__init__.py
@@ -1 +1 @@
 
				-from .decode_video_produce import DecodeVideoProduce
			
 
				+from .decode_video_produce import DecodeVideoProduce
			
--- a/app/domains/llm_tasks/decode_video/_const.py
+++ b/app/domains/llm_tasks/decode_video/_const.py
@@ -1,4 +1,9 @@
 
				 class VideoDecodeConst:
			
 
				+    TASK_NAME = "decode_video_produce"
			
 
				+    FEISHU_BOT_ENV = "long_articles_task"
			
 
				+    FAIL_RATE_THRESHOLD = 0.1
			
 
				+    ERROR_SAMPLE_LIMIT = 5
			
 
				+
			
 
				     class TaskStatus:
			
 
				         INIT = 0
			
 
				         PROCESSING = 1
			
--- a/app/domains/llm_tasks/decode_video/_mapper.py
+++ b/app/domains/llm_tasks/decode_video/_mapper.py
@@ -46,8 +46,17 @@ class VideoDecodeMapper:
 
				             params=data,
			
 
				         )
			
 
				 
			
 
				-    async def update_video_decode_data(self, ori_status, new_status, video_path):
			
 
				-        pass
			
 
				+    async def update_video_decode_data_status(self, video_id, ori_status, new_status):
			
 
				+        query = """
			
 
				+            UPDATE video_decode_data
			
 
				+            SET status = %s
			
 
				+            WHERE video_id = %s AND status = %s;
			
 
				+        """
			
 
				+        affected_rows = await self.pool.async_save(
			
 
				+            query=query,
			
 
				+            params=(new_status, video_id, ori_status),
			
 
				+        )
			
 
				+        return bool(affected_rows)
			
 
				 
			
 
				     async def insert_into_decode_task_queue(self, data: tuple):
			
 
				         query = """
			
@@ -60,11 +69,3 @@ class VideoDecodeMapper:
 
				             query=query,
			
 
				             params=data,
			
 
				         )
			
 
				-
			
 
				-    async def fetch_decode_task_queue(self, video_path):
			
 
				-        query = """
			
 
				-            SELECT video_path, sample_video_id, dt
			
 
				-            FROM video_decode_queue
			
 
				-            WHERE video_path = %s;
			
 
				-            """
			
 
				-        return await self.pool.async_fetch(query=query, params=(video_path,))
			
--- a/app/domains/llm_tasks/decode_video/decode_video_produce.py
+++ b/app/domains/llm_tasks/decode_video/decode_video_produce.py
@@ -2,6 +2,8 @@
 
				 建立待解构视频 video_id 队列， 以视频的 oss_path 作为唯一视频粒度
			
 
				 """
			
 
				 
			
 
				+import time
			
 
				+import traceback
			
 
				 from functools import partial
			
 
				 from typing import Dict
			
 
				 
			
@@ -9,6 +11,8 @@ from app.core.database import DatabaseManager
 
				 from app.core.observability import LogService
			
 
				 
			
 
				 from app.infra.shared import run_tasks_with_asyncio_task_group
			
 
				+from app.infra.shared.tools import generate_task_trace_id
			
 
				+from app.infra.external import feishu_robot
			
 
				 
			
 
				 from ._const import VideoDecodeConst
			
 
				 from ._utils import VideoDecodeUtils
			
@@ -21,9 +25,21 @@ class DecodeVideoProduce(VideoDecodeConst):
 
				         self.tool: VideoDecodeUtils = VideoDecodeUtils()
			
 
				         self.mapper: VideoDecodeMapper = VideoDecodeMapper(pool)
			
 
				 
			
 
				+    async def _trace_log(self, **kwargs):
			
 
				+        payload = {"task": self.TASK_NAME, **kwargs}
			
 
				+        await self.log_service.log(contents=payload)
			
 
				+
			
 
				     async def save_decode_info(
			
 
				         self, video_id, channel, hot_scene_type, video_path, title, root_source_id, dt
			
 
				     ):
			
 
				+        if not video_path:
			
 
				+            await self._trace_log(
			
 
				+                event="video_path_missing",
			
 
				+                video_id=video_id,
			
 
				+                message="video_path 为空，跳过入队",
			
 
				+            )
			
 
				+            return
			
 
				+
			
 
				         # 存储到 video_decode_data 表中
			
 
				         insert_row = await self.mapper.save_video_to_decode_data(
			
 
				             data=(
			
@@ -37,20 +53,33 @@ class DecodeVideoProduce(VideoDecodeConst):
 
				             )
			
 
				         )
			
 
				         if not insert_row:
			
 
				-            print(f"INSERT error!!!! Duplicated VideoId, video_id {video_id}")
			
 
				+            await self._trace_log(
			
 
				+                event="video_duplicated",
			
 
				+                video_id=video_id,
			
 
				+                message="重复 video_id，跳过",
			
 
				+            )
			
 
				             return
			
 
				 
			
 
				-        # 进入解构队列
			
 
				-        decoding_queue = await self.mapper.fetch_decode_task_queue(video_path)
			
 
				-        if decoding_queue:
			
 
				-            # 已经解构成功
			
 
				-            print("该视频已经解构成功，无需解构")
			
 
				+        # 进入解构队列（INSERT IGNORE 保证幂等，避免先查后插的并发竞争）
			
 
				+        insert_queue_row = await self.mapper.insert_into_decode_task_queue(
			
 
				+            data=(video_path, video_id, dt)
			
 
				+        )
			
 
				+        if not insert_queue_row:
			
 
				+            await self._trace_log(
			
 
				+                event="video_already_in_queue",
			
 
				+                video_id=video_id,
			
 
				+                video_path=video_path,
			
 
				+                message="该视频已在解构队列中，跳过",
			
 
				+            )
			
 
				             return
			
 
				 
			
 
				-        else:
			
 
				-            await self.mapper.insert_into_decode_task_queue(
			
 
				-                data=(video_path, video_id, dt)
			
 
				-            )
			
 
				+        await self._trace_log(
			
 
				+            event="video_enqueued",
			
 
				+            video_id=video_id,
			
 
				+            video_path=video_path,
			
 
				+            dt=dt,
			
 
				+            message="视频已成功加入解构队列",
			
 
				+        )
			
 
				 
			
 
				     async def decode_daily_video(self, video_obj: Dict, dt: str):
			
 
				         root_source_id = video_obj["root_source_id"]  # 上游肯定有，不然上游就会报错
			
@@ -59,14 +88,24 @@ class DecodeVideoProduce(VideoDecodeConst):
 
				         # 查文章信息
			
 
				         article_info = await self.mapper.fetch_video_source_content(root_source_id)
			
 
				         if not article_info:
			
 
				+            await self._trace_log(
			
 
				+                event="daily_article_not_found",
			
 
				+                video_id=video_id,
			
 
				+                root_source_id=root_source_id,
			
 
				+                message="未找到文章信息，降级走普通视频链路",
			
 
				+            )
			
 
				             return await self.decode_other_video(video_obj=video_obj, dt=dt)
			
 
				         gh_id = article_info[0]["gh_id"]
			
 
				         content_id = article_info[0]["content_id"]
			
 
				         inner_vid = article_info[0]["video_id"]
			
 
				         trace_id = article_info[0]["trace_id"]
			
 
				         if inner_vid != video_id:
			
 
				-            print(
			
 
				-                f"error!!!!, root_source_id 和  video_id 映射失败 内部视频id{inner_vid}， 内部视频 type{type(inner_vid)}, 外部视频id{video_id}, 外部视频类型{type(video_id)}"
			
 
				+            await self._trace_log(
			
 
				+                event="video_id_mismatch",
			
 
				+                video_id=video_id,
			
 
				+                inner_video_id=inner_vid,
			
 
				+                root_source_id=root_source_id,
			
 
				+                message="root_source_id 与 video_id 映射失败，降级走普通视频链路",
			
 
				             )
			
 
				             return await self.decode_other_video(video_obj=video_obj, dt=dt)
			
 
				 
			
@@ -80,8 +119,13 @@ class DecodeVideoProduce(VideoDecodeConst):
 
				             )
			
 
				 
			
 
				         if not match_video_info:
			
 
				-            print(
			
 
				-                f"error!!!!, gh_id{gh_id}, content_id{content_id}, trace_id{trace_id}"
			
 
				+            await self._trace_log(
			
 
				+                event="match_video_not_found",
			
 
				+                video_id=video_id,
			
 
				+                gh_id=gh_id,
			
 
				+                content_id=content_id,
			
 
				+                content_trace_id=trace_id,
			
 
				+                message="未命中匹配视频，降级走普通视频链路",
			
 
				             )
			
 
				             return await self.decode_other_video(video_obj=video_obj, dt=dt)
			
 
				 
			
@@ -97,7 +141,7 @@ class DecodeVideoProduce(VideoDecodeConst):
 
				         )
			
 
				 
			
 
				     async def decode_other_video(self, video_obj: Dict, dt: str):
			
 
				-        video_id = video_obj["video_id"]
			
 
				+        video_id = int(video_obj["video_id"])
			
 
				         video_path = await self.tool.get_pq_video_real_path(video_id=video_id)
			
 
				         await self.save_decode_info(
			
 
				             video_id=video_id,
			
@@ -109,24 +153,104 @@ class DecodeVideoProduce(VideoDecodeConst):
 
				             dt=dt,
			
 
				         )
			
 
				 
			
 
				-    async def process_single_video(self, video_obj: Dict, dt):
			
 
				+    async def process_single_video(self, video_obj: Dict, dt, trace_id: str):
			
 
				+        video_id = int(video_obj["video_id"])
			
 
				         hot_scene_type = video_obj.get("hot_scene_type")
			
 
				-        if hot_scene_type == self.SceneType.DAILY_ARTICLE:
			
 
				-            return await self.decode_daily_video(video_obj, dt)
			
 
				-        else:
			
 
				-            return await self.decode_other_video(video_obj, dt)
			
 
				+        try:
			
 
				+            if hot_scene_type == self.SceneType.DAILY_ARTICLE:
			
 
				+                await self.decode_daily_video(video_obj, dt)
			
 
				+            else:
			
 
				+                await self.decode_other_video(video_obj, dt)
			
 
				+
			
 
				+            await self._trace_log(
			
 
				+                event="video_processed",
			
 
				+                trace_id=trace_id,
			
 
				+                video_id=video_id,
			
 
				+                hot_scene_type=hot_scene_type,
			
 
				+                status="success",
			
 
				+            )
			
 
				+        except Exception as e:
			
 
				+            await self._trace_log(
			
 
				+                event="video_process_failed",
			
 
				+                trace_id=trace_id,
			
 
				+                video_id=video_id,
			
 
				+                hot_scene_type=hot_scene_type,
			
 
				+                status="fail",
			
 
				+                error=str(e),
			
 
				+                traceback=traceback.format_exc(),
			
 
				+            )
			
 
				+            raise
			
 
				 
			
 
				     async def deal(self, execute_dt="20260401"):
			
 
				+        trace_id = generate_task_trace_id()
			
 
				+        start_time = time.time()
			
 
				+
			
 
				         odps_video_list = self.tool.get_top_head_videos(execute_dt=execute_dt)
			
 
				         if not odps_video_list:
			
 
				+            await self._trace_log(
			
 
				+                event="task_empty",
			
 
				+                trace_id=trace_id,
			
 
				+                dt=execute_dt,
			
 
				+                message="未获取到待解构视频",
			
 
				+            )
			
 
				             return
			
 
				+
			
 
				         task_list = self.tool.process_odps_data(odps_video_list)
			
 
				-        handler = partial(self.process_single_video, dt=execute_dt)
			
 
				+        handler = partial(self.process_single_video, dt=execute_dt, trace_id=trace_id)
			
 
				 
			
 
				-        await run_tasks_with_asyncio_task_group(
			
 
				+        await self._trace_log(
			
 
				+            event="task_start",
			
 
				+            trace_id=trace_id,
			
 
				+            dt=execute_dt,
			
 
				+            total_videos=len(odps_video_list),
			
 
				+            processed_videos=len(task_list),
			
 
				+        )
			
 
				+
			
 
				+        result = await run_tasks_with_asyncio_task_group(
			
 
				             task_list=task_list,
			
 
				             handler=handler,
			
 
				             description="解构视频生产",
			
 
				             unit="video",
			
 
				             max_concurrency=10,
			
 
				         )
			
 
				+
			
 
				+        duration = time.time() - start_time
			
 
				+        error_count = len(result["errors"])
			
 
				+        total_task = result["total_task"]
			
 
				+        fail_rate = error_count / total_task if total_task else 0
			
 
				+
			
 
				+        await self._trace_log(
			
 
				+            event="task_complete",
			
 
				+            trace_id=trace_id,
			
 
				+            dt=execute_dt,
			
 
				+            total_task=total_task,
			
 
				+            success_count=result["processed_task"],
			
 
				+            error_count=error_count,
			
 
				+            fail_rate=fail_rate,
			
 
				+            duration_seconds=duration,
			
 
				+            avg_time_per_video=duration / total_task if total_task else 0,
			
 
				+        )
			
 
				+
			
 
				+        if error_count:
			
 
				+            error_samples = [
			
 
				+                {
			
 
				+                    "index": idx,
			
 
				+                    "video_id": task_obj.get("video_id"),
			
 
				+                    "error": str(err),
			
 
				+                }
			
 
				+                for idx, task_obj, err in result["errors"][: self.ERROR_SAMPLE_LIMIT]
			
 
				+            ]
			
 
				+            await feishu_robot.bot(
			
 
				+                title="视频解构生产任务异常",
			
 
				+                detail={
			
 
				+                    "dt": execute_dt,
			
 
				+                    "trace_id": trace_id,
			
 
				+                    "总任务数": total_task,
			
 
				+                    "成功数": result["processed_task"],
			
 
				+                    "失败数": error_count,
			
 
				+                    "失败率": round(fail_rate, 4),
			
 
				+                    "错误样例": error_samples,
			
 
				+                },
			
 
				+                mention=fail_rate >= self.FAIL_RATE_THRESHOLD,
			
 
				+                env=self.FEISHU_BOT_ENV,
			
 
				+            )