|
@@ -0,0 +1,131 @@
|
|
|
|
|
+"""
|
|
|
|
|
+建立待解构视频 video_id 队列, 以视频的 oss_path 作为唯一视频粒度
|
|
|
|
|
+"""
|
|
|
|
|
+
|
|
|
|
|
+from typing import Dict
|
|
|
|
|
+
|
|
|
|
|
+from app.core.database import DatabaseManager
|
|
|
|
|
+from app.core.observability import LogService
|
|
|
|
|
+
|
|
|
|
|
+from app.infra.shared import run_tasks_with_asyncio_task_group
|
|
|
|
|
+
|
|
|
|
|
+from ._const import VideoDecodeConst
|
|
|
|
|
+from ._utils import VideoDecodeUtils
|
|
|
|
|
+from ._mapper import VideoDecodeMapper
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+class DecodeVideoProduce(VideoDecodeConst):
|
|
|
|
|
+ def __init__(self, pool: DatabaseManager, log_service: LogService):
|
|
|
|
|
+ self.log_service: LogService = log_service
|
|
|
|
|
+ self.tool: VideoDecodeUtils = VideoDecodeUtils()
|
|
|
|
|
+ self.mapper: VideoDecodeMapper = VideoDecodeMapper(pool)
|
|
|
|
|
+
|
|
|
|
|
+ async def save_decode_info(
|
|
|
|
|
+ self, video_id, channel, hot_scene_type, video_path, title, root_source_id, dt
|
|
|
|
|
+ ):
|
|
|
|
|
+ # 存储到 video_decode_data 表中
|
|
|
|
|
+ insert_row = await self.mapper.save_video_to_decode_data(
|
|
|
|
|
+ data=(
|
|
|
|
|
+ video_id,
|
|
|
|
|
+ channel,
|
|
|
|
|
+ hot_scene_type,
|
|
|
|
|
+ video_path,
|
|
|
|
|
+ title,
|
|
|
|
|
+ root_source_id,
|
|
|
|
|
+ dt,
|
|
|
|
|
+ )
|
|
|
|
|
+ )
|
|
|
|
|
+ if not insert_row:
|
|
|
|
|
+ print(f"INSERT error!!!! Duplicated VideoId, video_id {video_id}")
|
|
|
|
|
+ return
|
|
|
|
|
+
|
|
|
|
|
+ # 进入解构队列
|
|
|
|
|
+ decoding_queue = await self.mapper.fetch_decode_task_queue(video_path)
|
|
|
|
|
+ if decoding_queue:
|
|
|
|
|
+ # 已经解构成功
|
|
|
|
|
+ print("该视频已经解构成功,无需解构")
|
|
|
|
|
+ return
|
|
|
|
|
+
|
|
|
|
|
+ else:
|
|
|
|
|
+ await self.mapper.insert_into_decode_task_queue(
|
|
|
|
|
+ data=(video_path, video_id, "20260429")
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ async def decode_daily_video(self, video_obj: Dict, dt: str):
|
|
|
|
|
+ root_source_id = video_obj["root_source_id"] # 上游肯定有,不然上游就会报错
|
|
|
|
|
+ video_id = int(video_obj["video_id"])
|
|
|
|
|
+
|
|
|
|
|
+ # 查文章信息
|
|
|
|
|
+ article_info = await self.mapper.fetch_video_source_content(root_source_id)
|
|
|
|
|
+ if not article_info:
|
|
|
|
|
+ return await self.decode_other_video(video_obj=video_obj, dt=dt)
|
|
|
|
|
+ gh_id = article_info[0]["gh_id"]
|
|
|
|
|
+ content_id = article_info[0]["content_id"]
|
|
|
|
|
+ inner_vid = article_info[0]["video_id"]
|
|
|
|
|
+ trace_id = article_info[0]["trace_id"]
|
|
|
|
|
+ if inner_vid != video_id:
|
|
|
|
|
+ print(
|
|
|
|
|
+ f"error!!!!, root_source_id 和 video_id 映射失败 内部视频id{inner_vid}, 内部视频 type{type(inner_vid)}, 外部视频id{video_id}, 外部视频类型{type(video_id)}"
|
|
|
|
|
+ )
|
|
|
|
|
+ return await self.decode_other_video(video_obj=video_obj, dt=dt)
|
|
|
|
|
+
|
|
|
|
|
+ # 查匹配小程序信息
|
|
|
|
|
+ match_video_info = await self.mapper.fetch_video_match_result_v1(
|
|
|
|
|
+ gh_id=gh_id, content_id=content_id
|
|
|
|
|
+ )
|
|
|
|
|
+ if not match_video_info:
|
|
|
|
|
+ match_video_info = await self.mapper.fetch_video_match_result_v2(
|
|
|
|
|
+ trace_id=trace_id
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ if not match_video_info:
|
|
|
|
|
+ print(
|
|
|
|
|
+ f"error!!!!, gh_id{gh_id}, content_id{content_id}, trace_id{trace_id}"
|
|
|
|
|
+ )
|
|
|
|
|
+ return await self.decode_other_video(video_obj=video_obj, dt=dt)
|
|
|
|
|
+
|
|
|
|
|
+ video_path = self.tool.get_match_video_real_path(match_video_info, video_id)
|
|
|
|
|
+ return await self.save_decode_info(
|
|
|
|
|
+ video_id=video_id,
|
|
|
|
|
+ channel=video_obj["channel"],
|
|
|
|
|
+ hot_scene_type=video_obj["hot_scene_type"],
|
|
|
|
|
+ video_path=video_path,
|
|
|
|
|
+ title=video_obj["title"],
|
|
|
|
|
+ root_source_id=video_obj["root_source_id"],
|
|
|
|
|
+ dt=dt,
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ async def decode_other_video(self, video_obj: Dict, dt: str):
|
|
|
|
|
+ video_id = video_obj["video_id"]
|
|
|
|
|
+ video_path = await self.tool.get_pq_video_real_path(video_id=video_id)
|
|
|
|
|
+ await self.save_decode_info(
|
|
|
|
|
+ video_id=video_id,
|
|
|
|
|
+ channel=video_obj["channel"],
|
|
|
|
|
+ hot_scene_type=video_obj["hot_scene_type"],
|
|
|
|
|
+ video_path=video_path,
|
|
|
|
|
+ title=video_obj["title"],
|
|
|
|
|
+ root_source_id=video_obj["root_source_id"],
|
|
|
|
|
+ dt=dt,
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ async def process_single_video(self, video_obj: Dict):
|
|
|
|
|
+ dt = "20260427"
|
|
|
|
|
+ hot_scene_type = video_obj.get("hot_scene_type")
|
|
|
|
|
+ if hot_scene_type == self.SceneType.DAILY_ARTICLE:
|
|
|
|
|
+ return await self.decode_daily_video(video_obj, dt)
|
|
|
|
|
+ else:
|
|
|
|
|
+ return await self.decode_other_video(video_obj, dt)
|
|
|
|
|
+
|
|
|
|
|
+ async def deal(self):
|
|
|
|
|
+ odps_video_list = self.tool.get_top_head_videos(duration="history")
|
|
|
|
|
+ if not odps_video_list:
|
|
|
|
|
+ return
|
|
|
|
|
+ task_list = self.tool.process_odps_data(odps_video_list)
|
|
|
|
|
+
|
|
|
|
|
+ await run_tasks_with_asyncio_task_group(
|
|
|
|
|
+ task_list=task_list,
|
|
|
|
|
+ handler=self.process_single_video,
|
|
|
|
|
+ description="解构视频生产",
|
|
|
|
|
+ unit="video",
|
|
|
|
|
+ max_concurrency=10,
|
|
|
|
|
+ )
|