10 mesi fa · 1f11f10198
--- a/applications/pipeline/crawler_pipeline.py
+++ b/applications/pipeline/crawler_pipeline.py
@@ -45,7 +45,7 @@ def whether_duplicate_video_title(video_title: str, db_client) -> bool:
 
				     return False
			
 
				 
			
 
				 
			
 
				-def scrape_video_entities_process(video_item, db_client) -> dict:
			
 
				+def scrape_video_entities_process(video_item, db_client, oss_path=None) -> dict:
			
 
				     """
			
 
				     video crawler pipeline
			
 
				     """
			
@@ -60,6 +60,10 @@ def scrape_video_entities_process(video_item, db_client) -> dict:
 
				     if whether_duplicate_video_title(video_title, db_client):
			
 
				         return empty_dict
			
 
				 
			
 
				+    if oss_path:
			
 
				+        video_item["video_oss_path"] = oss_path
			
 
				+        return video_item
			
 
				+
			
 
				     # download video
			
 
				     match platform:
			
 
				         case "toutiao":
			
--- a/coldStartTasks/crawler/piaoquan/__init__.py
+++ b/coldStartTasks/crawler/piaoquan/__init__.py
--- a/coldStartTasks/publish/publish_single_video_pool_videos.py
+++ b/coldStartTasks/publish/publish_single_video_pool_videos.py
@@ -15,6 +15,11 @@ const = SingleVideoPoolPublishTaskConst()
 
				 
			
 
				 video_pool_config = json.loads(config.getConfigValue(key="video_pool_publish_config"))
			
 
				 
			
 
				+video_pool_config["piaoquan"] = {
			
 
				+    "nick_name": "票圈视频",
			
 
				+    "process_num_each_day": 1000,
			
 
				+    "generate_plan_id": "20250416060125363145973"
			
 
				+}
			
 
				 
			
 
				 class PublishSingleVideoPoolVideos:
			
 
				     def __init__(self):
			
@@ -114,4 +119,81 @@ class PublishSingleVideoPoolVideos:
 
				                         'msg': '该平台无待发布视频，请关注供给的抓取'
			
 
				                     },
			
 
				                     mention=False
			
 
				-                )
			
 
				+                )
			
 
				+
			
 
				+    def create_crawler_plan_by_category(self):
			
 
				+        platform = 'piaoquan'
			
 
				+        fetch_query = f"""
			
 
				+                   select t1.id, t1.content_trace_id, t1.pq_vid, t2.category
			
 
				+                   from single_video_transform_queue t1
			
 
				+                       join publish_single_video_source t2 on t1.content_trace_id = t2.content_trace_id
			
 
				+                   where t1.status = {const.TRANSFORM_INIT_STATUS} and t1.platform = '{platform}';
			
 
				+               """
			
 
				+        fetch_response = self.db_client.fetch(query=fetch_query, cursor_type=DictCursor)
			
 
				+        category_list = ['健康养生', '军事历史', '历史人物', '名人八卦', '奇闻趣事', '家长里短', '情感故事', '政治新闻', '知识科普', '社会法治']
			
 
				+        for category in category_list:
			
 
				+            category_task_list = [task for task in fetch_response if task['category'] == category]
			
 
				+            task_id_tuple = tuple([task['id'] for task in category_task_list])
			
 
				+            vid_list = [task['pq_vid'] for task in category_task_list]
			
 
				+            if vid_list:
			
 
				+                try:
			
 
				+                    # create video crawler plan
			
 
				+                    plan_name = f"{video_pool_config[platform]['nick_name']}-{category}-{datetime.datetime.today().strftime('%Y-%m-%d')}-视频数量： {len(vid_list)}"
			
 
				+                    crawler_plan_response = aiditApi.auto_create_single_video_crawler_task(
			
 
				+                        plan_name=plan_name,
			
 
				+                        plan_tag="单视频供给冷启动",
			
 
				+                        video_id_list=vid_list,
			
 
				+                    )
			
 
				+                    crawler_plan_id = crawler_plan_response["data"]["id"]
			
 
				+                    crawler_plan_name = crawler_plan_response["data"]["name"]
			
 
				+
			
 
				+                    # bind crawler plan to generate plan
			
 
				+                    crawler_task_list = [
			
 
				+                        {
			
 
				+                            "contentType": 1,
			
 
				+                            "inputSourceModal": 4,
			
 
				+                            "inputSourceChannel": 10,
			
 
				+                            "inputSourceType": 2,
			
 
				+                            "inputSourceValue": crawler_plan_id,
			
 
				+                            "inputSourceSubType": None,
			
 
				+                            "fieldName": None,
			
 
				+                            "inputSourceLabel": "原始帖子-视频-票圈小程序-内容添加计划-{}".format(crawler_plan_name),
			
 
				+                        }
			
 
				+                    ]
			
 
				+                    generate_plan_id = video_pool_config[platform]['generate_plan_id']
			
 
				+                    aiditApi.bind_crawler_task_to_generate_task(
			
 
				+                        crawler_task_list=crawler_task_list,
			
 
				+                        generate_task_id=generate_plan_id,
			
 
				+                    )
			
 
				+
			
 
				+                    # update status
			
 
				+                    self.update_tasks_status(
			
 
				+                        task_id_tuple=task_id_tuple,
			
 
				+                        ori_status=const.TRANSFORM_INIT_STATUS,
			
 
				+                        new_status=const.TRANSFORM_SUCCESS_STATUS
			
 
				+                    )
			
 
				+                except Exception as e:
			
 
				+                    bot(
			
 
				+                        title='视频内容池发布任务',
			
 
				+                        detail={
			
 
				+                            'platform': platform,
			
 
				+                            'date': datetime.datetime.today().strftime('%Y-%m-%d'),
			
 
				+                            'msg': '发布视频内容池失败，原因：{}'.format(str(e)),
			
 
				+                            'detail': traceback.format_exc(),
			
 
				+                        },
			
 
				+                        mention=False
			
 
				+                    )
			
 
				+            else:
			
 
				+                bot(
			
 
				+                    title='视频内容池发布任务',
			
 
				+                    detail={
			
 
				+                        'platform': platform,
			
 
				+                        'date': datetime.datetime.today().strftime('%Y-%m-%d'),
			
 
				+                        'msg': '该平台无待发布视频，请关注供给的抓取'
			
 
				+                    },
			
 
				+                    mention=False
			
 
				+                )
			
 
				+        return fetch_response
			
 
				+
			
 
				+P = PublishSingleVideoPoolVideos()
			
 
				+P.create_crawler_plan_by_category()