Browse Source

diff clear

luojunhui 1 week ago
parent
commit
1f11f10198

+ 5 - 1
applications/pipeline/crawler_pipeline.py

@@ -45,7 +45,7 @@ def whether_duplicate_video_title(video_title: str, db_client) -> bool:
     return False
 
 
-def scrape_video_entities_process(video_item, db_client) -> dict:
+def scrape_video_entities_process(video_item, db_client, oss_path=None) -> dict:
     """
     video crawler pipeline
     """
@@ -60,6 +60,10 @@ def scrape_video_entities_process(video_item, db_client) -> dict:
     if whether_duplicate_video_title(video_title, db_client):
         return empty_dict
 
+    if oss_path:
+        video_item["video_oss_path"] = oss_path
+        return video_item
+
     # download video
     match platform:
         case "toutiao":

+ 0 - 0
coldStartTasks/crawler/piaoquan/__init__.py


+ 83 - 1
coldStartTasks/publish/publish_single_video_pool_videos.py

@@ -15,6 +15,11 @@ const = SingleVideoPoolPublishTaskConst()
 
 video_pool_config = json.loads(config.getConfigValue(key="video_pool_publish_config"))
 
+video_pool_config["piaoquan"] = {
+    "nick_name": "票圈视频",
+    "process_num_each_day": 1000,
+    "generate_plan_id": "20250416060125363145973"
+}
 
 class PublishSingleVideoPoolVideos:
     def __init__(self):
@@ -114,4 +119,81 @@ class PublishSingleVideoPoolVideos:
                         'msg': '该平台无待发布视频,请关注供给的抓取'
                     },
                     mention=False
-                )
+                )
+
+    def create_crawler_plan_by_category(self):
+        platform = 'piaoquan'
+        fetch_query = f"""
+                   select t1.id, t1.content_trace_id, t1.pq_vid, t2.category
+                   from single_video_transform_queue t1
+                       join publish_single_video_source t2 on t1.content_trace_id = t2.content_trace_id
+                   where t1.status = {const.TRANSFORM_INIT_STATUS} and t1.platform = '{platform}';
+               """
+        fetch_response = self.db_client.fetch(query=fetch_query, cursor_type=DictCursor)
+        category_list = ['健康养生', '军事历史', '历史人物', '名人八卦', '奇闻趣事', '家长里短', '情感故事', '政治新闻', '知识科普', '社会法治']
+        for category in category_list:
+            category_task_list = [task for task in fetch_response if task['category'] == category]
+            task_id_tuple = tuple([task['id'] for task in category_task_list])
+            vid_list = [task['pq_vid'] for task in category_task_list]
+            if vid_list:
+                try:
+                    # create video crawler plan
+                    plan_name = f"{video_pool_config[platform]['nick_name']}-{category}-{datetime.datetime.today().strftime('%Y-%m-%d')}-视频数量: {len(vid_list)}"
+                    crawler_plan_response = aiditApi.auto_create_single_video_crawler_task(
+                        plan_name=plan_name,
+                        plan_tag="单视频供给冷启动",
+                        video_id_list=vid_list,
+                    )
+                    crawler_plan_id = crawler_plan_response["data"]["id"]
+                    crawler_plan_name = crawler_plan_response["data"]["name"]
+
+                    # bind crawler plan to generate plan
+                    crawler_task_list = [
+                        {
+                            "contentType": 1,
+                            "inputSourceModal": 4,
+                            "inputSourceChannel": 10,
+                            "inputSourceType": 2,
+                            "inputSourceValue": crawler_plan_id,
+                            "inputSourceSubType": None,
+                            "fieldName": None,
+                            "inputSourceLabel": "原始帖子-视频-票圈小程序-内容添加计划-{}".format(crawler_plan_name),
+                        }
+                    ]
+                    generate_plan_id = video_pool_config[platform]['generate_plan_id']
+                    aiditApi.bind_crawler_task_to_generate_task(
+                        crawler_task_list=crawler_task_list,
+                        generate_task_id=generate_plan_id,
+                    )
+
+                    # update status
+                    self.update_tasks_status(
+                        task_id_tuple=task_id_tuple,
+                        ori_status=const.TRANSFORM_INIT_STATUS,
+                        new_status=const.TRANSFORM_SUCCESS_STATUS
+                    )
+                except Exception as e:
+                    bot(
+                        title='视频内容池发布任务',
+                        detail={
+                            'platform': platform,
+                            'date': datetime.datetime.today().strftime('%Y-%m-%d'),
+                            'msg': '发布视频内容池失败,原因:{}'.format(str(e)),
+                            'detail': traceback.format_exc(),
+                        },
+                        mention=False
+                    )
+            else:
+                bot(
+                    title='视频内容池发布任务',
+                    detail={
+                        'platform': platform,
+                        'date': datetime.datetime.today().strftime('%Y-%m-%d'),
+                        'msg': '该平台无待发布视频,请关注供给的抓取'
+                    },
+                    mention=False
+                )
+        return fetch_response
+
+P = PublishSingleVideoPoolVideos()
+P.create_crawler_plan_by_category()