Selaa lähdekoodia

Merge branch 'master' into 2025-03-17-account-crawler-pipeline

luojunhui 6 kuukautta sitten
vanhempi
commit
f1e71bff39

+ 24 - 2
account_cold_start_daily.py

@@ -8,12 +8,30 @@ from argparse import ArgumentParser
 
 
 from applications import longArticlesMySQL, bot
 from applications import longArticlesMySQL, bot
 from coldStartTasks.crawler.weixinCategoryCrawler import weixinCategory
 from coldStartTasks.crawler.weixinCategoryCrawler import weixinCategory
+from coldStartTasks.publish.publish_single_video_pool_videos import PublishSingleVideoPoolVideos
 from coldStartTasks.publish.publishCategoryArticles import CategoryColdStartTask
 from coldStartTasks.publish.publishCategoryArticles import CategoryColdStartTask
 from coldStartTasks.filter.title_similarity_task import ColdStartTitleSimilarityTask
 from coldStartTasks.filter.title_similarity_task import ColdStartTitleSimilarityTask
 
 
 DEFAULT_CATEGORY_LIST = ['1030-手动挑号', 'account_association']
 DEFAULT_CATEGORY_LIST = ['1030-手动挑号', 'account_association']
 
 
 
 
+def publish_single_video_task():
+    """
+    从视频内容池获取抓取
+    """
+    try:
+        publish_single_video_pool_videos = PublishSingleVideoPoolVideos()
+        publish_single_video_pool_videos.deal()
+    except Exception as e:
+        bot(
+            title="视频内容池任务创建失败",
+            detail={
+                "error": str(e),
+                "error_msg": traceback.format_exc()
+            }
+        )
+
+
 class AccountColdStartDailyTask(object):
 class AccountColdStartDailyTask(object):
     """
     """
     账号冷启动代码
     账号冷启动代码
@@ -73,7 +91,7 @@ class AccountColdStartDailyTask(object):
                 }
                 }
             )
             )
 
 
-    def publish_task(self, category_list, article_source):
+    def publish_article_task(self, category_list, article_source):
         """
         """
         将账号文章发布到aigc抓取计划,并且绑定生成计划
         将账号文章发布到aigc抓取计划,并且绑定生成计划
         :param category_list:  文章品类
         :param category_list:  文章品类
@@ -109,6 +127,10 @@ def main(date_str, category_list=None, article_source=None):
     main job, use crontab to do job daily
     main job, use crontab to do job daily
     :return:
     :return:
     """
     """
+    # 首先发布视频内容池
+    publish_single_video_task()
+
+    # 再处理文章内容池
     if not category_list:
     if not category_list:
         category_list = DEFAULT_CATEGORY_LIST
         category_list = DEFAULT_CATEGORY_LIST
     if not article_source:
     if not article_source:
@@ -118,7 +140,7 @@ def main(date_str, category_list=None, article_source=None):
         if article_source == 'weixin':
         if article_source == 'weixin':
             task.crawler_task(category_list=category_list, date_str=date_str)
             task.crawler_task(category_list=category_list, date_str=date_str)
 
 
-        task.publish_task(category_list=category_list, article_source=article_source)
+        task.publish_article_task(category_list=category_list, article_source=article_source)
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':

+ 1 - 1
coldStartTasks/publish/publish_single_video_pool_videos.py

@@ -13,7 +13,7 @@ from config import long_articles_config, apolloConfig
 config = apolloConfig()
 config = apolloConfig()
 const = SingleVideoPoolPublishTaskConst()
 const = SingleVideoPoolPublishTaskConst()
 
 
-video_pool_config = json.loads(config.getConfigValue(key="video_pool_config"))
+video_pool_config = json.loads(config.getConfigValue(key="video_pool_publish_config"))
 
 
 
 
 class PublishSingleVideoPoolVideos:
 class PublishSingleVideoPoolVideos:

+ 11 - 3
tasks/crawler_channel_account_videos.py

@@ -158,9 +158,17 @@ class CrawlerChannelAccountVideos:
                 break
                 break
 
 
             response_data = response["data"]
             response_data = response["data"]
-            current_last_buffer = response_data["lastBuffer"]  # 更新分页游标
-            has_more = response_data["continueFlag"]  # 是否还有下一页
-            video_list = response_data["object"]
+            response_data_type = type(response_data)
+            if response_data_type is dict:
+                current_last_buffer = response_data.get["lastBuffer"]  # 更新分页游标
+                has_more = response_data["continueFlag"]  # 是否还有下一页
+                video_list = response_data["object"]
+            elif response_data_type is list:
+                has_more = False
+                video_list = response_data
+                video_list = video_list
+            else:
+                return
 
 
             if not video_list:
             if not video_list:
                 break
                 break