浏览代码

Merge branch '2024-10-30-luojunhui-add-daily-accounts-crawler' of luojunhui/LongArticlesJob into master

luojunhui 11 月之前
父节点
当前提交
6d3d7f9f17
共有 2 个文件被更改,包括 11 次插入14 次删除
  1. 8 11
      account_cold_start_daily.py
  2. 3 3
      coldStartTasks/crawler/weixinCategoryCrawler.py

+ 8 - 11
account_cold_start_daily.py

@@ -8,6 +8,8 @@ from applications import longArticlesMySQL, bot
 from coldStartTasks.crawler.weixinCategoryCrawler import weixinCategory
 from coldStartTasks.publish.publishCategoryArticles import CategoryColdStartTask
 
+DEFAULT_CATEGORY_LIST = ['1030-手动挑号']
+
 
 class AccountColdStartDailyTask(object):
     """
@@ -18,7 +20,6 @@ class AccountColdStartDailyTask(object):
         """
         """
         self.db_client = None
-        self.default_category = '1030-手动挑号'
 
     def init_db(self):
         """
@@ -38,16 +39,13 @@ class AccountColdStartDailyTask(object):
             )
             return False
 
-    def crawler_task(self, category_list=None):
+    def crawler_task(self, category_list):
         """
         :return:
         """
-        if not category_list:
-            category_list = [self.default_category]
-
         # 初始化category抓取类
         try:
-            weixin_category_crawler = weixinCategory()
+            weixin_category_crawler = weixinCategory(db_client=self.db_client)
             weixin_category_crawler.deal(category_list=category_list)
             bot(
                 title="账号冷启动任务,抓取完成",
@@ -66,14 +64,12 @@ class AccountColdStartDailyTask(object):
                 }
             )
 
-    def publish_task(self, category_list=None):
+    def publish_task(self, category_list):
         """
         将账号文章发布到aigc抓取计划,并且绑定生成计划
         :param category_list:
         :return:
         """
-        if not category_list:
-            category_list = [self.default_category]
         try:
             weixin_category_publisher = CategoryColdStartTask(db_client=self.db_client)
             weixin_category_publisher.do_job(
@@ -97,15 +93,16 @@ class AccountColdStartDailyTask(object):
             )
 
 
-def main():
+def main(category_list=None):
     """
     main job, use crontab to do job daily
     todo: 1. 开放一个输入可以输入指定品类  2. 增加对指定账号的抓取&&发布
     :return:
     """
+    if not category_list:
+        category_list = DEFAULT_CATEGORY_LIST
     task = AccountColdStartDailyTask()
     if task.init_db():
-        category_list = None
         task.crawler_task(category_list=category_list)
         task.publish_task(category_list=category_list)
 

+ 3 - 3
coldStartTasks/crawler/weixinCategoryCrawler.py

@@ -7,7 +7,7 @@ import time
 
 from tqdm import tqdm
 
-from applications import WeixinSpider, Functions, longArticlesMySQL
+from applications import WeixinSpider, Functions
 
 # 常量
 ACCOUNT_GOOD_STATUS = 1
@@ -22,8 +22,8 @@ class weixinCategory(object):
     微信全局品类账号抓取
     """
 
-    def __init__(self):
-        self.db_client_lam = longArticlesMySQL()
+    def __init__(self, db_client):
+        self.db_client_lam = db_client
         self.spider = WeixinSpider()
         self.function = Functions()