account_cold_start_daily.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. """
  2. @author: luojunhui
  3. """
  4. import datetime
  5. import traceback
  6. from argparse import ArgumentParser
  7. from applications import longArticlesMySQL, bot
  8. from coldStartTasks.crawler.weixinCategoryCrawler import weixinCategory
  9. from coldStartTasks.publish.publishCategoryArticles import CategoryColdStartTask
  10. DEFAULT_CATEGORY_LIST = ['1030-手动挑号', 'account_association']
  11. class AccountColdStartDailyTask(object):
  12. """
  13. 账号冷启动代码
  14. """
  15. def __init__(self):
  16. """
  17. """
  18. self.db_client = None
  19. def init_db(self):
  20. """
  21. 初始化数据库
  22. :return:
  23. """
  24. try:
  25. self.db_client = longArticlesMySQL()
  26. return True
  27. except Exception as e:
  28. bot(
  29. title='账号抓取任务, 冷启动数据库连接失败',
  30. detail={
  31. "error": str(e),
  32. "error_msg": traceback.format_exc()
  33. }
  34. )
  35. return False
  36. def crawler_task(self, category_list, date_str):
  37. """
  38. :return:
  39. """
  40. # 初始化category抓取类
  41. try:
  42. weixin_category_crawler = weixinCategory(db_client=self.db_client)
  43. weixin_category_crawler.deal(category_list=category_list, date_str=date_str)
  44. bot(
  45. title="账号冷启动任务,抓取完成",
  46. detail={
  47. "finish_time": datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'),
  48. "category": category_list
  49. },
  50. mention=False
  51. )
  52. except Exception as e:
  53. bot(
  54. title="账号抓取冷启动任务,抓取失败",
  55. detail={
  56. "error": str(e),
  57. "error_msg": traceback.format_exc()
  58. }
  59. )
  60. def publish_task(self, category_list, article_source):
  61. """
  62. 将账号文章发布到aigc抓取计划,并且绑定生成计划
  63. :param category_list: 文章品类
  64. :param article_source: 文章来源(toutiao or weixin)
  65. :return:
  66. """
  67. try:
  68. weixin_category_publisher = CategoryColdStartTask(db_client=self.db_client)
  69. weixin_category_publisher.do_job(
  70. category_list=category_list,
  71. article_source=article_source
  72. )
  73. bot(
  74. title="账号冷启任务,发布完成",
  75. detail={
  76. "finish_time": datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'),
  77. "category": category_list
  78. },
  79. mention=False
  80. )
  81. except Exception as e:
  82. bot(
  83. title="账号发布冷启动任务,发布失败",
  84. detail={
  85. "error": str(e),
  86. "error_msg": traceback.format_exc()
  87. }
  88. )
  89. def main(date_str, category_list=None, article_source=None):
  90. """
  91. main job, use crontab to do job daily
  92. :return:
  93. """
  94. if not category_list:
  95. category_list = DEFAULT_CATEGORY_LIST
  96. if not article_source:
  97. article_source = 'weixin'
  98. task = AccountColdStartDailyTask()
  99. if task.init_db():
  100. if article_source == 'weixin':
  101. task.crawler_task(category_list=category_list, date_str=date_str)
  102. task.publish_task(category_list=category_list, article_source=article_source)
  103. if __name__ == '__main__':
  104. parser = ArgumentParser()
  105. parser.add_argument("--run_date", help="--run_date format: %Y-%m-%d")
  106. args = parser.parse_args()
  107. if args.run_date:
  108. run_date = args.run_date
  109. else:
  110. run_date = datetime.date.today().isoformat()
  111. # 执行微信抓取发布
  112. main(date_str=run_date)
  113. # 执行头条发布
  114. main(
  115. date_str=run_date,
  116. category_list=['history', 'tech', 'finance', 'entertainment'],
  117. article_source='toutiao'
  118. )