account_cold_start_daily.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. """
  2. @author: luojunhui
  3. """
  4. import datetime
  5. import traceback
  6. from argparse import ArgumentParser
  7. from applications import longArticlesMySQL, bot
  8. from coldStartTasks.crawler.weixinCategoryCrawler import weixinCategory
  9. from coldStartTasks.publish.publish_single_video_pool_videos import PublishSingleVideoPoolVideos
  10. from coldStartTasks.publish.publishCategoryArticles import CategoryColdStartTask
  11. from coldStartTasks.filter.title_similarity_task import ColdStartTitleSimilarityTask
  12. DEFAULT_CATEGORY_LIST = ['1030-手动挑号', 'account_association']
  13. def publish_single_video_task():
  14. """
  15. 从视频内容池获取抓取
  16. """
  17. try:
  18. publish_single_video_pool_videos = PublishSingleVideoPoolVideos()
  19. publish_single_video_pool_videos.deal()
  20. except Exception as e:
  21. bot(
  22. title="视频内容池任务创建失败",
  23. detail={
  24. "error": str(e),
  25. "error_msg": traceback.format_exc()
  26. }
  27. )
  28. class AccountColdStartDailyTask(object):
  29. """
  30. 账号冷启动代码
  31. """
  32. def __init__(self):
  33. """
  34. """
  35. self.db_client = None
  36. def init_db(self):
  37. """
  38. 初始化数据库
  39. :return:
  40. """
  41. try:
  42. self.db_client = longArticlesMySQL()
  43. return True
  44. except Exception as e:
  45. bot(
  46. title='账号抓取任务, 冷启动数据库连接失败',
  47. detail={
  48. "error": str(e),
  49. "error_msg": traceback.format_exc()
  50. }
  51. )
  52. return False
  53. def crawler_task(self, category_list, date_str):
  54. """
  55. :return:
  56. """
  57. # 初始化category抓取类
  58. try:
  59. weixin_category_crawler = weixinCategory(db_client=self.db_client)
  60. weixin_category_crawler.deal(category_list=category_list, date_str=date_str)
  61. # 抓取完成之后,给抓取到的标题进行相似度打分
  62. cold_start_title_similarity_task = ColdStartTitleSimilarityTask()
  63. cold_start_title_similarity_task.init_database()
  64. cold_start_title_similarity_task.run(meta_source='article')
  65. bot(
  66. title="账号冷启动任务,抓取完成",
  67. detail={
  68. "finish_time": datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'),
  69. "category": category_list
  70. },
  71. mention=False
  72. )
  73. except Exception as e:
  74. bot(
  75. title="账号抓取冷启动任务,抓取失败",
  76. detail={
  77. "error": str(e),
  78. "error_msg": traceback.format_exc()
  79. }
  80. )
  81. def publish_article_task(self, category_list, article_source):
  82. """
  83. 将账号文章发布到aigc抓取计划,并且绑定生成计划
  84. :param category_list: 文章品类
  85. :param article_source: 文章来源(toutiao or weixin)
  86. :return:
  87. """
  88. try:
  89. weixin_category_publisher = CategoryColdStartTask(db_client=self.db_client)
  90. weixin_category_publisher.do_job(
  91. category_list=category_list,
  92. article_source=article_source
  93. )
  94. bot(
  95. title="账号冷启任务,发布完成",
  96. detail={
  97. "finish_time": datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'),
  98. "category": category_list
  99. },
  100. mention=False
  101. )
  102. except Exception as e:
  103. bot(
  104. title="账号发布冷启动任务,发布失败",
  105. detail={
  106. "error": str(e),
  107. "error_msg": traceback.format_exc()
  108. }
  109. )
  110. def main(date_str, category_list=None, article_source=None):
  111. """
  112. main job, use crontab to do job daily
  113. :return:
  114. """
  115. # 首先发布视频内容池
  116. publish_single_video_task()
  117. # 再处理文章内容池
  118. if not category_list:
  119. category_list = DEFAULT_CATEGORY_LIST
  120. if not article_source:
  121. article_source = 'weixin'
  122. task = AccountColdStartDailyTask()
  123. if task.init_db():
  124. if article_source == 'weixin':
  125. task.crawler_task(category_list=category_list, date_str=date_str)
  126. task.publish_article_task(category_list=category_list, article_source=article_source)
  127. if __name__ == '__main__':
  128. parser = ArgumentParser()
  129. parser.add_argument("--run_date", help="--run_date format: %Y-%m-%d")
  130. args = parser.parse_args()
  131. if args.run_date:
  132. run_date = args.run_date
  133. else:
  134. run_date = datetime.date.today().isoformat()
  135. # 执行微信抓取发布
  136. main(date_str=run_date)
  137. # 执行头条发布
  138. main(
  139. date_str=run_date,
  140. category_list=['history', 'tech', 'finance', 'entertainment'],
  141. article_source='toutiao'
  142. )