123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166 |
- """
- @author: luojunhui
- """
- import datetime
- import traceback
- from argparse import ArgumentParser
- from applications import longArticlesMySQL, bot
- from coldStartTasks.crawler.weixinCategoryCrawler import weixinCategory
- from coldStartTasks.publish.publish_single_video_pool_videos import PublishSingleVideoPoolVideos
- from coldStartTasks.publish.publishCategoryArticles import CategoryColdStartTask
- from coldStartTasks.filter.title_similarity_task import ColdStartTitleSimilarityTask
- DEFAULT_CATEGORY_LIST = ['1030-手动挑号', 'account_association']
- def publish_single_video_task():
- """
- 从视频内容池获取抓取
- """
- try:
- publish_single_video_pool_videos = PublishSingleVideoPoolVideos()
- publish_single_video_pool_videos.deal()
- except Exception as e:
- bot(
- title="视频内容池任务创建失败",
- detail={
- "error": str(e),
- "error_msg": traceback.format_exc()
- }
- )
- class AccountColdStartDailyTask(object):
- """
- 账号冷启动代码
- """
- def __init__(self):
- """
- """
- self.db_client = None
- def init_db(self):
- """
- 初始化数据库
- :return:
- """
- try:
- self.db_client = longArticlesMySQL()
- return True
- except Exception as e:
- bot(
- title='账号抓取任务, 冷启动数据库连接失败',
- detail={
- "error": str(e),
- "error_msg": traceback.format_exc()
- }
- )
- return False
- def crawler_task(self, category_list, date_str):
- """
- :return:
- """
- # 初始化category抓取类
- try:
- weixin_category_crawler = weixinCategory(db_client=self.db_client)
- weixin_category_crawler.deal(category_list=category_list, date_str=date_str)
- # 抓取完成之后,给抓取到的标题进行相似度打分
- cold_start_title_similarity_task = ColdStartTitleSimilarityTask()
- cold_start_title_similarity_task.init_database()
- cold_start_title_similarity_task.run(meta_source='article')
- bot(
- title="账号冷启动任务,抓取完成",
- detail={
- "finish_time": datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'),
- "category": category_list
- },
- mention=False
- )
- except Exception as e:
- bot(
- title="账号抓取冷启动任务,抓取失败",
- detail={
- "error": str(e),
- "error_msg": traceback.format_exc()
- }
- )
- def publish_article_task(self, category_list, article_source):
- """
- 将账号文章发布到aigc抓取计划,并且绑定生成计划
- :param category_list: 文章品类
- :param article_source: 文章来源(toutiao or weixin)
- :return:
- """
- try:
- weixin_category_publisher = CategoryColdStartTask(db_client=self.db_client)
- weixin_category_publisher.do_job(
- category_list=category_list,
- article_source=article_source
- )
- bot(
- title="账号冷启任务,发布完成",
- detail={
- "finish_time": datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'),
- "category": category_list
- },
- mention=False
- )
- except Exception as e:
- bot(
- title="账号发布冷启动任务,发布失败",
- detail={
- "error": str(e),
- "error_msg": traceback.format_exc()
- }
- )
- def main(date_str, category_list=None, article_source=None):
- """
- main job, use crontab to do job daily
- :return:
- """
- # 首先发布视频内容池
- publish_single_video_task()
- # 再处理文章内容池
- if not category_list:
- category_list = DEFAULT_CATEGORY_LIST
- if not article_source:
- article_source = 'weixin'
- task = AccountColdStartDailyTask()
- if task.init_db():
- if article_source == 'weixin':
- task.crawler_task(category_list=category_list, date_str=date_str)
- task.publish_article_task(category_list=category_list, article_source=article_source)
- if __name__ == '__main__':
- parser = ArgumentParser()
- parser.add_argument("--run_date", help="--run_date format: %Y-%m-%d")
- args = parser.parse_args()
- if args.run_date:
- run_date = args.run_date
- else:
- run_date = datetime.date.today().isoformat()
- # 执行微信抓取发布
- main(date_str=run_date)
- # 执行头条发布
- main(
- date_str=run_date,
- category_list=['history', 'tech', 'finance', 'entertainment'],
- article_source='toutiao'
- )
|