""" @author: luojunhui """ import datetime import traceback from argparse import ArgumentParser from applications import longArticlesMySQL, bot from coldStartTasks.crawler.weixinCategoryCrawler import weixinCategory from coldStartTasks.publish.publish_single_video_pool_videos import PublishSingleVideoPoolVideos from coldStartTasks.publish.publishCategoryArticles import CategoryColdStartTask from coldStartTasks.filter.title_similarity_task import ColdStartTitleSimilarityTask DEFAULT_CATEGORY_LIST = ['1030-手动挑号', 'account_association'] def publish_single_video_task(): """ 从视频内容池获取抓取 """ try: publish_single_video_pool_videos = PublishSingleVideoPoolVideos() publish_single_video_pool_videos.deal() except Exception as e: bot( title="视频内容池任务创建失败", detail={ "error": str(e), "error_msg": traceback.format_exc() } ) class AccountColdStartDailyTask(object): """ 账号冷启动代码 """ def __init__(self): """ """ self.db_client = None def init_db(self): """ 初始化数据库 :return: """ try: self.db_client = longArticlesMySQL() return True except Exception as e: bot( title='账号抓取任务, 冷启动数据库连接失败', detail={ "error": str(e), "error_msg": traceback.format_exc() } ) return False def crawler_task(self, category_list, date_str): """ :return: """ # 初始化category抓取类 try: weixin_category_crawler = weixinCategory(db_client=self.db_client) weixin_category_crawler.deal(category_list=category_list, date_str=date_str) # 抓取完成之后,给抓取到的标题进行相似度打分 cold_start_title_similarity_task = ColdStartTitleSimilarityTask() cold_start_title_similarity_task.init_database() cold_start_title_similarity_task.run(meta_source='article') bot( title="账号冷启动任务,抓取完成", detail={ "finish_time": datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'), "category": category_list }, mention=False ) except Exception as e: bot( title="账号抓取冷启动任务,抓取失败", detail={ "error": str(e), "error_msg": traceback.format_exc() } ) def publish_article_task(self, category_list, article_source): """ 将账号文章发布到aigc抓取计划,并且绑定生成计划 :param category_list: 文章品类 :param article_source: 文章来源(toutiao or weixin) :return: """ try: weixin_category_publisher = CategoryColdStartTask(db_client=self.db_client) weixin_category_publisher.do_job( category_list=category_list, article_source=article_source ) bot( title="账号冷启任务,发布完成", detail={ "finish_time": datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S'), "category": category_list }, mention=False ) except Exception as e: bot( title="账号发布冷启动任务,发布失败", detail={ "error": str(e), "error_msg": traceback.format_exc() } ) def main(date_str, category_list=None, article_source=None): """ main job, use crontab to do job daily :return: """ # 首先发布视频内容池 publish_single_video_task() # 再处理文章内容池 if not category_list: category_list = DEFAULT_CATEGORY_LIST if not article_source: article_source = 'weixin' task = AccountColdStartDailyTask() if task.init_db(): if article_source == 'weixin': task.crawler_task(category_list=category_list, date_str=date_str) task.publish_article_task(category_list=category_list, article_source=article_source) if __name__ == '__main__': parser = ArgumentParser() parser.add_argument("--run_date", help="--run_date format: %Y-%m-%d") args = parser.parse_args() if args.run_date: run_date = args.run_date else: run_date = datetime.date.today().isoformat() # 执行微信抓取发布 main(date_str=run_date) # 执行头条发布 main( date_str=run_date, category_list=['history', 'tech', 'finance', 'entertainment'], article_source='toutiao' )