""" @author: luojunhui """ import datetime import json from tqdm import tqdm from applications import AIDTApi, DeNetMysql, PQMySQL, Functions from config import poolLevelConfig, cateMap, coldPoolArticlesNum, accountBaseInfo class ColdStartTask(object): """ 冷启分配任务 """ AidApi = AIDTApi() DeMysql = DeNetMysql() PqMysql = PQMySQL() Fun = Functions() pool3 = poolLevelConfig['3'] @classmethod def generate_account_dict(cls): """ 生成account_list :return: """ account_dict = {} for key in accountBaseInfo: account_name = accountBaseInfo[key]['accountName'] account_gh_id = accountBaseInfo[key]['ghId'] account_dict[account_name] = account_gh_id return account_dict @classmethod def getTopArticles(cls, category, limit_count): """ 获取高分享的文章list :return: """ sql = f""" select content_channel_id, content_link, title from cold_start_article_pool where category = '{category}' order by view_count DESC, publish_time_stamp DESC limit {limit_count}; """ result = cls.PqMysql.select(sql) return result @classmethod def getAccountScoreList(cls, title_list, account_name): """ 预分配文章给不同的账号 :return: """ score_list = cls.Fun.getTitleScore(title_list=title_list, account_name=account_name)[account_name]['score_list'] return score_list @classmethod def splitToAccount(cls, obj_list): """ split articles to each account :return: """ account_dict = cls.generate_account_dict() account_list = list(account_dict.keys()) title_list = [i['title'] for i in obj_list] for account in tqdm(account_list): score_list = cls.getAccountScoreList(title_list=title_list, account_name=account) L = [] for index, score in enumerate(score_list): L.append((obj_list[index]['id'], score)) SL = sorted(L, key=lambda x: x[1], reverse=True) gh_id = account_dict[account] date_str = datetime.datetime.today().strftime("%Y-%m-%d") insert_sql = f""" INSERT INTO article_pre_distribute_account (gh_id, date, article_list) VALUES (%s, %s, %s); """ try: PQMySQL.update(sql=insert_sql, params=(gh_id, date_str, json.dumps(SL[:30], ensure_ascii=False))) except Exception as e: print("插入出现问题----{}".format(e)) print("成功更新完成") @classmethod def findArticlesDaily(cls): """ 和每个账号计算相关性分数 :return: """ category_list = [ "军事政法", "健康养生", "宗教历史", "情感生活", "娱乐八卦", "新闻媒体" ] L = [] for category in tqdm(category_list): print("{} is processing......".format(category)) limit_count = coldPoolArticlesNum * cateMap.get(category, 0.1) article_tuple = cls.getTopArticles(category, int(limit_count)) title_list = [article[2] for article in article_tuple] score_list = cls.Fun.getTitleScore(title_list, "指尖奇文")['指尖奇文']['score_list'] for index, score in enumerate(score_list): obj = { "id": article_tuple[index][0], "url": article_tuple[index][1], "title": article_tuple[index][2], "cate": category, "score": score } L.append(obj) result = [i for i in L if i['score'] >= 0.35] return result @classmethod def sendToColdPool(cls, plan_id, plan_name, plan_tag): """ 把文章send至第四层 :return: """ # 获取6个品类的数据 target_article_list = cls.findArticlesDaily() # 预分配账号 cls.splitToAccount(target_article_list) # 再加一次配比,每个品类的数量占比 cls.AidApi.updateArticleIntoCrawlerPlan( plan_id=plan_id, plan_name=plan_name, plan_tag=plan_tag, url_list=[i['url'] for i in target_article_list] ) if __name__ == '__main__': CST = ColdStartTask() CST.sendToColdPool( plan_id=None, plan_name="冷启池子--0729--Monday--分品类抓取--6个品类", plan_tag="autoArticlePoolLevel1", )