""" @author: luojunhui 分发逻辑 """ import json import datetime from applications import PQMySQL, WeixinSpider from tqdm import tqdm from config import accountBaseInfo class ArticleDistribution(object): """ 冷启文章分发逻辑 """ account_position_dict = { "gh_058e41145a0c": 30, "gh_0e4fd9e88386": 30, "gh_744cb16f6e16": 30, "gh_ac43eb24376d": 30, "gh_970460d9ccec": 30, "gh_56ca3dae948c": 30, "gh_c91b42649690": 30, "gh_6d205db62f04": 30, "gh_e24da99dc899": 30, "gh_4c058673c07e": 30, "gh_03d32e83122f": 30, "gh_c69776baf2cd": 30, "gh_30816d8adb52": 30, "gh_789a40fe7935": 30, "gh_95ed5ecf9363": 30, "gh_3e91f0624545": 30, "gh_57573f01b2ee": 30, "gh_9877c8541764": 30, "gh_6cfd1132df94": 30, "gh_008ef23062ee": 30, "gh_5ae65db96cb7": 30, "gh_be8c29139989": 30, "gh_51e4ad40466d": 30, "gh_d4dffc34ac39": 30, "gh_89ef4798d3ea": 30, "gh_b15de7c99912": 30, "gh_9f8dc5b0c74e": 30, "gh_7b4a5f86d68c": 30, "gh_c5cdf60d9ab4": 5, "gh_0c89e11f8bf3": 5, "gh_e0eb490115f5": 5, "gh_a2901d34f75b": 5, "gh_d5f935d0d1f2": 30 } pq_mysql_client = PQMySQL() Spider = WeixinSpider() @classmethod def generate_account_dict(cls): """ 生成account_list :return: """ account_dict = {} for key in accountBaseInfo: account_name = accountBaseInfo[key]['accountName'] account_gh_id = accountBaseInfo[key]['ghId'] account_dict[account_name] = account_gh_id return account_dict @classmethod def findArticleScoreList(cls, url_md5): """ 获取文章的相关账号的相关性分数 :param url_md5: :return: """ sql = f""" select account_score, ori_account from association_articles where url_md5 = '{url_md5}'; """ response = cls.pq_mysql_client.select(sql=sql) return response @classmethod def association_split(cls, article_list): """ 联想类型文章分发逻辑 :param article_list: :return: """ account_name_map = cls.generate_account_dict() L = {} for article in tqdm(article_list): link = article['url'] url_md5 = article['url_md5'] title = article['title'] c_id = article['id'] title_match_list = cls.findArticleScoreList(url_md5) title_match_list = sorted(title_match_list, key=lambda x: x[0], reverse=True) # print("标题:\t", title) # print("相关账号:\t", title_match_list) # print("\n") for account_tuple in title_match_list: account_name = account_tuple[1] score = account_tuple[0] account_gh_id = account_name_map[account_name] if cls.account_position_dict.get(account_gh_id): try: # channel_content_id = cls.Spider.get_article_text(link)['data']['data']['channel_content_id'] channel_content_id = c_id except: print(link) channel_content_id = url_md5 # channel_content_id = "id" if cls.account_position_dict[account_gh_id] > 0: if L.get(account_gh_id): if len(L[account_gh_id]) >= 10: continue else: L[account_gh_id].append([channel_content_id, score]) else: L[account_gh_id] = [[channel_content_id, score]] cls.account_position_dict[account_gh_id] -= 1 else: continue for account in tqdm(L): date_str = datetime.datetime.today().strftime("%Y-%m-%d") print(account, date_str, json.dumps(L[account], ensure_ascii=False)) insert_sql = f""" INSERT INTO article_pre_distribute_account (gh_id, date, article_list) VALUES (%s, %s, %s); """ try: PQMySQL.update(sql=insert_sql, params=(account, date_str, json.dumps(L[account], ensure_ascii=False))) except Exception as e: print("插入出现问题----{}".format(e)) return L