""" @author: luojunhui 计算账号的阅读均值倍数 """ import json from pandas import DataFrame from tqdm import tqdm from applications import DeNetMysql from applications import AIDTApi D = DeNetMysql() def get_account_avg(): """ 获取账号 :return: """ with open("/Users/luojunhui/cyber/LongArticlesJob/dev/avg_new_health.json", encoding="utf-8") as f: avg_dict = json.loads(f.read()) account_position_list = list(avg_dict.keys()) L = [] for account in tqdm(account_position_list): gh_id = account[:-2] index = int(account[-1:]) select_sql = f""" select title, read_cnt, link from crawler_meta_article where out_account_id = '{gh_id}' and article_index = {index} and status = 1; """ result_list = D.select(select_sql) try: avg_read = avg_dict[account] for i in result_list: title, read_cnt, link = i avg_score = read_cnt / avg_read temp = [title, link, read_cnt, avg_score] L.append(temp) except: continue sl = sorted(L, reverse=True, key=lambda x: x[3]) a = 0 b = 0 LL = [] for line in sl: title = line[0] read_cnt = line[2] if "农历" in title or '太极' in title or "节" in title or line[3] < 1.3 or len(title) < 15 or read_cnt < 5000: a += 1 continue else: b += 1 print(line) LL.append(line) print(a) print(b) df = DataFrame(LL, columns=["title", "link", "read", "read_avg"]) df.to_excel("health_2.xlsx", index=False) # url_list = [i[1] for i in LL[3:]] # try: # AIDTApi().updateArticleIntoCrawlerPlan( # plan_id=None, # plan_name="历史冷启-0816-new", # plan_tag="autoArticlePoolLevel1", # url_list=url_list # ) # except Exception as e: # print("error--{}".format(e)) get_account_avg()