""" @author: luojunhui 计算账号的阅读均值倍数 """ import json from pandas import DataFrame from applications import DeNetMysql from applications import AIDTApi D = DeNetMysql() def get_accounts(): """ 获取账号 :return: """ sql = f"""select account_id from long_article_accounts_outside where category = '军事政法';""" account_list = D.select(sql) account_list_ = [] for account in account_list: account_list_.append(account[0]) return account_list_ def get_account_avg(): """ 获取账号 :return: """ with open("avg.json", encoding="utf-8") as f: avg_dict = json.loads(f.read()) account_list = get_accounts() L = [] for account in account_list: select_sql = f""" select title, read_cnt, link from crawler_meta_article where out_account_id = '{account}'; """ result_list = D.select(select_sql) try: avg_read = avg_dict[account] for i in result_list: title, read_cnt, link = i avg_score = read_cnt / avg_read temp = [title, link, read_cnt, avg_score] L.append(temp) except: continue sl = sorted(L, reverse=True, key=lambda x: x[3]) a = 0 b = 0 LL = [] for line in sl: title = line[0] read_cnt = line[2] if "农历" in title or "节" in title or line[3] < 1.3 or len(title) < 15 or read_cnt < 1000: a += 1 continue else: b += 1 LL.append(line) # print(a) # print(b) # df = DataFrame(LL, columns=["title", "link", "read", "read_avg"]) # df.to_excel("test.xlsx", index=False) # url_list = [i[1] for i in LL] # try: # AIDTApi().updateArticleIntoCrawlerPlan( # plan_id=None, # plan_name="军事政法类冷启-0805-new", # plan_tag="autoArticlePoolLevel1", # url_list=url_list # ) # except Exception as e: # print("error--{}".format(e)) get_account_avg()