""" @author: luojunhui 计算账号的阅读均值倍数 """ import json import pandas as pd from pandas import DataFrame from tqdm import tqdm from applications import DeNetMysql from applications import AIDTApi D = DeNetMysql() def get_account_avg(): """ 获取账号 :return: """ # with open("/Users/luojunhui/cyber/LongArticlesJob/dev/军事历史.json", encoding="utf-8") as f: # avg_dict = json.loads(f.read()) # # account_position_list = list(avg_dict.keys()) # L = [] # for account in tqdm(account_position_list): # gh_id = account[:-2] # index = int(account[-1:]) # select_sql = f""" # select title, read_cnt, link from crawler_meta_article # where out_account_id = '{gh_id}' and article_index = {index} and status = 1; # """ # result_list = D.select(select_sql) # try: # avg_read = avg_dict[account]['readAvg'] # for i in result_list: # title, read_cnt, link = i # avg_score = read_cnt / avg_read # temp = [title, link, read_cnt, avg_score, avg_read, avg_dict[account]['category']] # L.append(temp) # except: # continue # # sl = sorted(L, reverse=True, key=lambda x: x[3]) # a = 0 # b = 0 # LL = [] # for line in sl: # title = line[0] # read_cnt = line[2] # if "农历" in title or '太极' in title or "节" in title or line[3] < 1.3 or len(title) < 15 or read_cnt < 5000: # a += 1 # continue # else: # b += 1 # print(line) # LL.append(line) # print(a) # print(b) # df = DataFrame(LL, columns=["title", "link", "read", "read_avg_times", "read_avg", "category"]) # df.to_excel("historyArmy.xlsx", index=False) # url_list = [i[1] for i in LL[3:]] dataFrame = pd.read_excel("historyArmy.xlsx") print(dataFrame.columns.values.tolist()) url_list = [] for line in dataFrame.values.tolist(): if line[-1] == '历史': url_list.append(line[1]) print(len(url_list)) try: AIDTApi().updateArticleIntoCrawlerPlan( plan_id=None, plan_name="历史冷启-0905-new", plan_tag="autoArticlePoolLevel1", url_list=url_list ) except Exception as e: print("error--{}".format(e)) get_account_avg()