|  | @@ -1,80 +0,0 @@
 | 
	
		
			
				|  |  | -"""
 | 
	
		
			
				|  |  | -@author: luojunhui
 | 
	
		
			
				|  |  | -计算账号的阅读均值倍数
 | 
	
		
			
				|  |  | -"""
 | 
	
		
			
				|  |  | -import json
 | 
	
		
			
				|  |  | -import pandas as pd
 | 
	
		
			
				|  |  | -from pandas import DataFrame
 | 
	
		
			
				|  |  | -from tqdm import tqdm
 | 
	
		
			
				|  |  | -from applications import DeNetMysql
 | 
	
		
			
				|  |  | -from applications import AIDTApi
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -D = DeNetMysql()
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -def get_account_avg():
 | 
	
		
			
				|  |  | -    """
 | 
	
		
			
				|  |  | -    获取账号
 | 
	
		
			
				|  |  | -    :return:
 | 
	
		
			
				|  |  | -    """
 | 
	
		
			
				|  |  | -    # with open("/Users/luojunhui/cyber/LongArticlesJob/dev/军事历史.json", encoding="utf-8") as f:
 | 
	
		
			
				|  |  | -    #     avg_dict = json.loads(f.read())
 | 
	
		
			
				|  |  | -    #
 | 
	
		
			
				|  |  | -    # account_position_list = list(avg_dict.keys())
 | 
	
		
			
				|  |  | -    # L = []
 | 
	
		
			
				|  |  | -    # for account in tqdm(account_position_list):
 | 
	
		
			
				|  |  | -    #     gh_id = account[:-2]
 | 
	
		
			
				|  |  | -    #     index = int(account[-1:])
 | 
	
		
			
				|  |  | -    #     select_sql = f"""
 | 
	
		
			
				|  |  | -    #     select title, read_cnt, link from crawler_meta_article
 | 
	
		
			
				|  |  | -    #     where out_account_id = '{gh_id}' and article_index = {index} and status = 1;
 | 
	
		
			
				|  |  | -    #     """
 | 
	
		
			
				|  |  | -    #     result_list = D.select(select_sql)
 | 
	
		
			
				|  |  | -    #     try:
 | 
	
		
			
				|  |  | -    #         avg_read = avg_dict[account]['readAvg']
 | 
	
		
			
				|  |  | -    #         for i in result_list:
 | 
	
		
			
				|  |  | -    #             title, read_cnt, link = i
 | 
	
		
			
				|  |  | -    #             avg_score = read_cnt / avg_read
 | 
	
		
			
				|  |  | -    #             temp = [title, link, read_cnt, avg_score, avg_read, avg_dict[account]['category']]
 | 
	
		
			
				|  |  | -    #             L.append(temp)
 | 
	
		
			
				|  |  | -    #     except:
 | 
	
		
			
				|  |  | -    #         continue
 | 
	
		
			
				|  |  | -    #
 | 
	
		
			
				|  |  | -    # sl = sorted(L, reverse=True, key=lambda x: x[3])
 | 
	
		
			
				|  |  | -    # a = 0
 | 
	
		
			
				|  |  | -    # b = 0
 | 
	
		
			
				|  |  | -    # LL = []
 | 
	
		
			
				|  |  | -    # for line in sl:
 | 
	
		
			
				|  |  | -    #     title = line[0]
 | 
	
		
			
				|  |  | -    #     read_cnt = line[2]
 | 
	
		
			
				|  |  | -    #     if "农历" in title or '太极' in title or "节" in title or line[3] < 1.3 or len(title) < 15 or read_cnt < 5000:
 | 
	
		
			
				|  |  | -    #         a += 1
 | 
	
		
			
				|  |  | -    #         continue
 | 
	
		
			
				|  |  | -    #     else:
 | 
	
		
			
				|  |  | -    #         b += 1
 | 
	
		
			
				|  |  | -    #         print(line)
 | 
	
		
			
				|  |  | -    #         LL.append(line)
 | 
	
		
			
				|  |  | -    # print(a)
 | 
	
		
			
				|  |  | -    # print(b)
 | 
	
		
			
				|  |  | -    # df = DataFrame(LL, columns=["title", "link", "read", "read_avg_times", "read_avg", "category"])
 | 
	
		
			
				|  |  | -    # df.to_excel("historyArmy.xlsx", index=False)
 | 
	
		
			
				|  |  | -    # url_list = [i[1] for i in LL[3:]]
 | 
	
		
			
				|  |  | -    dataFrame = pd.read_excel("historyArmy.xlsx")
 | 
	
		
			
				|  |  | -    print(dataFrame.columns.values.tolist())
 | 
	
		
			
				|  |  | -    url_list = []
 | 
	
		
			
				|  |  | -    for line in dataFrame.values.tolist():
 | 
	
		
			
				|  |  | -        if line[-1] == '历史':
 | 
	
		
			
				|  |  | -            url_list.append(line[1])
 | 
	
		
			
				|  |  | -    print(len(url_list))
 | 
	
		
			
				|  |  | -    try:
 | 
	
		
			
				|  |  | -        AIDTApi().updateArticleIntoCrawlerPlan(
 | 
	
		
			
				|  |  | -            plan_id=None,
 | 
	
		
			
				|  |  | -            plan_name="历史冷启-0905-new",
 | 
	
		
			
				|  |  | -            plan_tag="autoArticlePoolLevel1",
 | 
	
		
			
				|  |  | -            url_list=url_list
 | 
	
		
			
				|  |  | -        )
 | 
	
		
			
				|  |  | -    except Exception as e:
 | 
	
		
			
				|  |  | -        print("error--{}".format(e))
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -get_account_avg()
 |