|
@@ -1,80 +0,0 @@
|
|
|
-"""
|
|
|
-@author: luojunhui
|
|
|
-计算账号的阅读均值倍数
|
|
|
-"""
|
|
|
-import json
|
|
|
-import pandas as pd
|
|
|
-from pandas import DataFrame
|
|
|
-from tqdm import tqdm
|
|
|
-from applications import DeNetMysql
|
|
|
-from applications import AIDTApi
|
|
|
-
|
|
|
-D = DeNetMysql()
|
|
|
-
|
|
|
-
|
|
|
-def get_account_avg():
|
|
|
- """
|
|
|
- 获取账号
|
|
|
- :return:
|
|
|
- """
|
|
|
- # with open("/Users/luojunhui/cyber/LongArticlesJob/dev/军事历史.json", encoding="utf-8") as f:
|
|
|
- # avg_dict = json.loads(f.read())
|
|
|
- #
|
|
|
- # account_position_list = list(avg_dict.keys())
|
|
|
- # L = []
|
|
|
- # for account in tqdm(account_position_list):
|
|
|
- # gh_id = account[:-2]
|
|
|
- # index = int(account[-1:])
|
|
|
- # select_sql = f"""
|
|
|
- # select title, read_cnt, link from crawler_meta_article
|
|
|
- # where out_account_id = '{gh_id}' and article_index = {index} and status = 1;
|
|
|
- # """
|
|
|
- # result_list = D.select(select_sql)
|
|
|
- # try:
|
|
|
- # avg_read = avg_dict[account]['readAvg']
|
|
|
- # for i in result_list:
|
|
|
- # title, read_cnt, link = i
|
|
|
- # avg_score = read_cnt / avg_read
|
|
|
- # temp = [title, link, read_cnt, avg_score, avg_read, avg_dict[account]['category']]
|
|
|
- # L.append(temp)
|
|
|
- # except:
|
|
|
- # continue
|
|
|
- #
|
|
|
- # sl = sorted(L, reverse=True, key=lambda x: x[3])
|
|
|
- # a = 0
|
|
|
- # b = 0
|
|
|
- # LL = []
|
|
|
- # for line in sl:
|
|
|
- # title = line[0]
|
|
|
- # read_cnt = line[2]
|
|
|
- # if "农历" in title or '太极' in title or "节" in title or line[3] < 1.3 or len(title) < 15 or read_cnt < 5000:
|
|
|
- # a += 1
|
|
|
- # continue
|
|
|
- # else:
|
|
|
- # b += 1
|
|
|
- # print(line)
|
|
|
- # LL.append(line)
|
|
|
- # print(a)
|
|
|
- # print(b)
|
|
|
- # df = DataFrame(LL, columns=["title", "link", "read", "read_avg_times", "read_avg", "category"])
|
|
|
- # df.to_excel("historyArmy.xlsx", index=False)
|
|
|
- # url_list = [i[1] for i in LL[3:]]
|
|
|
- dataFrame = pd.read_excel("historyArmy.xlsx")
|
|
|
- print(dataFrame.columns.values.tolist())
|
|
|
- url_list = []
|
|
|
- for line in dataFrame.values.tolist():
|
|
|
- if line[-1] == '历史':
|
|
|
- url_list.append(line[1])
|
|
|
- print(len(url_list))
|
|
|
- try:
|
|
|
- AIDTApi().updateArticleIntoCrawlerPlan(
|
|
|
- plan_id=None,
|
|
|
- plan_name="历史冷启-0905-new",
|
|
|
- plan_tag="autoArticlePoolLevel1",
|
|
|
- url_list=url_list
|
|
|
- )
|
|
|
- except Exception as e:
|
|
|
- print("error--{}".format(e))
|
|
|
-
|
|
|
-
|
|
|
-get_account_avg()
|