1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283 |
- """
- @author: luojunhui
- 计算账号的阅读均值倍数
- """
- import json
- from pandas import DataFrame
- from applications import DeNetMysql
- from applications import AIDTApi
- D = DeNetMysql()
- def get_accounts():
- """
- 获取账号
- :return:
- """
- sql = f"""select account_id from long_article_accounts_outside where category = '军事政法';"""
- account_list = D.select(sql)
- account_list_ = []
- for account in account_list:
- account_list_.append(account[0])
- return account_list_
- def get_account_avg():
- """
- 获取账号
- :return:
- """
- with open("avg.json", encoding="utf-8") as f:
- avg_dict = json.loads(f.read())
- account_list = get_accounts()
- L = []
- for account in account_list:
- select_sql = f"""
- select title, read_cnt, link from crawler_meta_article
- where out_account_id = '{account}';
- """
- result_list = D.select(select_sql)
- try:
- avg_read = avg_dict[account]
- for i in result_list:
- title, read_cnt, link = i
- avg_score = read_cnt / avg_read
- temp = [title, link, read_cnt, avg_score]
- L.append(temp)
- except:
- continue
- sl = sorted(L, reverse=True, key=lambda x: x[3])
- a = 0
- b = 0
- LL = []
- for line in sl:
- title = line[0]
- read_cnt = line[2]
- if "农历" in title or "节" in title or line[3] < 1.3 or len(title) < 15 or read_cnt < 1000:
- a += 1
- continue
- else:
- b += 1
- LL.append(line)
- # print(a)
- # print(b)
- # df = DataFrame(LL, columns=["title", "link", "read", "read_avg"])
- # df.to_excel("test.xlsx", index=False)
- # url_list = [i[1] for i in LL]
- # try:
- # AIDTApi().updateArticleIntoCrawlerPlan(
- # plan_id=None,
- # plan_name="军事政法类冷启-0805-new",
- # plan_tag="autoArticlePoolLevel1",
- # url_list=url_list
- # )
- # except Exception as e:
- # print("error--{}".format(e))
- get_account_avg()
|