luojunhui
/
LongArticlesJob


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
							"""
@author: luojunhui
计算账号的阅读均值倍数
"""
import json

from pandas import DataFrame
from tqdm import tqdm
from applications import DeNetMysql
from applications import AIDTApi

D = DeNetMysql()


def get_account_avg():
    """
    获取账号
    :return:
    """
    with open("/Users/luojunhui/cyber/LongArticlesJob/dev/avg_new_health.json", encoding="utf-8") as f:
        avg_dict = json.loads(f.read())

    account_position_list = list(avg_dict.keys())
    L = []
    for account in tqdm(account_position_list):
        gh_id = account[:-2]
        index = int(account[-1:])
        select_sql = f"""
        select title, read_cnt, link from crawler_meta_article
        where out_account_id = '{gh_id}' and article_index = {index} and status = 1;
        """
        result_list = D.select(select_sql)
        try:
            avg_read = avg_dict[account]
            for i in result_list:
                title, read_cnt, link = i
                avg_score = read_cnt / avg_read
                temp = [title, link, read_cnt, avg_score]
                L.append(temp)
        except:
            continue

    sl = sorted(L, reverse=True, key=lambda x: x[3])
    a = 0
    b = 0
    LL = []
    for line in sl:
        title = line[0]
        read_cnt = line[2]
        if "农历" in title or '太极' in title or "节" in title or line[3] < 1.3 or len(title) < 15 or read_cnt < 5000:
            a += 1
            continue
        else:
            b += 1
            print(line)
            LL.append(line)
    print(a)
    print(b)
    df = DataFrame(LL, columns=["title", "link", "read", "read_avg"])
    df.to_excel("health_2.xlsx", index=False)
    # url_list = [i[1] for i in LL[3:]]
    # try:
    #     AIDTApi().updateArticleIntoCrawlerPlan(
    #         plan_id=None,
    #         plan_name="历史冷启-0816-new",
    #         plan_tag="autoArticlePoolLevel1",
    #         url_list=url_list
    #     )
    # except Exception as e:
    #     print("error--{}".format(e))


get_account_avg()