fengzhoutian
/
LongArticlesJob-fork
派生自 luojunhui/LongArticlesJob


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
							"""
@author: luojunhui
计算账号的阅读均值倍数
"""
import json

from pandas import DataFrame

from applications import DeNetMysql
from applications import AIDTApi

D = DeNetMysql()


def get_accounts():
    """
    获取账号
    :return:
    """
    sql = f"""select account_id from long_article_accounts_outside where category = '军事政法';"""
    account_list = D.select(sql)
    account_list_ = []
    for account in account_list:
        account_list_.append(account[0])
    return account_list_


def get_account_avg():
    """
    获取账号
    :return:
    """
    with open("avg.json", encoding="utf-8") as f:
        avg_dict = json.loads(f.read())

    account_list = get_accounts()
    L = []
    for account in account_list:
        select_sql = f"""
        select title, read_cnt, link from crawler_meta_article
        where out_account_id = '{account}';
        """
        result_list = D.select(select_sql)
        try:
            avg_read = avg_dict[account]
            for i in result_list:
                title, read_cnt, link = i
                avg_score = read_cnt / avg_read
                temp = [title, link, read_cnt, avg_score]
                L.append(temp)
        except:
            continue

    sl = sorted(L, reverse=True, key=lambda x: x[3])
    a = 0
    b = 0
    LL = []
    for line in sl:
        title = line[0]
        read_cnt = line[2]
        if "农历" in title or "节" in title or line[3] < 1.3 or len(title) < 15 or read_cnt < 1000:
            a += 1
            continue
        else:
            b += 1
            LL.append(line)
    # print(a)
    # print(b)
    # df = DataFrame(LL, columns=["title", "link", "read", "read_avg"])
    # df.to_excel("test.xlsx", index=False)
    # url_list = [i[1] for i in LL]
    # try:
    #     AIDTApi().updateArticleIntoCrawlerPlan(
    #         plan_id=None,
    #         plan_name="军事政法类冷启-0805-new",
    #         plan_tag="autoArticlePoolLevel1",
    #         url_list=url_list
    #     )
    # except Exception as e:
    #     print("error--{}".format(e))


get_account_avg()