luojunhui
/
LongArticlesJob


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
							"""
@author: luojunhui
"""
import json

from tqdm import tqdm

from applications.functions import Functions
from config import accountBaseInfo, pool_level_detail


class ArticlePoolStrategy(object):
    """
    长文策略池
    """
    Fun = Functions()

    @classmethod
    def getData(cls, article_list):
        """
        :param article_list: 每天召回的文章list
        """
        detail_list = []
        print("查询文章url......")
        id_tuple = [i['id'] for i in article_list]
        detail_dict = cls.Fun.matchLinkByIdTuple(channel_id_tuple=tuple(id_tuple))
        for i in tqdm(article_list):
            content_id = i['id']
            i['gh_key'] = detail_dict[content_id]['gh_key']
            i['url'] = detail_dict[content_id]['url']
            i['title'] = detail_dict[content_id]['title']
            detail_list.append(i)
        print("查询完成, 开始排序")
        return detail_list

    @classmethod
    def splitByStrategy(cls, detail_list):
        """
        账号-位置-阅读倍数
        :return:
        """
        L = []
        for line in detail_list:
            key = line['gh_key']
            article_read = line['read_count']
            if accountBaseInfo.get(key):
                avg_read = accountBaseInfo[key]['readAvg']
                # 计算比率
                level_rate = article_read / avg_read - 1
                obj = {
                    "key": key,
                    "avg_read": avg_read,
                    "article_read": article_read,
                    "level_rate": level_rate,
                    "url": line['url'],
                    "title": line['title']
                }
                L.append(obj)
        L = sorted(L, key=lambda x: x["level_rate"], reverse=True)
        result = {
            "Level1": [],
            "Level2": []
        }
        c1 = 0
        c2 = 0
        for line in L:
            gh_key = line['key']
            if pool_level_detail.get(gh_key):
                now_level = pool_level_detail[gh_key]
            else:
                now_level = "3"
            match now_level:
                case "2":
                    # 往1层升
                    if line['level_rate'] > 0.1 and line['avg_read'] >= 1000:
                        now_title_list = [i['title'] for i in result['Level1']]
                        if cls.Fun.TitleSimilarity(now_title_list, line['title']):
                            continue
                        else:
                            result['Level1'].append(line)
                case "3":
                    if line['level_rate'] > 0.1 and line['avg_read'] >= 100:
                        now_title_list = [i['title'] for i in result['Level2']]
                        if cls.Fun.TitleSimilarity(now_title_list, line['title']):
                            continue
                        else:
                            result['Level2'].append(line)
                case "1":
                    continue
                    # if line['level_rate'] > 1.0:
                    #     now_title_list = [i['title'] for i in result['Level1']]
                    #     if cls.Fun.TitleSimilarity(now_title_list, line['title']):
                    #         continue
                    #     else:
                    #         result['Level1'].append(line)
        return result