123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102 |
- """
- @author: luojunhui
- """
- import json
- from tqdm import tqdm
- from applications.functions import Functions
- from config import accountBaseInfo, pool_level_detail
- class ArticlePoolStrategy(object):
- """
- 长文策略池
- """
- Fun = Functions()
- @classmethod
- def getData(cls, article_list):
- """
- :param article_list: 每天召回的文章list
- """
- detail_list = []
- print("查询文章url......")
- id_tuple = [i['id'] for i in article_list]
- detail_dict = cls.Fun.matchLinkByIdTuple(channel_id_tuple=tuple(id_tuple))
- for i in tqdm(article_list):
- content_id = i['id']
- i['gh_key'] = detail_dict[content_id]['gh_key']
- i['url'] = detail_dict[content_id]['url']
- i['title'] = detail_dict[content_id]['title']
- detail_list.append(i)
- print("查询完成, 开始排序")
- return detail_list
- @classmethod
- def splitByStrategy(cls, detail_list):
- """
- 账号-位置-阅读倍数
- :return:
- """
- L = []
- for line in detail_list:
- key = line['gh_key']
- article_read = line['read_count']
- if accountBaseInfo.get(key):
- avg_read = accountBaseInfo[key]['readAvg']
- # 计算比率
- level_rate = article_read / avg_read - 1
- obj = {
- "key": key,
- "avg_read": avg_read,
- "article_read": article_read,
- "level_rate": level_rate,
- "url": line['url'],
- "title": line['title']
- }
- L.append(obj)
- L = sorted(L, key=lambda x: x["level_rate"], reverse=True)
- result = {
- "Level1": [],
- "Level2": []
- }
- c1 = 0
- c2 = 0
- for line in L:
- gh_key = line['key']
- if pool_level_detail.get(gh_key):
- now_level = pool_level_detail[gh_key]
- else:
- now_level = "3"
- match now_level:
- case "2":
- # 往1层升
- if line['level_rate'] > 0.1 and line['avg_read'] >= 1000:
- now_title_list = [i['title'] for i in result['Level1']]
- if cls.Fun.TitleSimilarity(now_title_list, line['title']):
- continue
- else:
- result['Level1'].append(line)
- case "3":
- if line['level_rate'] > 0.1 and line['avg_read'] >= 100:
- now_title_list = [i['title'] for i in result['Level2']]
- if cls.Fun.TitleSimilarity(now_title_list, line['title']):
- continue
- else:
- result['Level2'].append(line)
- case "1":
- continue
- # if line['level_rate'] > 1.0:
- # now_title_list = [i['title'] for i in result['Level1']]
- # if cls.Fun.TitleSimilarity(now_title_list, line['title']):
- # continue
- # else:
- # result['Level1'].append(line)
- return result
|