strategy.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. """
  2. @author: luojunhui
  3. """
  4. import json
  5. from tqdm import tqdm
  6. from applications.functions import Functions
  7. from config import accountBaseInfo, pool_level_detail
  8. class ArticlePoolStrategy(object):
  9. """
  10. 长文策略池
  11. """
  12. Fun = Functions()
  13. @classmethod
  14. def getData(cls, article_list):
  15. """
  16. :param article_list: 每天召回的文章list
  17. """
  18. detail_list = []
  19. print("查询文章url......")
  20. id_tuple = [i['id'] for i in article_list]
  21. detail_dict = cls.Fun.matchLinkByIdTuple(channel_id_tuple=tuple(id_tuple))
  22. for i in tqdm(article_list):
  23. content_id = i['id']
  24. i['gh_key'] = detail_dict[content_id]['gh_key']
  25. i['url'] = detail_dict[content_id]['url']
  26. i['title'] = detail_dict[content_id]['title']
  27. detail_list.append(i)
  28. print("查询完成, 开始排序")
  29. return detail_list
  30. @classmethod
  31. def splitByStrategy(cls, detail_list):
  32. """
  33. 账号-位置-阅读倍数
  34. :return:
  35. """
  36. L = []
  37. for line in detail_list:
  38. key = line['gh_key']
  39. article_read = line['read_count']
  40. if accountBaseInfo.get(key):
  41. avg_read = accountBaseInfo[key]['readAvg']
  42. # 计算比率
  43. level_rate = article_read / avg_read - 1
  44. obj = {
  45. "key": key,
  46. "avg_read": avg_read,
  47. "article_read": article_read,
  48. "level_rate": level_rate,
  49. "url": line['url'],
  50. "title": line['title']
  51. }
  52. L.append(obj)
  53. L = sorted(L, key=lambda x: x["level_rate"], reverse=True)
  54. result = {
  55. "Level1": [],
  56. "Level2": []
  57. }
  58. c1 = 0
  59. c2 = 0
  60. for line in L:
  61. gh_key = line['key']
  62. if pool_level_detail.get(gh_key):
  63. now_level = pool_level_detail[gh_key]
  64. else:
  65. now_level = "3"
  66. match now_level:
  67. case "2":
  68. # 往1层升
  69. if line['level_rate'] > 0.1 and line['avg_read'] >= 1000:
  70. now_title_list = [i['title'] for i in result['Level1']]
  71. if cls.Fun.TitleSimilarity(now_title_list, line['title']):
  72. continue
  73. else:
  74. result['Level1'].append(line)
  75. case "3":
  76. if line['level_rate'] > 0.1 and line['avg_read'] >= 100:
  77. now_title_list = [i['title'] for i in result['Level2']]
  78. if cls.Fun.TitleSimilarity(now_title_list, line['title']):
  79. continue
  80. else:
  81. result['Level2'].append(line)
  82. case "1":
  83. continue
  84. # if line['level_rate'] > 1.0:
  85. # now_title_list = [i['title'] for i in result['Level1']]
  86. # if cls.Fun.TitleSimilarity(now_title_list, line['title']):
  87. # continue
  88. # else:
  89. # result['Level1'].append(line)
  90. return result