AccountArticleRank.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. """
  2. @author: luojunhui
  3. """
  4. import json
  5. from applications.functions import ArticleRank
  6. from applications.functions import title_sim_v2_by_list
  7. from applications.functions import get_article_title_url_list
  8. def has_same(title, account_nickname):
  9. """
  10. 判断是否存储
  11. :param title:
  12. :param account_nickname:
  13. :return:
  14. """
  15. account_title_list = get_article_title_url_list(
  16. account_nickname,
  17. # max_time='20240603'
  18. )
  19. sim_res = title_sim_v2_by_list(title, account_title_list)
  20. if sim_res:
  21. return True
  22. return False
  23. class AccountArticleRank(object):
  24. """
  25. 文章排序
  26. """
  27. def __init__(self, params, mysql_client):
  28. """
  29. :param params: 请求参数
  30. :param mysql_client: 数据库链接池
  31. """
  32. self.publishArticleList = None
  33. self.publishNum = None
  34. self.strategy = None
  35. self.ghId = None
  36. self.accountName = None
  37. self.accountId = None
  38. self.params = params
  39. self.mysql_client = mysql_client
  40. async def check_params(self):
  41. """
  42. 校验参数
  43. :return:
  44. """
  45. try:
  46. self.accountId = self.params["accountId"]
  47. self.accountName = self.params["accountName"]
  48. self.ghId = self.params["ghId"]
  49. self.strategy = self.params["strategy"]
  50. self.publishNum = self.params["publishNum"]
  51. self.publishArticleList = self.params["publishArticleList"]
  52. # self.title_list = [i["title"] for i in self.publishArticleList]
  53. # self.content_list = [i["content"] for i in self.publishArticleList]
  54. return None
  55. except Exception as e:
  56. response = {
  57. "msg": "params error",
  58. "info": "params check failed, params : {} is not correct".format(e),
  59. "code": 0,
  60. }
  61. return response
  62. async def basic_rank(self):
  63. # 第一步把所有文章标题分为3组
  64. article_list1_ori = [i for i in self.publishArticleList if "【1】" in i['producePlanName']]
  65. article_list2_ori = [i for i in self.publishArticleList if "【2】" in i['producePlanName']]
  66. article_list3_ori = [i for i in self.publishArticleList if not i in article_list1_ori and not i in article_list2_ori]
  67. # 全局去重,保留优先级由 L1 --> L2 --> L3
  68. hash_map = {}
  69. article_list1 = []
  70. for i in article_list1_ori:
  71. title = i['title']
  72. if hash_map.get(title):
  73. continue
  74. else:
  75. article_list1.append(i)
  76. hash_map[title] = 1
  77. article_list2 = []
  78. for i in article_list2_ori:
  79. title = i['title']
  80. if hash_map.get(title):
  81. continue
  82. else:
  83. article_list2.append(i)
  84. hash_map[title] = 2
  85. article_list3 = []
  86. for i in article_list3_ori:
  87. title = i['title']
  88. if hash_map.get(title):
  89. continue
  90. else:
  91. article_list3.append(i)
  92. hash_map[title] = 1
  93. # 第二步对article_list1, article_list3按照得分排序, 对article_list2按照播放量排序
  94. if article_list1:
  95. rank1 = ArticleRank().rank(
  96. account_list=[self.accountName],
  97. text_list=[i['title'] for i in article_list1]
  98. )
  99. score_list1 = rank1[self.accountName]['score_list']
  100. ranked_1 = []
  101. for index, value in enumerate(score_list1):
  102. obj = article_list1[index]
  103. obj['score'] = value + 1000
  104. ranked_1.append(obj)
  105. ranked_1 = [i for i in ranked_1 if not has_same(i['title'], self.accountName)]
  106. ranked_1 = sorted(ranked_1, key=lambda x:x['score'], reverse=True)
  107. else:
  108. ranked_1 = []
  109. # rank2
  110. if article_list2:
  111. article_list2 = [i for i in article_list2 if not has_same(i['title'], self.accountName)]
  112. for item in article_list2:
  113. item['score'] = 100
  114. ranked_2 = sorted(article_list2, key=lambda x:x['crawlerViewCount'], reverse=True)
  115. else:
  116. ranked_2 = []
  117. # rank3
  118. if article_list3:
  119. rank3 = ArticleRank().rank(
  120. account_list=[self.accountName],
  121. text_list=[i['title'] for i in article_list3]
  122. )
  123. score_list3 = rank3[self.accountName]['score_list']
  124. ranked_3 = []
  125. for index, value in enumerate(score_list3):
  126. obj = article_list3[index]
  127. obj['score'] = value
  128. ranked_3.append(obj)
  129. ranked_3 = [i for i in ranked_3 if not has_same(i['title'], self.accountName)]
  130. ranked_3 = sorted(ranked_3, key=lambda x:x['score'], reverse=True)
  131. else:
  132. ranked_3 = []
  133. return ranked_1, ranked_2, ranked_3
  134. async def rank_v1(self):
  135. """
  136. Rank Version 1
  137. :return:
  138. """
  139. try:
  140. ranked_1, ranked_2, ranked_3 = await self.basic_rank()
  141. # 还要全局去重
  142. try:
  143. L = []
  144. if ranked_1:
  145. L.append(ranked_1[0])
  146. if ranked_2:
  147. L.append(ranked_2[0])
  148. else:
  149. if ranked_2:
  150. if len(ranked_2) > 1:
  151. for i in ranked_2[:2]:
  152. L.append(i)
  153. else:
  154. L.append(ranked_2[0])
  155. for item in ranked_3:
  156. L.append(item)
  157. result = {
  158. "accountId": self.accountId,
  159. "accountName": self.accountName,
  160. "ghId": self.ghId,
  161. "strategy": self.strategy,
  162. "publishNum": self.publishNum,
  163. "rank_list": L[:self.publishNum],
  164. }
  165. response = {"status": "Rank Success", "data": result, "code": 1}
  166. except Exception as e:
  167. result = {
  168. "accountId": self.accountId,
  169. "accountName": self.accountName,
  170. "ghId": self.ghId,
  171. "strategy": self.strategy,
  172. "publishNum": self.publishNum,
  173. "rank_list": self.publishArticleList[: self.publishNum],
  174. }
  175. response = {"status": "Rank Fail", "data": result, "code": 1}
  176. return response
  177. except:
  178. result = {"code": 2, "info": "account is not exist"}
  179. return result
  180. async def rank_v2(self):
  181. """
  182. Rank Version 2
  183. :return:
  184. """
  185. return await self.rank_v1()
  186. async def rank_v3(self):
  187. """
  188. Rank Version 3
  189. :return:
  190. """
  191. return await self.rank_v1()
  192. async def rank_v4(self):
  193. """
  194. Rank Version 4
  195. :return:
  196. """
  197. return await self.rank_v1()
  198. async def rank_v5(self):
  199. """
  200. Rank Version 5
  201. :return:
  202. """
  203. return await self.rank_v1()
  204. async def choose_strategy(self):
  205. """
  206. 选择排序策略
  207. :return:
  208. """
  209. match self.strategy:
  210. case "ArticleRankV1":
  211. return await self.rank_v1()
  212. case "ArticleRankV2":
  213. return await self.rank_v2()
  214. case "ArticleRankV3":
  215. return await self.rank_v3()
  216. case "ArticleRankV4":
  217. return await self.rank_v4()
  218. case "ArticleRankV5":
  219. return await self.rank_v5()
  220. async def deal(self):
  221. """
  222. Deal Function
  223. :return:
  224. """
  225. error_params = await self.check_params()
  226. if error_params:
  227. return error_params
  228. else:
  229. return await self.choose_strategy()
  230. # except Exception as e:
  231. # result = {"code": 2, "info": "account is not exist"}
  232. # return result