AccountArticleRank.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. """
  2. @author: luojunhui
  3. """
  4. import json
  5. from applications.functions import ArticleRank
  6. from applications.functions import title_sim_v2_by_list
  7. from applications.functions import get_article_title_url_list
  8. def has_same(title, account_nickname, index_list=[1, 2]):
  9. """
  10. 判断是否存储
  11. :param title:
  12. :param account_nickname:
  13. :param index_list: 历史已发布的文章需要屏蔽的位置
  14. :return:
  15. """
  16. account_title_list = get_article_title_url_list(
  17. account_nickname,
  18. index_list=index_list
  19. )
  20. sim_res = title_sim_v2_by_list(title, account_title_list)
  21. if sim_res:
  22. return True
  23. return False
  24. class AccountArticleRank(object):
  25. """
  26. 文章排序
  27. """
  28. def __init__(self, params, mysql_client):
  29. """
  30. :param params: 请求参数
  31. :param mysql_client: 数据库链接池
  32. """
  33. self.publishArticleList = None
  34. self.publishNum = None
  35. self.strategy = None
  36. self.ghId = None
  37. self.accountName = None
  38. self.accountId = None
  39. self.params = params
  40. self.mysql_client = mysql_client
  41. async def check_params(self):
  42. """
  43. 校验参数
  44. :return:
  45. """
  46. try:
  47. self.accountId = self.params["accountId"]
  48. self.accountName = self.params["accountName"]
  49. self.ghId = self.params["ghId"]
  50. self.strategy = self.params["strategy"]
  51. self.publishNum = self.params["publishNum"]
  52. self.publishArticleList = self.params["publishArticleList"]
  53. # self.title_list = [i["title"] for i in self.publishArticleList]
  54. # self.content_list = [i["content"] for i in self.publishArticleList]
  55. return None
  56. except Exception as e:
  57. response = {
  58. "msg": "params error",
  59. "info": "params check failed, params : {} is not correct".format(e),
  60. "code": 0,
  61. }
  62. return response
  63. async def basic_rank(self):
  64. # 第一步把所有文章标题分为3组
  65. article_list1_ori = [i for i in self.publishArticleList if "【1】" in i['producePlanName']]
  66. article_list2_ori = [i for i in self.publishArticleList if "【2】" in i['producePlanName']]
  67. article_list3_ori = [i for i in self.publishArticleList if not i in article_list1_ori and not i in article_list2_ori]
  68. # 全局去重,保留优先级由 L1 --> L2 --> L3
  69. hash_map = {}
  70. article_list1 = []
  71. for i in article_list1_ori:
  72. title = i['title']
  73. if hash_map.get(title):
  74. continue
  75. else:
  76. article_list1.append(i)
  77. hash_map[title] = 1
  78. article_list2 = []
  79. for i in article_list2_ori:
  80. title = i['title']
  81. if hash_map.get(title):
  82. continue
  83. else:
  84. article_list2.append(i)
  85. hash_map[title] = 2
  86. article_list3 = []
  87. for i in article_list3_ori:
  88. title = i['title']
  89. if hash_map.get(title):
  90. continue
  91. else:
  92. article_list3.append(i)
  93. hash_map[title] = 1
  94. # 第二步对article_list1, article_list3按照得分排序, 对article_list2按照播放量排序
  95. if article_list1:
  96. rank1 = ArticleRank().rank(
  97. account_list=[self.accountName],
  98. text_list=[i['title'] for i in article_list1]
  99. )
  100. score_list1 = rank1[self.accountName]['score_list']
  101. ranked_1 = []
  102. for index, value in enumerate(score_list1):
  103. obj = article_list1[index]
  104. obj['score'] = value + 1000
  105. ranked_1.append(obj)
  106. ranked_1 = [i for i in ranked_1 if not has_same(i['title'], self.accountName)]
  107. ranked_1 = sorted(ranked_1, key=lambda x:x['score'], reverse=True)
  108. else:
  109. ranked_1 = []
  110. # rank2
  111. if article_list2:
  112. article_list2 = [i for i in article_list2 if not has_same(i['title'], self.accountName)]
  113. for item in article_list2:
  114. item['score'] = 100
  115. ranked_2 = sorted(article_list2, key=lambda x:x['crawlerViewCount'], reverse=True)
  116. else:
  117. ranked_2 = []
  118. # rank3
  119. if article_list3:
  120. rank3 = ArticleRank().rank(
  121. account_list=[self.accountName],
  122. text_list=[i['title'] for i in article_list3]
  123. )
  124. score_list3 = rank3[self.accountName]['score_list']
  125. ranked_3 = []
  126. for index, value in enumerate(score_list3):
  127. obj = article_list3[index]
  128. obj['score'] = value
  129. ranked_3.append(obj)
  130. ranked_3 = [i for i in ranked_3 if not has_same(i['title'], self.accountName, index_list=[1, 2, 3, 4, 5, 6, 7, 8])]
  131. ranked_3 = sorted(ranked_3, key=lambda x:x['score'], reverse=True)
  132. else:
  133. ranked_3 = []
  134. return ranked_1, ranked_2, ranked_3
  135. async def rank_v1(self):
  136. """
  137. Rank Version 1
  138. :return:
  139. """
  140. try:
  141. ranked_1, ranked_2, ranked_3 = await self.basic_rank()
  142. # 还要全局去重
  143. try:
  144. L = []
  145. if ranked_1:
  146. L.append(ranked_1[0])
  147. if ranked_2:
  148. L.append(ranked_2[0])
  149. else:
  150. if ranked_2:
  151. if len(ranked_2) > 1:
  152. for i in ranked_2[:2]:
  153. L.append(i)
  154. else:
  155. L.append(ranked_2[0])
  156. for item in ranked_3:
  157. L.append(item)
  158. result = {
  159. "accountId": self.accountId,
  160. "accountName": self.accountName,
  161. "ghId": self.ghId,
  162. "strategy": self.strategy,
  163. "publishNum": self.publishNum,
  164. "rank_list": L[:self.publishNum],
  165. }
  166. response = {"status": "Rank Success", "data": result, "code": 1}
  167. except Exception as e:
  168. result = {
  169. "accountId": self.accountId,
  170. "accountName": self.accountName,
  171. "ghId": self.ghId,
  172. "strategy": self.strategy,
  173. "publishNum": self.publishNum,
  174. "rank_list": self.publishArticleList[: self.publishNum],
  175. }
  176. response = {"status": "Rank Fail", "data": result, "code": 1}
  177. return response
  178. except:
  179. result = {"code": 2, "info": "account is not exist"}
  180. return result
  181. async def rank_v2(self):
  182. """
  183. Rank Version 2
  184. :return:
  185. """
  186. return await self.rank_v1()
  187. async def rank_v3(self):
  188. """
  189. Rank Version 3
  190. :return:
  191. """
  192. return await self.rank_v1()
  193. async def rank_v4(self):
  194. """
  195. Rank Version 4
  196. :return:
  197. """
  198. return await self.rank_v1()
  199. async def rank_v5(self):
  200. """
  201. Rank Version 5
  202. :return:
  203. """
  204. return await self.rank_v1()
  205. async def choose_strategy(self):
  206. """
  207. 选择排序策略
  208. :return:
  209. """
  210. match self.strategy:
  211. case "ArticleRankV1":
  212. return await self.rank_v1()
  213. case "ArticleRankV2":
  214. return await self.rank_v2()
  215. case "ArticleRankV3":
  216. return await self.rank_v3()
  217. case "ArticleRankV4":
  218. return await self.rank_v4()
  219. case "ArticleRankV5":
  220. return await self.rank_v5()
  221. async def deal(self):
  222. """
  223. Deal Function
  224. :return:
  225. """
  226. error_params = await self.check_params()
  227. if error_params:
  228. return error_params
  229. else:
  230. return await self.choose_strategy()
  231. # except Exception as e:
  232. # result = {"code": 2, "info": "account is not exist"}
  233. # return result