AccountArticleRank.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. """
  2. @author: luojunhui
  3. """
  4. import json
  5. import time
  6. from uuid import uuid4
  7. from applications.aliyunLog import AliyunArticleLog
  8. from applications.functions import ArticleRank
  9. from applications.functions import title_sim_v2_by_list
  10. from applications.functions import get_article_title_url_list
  11. def has_same(title, account_nickname, index_list=[1, 2]):
  12. """
  13. 判断是否存储
  14. :param title:
  15. :param account_nickname:
  16. :param index_list: 历史已发布的文章需要屏蔽的位置
  17. :return:
  18. """
  19. account_title_list = get_article_title_url_list(
  20. account_nickname,
  21. index_list=index_list
  22. )
  23. sim_res = title_sim_v2_by_list(title, account_title_list)
  24. if sim_res:
  25. return True
  26. return False
  27. class AccountArticleRank(object):
  28. """
  29. 文章排序
  30. """
  31. def __init__(self, params, mysql_client):
  32. """
  33. :param params: 请求参数
  34. :param mysql_client: 数据库链接池
  35. """
  36. self.publishArticleList = None
  37. self.publishNum = None
  38. self.strategy = None
  39. self.ghId = None
  40. self.accountName = None
  41. self.accountId = None
  42. self.params = params
  43. self.mysql_client = mysql_client
  44. self.filter_list = []
  45. self.request_id = "alg-{}-{}".format(uuid4(), int(time.time()))
  46. self.logger = AliyunArticleLog(request_id=self.request_id, alg="ArticleRank")
  47. async def check_params(self):
  48. """
  49. 校验参数
  50. :return:
  51. """
  52. try:
  53. self.accountId = self.params["accountId"]
  54. self.accountName = self.params["accountName"]
  55. self.ghId = self.params["ghId"]
  56. self.strategy = self.params["strategy"]
  57. self.publishNum = self.params["publishNum"]
  58. self.publishArticleList = [i for i in self.params["publishArticleList"] if not has_same(i['title'], self.accountName)]
  59. self.history_list = [i for i in self.params["publishArticleList"] if has_same(i['title'], self.accountName)]
  60. self.logger.log(
  61. code="1001",
  62. msg="参数校验成功",
  63. data=self.params
  64. )
  65. return None
  66. except Exception as e:
  67. response = {
  68. "msg": "params error",
  69. "info": "params check failed, params : {} is not correct".format(e),
  70. "code": 0,
  71. }
  72. self.logger.log(
  73. code="1002",
  74. msg="参数校验失败--{}".format(e),
  75. data=self.params
  76. )
  77. return response
  78. async def basic_rank(self):
  79. # 第一步把所有文章标题分为3组
  80. article_list1_ori = [i for i in self.publishArticleList if "【1】" in i['producePlanName']]
  81. article_list2_ori = [i for i in self.publishArticleList if "【2】" in i['producePlanName']]
  82. article_list3_ori = [i for i in self.publishArticleList if not i in article_list1_ori and not i in article_list2_ori]
  83. # 全局去重,保留优先级由 L1 --> L2 --> L3
  84. hash_map = {}
  85. article_list1 = []
  86. for i in article_list1_ori:
  87. title = i['title']
  88. if hash_map.get(title):
  89. continue
  90. else:
  91. article_list1.append(i)
  92. hash_map[title] = 1
  93. article_list2 = []
  94. for i in article_list2_ori:
  95. title = i['title']
  96. if hash_map.get(title):
  97. continue
  98. else:
  99. article_list2.append(i)
  100. hash_map[title] = 2
  101. article_list3 = []
  102. for i in article_list3_ori:
  103. title = i['title']
  104. if hash_map.get(title):
  105. continue
  106. else:
  107. article_list3.append(i)
  108. hash_map[title] = 1
  109. # 第二步对article_list1, article_list3按照得分排序, 对article_list2按照播放量排序
  110. if article_list1:
  111. rank1 = ArticleRank().rank(
  112. account_list=[self.accountName],
  113. text_list=[i['title'] for i in article_list1]
  114. )
  115. score_list1 = rank1[self.accountName]['score_list']
  116. ranked_1 = []
  117. for index, value in enumerate(score_list1):
  118. obj = article_list1[index]
  119. obj['score'] = value + 1000
  120. ranked_1.append(obj)
  121. ranked_1 = sorted(ranked_1, key=lambda x:x['score'], reverse=True)
  122. else:
  123. ranked_1 = []
  124. # rank2
  125. if article_list2:
  126. for item in article_list2:
  127. item['score'] = 100
  128. ranked_2 = sorted(article_list2, key=lambda x:x['crawlerViewCount'], reverse=True)
  129. else:
  130. ranked_2 = []
  131. # rank3
  132. if article_list3:
  133. rank3 = ArticleRank().rank(
  134. account_list=[self.accountName],
  135. text_list=[i['title'] for i in article_list3]
  136. )
  137. score_list3 = rank3[self.accountName]['score_list']
  138. ranked_3 = []
  139. for index, value in enumerate(score_list3):
  140. obj = article_list3[index]
  141. obj['score'] = value
  142. ranked_3.append(obj)
  143. ranked_3 = sorted(ranked_3, key=lambda x:x['score'], reverse=True)
  144. else:
  145. ranked_3 = []
  146. self.logger.log(
  147. code="1004",
  148. msg="去重排序完成",
  149. data={
  150. "rank1": ranked_1,
  151. "rank2": ranked_2,
  152. "rank3": ranked_3
  153. }
  154. )
  155. return ranked_1, ranked_2, ranked_3
  156. async def rank_v1(self):
  157. """
  158. Rank Version 1
  159. :return:
  160. """
  161. try:
  162. ranked_1, ranked_2, ranked_3 = await self.basic_rank()
  163. try:
  164. L = []
  165. if ranked_1:
  166. L.append(ranked_1[0])
  167. if ranked_2:
  168. L.append(ranked_2[0])
  169. else:
  170. if ranked_2:
  171. if len(ranked_2) > 1:
  172. for i in ranked_2[:2]:
  173. L.append(i)
  174. else:
  175. L.append(ranked_2[0])
  176. for item in ranked_3:
  177. L.append(item)
  178. result = {
  179. "accountId": self.accountId,
  180. "accountName": self.accountName,
  181. "ghId": self.ghId,
  182. "strategy": self.strategy,
  183. "publishNum": self.publishNum,
  184. "rank_list": L[:self.publishNum],
  185. "filter_list": self.filter_list
  186. }
  187. self.logger.log(
  188. code=1006,
  189. msg="rank successfully",
  190. data=result
  191. )
  192. response = {"status": "Rank Success", "data": result, "code": 1}
  193. except Exception as e:
  194. result = {
  195. "accountId": self.accountId,
  196. "accountName": self.accountName,
  197. "ghId": self.ghId,
  198. "strategy": self.strategy,
  199. "publishNum": self.publishNum,
  200. "rank_list": self.publishArticleList[: self.publishNum],
  201. "filter_list": self.filter_list
  202. }
  203. self.logger.log(
  204. code=1007,
  205. msg="rank failed because of {}".format(e),
  206. data=result
  207. )
  208. response = {"status": "Rank Fail", "data": result, "code": 1}
  209. return response
  210. except:
  211. result = {"code": 2, "info": "account is not exist"}
  212. return result
  213. async def rank_v2(self):
  214. """
  215. Rank Version 2
  216. :return:
  217. """
  218. return await self.rank_v1()
  219. async def rank_v3(self):
  220. """
  221. Rank Version 3
  222. :return:
  223. """
  224. return await self.rank_v1()
  225. async def rank_v4(self):
  226. """
  227. Rank Version 4
  228. :return:
  229. """
  230. return await self.rank_v1()
  231. async def rank_v5(self):
  232. """
  233. Rank Version 5
  234. :return:
  235. """
  236. return await self.rank_v1()
  237. async def choose_strategy(self):
  238. """
  239. 选择排序策略
  240. :return:
  241. """
  242. match self.strategy:
  243. case "ArticleRankV1":
  244. self.logger.log(
  245. code="1003",
  246. msg="命中排序策略1"
  247. )
  248. return await self.rank_v1()
  249. case "ArticleRankV2":
  250. self.logger.log(
  251. code="1003",
  252. msg="命中排序策略2"
  253. )
  254. return await self.rank_v2()
  255. case "ArticleRankV3":
  256. self.logger.log(
  257. code="1003",
  258. msg="命中排序策略3"
  259. )
  260. return await self.rank_v3()
  261. case "ArticleRankV4":
  262. self.logger.log(
  263. code="1003",
  264. msg="命中排序策略4"
  265. )
  266. return await self.rank_v4()
  267. case "ArticleRankV5":
  268. self.logger.log(
  269. code="1003",
  270. msg="命中排序策略5"
  271. )
  272. return await self.rank_v5()
  273. async def deal(self):
  274. """
  275. Deal Function
  276. :return:
  277. """
  278. error_params = await self.check_params()
  279. if error_params:
  280. return error_params
  281. else:
  282. for i in self.history_list:
  283. i['filterReason'] = "历史已发布文章"
  284. self.filter_list.append(i)
  285. return await self.choose_strategy()