AccountArticleRank.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. """
  2. @author: luojunhui
  3. """
  4. import json
  5. import time
  6. from uuid import uuid4
  7. from applications.aliyunLog import AliyunArticleLog
  8. from applications.functions import ArticleRank
  9. from applications.pipeline import LongArticlesPipeline
  10. class AccountArticleRank(object):
  11. """
  12. 文章排序
  13. """
  14. def __init__(self, params, mysql_client):
  15. """
  16. :param params: 请求参数
  17. :param mysql_client: 数据库链接池
  18. """
  19. self.publishArticleList = None
  20. self.publishNum = None
  21. self.strategy = None
  22. self.ghId = None
  23. self.accountName = None
  24. self.accountId = None
  25. self.params = params
  26. self.mysql_client = mysql_client
  27. self.request_id = "alg-{}-{}".format(uuid4(), int(time.time()))
  28. self.logger = AliyunArticleLog(request_id=self.request_id, alg="ArticleRank")
  29. self.pipeline = LongArticlesPipeline()
  30. def filter(self):
  31. self.publishArticleList = []
  32. self.filter_list = []
  33. for item in self.params['publishArticleList']:
  34. flag = self.pipeline.deal(item)
  35. if flag:
  36. item['filterReason'] = flag['fileterReason']
  37. self.filter_list.append(item)
  38. else:
  39. self.publishArticleList.append(item)
  40. async def check_params(self):
  41. """
  42. 校验参数
  43. :return:
  44. """
  45. try:
  46. self.accountId = self.params["accountId"]
  47. self.accountName = self.params["accountName"]
  48. self.ghId = self.params["ghId"]
  49. self.strategy = self.params["strategy"]
  50. self.publishNum = self.params["publishNum"]
  51. self.filter()
  52. self.logger.log(
  53. code="1001",
  54. msg="参数校验成功",
  55. data=self.params
  56. )
  57. return None
  58. except Exception as e:
  59. response = {
  60. "msg": "params error",
  61. "info": "params check failed, params : {} is not correct".format(e),
  62. "code": 0,
  63. }
  64. self.logger.log(
  65. code="1002",
  66. msg="参数校验失败--{}".format(e),
  67. data=self.params
  68. )
  69. return response
  70. async def basic_rank(self):
  71. # 第一步把所有文章标题分为3组
  72. article_list1_ori = [i for i in self.publishArticleList if "【1】" in i['producePlanName']]
  73. article_list2_ori = [i for i in self.publishArticleList if "【2】" in i['producePlanName']]
  74. article_list3_ori = [i for i in self.publishArticleList if not i in article_list1_ori and not i in article_list2_ori]
  75. # 全局去重,保留优先级由 L1 --> L2 --> L3
  76. hash_map = {}
  77. article_list1 = []
  78. for i in article_list1_ori:
  79. title = i['title']
  80. if hash_map.get(title):
  81. continue
  82. else:
  83. article_list1.append(i)
  84. hash_map[title] = 1
  85. article_list2 = []
  86. for i in article_list2_ori:
  87. title = i['title']
  88. if hash_map.get(title):
  89. continue
  90. else:
  91. article_list2.append(i)
  92. hash_map[title] = 2
  93. article_list3 = []
  94. for i in article_list3_ori:
  95. title = i['title']
  96. if hash_map.get(title):
  97. continue
  98. else:
  99. article_list3.append(i)
  100. hash_map[title] = 1
  101. # 第二步对article_list1, article_list3按照得分排序, 对article_list2按照播放量排序
  102. if article_list1:
  103. rank1 = ArticleRank().rank(
  104. account_list=[self.accountName],
  105. text_list=[i['title'] for i in article_list1]
  106. )
  107. score_list1 = rank1[self.accountName]['score_list']
  108. ranked_1 = []
  109. for index, value in enumerate(score_list1):
  110. obj = article_list1[index]
  111. obj['score'] = value + 1000
  112. ranked_1.append(obj)
  113. ranked_1 = sorted(ranked_1, key=lambda x:x['score'], reverse=True)
  114. else:
  115. ranked_1 = []
  116. # rank2
  117. if article_list2:
  118. for item in article_list2:
  119. item['score'] = 100
  120. ranked_2 = sorted(article_list2, key=lambda x:x['crawlerViewCount'], reverse=True)
  121. else:
  122. ranked_2 = []
  123. # rank3
  124. if article_list3:
  125. rank3 = ArticleRank().rank(
  126. account_list=[self.accountName],
  127. text_list=[i['title'] for i in article_list3]
  128. )
  129. score_list3 = rank3[self.accountName]['score_list']
  130. ranked_3 = []
  131. for index, value in enumerate(score_list3):
  132. obj = article_list3[index]
  133. obj['score'] = value
  134. ranked_3.append(obj)
  135. ranked_3 = sorted(ranked_3, key=lambda x:x['score'], reverse=True)
  136. else:
  137. ranked_3 = []
  138. self.logger.log(
  139. code="1004",
  140. msg="去重排序完成",
  141. data={
  142. "rank1": ranked_1,
  143. "rank2": ranked_2,
  144. "rank3": ranked_3
  145. }
  146. )
  147. return ranked_1, ranked_2, ranked_3
  148. async def rank_v1(self):
  149. """
  150. Rank Version 1
  151. :return:
  152. """
  153. # try:
  154. ranked_1, ranked_2, ranked_3 = await self.basic_rank()
  155. try:
  156. L = []
  157. if ranked_1:
  158. L.append(ranked_1[0])
  159. if ranked_2:
  160. L.append(ranked_2[0])
  161. else:
  162. if ranked_2:
  163. if len(ranked_2) > 1:
  164. for i in ranked_2[:2]:
  165. L.append(i)
  166. else:
  167. L.append(ranked_2[0])
  168. for item in ranked_3:
  169. L.append(item)
  170. result = {
  171. "accountId": self.accountId,
  172. "accountName": self.accountName,
  173. "ghId": self.ghId,
  174. "strategy": self.strategy,
  175. "publishNum": self.publishNum,
  176. "rank_list": L[:self.publishNum],
  177. "filter_list": self.filter_list
  178. }
  179. self.logger.log(
  180. code=1006,
  181. msg="rank successfully",
  182. data=result
  183. )
  184. response = {"status": "Rank Success", "data": result, "code": 1}
  185. except Exception as e:
  186. result = {
  187. "accountId": self.accountId,
  188. "accountName": self.accountName,
  189. "ghId": self.ghId,
  190. "strategy": self.strategy,
  191. "publishNum": self.publishNum,
  192. "rank_list": self.publishArticleList[: self.publishNum],
  193. "filter_list": self.filter_list
  194. }
  195. self.logger.log(
  196. code=1007,
  197. msg="rank failed because of {}".format(e),
  198. data=result
  199. )
  200. response = {"status": "Rank Fail", "data": result, "code": 1}
  201. return response
  202. # except:
  203. # result = {"code": 2, "info": "account is not exist"}
  204. # return result
  205. async def rank_v2(self):
  206. """
  207. Rank Version 2
  208. :return:
  209. """
  210. return await self.rank_v1()
  211. async def rank_v3(self):
  212. """
  213. Rank Version 3
  214. :return:
  215. """
  216. return await self.rank_v1()
  217. async def rank_v4(self):
  218. """
  219. Rank Version 4
  220. :return:
  221. """
  222. return await self.rank_v1()
  223. async def rank_v5(self):
  224. """
  225. Rank Version 5
  226. :return:
  227. """
  228. return await self.rank_v1()
  229. async def choose_strategy(self):
  230. """
  231. 选择排序策略
  232. :return:
  233. """
  234. match self.strategy:
  235. case "ArticleRankV1":
  236. self.logger.log(
  237. code="1003",
  238. msg="命中排序策略1"
  239. )
  240. return await self.rank_v1()
  241. case "ArticleRankV2":
  242. self.logger.log(
  243. code="1003",
  244. msg="命中排序策略2"
  245. )
  246. return await self.rank_v2()
  247. case "ArticleRankV3":
  248. self.logger.log(
  249. code="1003",
  250. msg="命中排序策略3"
  251. )
  252. return await self.rank_v3()
  253. case "ArticleRankV4":
  254. self.logger.log(
  255. code="1003",
  256. msg="命中排序策略4"
  257. )
  258. return await self.rank_v4()
  259. case "ArticleRankV5":
  260. self.logger.log(
  261. code="1003",
  262. msg="命中排序策略5"
  263. )
  264. return await self.rank_v5()
  265. async def deal(self):
  266. """
  267. Deal Function
  268. :return:
  269. """
  270. error_params = await self.check_params()
  271. if error_params:
  272. return error_params
  273. else:
  274. return await self.choose_strategy()