AccountArticleRank.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. """
  2. @author: luojunhui
  3. """
  4. import json
  5. import time
  6. from uuid import uuid4
  7. from applications.aliyunLog import AliyunArticleLog
  8. from applications.functions import ArticleRank
  9. from applications.functions import title_sim_v2_by_list
  10. from applications.functions import get_article_title_url_list
  11. def has_same(title, account_nickname, index_list=[1, 2]):
  12. """
  13. 判断是否存储
  14. :param title:
  15. :param account_nickname:
  16. :param index_list: 历史已发布的文章需要屏蔽的位置
  17. :return:
  18. """
  19. account_title_list = get_article_title_url_list(
  20. account_nickname,
  21. index_list=index_list
  22. )
  23. sim_res = title_sim_v2_by_list(title, account_title_list)
  24. if sim_res:
  25. return True
  26. return False
  27. class AccountArticleRank(object):
  28. """
  29. 文章排序
  30. """
  31. def __init__(self, params, mysql_client):
  32. """
  33. :param params: 请求参数
  34. :param mysql_client: 数据库链接池
  35. """
  36. self.publishArticleList = None
  37. self.publishNum = None
  38. self.strategy = None
  39. self.ghId = None
  40. self.accountName = None
  41. self.accountId = None
  42. self.params = params
  43. self.mysql_client = mysql_client
  44. self.request_id = "alg-{}-{}".format(uuid4(), int(time.time()))
  45. self.logger = AliyunArticleLog(request_id=self.request_id, alg="ArticleRank")
  46. async def check_params(self):
  47. """
  48. 校验参数
  49. :return:
  50. """
  51. try:
  52. self.accountId = self.params["accountId"]
  53. self.accountName = self.params["accountName"]
  54. self.ghId = self.params["ghId"]
  55. self.strategy = self.params["strategy"]
  56. self.publishNum = self.params["publishNum"]
  57. self.publishArticleList = [i for i in self.params["publishArticleList"] if not has_same(i['title'], self.accountName)]
  58. self.history_list = [i for i in self.params["publishArticleList"] if has_same(i['title'], self.accountName)]
  59. self.logger.log(
  60. code="1001",
  61. msg="参数校验成功",
  62. data=self.params
  63. )
  64. return None
  65. except Exception as e:
  66. response = {
  67. "msg": "params error",
  68. "info": "params check failed, params : {} is not correct".format(e),
  69. "code": 0,
  70. }
  71. self.logger.log(
  72. code="1002",
  73. msg="参数校验失败--{}".format(e),
  74. data=self.params
  75. )
  76. return response
  77. async def basic_rank(self):
  78. # 第一步把所有文章标题分为3组
  79. article_list1_ori = [i for i in self.publishArticleList if "【1】" in i['producePlanName']]
  80. article_list2_ori = [i for i in self.publishArticleList if "【2】" in i['producePlanName']]
  81. article_list3_ori = [i for i in self.publishArticleList if not i in article_list1_ori and not i in article_list2_ori]
  82. # 全局去重,保留优先级由 L1 --> L2 --> L3
  83. hash_map = {}
  84. article_list1 = []
  85. for i in article_list1_ori:
  86. title = i['title']
  87. if hash_map.get(title):
  88. continue
  89. else:
  90. article_list1.append(i)
  91. hash_map[title] = 1
  92. article_list2 = []
  93. for i in article_list2_ori:
  94. title = i['title']
  95. if hash_map.get(title):
  96. continue
  97. else:
  98. article_list2.append(i)
  99. hash_map[title] = 2
  100. article_list3 = []
  101. for i in article_list3_ori:
  102. title = i['title']
  103. if hash_map.get(title):
  104. continue
  105. else:
  106. article_list3.append(i)
  107. hash_map[title] = 1
  108. # 第二步对article_list1, article_list3按照得分排序, 对article_list2按照播放量排序
  109. if article_list1:
  110. rank1 = ArticleRank().rank(
  111. account_list=[self.accountName],
  112. text_list=[i['title'] for i in article_list1]
  113. )
  114. score_list1 = rank1[self.accountName]['score_list']
  115. ranked_1 = []
  116. for index, value in enumerate(score_list1):
  117. obj = article_list1[index]
  118. obj['score'] = value + 1000
  119. ranked_1.append(obj)
  120. ranked_1 = sorted(ranked_1, key=lambda x:x['score'], reverse=True)
  121. else:
  122. ranked_1 = []
  123. # rank2
  124. if article_list2:
  125. for item in article_list2:
  126. item['score'] = 100
  127. ranked_2 = sorted(article_list2, key=lambda x:x['crawlerViewCount'], reverse=True)
  128. else:
  129. ranked_2 = []
  130. # rank3
  131. if article_list3:
  132. rank3 = ArticleRank().rank(
  133. account_list=[self.accountName],
  134. text_list=[i['title'] for i in article_list3]
  135. )
  136. score_list3 = rank3[self.accountName]['score_list']
  137. ranked_3 = []
  138. for index, value in enumerate(score_list3):
  139. obj = article_list3[index]
  140. obj['score'] = value
  141. ranked_3.append(obj)
  142. ranked_3 = sorted(ranked_3, key=lambda x:x['score'], reverse=True)
  143. else:
  144. ranked_3 = []
  145. self.logger.log(
  146. code="1004",
  147. msg="去重排序完成",
  148. data={
  149. "rank1": ranked_1,
  150. "rank2": ranked_2,
  151. "rank3": ranked_3
  152. }
  153. )
  154. return ranked_1, ranked_2, ranked_3
  155. async def rank_v1(self):
  156. """
  157. Rank Version 1
  158. :return:
  159. """
  160. try:
  161. ranked_1, ranked_2, ranked_3 = await self.basic_rank()
  162. try:
  163. L = []
  164. if ranked_1:
  165. L.append(ranked_1[0])
  166. if ranked_2:
  167. L.append(ranked_2[0])
  168. else:
  169. if ranked_2:
  170. if len(ranked_2) > 1:
  171. for i in ranked_2[:2]:
  172. L.append(i)
  173. else:
  174. L.append(ranked_2[0])
  175. for item in ranked_3:
  176. L.append(item)
  177. result = {
  178. "accountId": self.accountId,
  179. "accountName": self.accountName,
  180. "ghId": self.ghId,
  181. "strategy": self.strategy,
  182. "publishNum": self.publishNum,
  183. "rank_list": L[:self.publishNum],
  184. "filter": {
  185. "repeat": self.history_list
  186. }
  187. }
  188. self.logger.log(
  189. code=1006,
  190. msg="rank successfully",
  191. data=result
  192. )
  193. response = {"status": "Rank Success", "data": result, "code": 1}
  194. except Exception as e:
  195. result = {
  196. "accountId": self.accountId,
  197. "accountName": self.accountName,
  198. "ghId": self.ghId,
  199. "strategy": self.strategy,
  200. "publishNum": self.publishNum,
  201. "rank_list": self.publishArticleList[: self.publishNum],
  202. "filter": {
  203. "repeat": self.history_list
  204. }
  205. }
  206. self.logger.log(
  207. code=1007,
  208. msg="rank failed because of {}".format(e),
  209. data=result
  210. )
  211. response = {"status": "Rank Fail", "data": result, "code": 1}
  212. return response
  213. except:
  214. result = {"code": 2, "info": "account is not exist"}
  215. return result
  216. async def rank_v2(self):
  217. """
  218. Rank Version 2
  219. :return:
  220. """
  221. return await self.rank_v1()
  222. async def rank_v3(self):
  223. """
  224. Rank Version 3
  225. :return:
  226. """
  227. return await self.rank_v1()
  228. async def rank_v4(self):
  229. """
  230. Rank Version 4
  231. :return:
  232. """
  233. return await self.rank_v1()
  234. async def rank_v5(self):
  235. """
  236. Rank Version 5
  237. :return:
  238. """
  239. return await self.rank_v1()
  240. async def choose_strategy(self):
  241. """
  242. 选择排序策略
  243. :return:
  244. """
  245. match self.strategy:
  246. case "ArticleRankV1":
  247. self.logger.log(
  248. code="1003",
  249. msg="命中排序策略1"
  250. )
  251. return await self.rank_v1()
  252. case "ArticleRankV2":
  253. self.logger.log(
  254. code="1003",
  255. msg="命中排序策略2"
  256. )
  257. return await self.rank_v2()
  258. case "ArticleRankV3":
  259. self.logger.log(
  260. code="1003",
  261. msg="命中排序策略3"
  262. )
  263. return await self.rank_v3()
  264. case "ArticleRankV4":
  265. self.logger.log(
  266. code="1003",
  267. msg="命中排序策略4"
  268. )
  269. return await self.rank_v4()
  270. case "ArticleRankV5":
  271. self.logger.log(
  272. code="1003",
  273. msg="命中排序策略5"
  274. )
  275. return await self.rank_v5()
  276. async def deal(self):
  277. """
  278. Deal Function
  279. :return:
  280. """
  281. error_params = await self.check_params()
  282. if error_params:
  283. return error_params
  284. else:
  285. return await self.choose_strategy()