AccountArticleRank.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. """
  2. @author: luojunhui
  3. """
  4. import json
  5. import time
  6. from uuid import uuid4
  7. from applications.aliyunLog import AliyunArticleLog
  8. from applications.functions import ArticleRank
  9. from applications.functions import title_sim_v2_by_list
  10. from applications.functions import get_article_title_url_list
  11. def has_same(title, account_nickname, index_list=[1, 2]):
  12. """
  13. 判断是否存储
  14. :param title:
  15. :param account_nickname:
  16. :param index_list: 历史已发布的文章需要屏蔽的位置
  17. :return:
  18. """
  19. account_title_list = get_article_title_url_list(
  20. account_nickname,
  21. index_list=index_list
  22. )
  23. sim_res = title_sim_v2_by_list(title, account_title_list)
  24. if sim_res:
  25. return True
  26. return False
  27. class AccountArticleRank(object):
  28. """
  29. 文章排序
  30. """
  31. def __init__(self, params, mysql_client):
  32. """
  33. :param params: 请求参数
  34. :param mysql_client: 数据库链接池
  35. """
  36. self.publishArticleList = None
  37. self.publishNum = None
  38. self.strategy = None
  39. self.ghId = None
  40. self.accountName = None
  41. self.accountId = None
  42. self.params = params
  43. self.mysql_client = mysql_client
  44. self.request_id = "alg-{}-{}".format(uuid4(), int(time.time()))
  45. self.logger = AliyunArticleLog(request_id=self.request_id, alg="ArticleRank")
  46. async def check_params(self):
  47. """
  48. 校验参数
  49. :return:
  50. """
  51. try:
  52. self.accountId = self.params["accountId"]
  53. self.accountName = self.params["accountName"]
  54. self.ghId = self.params["ghId"]
  55. self.strategy = self.params["strategy"]
  56. self.publishNum = self.params["publishNum"]
  57. self.publishArticleList = self.params["publishArticleList"]
  58. self.logger.log(
  59. code="1001",
  60. msg="参数校验成功",
  61. data=self.params
  62. )
  63. return None
  64. except Exception as e:
  65. response = {
  66. "msg": "params error",
  67. "info": "params check failed, params : {} is not correct".format(e),
  68. "code": 0,
  69. }
  70. self.logger.log(
  71. code="1002",
  72. msg="参数校验失败--{}".format(e),
  73. data=self.params
  74. )
  75. return response
  76. async def basic_rank(self):
  77. # 第一步把所有文章标题分为3组
  78. article_list1_ori = [i for i in self.publishArticleList if "【1】" in i['producePlanName']]
  79. article_list2_ori = [i for i in self.publishArticleList if "【2】" in i['producePlanName']]
  80. article_list3_ori = [i for i in self.publishArticleList if not i in article_list1_ori and not i in article_list2_ori]
  81. # 全局去重,保留优先级由 L1 --> L2 --> L3
  82. hash_map = {}
  83. article_list1 = []
  84. for i in article_list1_ori:
  85. title = i['title']
  86. if hash_map.get(title):
  87. continue
  88. else:
  89. article_list1.append(i)
  90. hash_map[title] = 1
  91. article_list2 = []
  92. for i in article_list2_ori:
  93. title = i['title']
  94. if hash_map.get(title):
  95. continue
  96. else:
  97. article_list2.append(i)
  98. hash_map[title] = 2
  99. article_list3 = []
  100. for i in article_list3_ori:
  101. title = i['title']
  102. if hash_map.get(title):
  103. continue
  104. else:
  105. article_list3.append(i)
  106. hash_map[title] = 1
  107. # 第二步对article_list1, article_list3按照得分排序, 对article_list2按照播放量排序
  108. if article_list1:
  109. rank1 = ArticleRank().rank(
  110. account_list=[self.accountName],
  111. text_list=[i['title'] for i in article_list1]
  112. )
  113. score_list1 = rank1[self.accountName]['score_list']
  114. ranked_1 = []
  115. for index, value in enumerate(score_list1):
  116. obj = article_list1[index]
  117. obj['score'] = value + 1000
  118. ranked_1.append(obj)
  119. ranked_1 = [i for i in ranked_1 if not has_same(i['title'], self.accountName)]
  120. ranked_1 = sorted(ranked_1, key=lambda x:x['score'], reverse=True)
  121. else:
  122. ranked_1 = []
  123. # rank2
  124. if article_list2:
  125. article_list2 = [i for i in article_list2 if not has_same(i['title'], self.accountName)]
  126. for item in article_list2:
  127. item['score'] = 100
  128. ranked_2 = sorted(article_list2, key=lambda x:x['crawlerViewCount'], reverse=True)
  129. else:
  130. ranked_2 = []
  131. # rank3
  132. if article_list3:
  133. rank3 = ArticleRank().rank(
  134. account_list=[self.accountName],
  135. text_list=[i['title'] for i in article_list3]
  136. )
  137. score_list3 = rank3[self.accountName]['score_list']
  138. ranked_3 = []
  139. for index, value in enumerate(score_list3):
  140. obj = article_list3[index]
  141. obj['score'] = value
  142. ranked_3.append(obj)
  143. ranked_3 = [i for i in ranked_3 if not has_same(i['title'], self.accountName, index_list=[1, 2, 3, 4, 5, 6, 7, 8])]
  144. ranked_3 = sorted(ranked_3, key=lambda x:x['score'], reverse=True)
  145. else:
  146. ranked_3 = []
  147. self.logger.log(
  148. code="1004",
  149. msg="去重排序完成",
  150. data={
  151. "rank1": ranked_1,
  152. "rank2": ranked_2,
  153. "rank3": ranked_3
  154. }
  155. )
  156. return ranked_1, ranked_2, ranked_3
  157. async def rank_v1(self):
  158. """
  159. Rank Version 1
  160. :return:
  161. """
  162. try:
  163. ranked_1, ranked_2, ranked_3 = await self.basic_rank()
  164. try:
  165. L = []
  166. if ranked_1:
  167. L.append(ranked_1[0])
  168. if ranked_2:
  169. L.append(ranked_2[0])
  170. else:
  171. if ranked_2:
  172. if len(ranked_2) > 1:
  173. for i in ranked_2[:2]:
  174. L.append(i)
  175. else:
  176. L.append(ranked_2[0])
  177. for item in ranked_3:
  178. L.append(item)
  179. result = {
  180. "accountId": self.accountId,
  181. "accountName": self.accountName,
  182. "ghId": self.ghId,
  183. "strategy": self.strategy,
  184. "publishNum": self.publishNum,
  185. "rank_list": L[:self.publishNum],
  186. }
  187. self.logger.log(
  188. code=1006,
  189. msg="rank successfully",
  190. data=result
  191. )
  192. response = {"status": "Rank Success", "data": result, "code": 1}
  193. except Exception as e:
  194. result = {
  195. "accountId": self.accountId,
  196. "accountName": self.accountName,
  197. "ghId": self.ghId,
  198. "strategy": self.strategy,
  199. "publishNum": self.publishNum,
  200. "rank_list": self.publishArticleList[: self.publishNum],
  201. }
  202. self.logger.log(
  203. code=1007,
  204. msg="rank failed because of {}".format(e),
  205. data=result
  206. )
  207. response = {"status": "Rank Fail", "data": result, "code": 1}
  208. return response
  209. except:
  210. result = {"code": 2, "info": "account is not exist"}
  211. return result
  212. async def rank_v2(self):
  213. """
  214. Rank Version 2
  215. :return:
  216. """
  217. return await self.rank_v1()
  218. async def rank_v3(self):
  219. """
  220. Rank Version 3
  221. :return:
  222. """
  223. return await self.rank_v1()
  224. async def rank_v4(self):
  225. """
  226. Rank Version 4
  227. :return:
  228. """
  229. return await self.rank_v1()
  230. async def rank_v5(self):
  231. """
  232. Rank Version 5
  233. :return:
  234. """
  235. return await self.rank_v1()
  236. async def choose_strategy(self):
  237. """
  238. 选择排序策略
  239. :return:
  240. """
  241. match self.strategy:
  242. case "ArticleRankV1":
  243. self.logger.log(
  244. code="1003",
  245. msg="命中排序策略1"
  246. )
  247. return await self.rank_v1()
  248. case "ArticleRankV2":
  249. self.logger.log(
  250. code="1003",
  251. msg="命中排序策略2"
  252. )
  253. return await self.rank_v2()
  254. case "ArticleRankV3":
  255. self.logger.log(
  256. code="1003",
  257. msg="命中排序策略3"
  258. )
  259. return await self.rank_v3()
  260. case "ArticleRankV4":
  261. self.logger.log(
  262. code="1003",
  263. msg="命中排序策略4"
  264. )
  265. return await self.rank_v4()
  266. case "ArticleRankV5":
  267. self.logger.log(
  268. code="1003",
  269. msg="命中排序策略5"
  270. )
  271. return await self.rank_v5()
  272. async def deal(self):
  273. """
  274. Deal Function
  275. :return:
  276. """
  277. error_params = await self.check_params()
  278. if error_params:
  279. return error_params
  280. else:
  281. return await self.choose_strategy()