|  | @@ -1,6 +1,7 @@
 | 
	
		
			
				|  |  |  """
 | 
	
		
			
				|  |  |  @author: luojunhui
 | 
	
		
			
				|  |  |  """
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |  import random
 | 
	
		
			
				|  |  |  import time
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -23,7 +24,7 @@ def deduplication(rank1, rank2, rank3):
 | 
	
		
			
				|  |  |          result = []
 | 
	
		
			
				|  |  |          if item_list:
 | 
	
		
			
				|  |  |              for item in item_list:
 | 
	
		
			
				|  |  | -                title = item['title']
 | 
	
		
			
				|  |  | +                title = item["title"]
 | 
	
		
			
				|  |  |                  if title_sim_v2_by_list(title, dup_list):
 | 
	
		
			
				|  |  |                      # print("标题重复,已经过滤\t", title)
 | 
	
		
			
				|  |  |                      continue
 | 
	
	
		
			
				|  | @@ -63,11 +64,13 @@ class AccountArticleRank(object):
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  |          self.publishArticleList = []
 | 
	
		
			
				|  |  |          self.filter_list = []
 | 
	
		
			
				|  |  | -        history_title_dict = self.pipeline.history_title(account_nickname=self.accountName)
 | 
	
		
			
				|  |  | -        for item in tqdm(self.params['publishArticleList']):
 | 
	
		
			
				|  |  | +        history_title_dict = self.pipeline.history_title(
 | 
	
		
			
				|  |  | +            account_nickname=self.accountName
 | 
	
		
			
				|  |  | +        )
 | 
	
		
			
				|  |  | +        for item in tqdm(self.params["publishArticleList"]):
 | 
	
		
			
				|  |  |              flag = self.pipeline.deal(item, self.accountName, history_title_dict)
 | 
	
		
			
				|  |  |              if flag:
 | 
	
		
			
				|  |  | -                item['filterReason'] = flag['filterReason']
 | 
	
		
			
				|  |  | +                item["filterReason"] = flag["filterReason"]
 | 
	
		
			
				|  |  |                  self.filter_list.append(item)
 | 
	
		
			
				|  |  |              else:
 | 
	
		
			
				|  |  |                  self.publishArticleList.append(item)
 | 
	
	
		
			
				|  | @@ -86,11 +89,7 @@ class AccountArticleRank(object):
 | 
	
		
			
				|  |  |              self.publishNum = self.params["publishNum"]
 | 
	
		
			
				|  |  |              print("开始校验参数")
 | 
	
		
			
				|  |  |              self.filter()
 | 
	
		
			
				|  |  | -            self.logger.log(
 | 
	
		
			
				|  |  | -                code="1001",
 | 
	
		
			
				|  |  | -                msg="参数校验成功",
 | 
	
		
			
				|  |  | -                data=self.params
 | 
	
		
			
				|  |  | -            )
 | 
	
		
			
				|  |  | +            self.logger.log(code="1001", msg="参数校验成功", data=self.params)
 | 
	
		
			
				|  |  |              return None
 | 
	
		
			
				|  |  |          except Exception as e:
 | 
	
		
			
				|  |  |              response = {
 | 
	
	
		
			
				|  | @@ -99,9 +98,7 @@ class AccountArticleRank(object):
 | 
	
		
			
				|  |  |                  "code": 0,
 | 
	
		
			
				|  |  |              }
 | 
	
		
			
				|  |  |              self.logger.log(
 | 
	
		
			
				|  |  | -                code="1002",
 | 
	
		
			
				|  |  | -                msg="参数校验失败--{}".format(e),
 | 
	
		
			
				|  |  | -                data=self.params
 | 
	
		
			
				|  |  | +                code="1002", msg="参数校验失败--{}".format(e), data=self.params
 | 
	
		
			
				|  |  |              )
 | 
	
		
			
				|  |  |              return response
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -111,10 +108,17 @@ class AccountArticleRank(object):
 | 
	
		
			
				|  |  |          :return:
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  |          # 第一步把所有文章标题分为3组
 | 
	
		
			
				|  |  | -        article_list1_ori = [i for i in self.publishArticleList if "【1】" in i['producePlanName']]
 | 
	
		
			
				|  |  | -        article_list2_ori = [i for i in self.publishArticleList if "【2】" in i['producePlanName']]
 | 
	
		
			
				|  |  | -        article_list3_ori = [i for i in self.publishArticleList if
 | 
	
		
			
				|  |  | -                             not i in article_list1_ori and not i in article_list2_ori]
 | 
	
		
			
				|  |  | +        article_list1_ori = [
 | 
	
		
			
				|  |  | +            i for i in self.publishArticleList if "【1】" in i["producePlanName"]
 | 
	
		
			
				|  |  | +        ]
 | 
	
		
			
				|  |  | +        article_list2_ori = [
 | 
	
		
			
				|  |  | +            i for i in self.publishArticleList if "【2】" in i["producePlanName"]
 | 
	
		
			
				|  |  | +        ]
 | 
	
		
			
				|  |  | +        article_list3_ori = [
 | 
	
		
			
				|  |  | +            i
 | 
	
		
			
				|  |  | +            for i in self.publishArticleList
 | 
	
		
			
				|  |  | +            if not i in article_list1_ori and not i in article_list2_ori
 | 
	
		
			
				|  |  | +        ]
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |          # # 全局去重,保留优先级由  L1 --> L2 --> L3
 | 
	
		
			
				|  |  |          # hash_map = {}
 | 
	
	
		
			
				|  | @@ -150,22 +154,24 @@ class AccountArticleRank(object):
 | 
	
		
			
				|  |  |          if article_list1_ori:
 | 
	
		
			
				|  |  |              rank1 = ArticleRank().rank(
 | 
	
		
			
				|  |  |                  account_list=[self.accountName],
 | 
	
		
			
				|  |  | -                text_list=[i['title'] for i in article_list1_ori]
 | 
	
		
			
				|  |  | +                text_list=[i["title"] for i in article_list1_ori],
 | 
	
		
			
				|  |  |              )
 | 
	
		
			
				|  |  | -            score_list1 = rank1[self.accountName]['score_list']
 | 
	
		
			
				|  |  | +            score_list1 = rank1[self.accountName]["score_list"]
 | 
	
		
			
				|  |  |              ranked_1 = []
 | 
	
		
			
				|  |  |              for index, value in enumerate(score_list1):
 | 
	
		
			
				|  |  |                  obj = article_list1_ori[index]
 | 
	
		
			
				|  |  | -                obj['score'] = value + 1000
 | 
	
		
			
				|  |  | +                obj["score"] = value + 1000
 | 
	
		
			
				|  |  |                  ranked_1.append(obj)
 | 
	
		
			
				|  |  | -            ranked_1 = sorted(ranked_1, key=lambda x: x['score'], reverse=True)
 | 
	
		
			
				|  |  | +            ranked_1 = sorted(ranked_1, key=lambda x: x["score"], reverse=True)
 | 
	
		
			
				|  |  |          else:
 | 
	
		
			
				|  |  |              ranked_1 = []
 | 
	
		
			
				|  |  |          # rank2
 | 
	
		
			
				|  |  |          if article_list2_ori:
 | 
	
		
			
				|  |  |              for item in article_list2_ori:
 | 
	
		
			
				|  |  | -                item['score'] = 100
 | 
	
		
			
				|  |  | -            ranked_2 = sorted(article_list2_ori, key=lambda x: x['crawlerViewCount'], reverse=True)
 | 
	
		
			
				|  |  | +                item["score"] = 100
 | 
	
		
			
				|  |  | +            ranked_2 = sorted(
 | 
	
		
			
				|  |  | +                article_list2_ori, key=lambda x: x["crawlerViewCount"], reverse=True
 | 
	
		
			
				|  |  | +            )
 | 
	
		
			
				|  |  |          else:
 | 
	
		
			
				|  |  |              ranked_2 = []
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -173,25 +179,21 @@ class AccountArticleRank(object):
 | 
	
		
			
				|  |  |          if article_list3_ori:
 | 
	
		
			
				|  |  |              rank3 = ArticleRank().rank(
 | 
	
		
			
				|  |  |                  account_list=[self.accountName],
 | 
	
		
			
				|  |  | -                text_list=[i['title'] for i in article_list3_ori]
 | 
	
		
			
				|  |  | +                text_list=[i["title"] for i in article_list3_ori],
 | 
	
		
			
				|  |  |              )
 | 
	
		
			
				|  |  | -            score_list3 = rank3[self.accountName]['score_list']
 | 
	
		
			
				|  |  | +            score_list3 = rank3[self.accountName]["score_list"]
 | 
	
		
			
				|  |  |              ranked_3 = []
 | 
	
		
			
				|  |  |              for index, value in enumerate(score_list3):
 | 
	
		
			
				|  |  |                  obj = article_list3_ori[index]
 | 
	
		
			
				|  |  | -                obj['score'] = value
 | 
	
		
			
				|  |  | +                obj["score"] = value
 | 
	
		
			
				|  |  |                  ranked_3.append(obj)
 | 
	
		
			
				|  |  | -            ranked_3 = sorted(ranked_3, key=lambda x: x['score'], reverse=True)
 | 
	
		
			
				|  |  | +            ranked_3 = sorted(ranked_3, key=lambda x: x["score"], reverse=True)
 | 
	
		
			
				|  |  |          else:
 | 
	
		
			
				|  |  |              ranked_3 = []
 | 
	
		
			
				|  |  |          self.logger.log(
 | 
	
		
			
				|  |  |              code="1004",
 | 
	
		
			
				|  |  |              msg="排序完成",
 | 
	
		
			
				|  |  | -            data={
 | 
	
		
			
				|  |  | -                "rank1": ranked_1,
 | 
	
		
			
				|  |  | -                "rank2": ranked_2,
 | 
	
		
			
				|  |  | -                "rank3": ranked_3
 | 
	
		
			
				|  |  | -            }
 | 
	
		
			
				|  |  | +            data={"rank1": ranked_1, "rank2": ranked_2, "rank3": ranked_3},
 | 
	
		
			
				|  |  |          )
 | 
	
		
			
				|  |  |          return ranked_1, ranked_2, ranked_3
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -203,7 +205,9 @@ class AccountArticleRank(object):
 | 
	
		
			
				|  |  |          print("开始排序")
 | 
	
		
			
				|  |  |          try:
 | 
	
		
			
				|  |  |              ranked_1_d, ranked_2_d, ranked_3_d = await self.basic_rank()
 | 
	
		
			
				|  |  | -            ranked_1, ranked_2, ranked_3 = deduplication(ranked_1_d, ranked_2_d, ranked_3_d)
 | 
	
		
			
				|  |  | +            ranked_1, ranked_2, ranked_3 = deduplication(
 | 
	
		
			
				|  |  | +                ranked_1_d, ranked_2_d, ranked_3_d
 | 
	
		
			
				|  |  | +            )
 | 
	
		
			
				|  |  |              print("去重成功")
 | 
	
		
			
				|  |  |              try:
 | 
	
		
			
				|  |  |                  L = []
 | 
	
	
		
			
				|  | @@ -229,14 +233,10 @@ class AccountArticleRank(object):
 | 
	
		
			
				|  |  |                      "ghId": self.ghId,
 | 
	
		
			
				|  |  |                      "strategy": self.strategy,
 | 
	
		
			
				|  |  |                      "publishNum": self.publishNum,
 | 
	
		
			
				|  |  | -                    "rank_list": L[:self.publishNum],
 | 
	
		
			
				|  |  | -                    "filter_list": self.filter_list
 | 
	
		
			
				|  |  | +                    "rank_list": L[: self.publishNum],
 | 
	
		
			
				|  |  | +                    "filter_list": self.filter_list,
 | 
	
		
			
				|  |  |                  }
 | 
	
		
			
				|  |  | -                self.logger.log(
 | 
	
		
			
				|  |  | -                    code=1006,
 | 
	
		
			
				|  |  | -                    msg="rank successfully",
 | 
	
		
			
				|  |  | -                    data=result
 | 
	
		
			
				|  |  | -                )
 | 
	
		
			
				|  |  | +                self.logger.log(code=1006, msg="rank successfully", data=result)
 | 
	
		
			
				|  |  |                  response = {"status": "Rank Success", "data": result, "code": 1}
 | 
	
		
			
				|  |  |              except Exception as e:
 | 
	
		
			
				|  |  |                  result = {
 | 
	
	
		
			
				|  | @@ -246,12 +246,10 @@ class AccountArticleRank(object):
 | 
	
		
			
				|  |  |                      "strategy": self.strategy,
 | 
	
		
			
				|  |  |                      "publishNum": self.publishNum,
 | 
	
		
			
				|  |  |                      "rank_list": self.publishArticleList[: self.publishNum],
 | 
	
		
			
				|  |  | -                    "filter_list": self.filter_list
 | 
	
		
			
				|  |  | +                    "filter_list": self.filter_list,
 | 
	
		
			
				|  |  |                  }
 | 
	
		
			
				|  |  |                  self.logger.log(
 | 
	
		
			
				|  |  | -                    code=1007,
 | 
	
		
			
				|  |  | -                    msg="rank failed because of {}".format(e),
 | 
	
		
			
				|  |  | -                    data=result
 | 
	
		
			
				|  |  | +                    code=1007, msg="rank failed because of {}".format(e), data=result
 | 
	
		
			
				|  |  |                  )
 | 
	
		
			
				|  |  |                  print("排序成功")
 | 
	
		
			
				|  |  |                  response = {"status": "Rank Fail", "data": result, "code": 1}
 | 
	
	
		
			
				|  | @@ -295,34 +293,19 @@ class AccountArticleRank(object):
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  |          match self.strategy:
 | 
	
		
			
				|  |  |              case "ArticleRankV1":
 | 
	
		
			
				|  |  | -                self.logger.log(
 | 
	
		
			
				|  |  | -                    code="1003",
 | 
	
		
			
				|  |  | -                    msg="命中排序策略1"
 | 
	
		
			
				|  |  | -                )
 | 
	
		
			
				|  |  | +                self.logger.log(code="1003", msg="命中排序策略1")
 | 
	
		
			
				|  |  |                  return await self.rank_v1()
 | 
	
		
			
				|  |  |              case "ArticleRankV2":
 | 
	
		
			
				|  |  | -                self.logger.log(
 | 
	
		
			
				|  |  | -                    code="1003",
 | 
	
		
			
				|  |  | -                    msg="命中排序策略2"
 | 
	
		
			
				|  |  | -                )
 | 
	
		
			
				|  |  | +                self.logger.log(code="1003", msg="命中排序策略2")
 | 
	
		
			
				|  |  |                  return await self.rank_v2()
 | 
	
		
			
				|  |  |              case "ArticleRankV3":
 | 
	
		
			
				|  |  | -                self.logger.log(
 | 
	
		
			
				|  |  | -                    code="1003",
 | 
	
		
			
				|  |  | -                    msg="命中排序策略3"
 | 
	
		
			
				|  |  | -                )
 | 
	
		
			
				|  |  | +                self.logger.log(code="1003", msg="命中排序策略3")
 | 
	
		
			
				|  |  |                  return await self.rank_v3()
 | 
	
		
			
				|  |  |              case "ArticleRankV4":
 | 
	
		
			
				|  |  | -                self.logger.log(
 | 
	
		
			
				|  |  | -                    code="1003",
 | 
	
		
			
				|  |  | -                    msg="命中排序策略4"
 | 
	
		
			
				|  |  | -                )
 | 
	
		
			
				|  |  | +                self.logger.log(code="1003", msg="命中排序策略4")
 | 
	
		
			
				|  |  |                  return await self.rank_v4()
 | 
	
		
			
				|  |  |              case "ArticleRankV5":
 | 
	
		
			
				|  |  | -                self.logger.log(
 | 
	
		
			
				|  |  | -                    code="1003",
 | 
	
		
			
				|  |  | -                    msg="命中排序策略5"
 | 
	
		
			
				|  |  | -                )
 | 
	
		
			
				|  |  | +                self.logger.log(code="1003", msg="命中排序策略5")
 | 
	
		
			
				|  |  |                  return await self.rank_v5()
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      async def deal(self):
 |