Explorar el Código

代码重构开发ing

罗俊辉 hace 1 año
padre
commit
181e5c3c93
Se han modificado 2 ficheros con 135 adiciones y 54 borrados
  1. 130 51
      routes/AccountArticleRank.py
  2. 5 3
      test/rank_dev.py

+ 130 - 51
routes/AccountArticleRank.py

@@ -1,6 +1,7 @@
 """
 @author: luojunhui
 """
+import json
 
 from applications.functions import ArticleRank
 from applications.functions import title_sim_v2_by_list
@@ -55,8 +56,8 @@ class AccountArticleRank(object):
             self.strategy = self.params["strategy"]
             self.publishNum = self.params["publishNum"]
             self.publishArticleList = self.params["publishArticleList"]
-            self.title_list = [i["title"] for i in self.publishArticleList]
-            self.content_list = [i["content"] for i in self.publishArticleList]
+            # self.title_list = [i["title"] for i in self.publishArticleList]
+            # self.content_list = [i["content"] for i in self.publishArticleList]
             return None
         except Exception as e:
             response = {
@@ -66,53 +67,135 @@ class AccountArticleRank(object):
             }
             return response
 
+    async def basic_rank(self):
+        # 第一步把所有文章标题分为3组
+        article_list1_ori = [i for i in self.publishArticleList if "【1】" in i['producePlanName']]
+        article_list2_ori = [i for i in self.publishArticleList if "【2】" in i['producePlanName']]
+        article_list3_ori = [i for i in self.publishArticleList if not i in article_list1_ori and not i in article_list2_ori]
+
+        # 全局去重,保留优先级由  L1 --> L2 --> L3
+        hash_map = {}
+
+        article_list1 = []
+        for i in article_list1_ori:
+            title = i['title']
+            if hash_map.get(title):
+                continue
+            else:
+                article_list1.append(i)
+                hash_map[title] = 1
+
+        article_list2 = []
+        for i in article_list2_ori:
+            title = i['title']
+            if hash_map.get(title):
+                continue
+            else:
+                article_list2.append(i)
+                hash_map[title] = 2
+
+        article_list3 = []
+        for i in article_list3_ori:
+            title = i['title']
+            if hash_map.get(title):
+                continue
+            else:
+                article_list3.append(i)
+                hash_map[title] = 1
+
+        # 第二步对article_list1, article_list3按照得分排序, 对article_list2按照播放量排序
+        if article_list1:
+            rank1 = ArticleRank().rank(
+                account_list=[self.accountName],
+                text_list=[i['title'] for i in article_list1]
+            )
+            score_list1 = rank1[self.accountName]['score_list']
+            ranked_1 = []
+            for index, value in enumerate(score_list1):
+                obj = article_list1[index]
+                obj['score'] = value + 1000
+                ranked_1.append(obj)
+            ranked_1 = [i for i in ranked_1 if not has_same(i['title'], self.accountName)]
+            ranked_1 = sorted(ranked_1, key=lambda x:x['score'], reverse=True)
+        else:
+            ranked_1 = []
+        # rank2
+        if article_list2:
+            article_list2 = [i for i in article_list2 if not has_same(i['title'], self.accountName)]
+            for item in article_list2:
+                item['score'] = 100
+            ranked_2 = sorted(article_list2, key=lambda x:x['crawlerViewCount'], reverse=True)
+        else:
+            ranked_2 = []
+
+        # rank3
+        if article_list3:
+            rank3 = ArticleRank().rank(
+                account_list=[self.accountName],
+                text_list=[i['title'] for i in article_list3]
+            )
+            score_list3 = rank3[self.accountName]['score_list']
+            ranked_3 = []
+            for index, value in enumerate(score_list3):
+                obj = article_list3[index]
+                obj['score'] = value
+                ranked_3.append(obj)
+            ranked_3 = [i for i in ranked_3 if not has_same(i['title'], self.accountName)]
+            ranked_3 = sorted(ranked_3, key=lambda x:x['score'], reverse=True)
+        else:
+            ranked_3 = []
+        return ranked_1, ranked_2, ranked_3
+
+
     async def rank_v1(self):
         """
         Rank Version 1
         :return:
         """
         try:
-            rank_info = ArticleRank().rank(
-                account_list=[self.accountName], text_list=self.title_list
-            )
-            score_list = rank_info[self.accountName]["score_list"]
-            title_score_dict = {}
-
-            for index, item in enumerate(self.title_list):
-                title_score_dict[item] = score_list[index]
-
-            result_list = []
-            for obj in self.publishArticleList:
-                if title_score_dict.get(obj["title"]):
-                    produce_plan_name = obj["producePlanName"]
-                    if "【1】" in produce_plan_name:
-                        obj["score"] = title_score_dict[obj["title"]] + 1000
-                    elif "【2】" in produce_plan_name:
-                        obj["score"] = title_score_dict[obj["title"]] + 100
-                    else:
-                        obj["score"] = title_score_dict[obj["title"]]
-                    result_list.append(obj)
-
-            sorted_list = sorted(result_list, key=lambda x: x["score"], reverse=True)
-            result = {
-                "accountId": self.accountId,
-                "accountName": self.accountName,
-                "ghId": self.ghId,
-                "strategy": self.strategy,
-                "publishNum": self.publishNum,
-                "rank_list": sorted_list[: self.publishNum],
-            }
-        except Exception as e:
-            result = {
-                "accountId": self.accountId,
-                "accountName": self.accountName,
-                "ghId": self.ghId,
-                "strategy": self.strategy,
-                "publishNum": self.publishNum,
-                "rank_list": self.publishArticleList[: self.publishNum],
-            }
-        response = {"status": "Rank Success", "data": result, "code": 1}
-        return response
+            ranked_1, ranked_2, ranked_3 = await self.basic_rank()
+            # 还要全局去重
+
+            try:
+                L = []
+                if ranked_1:
+                    L.append(ranked_1[0])
+                    if ranked_2:
+                        L.append(ranked_2[0])
+                else:
+                    if ranked_2:
+                        if len(ranked_2) > 1:
+                            for i in ranked_2[:2]:
+                                L.append(i)
+                        else:
+                            L.append(ranked_2[0])
+                for item in ranked_3:
+                    L.append(item)
+
+                result = {
+                    "accountId": self.accountId,
+                    "accountName": self.accountName,
+                    "ghId": self.ghId,
+                    "strategy": self.strategy,
+                    "publishNum": self.publishNum,
+                    "rank_list": L[:self.publishNum],
+                }
+                response = {"status": "Rank Success", "data": result, "code": 1}
+            except Exception as e:
+                result = {
+                    "accountId": self.accountId,
+                    "accountName": self.accountName,
+                    "ghId": self.ghId,
+                    "strategy": self.strategy,
+                    "publishNum": self.publishNum,
+                    "rank_list": self.publishArticleList[: self.publishNum],
+                }
+                response = {"status": "Rank Fail", "data": result, "code": 1}
+
+            return response
+        except:
+            result = {"code": 2, "info": "account is not exist"}
+            return result
 
     async def rank_v2(self):
         """
@@ -168,11 +251,7 @@ class AccountArticleRank(object):
         if error_params:
             return error_params
         else:
-            try:
-                self.title_list = [
-                    i for i in self.title_list if not has_same(i, self.accountName)
-                ]
-                return await self.choose_strategy()
-            except Exception as e:
-                result = {"code": 2, "info": "account is not exist"}
-                return result
+            return await self.choose_strategy()
+            # except Exception as e:
+            #     result = {"code": 2, "info": "account is not exist"}
+            #     return result

La diferencia del archivo ha sido suprimido porque es demasiado grande
+ 5 - 3
test/rank_dev.py


Algunos archivos no se mostraron porque demasiados archivos cambiaron en este cambio