Просмотр исходного кода

修改排序算法-返回重复视频

罗俊辉 10 месяцев назад
Родитель
Сommit
0bfce4504c
3 измененных файлов с 145 добавлено и 76 удалено
  1. 80 0
      applications/pipeline.py
  2. 65 75
      routes/AccountArticleRank.py
  3. 0 1
      test/rank_dev.py

+ 80 - 0
applications/pipeline.py

@@ -0,0 +1,80 @@
+"""
+@author: luojunhui
+"""
+import requests
+
+from applications.functions import title_sim_v2_by_list
+from applications.functions import get_article_title_url_list
+
+
+class LongArticlesPipeline(object):
+    """
+    Long articles Pipeline
+    """
+
+    @classmethod
+    def history_exists(cls, title, account_nickname, plan_name):
+        """
+        判断是否存储
+        :param plan_name:
+        :param title:
+        :param account_nickname:
+        :return:
+        """
+        if "【1】" in plan_name or "【2】" in plan_name:
+            index_list = [1, 2]
+        else:
+            index_list = [1, 2, 3, 4, 5, 6, 7, 8]
+        account_title_list = get_article_title_url_list(
+            account_nickname,
+            index_list=index_list
+        )
+        sim_res = title_sim_v2_by_list(title, account_title_list)
+        if sim_res:
+            return True
+        return False
+
+    @classmethod
+    def article_safe(cls, title):
+        """
+        判断文章是否安全
+        """
+        url = "http://192.168.100.31:8177/sensitive/is_sensitive"
+        body = {
+            "text": title
+        }
+        print(body)
+        response = requests.post(
+            url=url,
+            json=body,
+            headers={"Content-Type": "application/json"}
+        )
+        return response.json()['is_sensitive']
+
+    @classmethod
+    def deal(cls, article_obj):
+        """
+        :param article_obj:
+        :return:
+        """
+        history_exists_flag = cls.history_exists(
+            title=article_obj['title'],
+            account_nickname=article_obj['crawlerAccountName'],
+            plan_name=article_obj['producePlanName']
+        )
+        if history_exists_flag:
+            response = {
+                "fileterReason": "历史已发布文章",
+                "status": True
+            }
+            return response
+        else:
+            safe_flag = cls.article_safe(title=article_obj['title'])
+            if safe_flag:
+                response = {
+                    "fileterReason": "安全违规",
+                    "status": True
+                }
+                return response
+            else:
+                return False

+ 65 - 75
routes/AccountArticleRank.py

@@ -8,27 +8,9 @@ from uuid import uuid4
 
 from applications.aliyunLog import AliyunArticleLog
 from applications.functions import ArticleRank
-from applications.functions import title_sim_v2_by_list
-from applications.functions import get_article_title_url_list
+from applications.pipeline import LongArticlesPipeline
 
 
-def has_same(title, account_nickname, index_list=[1, 2]):
-    """
-    判断是否存储
-    :param title:
-    :param account_nickname:
-    :param index_list: 历史已发布的文章需要屏蔽的位置
-    :return:
-    """
-    account_title_list = get_article_title_url_list(
-        account_nickname,
-        index_list=index_list
-    )
-    sim_res = title_sim_v2_by_list(title, account_title_list)
-    if sim_res:
-        return True
-    return False
-
 
 class AccountArticleRank(object):
     """
@@ -48,9 +30,21 @@ class AccountArticleRank(object):
         self.accountId = None
         self.params = params
         self.mysql_client = mysql_client
-        self.filter_list = []
         self.request_id = "alg-{}-{}".format(uuid4(), int(time.time()))
         self.logger = AliyunArticleLog(request_id=self.request_id, alg="ArticleRank")
+        self.pipeline = LongArticlesPipeline()
+
+
+    def filter(self):
+        self.publishArticleList = []
+        self.filter_list = []
+        for item in self.params['publishArticleList']:
+            flag = self.pipeline.deal(item)
+            if flag:
+                item['filterReason'] = flag['fileterReason']
+                self.filter_list.append(item)
+            else:
+                self.publishArticleList.append(item)
 
     async def check_params(self):
         """
@@ -63,8 +57,7 @@ class AccountArticleRank(object):
             self.ghId = self.params["ghId"]
             self.strategy = self.params["strategy"]
             self.publishNum = self.params["publishNum"]
-            self.publishArticleList = [i for i in self.params["publishArticleList"] if not has_same(i['title'], self.accountName)]
-            self.history_list = [i for i in self.params["publishArticleList"] if has_same(i['title'], self.accountName)]
+            self.filter()
             self.logger.log(
                 code="1001",
                 msg="参数校验成功",
@@ -174,59 +167,59 @@ class AccountArticleRank(object):
         Rank Version 1
         :return:
         """
+        # try:
+        ranked_1, ranked_2, ranked_3 = await self.basic_rank()
         try:
-            ranked_1, ranked_2, ranked_3 = await self.basic_rank()
-            try:
-                L = []
-                if ranked_1:
-                    L.append(ranked_1[0])
-                    if ranked_2:
+            L = []
+            if ranked_1:
+                L.append(ranked_1[0])
+                if ranked_2:
+                    L.append(ranked_2[0])
+            else:
+                if ranked_2:
+                    if len(ranked_2) > 1:
+                        for i in ranked_2[:2]:
+                            L.append(i)
+                    else:
                         L.append(ranked_2[0])
-                else:
-                    if ranked_2:
-                        if len(ranked_2) > 1:
-                            for i in ranked_2[:2]:
-                                L.append(i)
-                        else:
-                            L.append(ranked_2[0])
-                for item in ranked_3:
-                    L.append(item)
+            for item in ranked_3:
+                L.append(item)
 
-                result = {
-                    "accountId": self.accountId,
-                    "accountName": self.accountName,
-                    "ghId": self.ghId,
-                    "strategy": self.strategy,
-                    "publishNum": self.publishNum,
-                    "rank_list": L[:self.publishNum],
-                    "filter_list": self.filter_list
-                }
-                self.logger.log(
-                    code=1006,
-                    msg="rank successfully",
-                    data=result
-                )
-                response = {"status": "Rank Success", "data": result, "code": 1}
-            except Exception as e:
-                result = {
-                    "accountId": self.accountId,
-                    "accountName": self.accountName,
-                    "ghId": self.ghId,
-                    "strategy": self.strategy,
-                    "publishNum": self.publishNum,
-                    "rank_list": self.publishArticleList[: self.publishNum],
-                    "filter_list": self.filter_list
-                }
-                self.logger.log(
-                    code=1007,
-                    msg="rank failed because of {}".format(e),
-                    data=result
-                )
-                response = {"status": "Rank Fail", "data": result, "code": 1}
-            return response
-        except:
-            result = {"code": 2, "info": "account is not exist"}
-            return result
+            result = {
+                "accountId": self.accountId,
+                "accountName": self.accountName,
+                "ghId": self.ghId,
+                "strategy": self.strategy,
+                "publishNum": self.publishNum,
+                "rank_list": L[:self.publishNum],
+                "filter_list": self.filter_list
+            }
+            self.logger.log(
+                code=1006,
+                msg="rank successfully",
+                data=result
+            )
+            response = {"status": "Rank Success", "data": result, "code": 1}
+        except Exception as e:
+            result = {
+                "accountId": self.accountId,
+                "accountName": self.accountName,
+                "ghId": self.ghId,
+                "strategy": self.strategy,
+                "publishNum": self.publishNum,
+                "rank_list": self.publishArticleList[: self.publishNum],
+                "filter_list": self.filter_list
+            }
+            self.logger.log(
+                code=1007,
+                msg="rank failed because of {}".format(e),
+                data=result
+            )
+            response = {"status": "Rank Fail", "data": result, "code": 1}
+        return response
+        # except:
+        #     result = {"code": 2, "info": "account is not exist"}
+        #     return result
 
     async def rank_v2(self):
         """
@@ -302,7 +295,4 @@ class AccountArticleRank(object):
         if error_params:
             return error_params
         else:
-            for i in self.history_list:
-                i['filterReason'] = "历史已发布文章"
-                self.filter_list.append(i)
             return await self.choose_strategy()

Разница между файлами не показана из-за своего большого размера
+ 0 - 1
test/rank_dev.py


Некоторые файлы не были показаны из-за большого количества измененных файлов