Переглянути джерело

相似度评分增加阅读均值倍数权重

wangyunpeng 7 місяців тому
батько
коміт
a1f4d67ecb
3 змінених файлів з 29 додано та 26 видалено
  1. 2 2
      applications/articleTools.py
  2. 1 1
      routes/__init__.py
  3. 26 23
      routes/accountServer.py

+ 2 - 2
applications/articleTools.py

@@ -212,7 +212,7 @@ class ArticleDBTools(object):
 
 
     async def get_good_bad_articles(self,
     async def get_good_bad_articles(self,
                                     account_name,
                                     account_name,
-                                    method,
+                                    interest_type,
                                     view_count_filter,
                                     view_count_filter,
                                     rate=0.1,
                                     rate=0.1,
                                     index_list=None,
                                     index_list=None,
@@ -234,7 +234,7 @@ class ArticleDBTools(object):
         )
         )
         df_rows = len(article_data_frame)
         df_rows = len(article_data_frame)
         if df_rows > 0:
         if df_rows > 0:
-            match method:
+            match interest_type:
                 case "top":
                 case "top":
                     sorted_df = article_data_frame.sort_values(by='show_view_count', reversed=True)
                     sorted_df = article_data_frame.sort_values(by='show_view_count', reversed=True)
                     topn = max(int(df_rows * rate), 1)
                     topn = max(int(df_rows * rate), 1)

+ 1 - 1
routes/__init__.py

@@ -51,7 +51,7 @@ def AlgRoutes(mysql_client, model):
         return jsonify(response)
         return jsonify(response)
 
 
     @blueprint.route("/score_list", methods=["POST"])
     @blueprint.route("/score_list", methods=["POST"])
-    async def articleAccount():
+    async def article_account():
         """
         """
         公众号文章功能等接口
         公众号文章功能等接口
         :return:
         :return:

+ 26 - 23
routes/accountServer.py

@@ -21,10 +21,11 @@ class AccountServer(object):
         self.max_time = None
         self.max_time = None
         self.rate = None
         self.rate = None
         self.title_list = None
         self.title_list = None
+        self.view_count_filter = None
         self.params = params
         self.params = params
         self.AT = ArticleDBTools(mysql_client)
         self.AT = ArticleDBTools(mysql_client)
 
 
-    async def request_for_nlp(self, title_list, account_interest, account_weight):
+    async def request_for_nlp(self, title_list, account_interest, interest_weight):
         """
         """
         nlp process
         nlp process
         """
         """
@@ -34,7 +35,7 @@ class AccountServer(object):
             "data": {
             "data": {
                 "text_list_a": [i.replace("'", "") for i in title_list],
                 "text_list_a": [i.replace("'", "") for i in title_list],
                 "text_list_b": [i.replace("'", "") for i in account_interest],
                 "text_list_b": [i.replace("'", "") for i in account_interest],
-                "score_list_b": account_weight,
+                "score_list_b": interest_weight,
                 "symbol": 1,
                 "symbol": 1,
             },
             },
             "function": "similarities_cross_mean" if self.sim_type == "mean" else "similarities_cross_avg"
             "function": "similarities_cross_mean" if self.sim_type == "mean" else "similarities_cross_avg"
@@ -49,7 +50,7 @@ class AccountServer(object):
                     print("Received empty response")
                     print("Received empty response")
                     return {}
                     return {}
 
 
-    def checkParams(self):
+    def check_params(self):
         """
         """
         校验传参
         校验传参
         :return:
         :return:
@@ -68,10 +69,10 @@ class AccountServer(object):
             response = {"error": "Params error", "detail": str(e)}
             response = {"error": "Params error", "detail": str(e)}
             return response
             return response
 
 
-    async def getAccountInterest(
+    async def get_account_interest(
         self,
         self,
         account_name,
         account_name,
-        method,
+        interest_type,
         view_count_filter,
         view_count_filter,
         rate=None,
         rate=None,
         msg_type=None,
         msg_type=None,
@@ -86,15 +87,14 @@ class AccountServer(object):
         :param min_time:
         :param min_time:
         :param index_list:
         :param index_list:
         :param msg_type:
         :param msg_type:
-        :param keys_dict:
         :param rate:
         :param rate:
-        :param gh_id:
-        :param method:
+        :param interest_type:
+        :param view_count_filter:
         :return:
         :return:
         """
         """
         good_df, bad_df = await self.AT.get_good_bad_articles(
         good_df, bad_df = await self.AT.get_good_bad_articles(
             account_name=account_name,
             account_name=account_name,
-            method=method,
+            interest_type=interest_type,
             msg_type=msg_type,
             msg_type=msg_type,
             index_list=index_list,
             index_list=index_list,
             min_time=min_time,
             min_time=min_time,
@@ -102,34 +102,37 @@ class AccountServer(object):
             rate=rate,
             rate=rate,
             view_count_filter=view_count_filter,
             view_count_filter=view_count_filter,
         )
         )
-        view_count_list = good_df["show_view_count"].values.tolist()
-        title_list = good_df["title"].values.tolist()
-        print(view_count_list)
-        print(title_list)
-        return title_list, view_count_list
+        if interest_type == "account_avg":
+            interest_weight = (good_df["show_view_count"] / good_df["view_count_avg"]).values.tolist()
+        else:
+            interest_weight = good_df["show_view_count"].values.tolist()
+        account_interest = good_df["title"].values.tolist()
+        print(account_interest)
+        print(interest_weight)
+        return account_interest, interest_weight
 
 
-    async def getEachAccountScoreList(self, account_name):
+    async def get_each_account_score_list(self, account_name):
         """
         """
         获取和单个账号的相关性分数
         获取和单个账号的相关性分数
         :return:
         :return:
         """
         """
         try:
         try:
-            account_interest, account_weight = await self.getAccountInterest(
+            account_interest, interest_weight = await self.get_account_interest(
                 account_name=account_name,
                 account_name=account_name,
-                method=self.interest_type,
+                interest_type=self.interest_type,
                 rate=self.rate,
                 rate=self.rate,
                 view_count_filter=self.view_count_filter,
                 view_count_filter=self.view_count_filter,
                 min_time=self.min_time,
                 min_time=self.min_time,
                 max_time=self.max_time,
                 max_time=self.max_time,
             )
             )
-            sim_key = "score_list_mean" if self.sim_type == "mean" else "score_list_avg"
             response = await self.request_for_nlp(
             response = await self.request_for_nlp(
                 title_list=self.title_list,
                 title_list=self.title_list,
                 account_interest=account_interest,
                 account_interest=account_interest,
-                account_weight=account_weight
+                interest_weight=interest_weight
             )
             )
+            score_list_key = "score_list_mean" if self.sim_type == "mean" else "score_list_avg"
             return {
             return {
-                "score_list": response[sim_key],
+                "score_list": response[score_list_key],
                 "text_list_max": response["text_list_max"],
                 "text_list_max": response["text_list_max"],
             }
             }
         except Exception as e:
         except Exception as e:
@@ -139,7 +142,7 @@ class AccountServer(object):
                 "text_list_max": self.title_list,
                 "text_list_max": self.title_list,
             }
             }
 
 
-    async def getAccountListScoreList(self):
+    async def get_account_list_score_list(self):
         """
         """
         获取AccountList中每一个账号的相关性分数
         获取AccountList中每一个账号的相关性分数
         :return:
         :return:
@@ -149,7 +152,7 @@ class AccountServer(object):
             if response.get(accountName):
             if response.get(accountName):
                 continue
                 continue
             else:
             else:
-                response[accountName] = await self.getEachAccountScoreList(account_name=accountName)
+                response[accountName] = await self.get_each_account_score_list(account_name=accountName)
         return response
         return response
 
 
     async def deal(self):
     async def deal(self):
@@ -158,5 +161,5 @@ class AccountServer(object):
         :return:
         :return:
         """
         """
         return (
         return (
-            self.checkParams() if self.checkParams() else await self.getAccountListScoreList()
+            self.check_params() if self.check_params() else await self.get_account_list_score_list()
         )
         )