Pārlūkot izejas kodu

score_list接口

罗俊辉 10 mēneši atpakaļ
vecāks
revīzija
3479fe4351
5 mainītis faili ar 295 papildinājumiem un 60 dzēšanām
  1. 156 0
      applications/articleTools.py
  2. 15 12
      routes/__init__.py
  3. 83 48
      routes/accountServer.py
  4. 1 0
      test/rank_dev.py
  5. 40 0
      test/score_list_dev.py

+ 156 - 0
applications/articleTools.py

@@ -2,11 +2,73 @@
 @author: luojunhui
 @author: luojunhui
 """
 """
 
 
+import asyncio
+import aiomysql
+from pandas import DataFrame
+
+
+class TaskMySQLClient(object):
+    """
+    Async MySQL
+    """
+
+    def __init__(self):
+        self.mysql_pool = None
+
+    async def init_pool(self):
+        """
+        初始化连接
+        :return:
+        """
+        self.mysql_pool = await aiomysql.create_pool(
+            host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",
+            port=3306,
+            user="crawler",
+            password="crawler123456@",
+            db="piaoquan-crawler",
+            charset="utf8mb4",
+            connect_timeout=120,
+        )
+        print("mysql init successfully")
+
+    async def close_pool(self):
+        """
+        关闭 mysql 连接
+        :return:
+        """
+        self.mysql_pool.close()
+        await self.mysql_pool.wait_closed()
+
+    async def async_select(self, sql):
+        """
+        select method
+        :param sql:
+        :return:
+        """
+        async with self.mysql_pool.acquire() as conn:
+            async with conn.cursor() as cursor:
+                await cursor.execute(sql)
+                result = await cursor.fetchall()
+                return result
+
+    async def async_insert(self, sql, params):
+        """
+        insert and update method
+        :param params:
+        :param sql:
+        :return:
+        """
+        async with self.mysql_pool.acquire() as coon:
+            async with coon.cursor() as cursor:
+                await cursor.execute(sql, params)
+                await coon.commit()
+
 
 
 class ArticleDBTools(object):
 class ArticleDBTools(object):
     """
     """
     长文数据库相关功能
     长文数据库相关功能
     """
     """
+
     def __init__(self, mysql_client):
     def __init__(self, mysql_client):
         """
         """
         init mysql
         init mysql
@@ -14,4 +76,98 @@ class ArticleDBTools(object):
         """
         """
         self.mysql_client = mysql_client
         self.mysql_client = mysql_client
 
 
+    async def getSingleAccountArticles(self, account_name):
+        """
+        获取单个账号的历史文章
+        :param gh_id:
+        :return:
+        appMsgId, title, Type, updateTime, ItemIndex, ContentUrl, show_view_count, show_like_count
+        """
+        keys = [
+            "appMsgId",
+            "title",
+            "Type",
+            "updateTime",
+            "ItemIndex",
+            "ContentUrl",
+            "show_view_count",
+            "show_like_count",
+        ]
+        sql = f"""
+            SELECT {", ".join(keys)}
+            FROM official_articles
+            WHERE accountName = '{account_name}';"""
+        result = await self.mysql_client.async_select(sql=sql)
+        return DataFrame(result, columns=keys)
+
+    async def getArticleByFilter(
+            self,
+            account_name,
+            index_list=None,
+            min_time=None,
+            max_time=None,
+            msg_type=None,
+    ):
+        """
+        :param account_name:
+        :param index_list: index ranges from 1 to 8
+        :param min_time: earliest time
+        :param max_time: latest time
+        :param msg_type: msg_type
+        :return:
+        """
+        if not index_list:
+            index_list = [1]
+        if not msg_type:
+            msg_type = "9"
+        if not min_time:
+            min_time = 0
+        if not max_time:
+            # 2099年
+            max_time = 4088051123
+        articleDataFrame = await self.getSingleAccountArticles(account_name=account_name)
+        filterDataFrame = articleDataFrame[
+            (articleDataFrame["Type"] == msg_type)
+            & (min_time < articleDataFrame["updateTime"])
+            & (articleDataFrame["updateTime"] < max_time)
+            & (articleDataFrame["ItemIndex"].isin(index_list))
+            ]
+        return filterDataFrame
+
+    async def get_good_bad_articles(self,
+                                    account_name,
+                                    method,
+                                    rate=0.1,
+                                    index_list=None,
+                                    min_time=None,
+                                    max_time=None,
+                                    msg_type=None
+                                    ):
+        """
+        获取质量好和不好的视频
+        :return:
+        """
+        article_data_frame = await self.getArticleByFilter(
+            account_name=account_name,
+            index_list=index_list,
+            min_time=min_time,
+            max_time=max_time,
+            msg_type=msg_type
+        )
+        df_rows = article_data_frame.nrows
+        if df_rows > 0:
+            match method:
+                case "top":
+                    sorted_df = article_data_frame.sort_values(by='show_view_count', reversed=True)
+                    topn = max(int(df_rows * rate), 1)
+                    top_df = sorted_df.head(topn)
+                    tail_df = sorted_df.tail(topn)
+                    return top_df, tail_df
+                case "avg":
+                    avg_view = article_data_frame['show_view_count'].mean()
+                    good_df = article_data_frame[(article_data_frame['show_view_count']) > avg_view * (1.0 + rate)]
+                    bad_df = article_data_frame[(article_data_frame['show_view_count']) > avg_view * (1.0 - rate)]
+                    return good_df, bad_df
+        else:
+            return None, None
 
 

+ 15 - 12
routes/__init__.py

@@ -6,7 +6,8 @@ from quart import Blueprint, jsonify, request
 
 
 from .accountArticleRank import AccountArticleRank
 from .accountArticleRank import AccountArticleRank
 from .nlpServer import NLPServer
 from .nlpServer import NLPServer
-from .articleDBServer import ArticleDB
+# from .articleDBServer import ArticleDB
+from .accountServer import AccountServer
 
 
 
 
 def AlgRoutes(mysql_client, model):
 def AlgRoutes(mysql_client, model):
@@ -55,17 +56,19 @@ def AlgRoutes(mysql_client, model):
         :return:
         :return:
         """
         """
         params = await request.get_json()
         params = await request.get_json()
-        return jsonify(params)
-
-    @blueprint.route("/article_db", methods=["POST"])
-    async def articleMysql():
-        """
-        长文数据库相关接口
-        :return:
-        """
-        params = await request.get_json()
-        ADB = ArticleDB(params=params, mysql_client=mysql_client)
-        response = await ADB.deal()
+        AS = AccountServer(mysql_client=mysql_client, params=params)
+        response = await AS.deal()
         return jsonify(response)
         return jsonify(response)
 
 
+    # @blueprint.route("/article_db", methods=["POST"])
+    # async def articleMysql():
+    #     """
+    #     长文数据库相关接口
+    #     :return:
+    #     """
+    #     params = await request.get_json()
+    #     ADB = ArticleDB(params=params, mysql_client=mysql_client)
+    #     response = await ADB.deal()
+    #     return jsonify(response)
+
     return blueprint
     return blueprint

+ 83 - 48
routes/accountServer.py

@@ -2,21 +2,8 @@
 @author: luojunhui
 @author: luojunhui
 """
 """
 
 
-
-def get_account_interest_by_top(account_nickname, min_time, max_time, rate):
-    return 1, 2
-
-
-def get_account_interest_by_avg(account_nickname, min_time, max_time, rate):
-    return 1, 2
-
-
-def get_sim_score_cross_mean(a, b, c):
-    return {"1": "2"}
-
-
-def get_sim_score_cross_avg(a, b, c):
-    return {"1": "2"}
+import requests
+from applications.articleTools import ArticleDBTools
 
 
 
 
 class AccountServer(object):
 class AccountServer(object):
@@ -24,7 +11,7 @@ class AccountServer(object):
     获取标题和公众号文章的相关性
     获取标题和公众号文章的相关性
     """
     """
 
 
-    def __init__(self, params):
+    def __init__(self, mysql_client, params):
         self.account_name_list = None
         self.account_name_list = None
         self.sim_type = None
         self.sim_type = None
         self.interest_type = None
         self.interest_type = None
@@ -33,6 +20,29 @@ class AccountServer(object):
         self.rate = None
         self.rate = None
         self.title_list = None
         self.title_list = None
         self.params = params
         self.params = params
+        self.AT = ArticleDBTools(mysql_client)
+
+    async def request_for_nlp(self, title_list, account_interest, account_weight):
+        """
+        nlp process
+        """
+        headers = {"Content-Type": "application/json"}
+        url = "http://localhost:6060/nlp"
+        body = {
+            "data": {
+                "text_list_a": title_list,
+                "text_list_b": account_interest,
+                "score_list_b": account_weight,
+                "symbol": 1,
+            },
+            "function": (
+                "similarities_cross_mean"
+                if self.sim_type == "mean"
+                else "similarities_cross_avg"
+            ),
+        }
+        response = requests.post(url=url, headers=headers, json=body)
+        return response
 
 
     def checkParams(self):
     def checkParams(self):
         """
         """
@@ -40,47 +50,70 @@ class AccountServer(object):
         :return:
         :return:
         """
         """
         try:
         try:
-            self.title_list = self.params['text_list']
+            self.title_list = self.params["text_list"]
             self.account_name_list = self.params.get("account_nickname_list", [])
             self.account_name_list = self.params.get("account_nickname_list", [])
             self.rate = self.params.get("rate", 0.1)
             self.rate = self.params.get("rate", 0.1)
-            self.max_time = self.params.get("max_time", 0.1)
-            self.min_time = self.params.get("min_time", 0.1)
-            self.interest_type = self.params.get("interest_type", "by_top")
+            self.max_time = self.params.get("max_time")
+            self.min_time = self.params.get("min_time")
+            self.interest_type = self.params.get("interest_type", "top")
             self.sim_type = self.params.get("sim_type", "mean")
             self.sim_type = self.params.get("sim_type", "mean")
             return None
             return None
         except Exception as e:
         except Exception as e:
-            response = {
-                "error": "Params error",
-                "detail": str(e)
-            }
+            response = {"error": "Params error", "detail": str(e)}
             return response
             return response
 
 
-    def getEachAccountScoreList(self, account_name):
+    async def getAccountInterest(
+        self,
+        account_name,
+        method,
+        rate=None,
+        msg_type=None,
+        index_list=None,
+        min_time=None,
+        max_time=None,
+    ):
+        """
+        获取账号的兴趣类型
+        :param account_name:
+        :param max_time:
+        :param min_time:
+        :param index_list:
+        :param msg_type:
+        :param keys_dict:
+        :param rate:
+        :param gh_id:
+        :param method:
+        :return:
+        """
+        good_df, bad_df = await self.AT.get_good_bad_articles(
+            account_name=account_name,
+            method=method,
+            msg_type=msg_type,
+            index_list=index_list,
+            min_time=min_time,
+            max_time=max_time,
+            rate=rate,
+        )
+        view_count_list = good_df["show_view_count"]
+        title_list = good_df["title"]
+        return title_list, view_count_list
+
+    async def getEachAccountScoreList(self, account_name):
         """
         """
         获取和单个账号的相关性分数
         获取和单个账号的相关性分数
         :return:
         :return:
         """
         """
         try:
         try:
-            account_interest, account_weight = (
-                get_account_interest_by_top(
-                    account_nickname=account_name,
-                    min_time=self.min_time,
-                    max_time=self.max_time,
-                    rate=self.rate,
-                )
-                if self.interest_type == "by_top"
-                else get_account_interest_by_avg(
-                    account_nickname=account_name,
-                    min_time=self.min_time,
-                    max_time=self.max_time,
-                    rate=self.rate,
-                )
+            account_interest, account_weight = await self.getAccountInterest(
+                account_name=account_name,
+                method=self.interest_type,
             )
             )
-            res = (
-                get_sim_score_cross_mean(self.title_list, account_interest, account_weight)
-                if self.sim_type == "mean"
-                else get_sim_score_cross_avg(self.title_list, account_interest, account_weight)
+            response = await self.request_for_nlp(
+                title_list=self.title_list,
+                account_interest=account_interest,
+                account_weight=account_weight,
             )
             )
+            res = response.json()
             sim_key = "score_list_mean" if self.sim_type == "mean" else "score_list_avg"
             sim_key = "score_list_mean" if self.sim_type == "mean" else "score_list_avg"
             return {
             return {
                 "score_list": res[sim_key],
                 "score_list": res[sim_key],
@@ -90,10 +123,10 @@ class AccountServer(object):
             print(e)
             print(e)
             return {
             return {
                 "score_list": [0] * len(self.title_list),
                 "score_list": [0] * len(self.title_list),
-                "text_list_max": self.title_list
+                "text_list_max": self.title_list,
             }
             }
 
 
-    def getAccountListScoreList(self):
+    async def getAccountListScoreList(self):
         """
         """
         获取AccountList中每一个账号的相关性分数
         获取AccountList中每一个账号的相关性分数
         :return:
         :return:
@@ -103,12 +136,14 @@ class AccountServer(object):
             if response.get(accountName):
             if response.get(accountName):
                 continue
                 continue
             else:
             else:
-                response[accountName] = self.getEachAccountScoreList(account_name=accountName)
+                response[accountName] = await self.getEachAccountScoreList(account_name=accountName)
         return response
         return response
 
 
-    def deal(self):
+    async def deal(self):
         """
         """
         Deal Function
         Deal Function
         :return:
         :return:
         """
         """
-        return self.checkParams() if self.checkParams() else self.getAccountListScoreList()
+        return (
+            self.checkParams() if self.checkParams() else await self.getAccountListScoreList()
+        )

+ 1 - 0
test/rank_dev.py

@@ -3,6 +3,7 @@ import requests
 import time
 import time
 
 
 url = "http://47.98.154.124:6060/articleRank"
 url = "http://47.98.154.124:6060/articleRank"
+
 with open("body.json") as f:
 with open("body.json") as f:
     data = json.loads(f.read())
     data = json.loads(f.read())
 body = json.loads(data['data'])
 body = json.loads(data['data'])

+ 40 - 0
test/score_list_dev.py

@@ -0,0 +1,40 @@
+"""
+@author: luojunhui
+"""
+import json
+
+import requests
+
+
+class ArticleRank(object):
+    """
+    账号排序
+    """
+    url = "http://192.168.100.31:8179/score_list"
+    url1 = "http://192.168.100.31:8179/score_list"
+
+    @classmethod
+    def rank(cls, account_list, text_list):
+        """
+        Rank
+        :param account_list:
+        :param text_list:
+        :return:
+        """
+        body = {
+            "account_nickname_list": account_list,
+            "text_list": text_list,
+            "max_time": None,
+            "min_time": None,
+            "interest_type": "avg",
+            "sim_type": "mean",
+            "rate": 0.1
+        }
+        response = requests.post(url=cls.url1, headers={}, json=body).json()
+        return response
+
+
+if __name__ == '__main__':
+    AR = ArticleRank()
+    response = AR.rank(account_list=['生活良读'], text_list=['保姆为300万拆迁款,嫁给大24岁老头,丈夫去世后,她发现房产证没有丈夫名字'] * 10)
+    print(json.dumps(response, ensure_ascii=False, indent=4))