Selaa lähdekoodia

好文章判断增加account_avg
增加最低阅读量过滤

wangyunpeng 7 kuukautta sitten
vanhempi
commit
66b2789035
2 muutettua tiedostoa jossa 73 lisäystä ja 1 poistoa
  1. 66 0
      applications/articleTools.py
  2. 7 1
      routes/accountServer.py

+ 66 - 0
applications/articleTools.py

@@ -5,6 +5,7 @@
 import asyncio
 import aiomysql
 from pandas import DataFrame
+from datetime import datetime
 
 
 class TaskMySQLClient(object):
@@ -76,6 +77,50 @@ class ArticleDBTools(object):
         """
         self.mysql_client = mysql_client
 
+    async def getAccountAvgInfo(self, account_name):
+        """
+        获取单个账号历史均值
+        """
+        keys = [
+            "gh_id",
+            "position",
+            "update_time",
+            "account_name",
+            "fans",
+            "read_avg",
+            "like_avg",
+            "status",
+            "account_type",
+            "account_mode",
+            "account_source",
+            "account_status",
+            "business_type",
+            "read_rate_avg"
+        ]
+        sql = f"""
+            SELECT {", ".join(keys)}
+            FROM account_avg_info_v3
+            WHERE account_name = '{account_name}'
+            and position = 1;"""
+        result = await self.mysql_client.async_select(sql=sql)
+        return result
+
+    async def get_account_avg_info(self, account_avg_info_map, timestamp):
+        target_date = datetime.fromtimestamp(timestamp).date()
+        # 获取所有可用日期并排序
+        available_dates = sorted(account_avg_info_map.keys())
+        # 尝试获取指定日期
+        info = account_avg_info_map.get(target_date.isoformat())
+        if info is not None:
+            return info
+        # 如果指定日期不存在,寻找最近日期
+        closest_date = None
+        for date in reversed(available_dates):
+            if (closest_date is None or abs((datetime.fromisoformat(date).date() - target_date).days) <
+                    abs((datetime.fromisoformat(closest_date).date() - target_date).days)):
+                closest_date = date
+        return account_avg_info_map.get(closest_date) if closest_date else None
+
     async def getSingleAccountArticles(self, account_name):
         """
         获取单个账号的历史文章
@@ -103,6 +148,7 @@ class ArticleDBTools(object):
     async def getArticleByFilter(
             self,
             account_name,
+            view_count_filter=None,
             index_list=None,
             min_time=None,
             max_time=None,
@@ -132,11 +178,14 @@ class ArticleDBTools(object):
             & (articleDataFrame["updateTime"] < max_time)
             & (articleDataFrame["ItemIndex"].isin(index_list))
             ]
+        if view_count_filter:
+            filterDataFrame = filterDataFrame[(articleDataFrame["show_view_count"] > view_count_filter)]
         return filterDataFrame
 
     async def get_good_bad_articles(self,
                                     account_name,
                                     method,
+                                    view_count_filter,
                                     rate=0.1,
                                     index_list=None,
                                     min_time=None,
@@ -149,6 +198,7 @@ class ArticleDBTools(object):
         """
         article_data_frame = await self.getArticleByFilter(
             account_name=account_name,
+            view_count_filter=view_count_filter,
             index_list=index_list,
             min_time=min_time,
             max_time=max_time,
@@ -168,6 +218,22 @@ class ArticleDBTools(object):
                     good_df = article_data_frame[(article_data_frame['show_view_count']) > avg_view * (1.0 + rate)]
                     bad_df = article_data_frame[(article_data_frame['show_view_count']) > avg_view * (1.0 - rate)]
                     return good_df, bad_df
+                case "account_avg":
+                    account_read_avg_list = await self.getAccountAvgInfo(
+                        account_name=account_name
+                    )
+                    account_avg_info_map = {info[2]: info for info in account_read_avg_list}
+                    view_count_avg_list = []
+                    for index, row in article_data_frame.iterrows():
+                        update_time = row['updateTime']
+                        info = await self.get_account_avg_info(account_avg_info_map, update_time)
+                        view_count_avg_list.append(info[5])
+                    article_data_frame['view_count_avg'] = view_count_avg_list
+                    good_df = article_data_frame[(article_data_frame['show_view_count']) >
+                                                 (article_data_frame['view_count_avg']) * (1.0 + rate)]
+                    bad_df = article_data_frame[(article_data_frame['show_view_count']) >
+                                                (article_data_frame['view_count_avg']) * (1.0 - rate)]
+                    return good_df, bad_df
         else:
             return None, None
 

+ 7 - 1
routes/accountServer.py

@@ -62,6 +62,7 @@ class AccountServer(object):
             self.min_time = self.params.get("min_time")
             self.interest_type = self.params.get("interest_type", "top")
             self.sim_type = self.params.get("sim_type", "mean")
+            self.view_count_filter = self.params.get("view_count_filter", None)
             return None
         except Exception as e:
             response = {"error": "Params error", "detail": str(e)}
@@ -71,6 +72,7 @@ class AccountServer(object):
         self,
         account_name,
         method,
+        view_count_filter,
         rate=None,
         msg_type=None,
         index_list=None,
@@ -98,6 +100,7 @@ class AccountServer(object):
             min_time=min_time,
             max_time=max_time,
             rate=rate,
+            view_count_filter=view_count_filter,
         )
         view_count_list = good_df["show_view_count"].values.tolist()
         title_list = good_df["title"].values.tolist()
@@ -114,7 +117,10 @@ class AccountServer(object):
             account_interest, account_weight = await self.getAccountInterest(
                 account_name=account_name,
                 method=self.interest_type,
-                rate=self.rate
+                rate=self.rate,
+                view_count_filter=self.view_count_filter,
+                min_time=self.min_time,
+                max_time=self.max_time,
             )
             sim_key = "score_list_mean" if self.sim_type == "mean" else "score_list_avg"
             response = await self.request_for_nlp(