|
@@ -5,6 +5,7 @@
|
|
|
import asyncio
|
|
|
import aiomysql
|
|
|
from pandas import DataFrame
|
|
|
+from datetime import datetime
|
|
|
|
|
|
|
|
|
class TaskMySQLClient(object):
|
|
@@ -76,6 +77,50 @@ class ArticleDBTools(object):
|
|
|
"""
|
|
|
self.mysql_client = mysql_client
|
|
|
|
|
|
+ async def getAccountAvgInfo(self, account_name):
|
|
|
+ """
|
|
|
+ 获取单个账号历史均值
|
|
|
+ """
|
|
|
+ keys = [
|
|
|
+ "gh_id",
|
|
|
+ "position",
|
|
|
+ "update_time",
|
|
|
+ "account_name",
|
|
|
+ "fans",
|
|
|
+ "read_avg",
|
|
|
+ "like_avg",
|
|
|
+ "status",
|
|
|
+ "account_type",
|
|
|
+ "account_mode",
|
|
|
+ "account_source",
|
|
|
+ "account_status",
|
|
|
+ "business_type",
|
|
|
+ "read_rate_avg"
|
|
|
+ ]
|
|
|
+ sql = f"""
|
|
|
+ SELECT {", ".join(keys)}
|
|
|
+ FROM account_avg_info_v3
|
|
|
+ WHERE account_name = '{account_name}'
|
|
|
+ and position = 1;"""
|
|
|
+ result = await self.mysql_client.async_select(sql=sql)
|
|
|
+ return result
|
|
|
+
|
|
|
+ async def get_account_avg_info(self, account_avg_info_map, timestamp):
|
|
|
+ target_date = datetime.fromtimestamp(timestamp).date()
|
|
|
+ # 获取所有可用日期并排序
|
|
|
+ available_dates = sorted(account_avg_info_map.keys())
|
|
|
+ # 尝试获取指定日期
|
|
|
+ info = account_avg_info_map.get(target_date.isoformat())
|
|
|
+ if info is not None:
|
|
|
+ return info
|
|
|
+ # 如果指定日期不存在,寻找最近日期
|
|
|
+ closest_date = None
|
|
|
+ for date in reversed(available_dates):
|
|
|
+ if (closest_date is None or abs((datetime.fromisoformat(date).date() - target_date).days) <
|
|
|
+ abs((datetime.fromisoformat(closest_date).date() - target_date).days)):
|
|
|
+ closest_date = date
|
|
|
+ return account_avg_info_map.get(closest_date) if closest_date else None
|
|
|
+
|
|
|
async def getSingleAccountArticles(self, account_name):
|
|
|
"""
|
|
|
获取单个账号的历史文章
|
|
@@ -103,6 +148,7 @@ class ArticleDBTools(object):
|
|
|
async def getArticleByFilter(
|
|
|
self,
|
|
|
account_name,
|
|
|
+ view_count_filter=None,
|
|
|
index_list=None,
|
|
|
min_time=None,
|
|
|
max_time=None,
|
|
@@ -132,11 +178,14 @@ class ArticleDBTools(object):
|
|
|
& (articleDataFrame["updateTime"] < max_time)
|
|
|
& (articleDataFrame["ItemIndex"].isin(index_list))
|
|
|
]
|
|
|
+ if view_count_filter:
|
|
|
+ filterDataFrame = filterDataFrame[(articleDataFrame["show_view_count"] > view_count_filter)]
|
|
|
return filterDataFrame
|
|
|
|
|
|
async def get_good_bad_articles(self,
|
|
|
account_name,
|
|
|
method,
|
|
|
+ view_count_filter,
|
|
|
rate=0.1,
|
|
|
index_list=None,
|
|
|
min_time=None,
|
|
@@ -149,6 +198,7 @@ class ArticleDBTools(object):
|
|
|
"""
|
|
|
article_data_frame = await self.getArticleByFilter(
|
|
|
account_name=account_name,
|
|
|
+ view_count_filter=view_count_filter,
|
|
|
index_list=index_list,
|
|
|
min_time=min_time,
|
|
|
max_time=max_time,
|
|
@@ -168,6 +218,22 @@ class ArticleDBTools(object):
|
|
|
good_df = article_data_frame[(article_data_frame['show_view_count']) > avg_view * (1.0 + rate)]
|
|
|
bad_df = article_data_frame[(article_data_frame['show_view_count']) > avg_view * (1.0 - rate)]
|
|
|
return good_df, bad_df
|
|
|
+ case "account_avg":
|
|
|
+ account_read_avg_list = await self.getAccountAvgInfo(
|
|
|
+ account_name=account_name
|
|
|
+ )
|
|
|
+ account_avg_info_map = {info[2]: info for info in account_read_avg_list}
|
|
|
+ view_count_avg_list = []
|
|
|
+ for index, row in article_data_frame.iterrows():
|
|
|
+ update_time = row['updateTime']
|
|
|
+ info = await self.get_account_avg_info(account_avg_info_map, update_time)
|
|
|
+ view_count_avg_list.append(info[5])
|
|
|
+ article_data_frame['view_count_avg'] = view_count_avg_list
|
|
|
+ good_df = article_data_frame[(article_data_frame['show_view_count']) >
|
|
|
+ (article_data_frame['view_count_avg']) * (1.0 + rate)]
|
|
|
+ bad_df = article_data_frame[(article_data_frame['show_view_count']) >
|
|
|
+ (article_data_frame['view_count_avg']) * (1.0 - rate)]
|
|
|
+ return good_df, bad_df
|
|
|
else:
|
|
|
return None, None
|
|
|
|