|  | @@ -5,6 +5,7 @@
 | 
	
		
			
				|  |  |  import asyncio
 | 
	
		
			
				|  |  |  import aiomysql
 | 
	
		
			
				|  |  |  from pandas import DataFrame
 | 
	
		
			
				|  |  | +from datetime import datetime
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  class TaskMySQLClient(object):
 | 
	
	
		
			
				|  | @@ -76,6 +77,50 @@ class ArticleDBTools(object):
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  |          self.mysql_client = mysql_client
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +    async def getAccountAvgInfo(self, account_name):
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        获取单个账号历史均值
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        keys = [
 | 
	
		
			
				|  |  | +            "gh_id",
 | 
	
		
			
				|  |  | +            "position",
 | 
	
		
			
				|  |  | +            "update_time",
 | 
	
		
			
				|  |  | +            "account_name",
 | 
	
		
			
				|  |  | +            "fans",
 | 
	
		
			
				|  |  | +            "read_avg",
 | 
	
		
			
				|  |  | +            "like_avg",
 | 
	
		
			
				|  |  | +            "status",
 | 
	
		
			
				|  |  | +            "account_type",
 | 
	
		
			
				|  |  | +            "account_mode",
 | 
	
		
			
				|  |  | +            "account_source",
 | 
	
		
			
				|  |  | +            "account_status",
 | 
	
		
			
				|  |  | +            "business_type",
 | 
	
		
			
				|  |  | +            "read_rate_avg"
 | 
	
		
			
				|  |  | +        ]
 | 
	
		
			
				|  |  | +        sql = f"""
 | 
	
		
			
				|  |  | +            SELECT {", ".join(keys)}
 | 
	
		
			
				|  |  | +            FROM account_avg_info_v3
 | 
	
		
			
				|  |  | +            WHERE account_name = '{account_name}'
 | 
	
		
			
				|  |  | +            and position = 1;"""
 | 
	
		
			
				|  |  | +        result = await self.mysql_client.async_select(sql=sql)
 | 
	
		
			
				|  |  | +        return result
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    async def get_account_avg_info(self, account_avg_info_map, timestamp):
 | 
	
		
			
				|  |  | +        target_date = datetime.fromtimestamp(timestamp).date()
 | 
	
		
			
				|  |  | +        # 获取所有可用日期并排序
 | 
	
		
			
				|  |  | +        available_dates = sorted(account_avg_info_map.keys())
 | 
	
		
			
				|  |  | +        # 尝试获取指定日期
 | 
	
		
			
				|  |  | +        info = account_avg_info_map.get(target_date.isoformat())
 | 
	
		
			
				|  |  | +        if info is not None:
 | 
	
		
			
				|  |  | +            return info
 | 
	
		
			
				|  |  | +        # 如果指定日期不存在,寻找最近日期
 | 
	
		
			
				|  |  | +        closest_date = None
 | 
	
		
			
				|  |  | +        for date in reversed(available_dates):
 | 
	
		
			
				|  |  | +            if (closest_date is None or abs((datetime.fromisoformat(date).date() - target_date).days) <
 | 
	
		
			
				|  |  | +                    abs((datetime.fromisoformat(closest_date).date() - target_date).days)):
 | 
	
		
			
				|  |  | +                closest_date = date
 | 
	
		
			
				|  |  | +        return account_avg_info_map.get(closest_date) if closest_date else None
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |      async def getSingleAccountArticles(self, account_name):
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  |          获取单个账号的历史文章
 | 
	
	
		
			
				|  | @@ -103,6 +148,7 @@ class ArticleDBTools(object):
 | 
	
		
			
				|  |  |      async def getArticleByFilter(
 | 
	
		
			
				|  |  |              self,
 | 
	
		
			
				|  |  |              account_name,
 | 
	
		
			
				|  |  | +            view_count_filter=None,
 | 
	
		
			
				|  |  |              index_list=None,
 | 
	
		
			
				|  |  |              min_time=None,
 | 
	
		
			
				|  |  |              max_time=None,
 | 
	
	
		
			
				|  | @@ -132,11 +178,14 @@ class ArticleDBTools(object):
 | 
	
		
			
				|  |  |              & (articleDataFrame["updateTime"] < max_time)
 | 
	
		
			
				|  |  |              & (articleDataFrame["ItemIndex"].isin(index_list))
 | 
	
		
			
				|  |  |              ]
 | 
	
		
			
				|  |  | +        if view_count_filter:
 | 
	
		
			
				|  |  | +            filterDataFrame = filterDataFrame[(articleDataFrame["show_view_count"] > view_count_filter)]
 | 
	
		
			
				|  |  |          return filterDataFrame
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      async def get_good_bad_articles(self,
 | 
	
		
			
				|  |  |                                      account_name,
 | 
	
		
			
				|  |  |                                      method,
 | 
	
		
			
				|  |  | +                                    view_count_filter,
 | 
	
		
			
				|  |  |                                      rate=0.1,
 | 
	
		
			
				|  |  |                                      index_list=None,
 | 
	
		
			
				|  |  |                                      min_time=None,
 | 
	
	
		
			
				|  | @@ -149,6 +198,7 @@ class ArticleDBTools(object):
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  |          article_data_frame = await self.getArticleByFilter(
 | 
	
		
			
				|  |  |              account_name=account_name,
 | 
	
		
			
				|  |  | +            view_count_filter=view_count_filter,
 | 
	
		
			
				|  |  |              index_list=index_list,
 | 
	
		
			
				|  |  |              min_time=min_time,
 | 
	
		
			
				|  |  |              max_time=max_time,
 | 
	
	
		
			
				|  | @@ -168,6 +218,22 @@ class ArticleDBTools(object):
 | 
	
		
			
				|  |  |                      good_df = article_data_frame[(article_data_frame['show_view_count']) > avg_view * (1.0 + rate)]
 | 
	
		
			
				|  |  |                      bad_df = article_data_frame[(article_data_frame['show_view_count']) > avg_view * (1.0 - rate)]
 | 
	
		
			
				|  |  |                      return good_df, bad_df
 | 
	
		
			
				|  |  | +                case "account_avg":
 | 
	
		
			
				|  |  | +                    account_read_avg_list = await self.getAccountAvgInfo(
 | 
	
		
			
				|  |  | +                        account_name=account_name
 | 
	
		
			
				|  |  | +                    )
 | 
	
		
			
				|  |  | +                    account_avg_info_map = {info[2]: info for info in account_read_avg_list}
 | 
	
		
			
				|  |  | +                    view_count_avg_list = []
 | 
	
		
			
				|  |  | +                    for index, row in article_data_frame.iterrows():
 | 
	
		
			
				|  |  | +                        update_time = row['updateTime']
 | 
	
		
			
				|  |  | +                        info = await self.get_account_avg_info(account_avg_info_map, update_time)
 | 
	
		
			
				|  |  | +                        view_count_avg_list.append(info[5])
 | 
	
		
			
				|  |  | +                    article_data_frame['view_count_avg'] = view_count_avg_list
 | 
	
		
			
				|  |  | +                    good_df = article_data_frame[(article_data_frame['show_view_count']) >
 | 
	
		
			
				|  |  | +                                                 (article_data_frame['view_count_avg']) * (1.0 + rate)]
 | 
	
		
			
				|  |  | +                    bad_df = article_data_frame[(article_data_frame['show_view_count']) >
 | 
	
		
			
				|  |  | +                                                (article_data_frame['view_count_avg']) * (1.0 - rate)]
 | 
	
		
			
				|  |  | +                    return good_df, bad_df
 | 
	
		
			
				|  |  |          else:
 | 
	
		
			
				|  |  |              return None, None
 | 
	
		
			
				|  |  |  
 |