|  | @@ -3,7 +3,7 @@
 | 
	
		
			
				|  |  |  """
 | 
	
		
			
				|  |  |  import traceback
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -import pandas as pd
 | 
	
		
			
				|  |  | +from datetime import datetime, timedelta
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  from applications import PQMySQL, longArticlesMySQL, bot, log
 | 
	
		
			
				|  |  |  from applications.aiditApi import get_generated_article_list
 | 
	
	
		
			
				|  | @@ -84,6 +84,25 @@ class ArticleTitleStatusManager(object):
 | 
	
		
			
				|  |  |          articles = self.lam_client.select(sql)
 | 
	
		
			
				|  |  |          return [i[0] for i in articles]
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +    def get_bad_articles_v2(self, publish_date_threshold, discovery_times_threshold) -> list[str]:
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        找出第一次发布在一个月之前,且发布次数大于5次的文章
 | 
	
		
			
				|  |  | +        :param publish_date_threshold: 发布时间戳阈值
 | 
	
		
			
				|  |  | +        :param discovery_times_threshold: 发布次数阈值
 | 
	
		
			
				|  |  | +        :return:
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        sql = f"""
 | 
	
		
			
				|  |  | +            SELECT
 | 
	
		
			
				|  |  | +                title, count(1) as title_count, min(date_str) as min_date
 | 
	
		
			
				|  |  | +            FROM
 | 
	
		
			
				|  |  | +                datastat_sort_strategy
 | 
	
		
			
				|  |  | +            WHERE position > 2 and fans > 10000
 | 
	
		
			
				|  |  | +            GROUP BY title
 | 
	
		
			
				|  |  | +            HAVING title_count >= {discovery_times_threshold} and min_date < {publish_date_threshold};
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        articles = self.lam_client.select(sql)
 | 
	
		
			
				|  |  | +        return [i[0] for i in articles]
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |      def save_titles(self, title_list, status) -> int:
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  |          修改标题状态
 | 
	
	
		
			
				|  | @@ -149,6 +168,9 @@ def main():
 | 
	
		
			
				|  |  |      ARTICLE_EXIT_STATUS = -1
 | 
	
		
			
				|  |  |      READ_TIMES_ON_AVG_THRESHOLD = 0.5
 | 
	
		
			
				|  |  |      DISCOVERY_TIMES_THRESHOLD = 10
 | 
	
		
			
				|  |  | +    PUBLISH_TIMES_THRESHOLD = 5
 | 
	
		
			
				|  |  | +    DAYS_THRESHOLD = 30
 | 
	
		
			
				|  |  | +    FIRST_PUBLISH_DATE_THRESHOLD = (datetime.now() - timedelta(days=DAYS_THRESHOLD)).strftime('%Y%m%d')
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      article_title_manager = ArticleTitleStatusManager()
 | 
	
		
			
				|  |  |      article_title_manager.init_database()
 | 
	
	
		
			
				|  | @@ -160,7 +182,7 @@ def main():
 | 
	
		
			
				|  |  |          status=UP_LEVEL_STATUS
 | 
	
		
			
				|  |  |      )
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    # 处理退场标题
 | 
	
		
			
				|  |  | +    # 处理退场标题V1
 | 
	
		
			
				|  |  |      exit_article_list = article_title_manager.get_bad_articles(
 | 
	
		
			
				|  |  |          read_times_on_avg_threshold=READ_TIMES_ON_AVG_THRESHOLD,
 | 
	
		
			
				|  |  |          discovery_times_threshold=DISCOVERY_TIMES_THRESHOLD
 | 
	
	
		
			
				|  | @@ -169,13 +191,25 @@ def main():
 | 
	
		
			
				|  |  |          title_list=exit_article_list,
 | 
	
		
			
				|  |  |          status=ARTICLE_EXIT_STATUS)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +    # 处理退场标题v2
 | 
	
		
			
				|  |  | +    exit_article_list_v2 = article_title_manager.get_bad_articles_v2(
 | 
	
		
			
				|  |  | +        publish_date_threshold=FIRST_PUBLISH_DATE_THRESHOLD,
 | 
	
		
			
				|  |  | +        discovery_times_threshold=PUBLISH_TIMES_THRESHOLD
 | 
	
		
			
				|  |  | +    )
 | 
	
		
			
				|  |  | +    exit_success_count_v2 = article_title_manager.save_titles(
 | 
	
		
			
				|  |  | +        title_list=exit_article_list_v2,
 | 
	
		
			
				|  |  | +        status=ARTICLE_EXIT_STATUS)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |      bot(
 | 
	
		
			
				|  |  |          title="冷启动文章晋级/退场完成",
 | 
	
		
			
				|  |  |          detail={
 | 
	
		
			
				|  |  |              "晋级文章数量": up_level_success_count,
 | 
	
		
			
				|  |  | -            "退场文章数量": exit_success_count,
 | 
	
		
			
				|  |  | -            "阅读均值倍数阈值": READ_TIMES_ON_AVG_THRESHOLD,
 | 
	
		
			
				|  |  | -            "探索次数阈值": DISCOVERY_TIMES_THRESHOLD
 | 
	
		
			
				|  |  | +            "策略1:退场文章数量": exit_success_count,
 | 
	
		
			
				|  |  | +            "策略2:退场文章数量": exit_success_count_v2,
 | 
	
		
			
				|  |  | +            "策略1:阅读均值倍数阈值": READ_TIMES_ON_AVG_THRESHOLD,
 | 
	
		
			
				|  |  | +            "策略1:探索次数阈值": DISCOVERY_TIMES_THRESHOLD,
 | 
	
		
			
				|  |  | +            "策略2:发布次数阈值": PUBLISH_TIMES_THRESHOLD,
 | 
	
		
			
				|  |  | +            "策略2:发布天数阈值": DAYS_THRESHOLD
 | 
	
		
			
				|  |  |          },
 | 
	
		
			
				|  |  |          mention=False
 | 
	
		
			
				|  |  |      )
 |