|  | @@ -85,7 +85,7 @@ class CategoryColdStartTask(object):
 | 
	
		
			
				|  |  |          FROM
 | 
	
		
			
				|  |  |              crawler_meta_article
 | 
	
		
			
				|  |  |          WHERE 
 | 
	
		
			
				|  |  | -            category = "{category}" and platform = "{article_source}" and status = {self.INIT_STATUS};
 | 
	
		
			
				|  |  | +            category = "{category}" and platform = "{article_source}";
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  |          article_list = self.db_client.select(sql)
 | 
	
		
			
				|  |  |          log(
 | 
	
	
		
			
				|  | @@ -97,7 +97,8 @@ class CategoryColdStartTask(object):
 | 
	
		
			
				|  |  |                  "category": category
 | 
	
		
			
				|  |  |              }
 | 
	
		
			
				|  |  |          )
 | 
	
		
			
				|  |  | -        article_df = DataFrame(article_list, columns=['article_id', 'gh_id', 'position', 'title', 'link', 'read_cnt', 'status'])
 | 
	
		
			
				|  |  | +        article_df = DataFrame(article_list,
 | 
	
		
			
				|  |  | +                               columns=['article_id', 'gh_id', 'position', 'title', 'link', 'read_cnt', 'status'])
 | 
	
		
			
				|  |  |          return article_df
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      def change_article_status(self, category):
 | 
	
	
		
			
				|  | @@ -228,6 +229,26 @@ class CategoryColdStartTask(object):
 | 
	
		
			
				|  |  |          )
 | 
	
		
			
				|  |  |          return filter_df
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +    def filter_toutiao_articles(self, articles_df, category):
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        头条文章过滤漏斗
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        total_length = articles_df.shape[0]
 | 
	
		
			
				|  |  | +        # 第一层漏斗通过状态过滤
 | 
	
		
			
				|  |  | +        zero_level_funnel_df = articles_df[articles_df['status'] == self.INIT_STATUS]
 | 
	
		
			
				|  |  | +        zero_level_funnel_length = zero_level_funnel_df.shape[0]
 | 
	
		
			
				|  |  | +        bot(
 | 
	
		
			
				|  |  | +            title="账号冷启动---头条推荐流发布",
 | 
	
		
			
				|  |  | +            detail={
 | 
	
		
			
				|  |  | +                "category": category,
 | 
	
		
			
				|  |  | +                "总文章数量": total_length,
 | 
	
		
			
				|  |  | +                "通过已经发布状态过滤": "过滤数量: {}    剩余数量: {}".format(total_length - zero_level_funnel_length,
 | 
	
		
			
				|  |  | +                                                                              zero_level_funnel_length),
 | 
	
		
			
				|  |  | +            },
 | 
	
		
			
				|  |  | +            mention=False
 | 
	
		
			
				|  |  | +        )
 | 
	
		
			
				|  |  | +        return zero_level_funnel_df
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |      def publish_filter_articles(self, category, articles_df, article_source):
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  |          过滤文章
 | 
	
	
		
			
				|  | @@ -241,7 +262,7 @@ class CategoryColdStartTask(object):
 | 
	
		
			
				|  |  |                  filtered_articles_df = self.filter_weixin_articles(articles_df, category)
 | 
	
		
			
				|  |  |                  input_source_channel = 5
 | 
	
		
			
				|  |  |              case "toutiao":
 | 
	
		
			
				|  |  | -                filtered_articles_df = articles_df
 | 
	
		
			
				|  |  | +                filtered_articles_df = self.filter_toutiao_articles(articles_df, category)
 | 
	
		
			
				|  |  |                  input_source_channel = 6
 | 
	
		
			
				|  |  |              case _:
 | 
	
		
			
				|  |  |                  return
 | 
	
	
		
			
				|  | @@ -293,7 +314,7 @@ class CategoryColdStartTask(object):
 | 
	
		
			
				|  |  |              )
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |              # change article status
 | 
	
		
			
				|  |  | -            article_id_list = articles_df['article_id'].values.tolist()
 | 
	
		
			
				|  |  | +            article_id_list = filtered_articles_df['article_id'].values.tolist()
 | 
	
		
			
				|  |  |              self.change_article_status_while_publishing(article_id_list=article_id_list)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      def do_job(self, article_source, category_list=None):
 | 
	
	
		
			
				|  | @@ -329,4 +350,4 @@ class CategoryColdStartTask(object):
 | 
	
		
			
				|  |  |                          "function": "do_job",
 | 
	
		
			
				|  |  |                          "traceback": traceback.format_exc()
 | 
	
		
			
				|  |  |                      }
 | 
	
		
			
				|  |  | -                )
 | 
	
		
			
				|  |  | +                )
 |