luojunhui 3 месяцев назад
Родитель
Сommit
8f45d6a207
1 измененных файлов с 26 добавлено и 5 удалено
  1. 26 5
      coldStartTasks/publish/publishCategoryArticles.py

+ 26 - 5
coldStartTasks/publish/publishCategoryArticles.py

@@ -85,7 +85,7 @@ class CategoryColdStartTask(object):
         FROM
             crawler_meta_article
         WHERE 
-            category = "{category}" and platform = "{article_source}" and status = {self.INIT_STATUS};
+            category = "{category}" and platform = "{article_source}";
         """
         article_list = self.db_client.select(sql)
         log(
@@ -97,7 +97,8 @@ class CategoryColdStartTask(object):
                 "category": category
             }
         )
-        article_df = DataFrame(article_list, columns=['article_id', 'gh_id', 'position', 'title', 'link', 'read_cnt', 'status'])
+        article_df = DataFrame(article_list,
+                               columns=['article_id', 'gh_id', 'position', 'title', 'link', 'read_cnt', 'status'])
         return article_df
 
     def change_article_status(self, category):
@@ -228,6 +229,26 @@ class CategoryColdStartTask(object):
         )
         return filter_df
 
+    def filter_toutiao_articles(self, articles_df, category):
+        """
+        头条文章过滤漏斗
+        """
+        total_length = articles_df.shape[0]
+        # 第一层漏斗通过状态过滤
+        zero_level_funnel_df = articles_df[articles_df['status'] == self.INIT_STATUS]
+        zero_level_funnel_length = zero_level_funnel_df.shape[0]
+        bot(
+            title="账号冷启动---头条推荐流发布",
+            detail={
+                "category": category,
+                "总文章数量": total_length,
+                "通过已经发布状态过滤": "过滤数量: {}    剩余数量: {}".format(total_length - zero_level_funnel_length,
+                                                                              zero_level_funnel_length),
+            },
+            mention=False
+        )
+        return zero_level_funnel_df
+
     def publish_filter_articles(self, category, articles_df, article_source):
         """
         过滤文章
@@ -241,7 +262,7 @@ class CategoryColdStartTask(object):
                 filtered_articles_df = self.filter_weixin_articles(articles_df, category)
                 input_source_channel = 5
             case "toutiao":
-                filtered_articles_df = articles_df
+                filtered_articles_df = self.filter_toutiao_articles(articles_df, category)
                 input_source_channel = 6
             case _:
                 return
@@ -293,7 +314,7 @@ class CategoryColdStartTask(object):
             )
 
             # change article status
-            article_id_list = articles_df['article_id'].values.tolist()
+            article_id_list = filtered_articles_df['article_id'].values.tolist()
             self.change_article_status_while_publishing(article_id_list=article_id_list)
 
     def do_job(self, article_source, category_list=None):
@@ -329,4 +350,4 @@ class CategoryColdStartTask(object):
                         "function": "do_job",
                         "traceback": traceback.format_exc()
                     }
-                )
+                )