|
@@ -85,7 +85,7 @@ class CategoryColdStartTask(object):
|
|
|
FROM
|
|
|
crawler_meta_article
|
|
|
WHERE
|
|
|
- category = "{category}" and platform = "{article_source}" and status = {self.INIT_STATUS};
|
|
|
+ category = "{category}" and platform = "{article_source}";
|
|
|
"""
|
|
|
article_list = self.db_client.select(sql)
|
|
|
log(
|
|
@@ -97,7 +97,8 @@ class CategoryColdStartTask(object):
|
|
|
"category": category
|
|
|
}
|
|
|
)
|
|
|
- article_df = DataFrame(article_list, columns=['article_id', 'gh_id', 'position', 'title', 'link', 'read_cnt', 'status'])
|
|
|
+ article_df = DataFrame(article_list,
|
|
|
+ columns=['article_id', 'gh_id', 'position', 'title', 'link', 'read_cnt', 'status'])
|
|
|
return article_df
|
|
|
|
|
|
def change_article_status(self, category):
|
|
@@ -228,6 +229,26 @@ class CategoryColdStartTask(object):
|
|
|
)
|
|
|
return filter_df
|
|
|
|
|
|
+ def filter_toutiao_articles(self, articles_df, category):
|
|
|
+ """
|
|
|
+ 头条文章过滤漏斗
|
|
|
+ """
|
|
|
+ total_length = articles_df.shape[0]
|
|
|
+ # 第一层漏斗通过状态过滤
|
|
|
+ zero_level_funnel_df = articles_df[articles_df['status'] == self.INIT_STATUS]
|
|
|
+ zero_level_funnel_length = zero_level_funnel_df.shape[0]
|
|
|
+ bot(
|
|
|
+ title="账号冷启动---头条推荐流发布",
|
|
|
+ detail={
|
|
|
+ "category": category,
|
|
|
+ "总文章数量": total_length,
|
|
|
+ "通过已经发布状态过滤": "过滤数量: {} 剩余数量: {}".format(total_length - zero_level_funnel_length,
|
|
|
+ zero_level_funnel_length),
|
|
|
+ },
|
|
|
+ mention=False
|
|
|
+ )
|
|
|
+ return zero_level_funnel_df
|
|
|
+
|
|
|
def publish_filter_articles(self, category, articles_df, article_source):
|
|
|
"""
|
|
|
过滤文章
|
|
@@ -241,7 +262,7 @@ class CategoryColdStartTask(object):
|
|
|
filtered_articles_df = self.filter_weixin_articles(articles_df, category)
|
|
|
input_source_channel = 5
|
|
|
case "toutiao":
|
|
|
- filtered_articles_df = articles_df
|
|
|
+ filtered_articles_df = self.filter_toutiao_articles(articles_df, category)
|
|
|
input_source_channel = 6
|
|
|
case _:
|
|
|
return
|
|
@@ -293,7 +314,7 @@ class CategoryColdStartTask(object):
|
|
|
)
|
|
|
|
|
|
# change article status
|
|
|
- article_id_list = articles_df['article_id'].values.tolist()
|
|
|
+ article_id_list = filtered_articles_df['article_id'].values.tolist()
|
|
|
self.change_article_status_while_publishing(article_id_list=article_id_list)
|
|
|
|
|
|
def do_job(self, article_source, category_list=None):
|
|
@@ -329,4 +350,4 @@ class CategoryColdStartTask(object):
|
|
|
"function": "do_job",
|
|
|
"traceback": traceback.format_exc()
|
|
|
}
|
|
|
- )
|
|
|
+ )
|