|
@@ -83,21 +83,40 @@ class CategoryColdStartTask(object):
|
|
|
"""
|
|
|
从长文 meta 库中获取冷启文章
|
|
|
:return:
|
|
|
-
|
|
|
"""
|
|
|
- sql = f"""
|
|
|
- select
|
|
|
- article_id, title, link, llm_sensitivity, score, category_by_ai
|
|
|
- from crawler_meta_article t1
|
|
|
- join crawler_meta_article_accounts_read_avg t2 on t1.out_account_id = t2.gh_id and t1.article_index = t2.position
|
|
|
- where category = '{category}'
|
|
|
- and platform = '{article_source}'
|
|
|
- and title_sensitivity = {TITLE_NOT_SENSITIVE}
|
|
|
- and t1.status = {self.INIT_STATUS}
|
|
|
- and t1.read_cnt / t2.read_avg >= {self.READ_TIMES_THRESHOLD}
|
|
|
- and t1.read_cnt >= {self.READ_THRESHOLD}
|
|
|
- ORDER BY score DESC;
|
|
|
- """
|
|
|
+ match article_source:
|
|
|
+ case 'weixin':
|
|
|
+ sql = f"""
|
|
|
+ select
|
|
|
+ article_id, title, link, llm_sensitivity, score, category_by_ai
|
|
|
+ from crawler_meta_article t1
|
|
|
+ join crawler_meta_article_accounts_read_avg t2 on t1.out_account_id = t2.gh_id and t1.article_index = t2.position
|
|
|
+ where category = '{category}'
|
|
|
+ and platform = '{article_source}'
|
|
|
+ and title_sensitivity = {TITLE_NOT_SENSITIVE}
|
|
|
+ and t1.status = {self.INIT_STATUS}
|
|
|
+ and t1.read_cnt / t2.read_avg >= {self.READ_TIMES_THRESHOLD}
|
|
|
+ and t1.read_cnt >= {self.READ_THRESHOLD}
|
|
|
+ ORDER BY score DESC;
|
|
|
+ """
|
|
|
+ case 'toutiao':
|
|
|
+ sql = f"""
|
|
|
+ select article_id, title, link, llm_sensitivity, score, category_by_ai
|
|
|
+ from crawler_meta_article
|
|
|
+ where category = '{category}'
|
|
|
+ and platform = '{article_source}'
|
|
|
+ and status = {self.INIT_STATUS}
|
|
|
+ """
|
|
|
+ case _:
|
|
|
+ log(
|
|
|
+ task="category_publish_task",
|
|
|
+ function="get_articles_from_meta_table",
|
|
|
+ message="不支持的文章来源",
|
|
|
+ data={
|
|
|
+ "article_source": article_source
|
|
|
+ }
|
|
|
+ )
|
|
|
+ return None
|
|
|
article_list = self.db_client.select(sql)
|
|
|
log(
|
|
|
task="category_publish_task",
|
|
@@ -256,20 +275,18 @@ class CategoryColdStartTask(object):
|
|
|
头条文章过滤漏斗
|
|
|
"""
|
|
|
total_length = articles_df.shape[0]
|
|
|
- # 第一层漏斗通过状态过滤
|
|
|
- zero_level_funnel_df = articles_df[articles_df['status'] == self.INIT_STATUS]
|
|
|
- zero_level_funnel_length = zero_level_funnel_df.shape[0]
|
|
|
+ # # 第一层漏斗通过状态过滤
|
|
|
+ # zero_level_funnel_df = articles_df[articles_df['status'] == self.INIT_STATUS]
|
|
|
+ # zero_level_funnel_length = zero_level_funnel_df.shape[0]
|
|
|
bot(
|
|
|
title="账号冷启动---头条推荐流发布",
|
|
|
detail={
|
|
|
"category": category,
|
|
|
"总文章数量": total_length,
|
|
|
- "通过已经发布状态过滤": "过滤数量: {} 剩余数量: {}".format(total_length - zero_level_funnel_length,
|
|
|
- zero_level_funnel_length),
|
|
|
},
|
|
|
mention=False
|
|
|
)
|
|
|
- return zero_level_funnel_df
|
|
|
+ return articles_df
|
|
|
|
|
|
def update_article_sensitive_status(self, article_id, status):
|
|
|
"""
|