|
@@ -68,34 +68,22 @@ class ArticleExitWithTitle(object):
|
|
|
)
|
|
|
return True
|
|
|
|
|
|
- def get_discovery_published_articles(self) -> pd.DataFrame:
|
|
|
+ def bad_article_manager(self, read_times_on_avg_threshold, discovery_times_threshold) -> list[str]:
|
|
|
"""
|
|
|
+ 找出质量很差的文章标题,将该标题设置为退场状态
|
|
|
:return:
|
|
|
"""
|
|
|
sql = f"""
|
|
|
SELECT
|
|
|
- title, max(read_rate), count(1) as title_count
|
|
|
+ title, max(read_rate) as max_rate, count(1) as title_count
|
|
|
FROM
|
|
|
datastat_sort_strategy
|
|
|
WHERE position > 2 and fans > 10000
|
|
|
- GROUP BY title;
|
|
|
+ GROUP BY title
|
|
|
+ HAVING title_count >= {discovery_times_threshold} and max_rate < {read_times_on_avg_threshold};
|
|
|
"""
|
|
|
- articles = self.pq_client.select(sql)
|
|
|
- article_df = pd.DataFrame(articles, columns=['title', 'max_read_times_on_avg', 'articles_count'])
|
|
|
- return article_df
|
|
|
-
|
|
|
- def bad_article_manager(self, read_times_on_avg_threshold, discovery_times_threshold) -> list[str]:
|
|
|
- """
|
|
|
- 找出质量很差的文章标题,将该标题设置为退场状态
|
|
|
- :return:
|
|
|
- """
|
|
|
- discovery_published_articles_df = self.get_discovery_published_articles()
|
|
|
- target_bad_dataframe = discovery_published_articles_df[
|
|
|
- (discovery_published_articles_df['max_read_times_on_avg'] < read_times_on_avg_threshold)
|
|
|
- & (discovery_published_articles_df['articles_count'] < discovery_times_threshold)
|
|
|
- ]
|
|
|
- target_bad_title_list = target_bad_dataframe['title'].tolist()
|
|
|
- return target_bad_title_list
|
|
|
+ articles = self.lam_client.select(sql)
|
|
|
+ return [i[0] for i in articles]
|
|
|
|
|
|
def record_title_list(self, title_list, status) -> int:
|
|
|
"""
|
|
@@ -161,7 +149,7 @@ def main():
|
|
|
UP_LEVEL_STATUS = 1
|
|
|
ARTICLE_EXIT_STATUS = -1
|
|
|
READ_TIMES_ON_AVG_THRESHOLD = 0.5
|
|
|
- DISCOVERY_TIMES_THRESHOLD = 3
|
|
|
+ DISCOVERY_TIMES_THRESHOLD = 10
|
|
|
|
|
|
article_title_manager = ArticleExitWithTitle()
|
|
|
article_title_manager.init_database()
|
|
@@ -169,6 +157,7 @@ def main():
|
|
|
# 处理晋级标题
|
|
|
up_level_title = get_level_up_articles()
|
|
|
up_level_success_count = article_title_manager.record_title_list(title_list=up_level_title, status=UP_LEVEL_STATUS)
|
|
|
+ # up_level_success_count = 0
|
|
|
|
|
|
# 处理退场标题
|
|
|
exit_article_list = article_title_manager.bad_article_manager(
|
|
@@ -191,4 +180,5 @@ def main():
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
- main()
|
|
|
+ main()
|
|
|
+
|