|
@@ -3,7 +3,7 @@
|
|
"""
|
|
"""
|
|
import traceback
|
|
import traceback
|
|
|
|
|
|
-import pandas as pd
|
|
|
|
|
|
+from datetime import datetime, timedelta
|
|
|
|
|
|
from applications import PQMySQL, longArticlesMySQL, bot, log
|
|
from applications import PQMySQL, longArticlesMySQL, bot, log
|
|
from applications.aiditApi import get_generated_article_list
|
|
from applications.aiditApi import get_generated_article_list
|
|
@@ -84,6 +84,25 @@ class ArticleTitleStatusManager(object):
|
|
articles = self.lam_client.select(sql)
|
|
articles = self.lam_client.select(sql)
|
|
return [i[0] for i in articles]
|
|
return [i[0] for i in articles]
|
|
|
|
|
|
|
|
+ def get_bad_articles_v2(self, publish_date_threshold, discovery_times_threshold) -> list[str]:
|
|
|
|
+ """
|
|
|
|
+ 找出第一次发布在一个月之前,且发布次数大于5次的文章
|
|
|
|
+ :param publish_date_threshold: 发布时间戳阈值
|
|
|
|
+ :param discovery_times_threshold: 发布次数阈值
|
|
|
|
+ :return:
|
|
|
|
+ """
|
|
|
|
+ sql = f"""
|
|
|
|
+ SELECT
|
|
|
|
+ title, count(1) as title_count, min(date_str) as min_date
|
|
|
|
+ FROM
|
|
|
|
+ datastat_sort_strategy
|
|
|
|
+ WHERE position > 2 and fans > 10000
|
|
|
|
+ GROUP BY title
|
|
|
|
+ HAVING title_count >= {discovery_times_threshold} and min_date < {publish_date_threshold};
|
|
|
|
+ """
|
|
|
|
+ articles = self.lam_client.select(sql)
|
|
|
|
+ return [i[0] for i in articles]
|
|
|
|
+
|
|
def save_titles(self, title_list, status) -> int:
|
|
def save_titles(self, title_list, status) -> int:
|
|
"""
|
|
"""
|
|
修改标题状态
|
|
修改标题状态
|
|
@@ -149,6 +168,9 @@ def main():
|
|
ARTICLE_EXIT_STATUS = -1
|
|
ARTICLE_EXIT_STATUS = -1
|
|
READ_TIMES_ON_AVG_THRESHOLD = 0.5
|
|
READ_TIMES_ON_AVG_THRESHOLD = 0.5
|
|
DISCOVERY_TIMES_THRESHOLD = 10
|
|
DISCOVERY_TIMES_THRESHOLD = 10
|
|
|
|
+ PUBLISH_TIMES_THRESHOLD = 5
|
|
|
|
+ DAYS_THRESHOLD = 30
|
|
|
|
+ FIRST_PUBLISH_DATE_THRESHOLD = (datetime.now() - timedelta(days=DAYS_THRESHOLD)).strftime('%Y%m%d')
|
|
|
|
|
|
article_title_manager = ArticleTitleStatusManager()
|
|
article_title_manager = ArticleTitleStatusManager()
|
|
article_title_manager.init_database()
|
|
article_title_manager.init_database()
|
|
@@ -160,7 +182,7 @@ def main():
|
|
status=UP_LEVEL_STATUS
|
|
status=UP_LEVEL_STATUS
|
|
)
|
|
)
|
|
|
|
|
|
- # 处理退场标题
|
|
|
|
|
|
+ # 处理退场标题V1
|
|
exit_article_list = article_title_manager.get_bad_articles(
|
|
exit_article_list = article_title_manager.get_bad_articles(
|
|
read_times_on_avg_threshold=READ_TIMES_ON_AVG_THRESHOLD,
|
|
read_times_on_avg_threshold=READ_TIMES_ON_AVG_THRESHOLD,
|
|
discovery_times_threshold=DISCOVERY_TIMES_THRESHOLD
|
|
discovery_times_threshold=DISCOVERY_TIMES_THRESHOLD
|
|
@@ -169,13 +191,25 @@ def main():
|
|
title_list=exit_article_list,
|
|
title_list=exit_article_list,
|
|
status=ARTICLE_EXIT_STATUS)
|
|
status=ARTICLE_EXIT_STATUS)
|
|
|
|
|
|
|
|
+ # 处理退场标题v2
|
|
|
|
+ exit_article_list_v2 = article_title_manager.get_bad_articles_v2(
|
|
|
|
+ publish_date_threshold=FIRST_PUBLISH_DATE_THRESHOLD,
|
|
|
|
+ discovery_times_threshold=PUBLISH_TIMES_THRESHOLD
|
|
|
|
+ )
|
|
|
|
+ exit_success_count_v2 = article_title_manager.save_titles(
|
|
|
|
+ title_list=exit_article_list_v2,
|
|
|
|
+ status=ARTICLE_EXIT_STATUS)
|
|
|
|
+
|
|
bot(
|
|
bot(
|
|
title="冷启动文章晋级/退场完成",
|
|
title="冷启动文章晋级/退场完成",
|
|
detail={
|
|
detail={
|
|
"晋级文章数量": up_level_success_count,
|
|
"晋级文章数量": up_level_success_count,
|
|
- "退场文章数量": exit_success_count,
|
|
|
|
- "阅读均值倍数阈值": READ_TIMES_ON_AVG_THRESHOLD,
|
|
|
|
- "探索次数阈值": DISCOVERY_TIMES_THRESHOLD
|
|
|
|
|
|
+ "策略1:退场文章数量": exit_success_count,
|
|
|
|
+ "策略2:退场文章数量": exit_success_count_v2,
|
|
|
|
+ "策略1:阅读均值倍数阈值": READ_TIMES_ON_AVG_THRESHOLD,
|
|
|
|
+ "策略1:探索次数阈值": DISCOVERY_TIMES_THRESHOLD,
|
|
|
|
+ "策略2:发布次数阈值": PUBLISH_TIMES_THRESHOLD,
|
|
|
|
+ "策略2:发布天数阈值": DAYS_THRESHOLD
|
|
},
|
|
},
|
|
mention=False
|
|
mention=False
|
|
)
|
|
)
|