|
@@ -0,0 +1,194 @@
|
|
|
+"""
|
|
|
+@author: luojunhui
|
|
|
+"""
|
|
|
+import traceback
|
|
|
+
|
|
|
+import pandas as pd
|
|
|
+
|
|
|
+from applications import PQMySQL, longArticlesMySQL, bot, log
|
|
|
+from applications.aiditApi import get_generated_article_list
|
|
|
+
|
|
|
+
|
|
|
+def get_level_up_articles() -> set:
|
|
|
+ """
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ pool_level2 = "20240804003153130851174"
|
|
|
+ pool_level1 = "20240802171417146947657"
|
|
|
+ pool_level0 = "20240802143345289374071"
|
|
|
+ pool_level2_result = get_generated_article_list(pool_level2)
|
|
|
+ title_list_2 = [i[1] for i in pool_level2_result]
|
|
|
+ pool_level1_result = get_generated_article_list(pool_level1)
|
|
|
+ title_list_1 = [i[1] for i in pool_level1_result]
|
|
|
+ pool_level0_result = get_generated_article_list(pool_level0)
|
|
|
+ title_list_0 = [i[1] for i in pool_level0_result]
|
|
|
+ title_list = title_list_1 + title_list_0 + title_list_2
|
|
|
+ good_title_set = set(title_list)
|
|
|
+ return good_title_set
|
|
|
+
|
|
|
+
|
|
|
+class ArticleExitWithTitle(object):
|
|
|
+ """
|
|
|
+ 文章退场表格维护
|
|
|
+ """
|
|
|
+
|
|
|
+ def __init__(self):
|
|
|
+ self.INIT_STATUS = 0
|
|
|
+ self.pq_client = None
|
|
|
+ self.lam_client = None
|
|
|
+
|
|
|
+ def init_database(self) -> bool:
|
|
|
+ """
|
|
|
+ 初始化数据库
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ self.pq_client = PQMySQL()
|
|
|
+ except Exception as e:
|
|
|
+ bot(
|
|
|
+ title="文章退场管理任务,数据库连接失败",
|
|
|
+ detail={
|
|
|
+ "e": str(e),
|
|
|
+ "error_msg": traceback.format_exc(),
|
|
|
+ "server": "old server"
|
|
|
+ }
|
|
|
+ )
|
|
|
+ return False
|
|
|
+
|
|
|
+ try:
|
|
|
+ self.lam_client = longArticlesMySQL()
|
|
|
+ except Exception as e:
|
|
|
+ bot(
|
|
|
+ title="文章退场管理任务,数据库连接失败",
|
|
|
+ detail={
|
|
|
+ "e": str(e),
|
|
|
+ "error_msg": traceback.format_exc(),
|
|
|
+ "server": "new server"
|
|
|
+ }
|
|
|
+ )
|
|
|
+ return True
|
|
|
+
|
|
|
+ def get_discovery_published_articles(self) -> pd.DataFrame:
|
|
|
+ """
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ sql = f"""
|
|
|
+ SELECT
|
|
|
+ title, max(read_rate), count(1) as title_count
|
|
|
+ FROM
|
|
|
+ datastat_sort_strategy
|
|
|
+ WHERE position > 2 and fans > 10000
|
|
|
+ GROUP BY title;
|
|
|
+ """
|
|
|
+ articles = self.pq_client.select(sql)
|
|
|
+ article_df = pd.DataFrame(articles, columns=['title', 'max_read_times_on_avg', 'articles_count'])
|
|
|
+ return article_df
|
|
|
+
|
|
|
+ def bad_article_manager(self, read_times_on_avg_threshold, discovery_times_threshold) -> list[str]:
|
|
|
+ """
|
|
|
+ 找出质量很差的文章标题,将该标题设置为退场状态
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ discovery_published_articles_df = self.get_discovery_published_articles()
|
|
|
+ target_bad_dataframe = discovery_published_articles_df[
|
|
|
+ (discovery_published_articles_df['max_read_times_on_avg'] < read_times_on_avg_threshold)
|
|
|
+ & (discovery_published_articles_df['articles_count'] < discovery_times_threshold)
|
|
|
+ ]
|
|
|
+ target_bad_title_list = target_bad_dataframe['title'].tolist()
|
|
|
+ return target_bad_title_list
|
|
|
+
|
|
|
+ def record_title_list(self, title_list, status) -> int:
|
|
|
+ """
|
|
|
+ 修改标题状态
|
|
|
+ :param status:
|
|
|
+ :param title_list:
|
|
|
+ :return: None
|
|
|
+ """
|
|
|
+ fail_list = []
|
|
|
+ insert_count = 0
|
|
|
+ for title in title_list:
|
|
|
+ insert_sql = f"""
|
|
|
+ INSERT INTO cold_start_title_pool
|
|
|
+ (title, status)
|
|
|
+ values
|
|
|
+ (%s, %s)
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ self.lam_client.update(
|
|
|
+ sql=insert_sql,
|
|
|
+ params=(title, status)
|
|
|
+ )
|
|
|
+ insert_count += 1
|
|
|
+ except Exception as e:
|
|
|
+ update_sql = f"""
|
|
|
+ UPDATE cold_start_title_pool
|
|
|
+ SET status = %s
|
|
|
+ where title = %s and status = %s;
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ self.lam_client.update(
|
|
|
+ sql=update_sql,
|
|
|
+ params=(status, title, self.INIT_STATUS)
|
|
|
+ )
|
|
|
+ except Exception as e:
|
|
|
+ error_msg = traceback.format_exc()
|
|
|
+ log(
|
|
|
+ task="article_exit_with_title",
|
|
|
+ function="record_title_list",
|
|
|
+ status="fail",
|
|
|
+ data={
|
|
|
+ "e": str(e),
|
|
|
+ "error_msg": error_msg,
|
|
|
+ }
|
|
|
+ )
|
|
|
+ fail_list.append(title)
|
|
|
+
|
|
|
+ if fail_list:
|
|
|
+ bot(
|
|
|
+ title="冷启动文章标题退场,sql操作失败",
|
|
|
+ detail=fail_list
|
|
|
+ )
|
|
|
+ return -1
|
|
|
+ else:
|
|
|
+ return insert_count
|
|
|
+
|
|
|
+
|
|
|
+def main():
|
|
|
+ """
|
|
|
+ main function
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ UP_LEVEL_STATUS = 1
|
|
|
+ ARTICLE_EXIT_STATUS = -1
|
|
|
+ READ_TIMES_ON_AVG_THRESHOLD = 0.5
|
|
|
+ DISCOVERY_TIMES_THRESHOLD = 3
|
|
|
+
|
|
|
+ article_title_manager = ArticleExitWithTitle()
|
|
|
+ article_title_manager.init_database()
|
|
|
+
|
|
|
+ # 处理晋级标题
|
|
|
+ up_level_title = get_level_up_articles()
|
|
|
+ up_level_success_count = article_title_manager.record_title_list(title_list=up_level_title, status=UP_LEVEL_STATUS)
|
|
|
+
|
|
|
+ # 处理退场标题
|
|
|
+ exit_article_list = article_title_manager.bad_article_manager(
|
|
|
+ read_times_on_avg_threshold=READ_TIMES_ON_AVG_THRESHOLD,
|
|
|
+ discovery_times_threshold=DISCOVERY_TIMES_THRESHOLD
|
|
|
+ )
|
|
|
+ exit_success_count = article_title_manager.record_title_list(title_list=exit_article_list, status=ARTICLE_EXIT_STATUS)
|
|
|
+
|
|
|
+ if exit_success_count >= 0 and up_level_success_count >= 0:
|
|
|
+ bot(
|
|
|
+ title="冷启动文章晋级, 退场完成",
|
|
|
+ detail={
|
|
|
+ "已经晋级文章数量": up_level_success_count,
|
|
|
+ "已经退场文章数控": exit_success_count,
|
|
|
+ "阅读均值倍数阈值": READ_TIMES_ON_AVG_THRESHOLD,
|
|
|
+ "探索次数阈值": DISCOVERY_TIMES_THRESHOLD
|
|
|
+ },
|
|
|
+ mention=False
|
|
|
+ )
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ main()
|