|
@@ -0,0 +1,186 @@
|
|
|
+"""
|
|
|
+@author: luojunhui
|
|
|
+"""
|
|
|
+import traceback
|
|
|
+
|
|
|
+import pandas as pd
|
|
|
+
|
|
|
+from applications import PQMySQL, longArticlesMySQL, bot, log
|
|
|
+from applications.aiditApi import get_generated_article_list
|
|
|
+
|
|
|
+
|
|
|
+def get_level_up_articles() -> set:
|
|
|
+ """
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ generate_pool_ids = [
|
|
|
+ "20240804003153130851174",
|
|
|
+ "20240802171417146947657",
|
|
|
+ "20240802143345289374071",
|
|
|
+ ]
|
|
|
+ good_title_set = set()
|
|
|
+ for pool_id in generate_pool_ids:
|
|
|
+ articles = get_generated_article_list(pool_id)
|
|
|
+ titles = [article[1] for article in articles]
|
|
|
+ good_title_set.update(titles)
|
|
|
+ return good_title_set
|
|
|
+
|
|
|
+
|
|
|
+class ArticleTitleStatusManager(object):
|
|
|
+ """
|
|
|
+ 文章退场表格维护
|
|
|
+ """
|
|
|
+
|
|
|
+ def __init__(self):
|
|
|
+ self.INIT_STATUS = 0
|
|
|
+ self.pq_client = None
|
|
|
+ self.lam_client = None
|
|
|
+
|
|
|
+ def init_database(self) -> bool:
|
|
|
+ """
|
|
|
+ 初始化数据库
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ self.pq_client = PQMySQL()
|
|
|
+ except Exception as e:
|
|
|
+ bot(
|
|
|
+ title="文章退场管理任务,数据库连接失败",
|
|
|
+ detail={
|
|
|
+ "e": str(e),
|
|
|
+ "error_msg": traceback.format_exc(),
|
|
|
+ "server": "old server"
|
|
|
+ }
|
|
|
+ )
|
|
|
+ return False
|
|
|
+
|
|
|
+ try:
|
|
|
+ self.lam_client = longArticlesMySQL()
|
|
|
+ except Exception as e:
|
|
|
+ bot(
|
|
|
+ title="文章退场管理任务,数据库连接失败",
|
|
|
+ detail={
|
|
|
+ "e": str(e),
|
|
|
+ "error_msg": traceback.format_exc(),
|
|
|
+ "server": "new server"
|
|
|
+ }
|
|
|
+ )
|
|
|
+ return True
|
|
|
+
|
|
|
+ def get_bad_articles(self, read_times_on_avg_threshold, discovery_times_threshold) -> list[str]:
|
|
|
+ """
|
|
|
+ 找出质量很差的文章标题,将该标题设置为退场状态
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ sql = f"""
|
|
|
+ SELECT
|
|
|
+ title, max(read_rate) as max_rate, count(1) as title_count
|
|
|
+ FROM
|
|
|
+ datastat_sort_strategy
|
|
|
+ WHERE position > 2 and fans > 10000
|
|
|
+ GROUP BY title
|
|
|
+ HAVING title_count >= {discovery_times_threshold} and max_rate < {read_times_on_avg_threshold};
|
|
|
+ """
|
|
|
+ articles = self.lam_client.select(sql)
|
|
|
+ return [i[0] for i in articles]
|
|
|
+
|
|
|
+ def save_titles(self, title_list, status) -> int:
|
|
|
+ """
|
|
|
+ 修改标题状态
|
|
|
+ :param status:
|
|
|
+ :param title_list:
|
|
|
+ :return: None
|
|
|
+ """
|
|
|
+ fail_list = []
|
|
|
+ insert_count = 0
|
|
|
+ for title in title_list:
|
|
|
+ insert_sql = f"""
|
|
|
+ INSERT INTO cold_start_title_pool
|
|
|
+ (title, status)
|
|
|
+ values
|
|
|
+ (%s, %s)
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ self.lam_client.update(
|
|
|
+ sql=insert_sql,
|
|
|
+ params=(title, status)
|
|
|
+ )
|
|
|
+ insert_count += 1
|
|
|
+ except Exception as e:
|
|
|
+ update_sql = f"""
|
|
|
+ UPDATE cold_start_title_pool
|
|
|
+ SET status = %s
|
|
|
+ where title = %s and status = %s;
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ self.lam_client.update(
|
|
|
+ sql=update_sql,
|
|
|
+ params=(status, title, self.INIT_STATUS)
|
|
|
+ )
|
|
|
+ except Exception as e:
|
|
|
+ error_msg = traceback.format_exc()
|
|
|
+ log(
|
|
|
+ task="article_exit_with_title",
|
|
|
+ function="save_titles",
|
|
|
+ status="fail",
|
|
|
+ data={
|
|
|
+ "e": str(e),
|
|
|
+ "error_msg": error_msg,
|
|
|
+ }
|
|
|
+ )
|
|
|
+ fail_list.append(title)
|
|
|
+
|
|
|
+ if fail_list:
|
|
|
+ bot(
|
|
|
+ title="冷启动文章标题退场,sql操作失败",
|
|
|
+ detail=fail_list
|
|
|
+ )
|
|
|
+ return -1
|
|
|
+ else:
|
|
|
+ return insert_count
|
|
|
+
|
|
|
+
|
|
|
+def main():
|
|
|
+ """
|
|
|
+ main function
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ UP_LEVEL_STATUS = 1
|
|
|
+ ARTICLE_EXIT_STATUS = -1
|
|
|
+ READ_TIMES_ON_AVG_THRESHOLD = 0.5
|
|
|
+ DISCOVERY_TIMES_THRESHOLD = 10
|
|
|
+
|
|
|
+ article_title_manager = ArticleTitleStatusManager()
|
|
|
+ article_title_manager.init_database()
|
|
|
+
|
|
|
+ # 处理晋级标题
|
|
|
+ up_level_title = get_level_up_articles()
|
|
|
+ up_level_success_count = article_title_manager.save_titles(
|
|
|
+ title_list=up_level_title,
|
|
|
+ status=UP_LEVEL_STATUS
|
|
|
+ )
|
|
|
+
|
|
|
+ # 处理退场标题
|
|
|
+ exit_article_list = article_title_manager.get_bad_articles(
|
|
|
+ read_times_on_avg_threshold=READ_TIMES_ON_AVG_THRESHOLD,
|
|
|
+ discovery_times_threshold=DISCOVERY_TIMES_THRESHOLD
|
|
|
+ )
|
|
|
+ exit_success_count = article_title_manager.save_titles(
|
|
|
+ title_list=exit_article_list,
|
|
|
+ status=ARTICLE_EXIT_STATUS)
|
|
|
+
|
|
|
+ bot(
|
|
|
+ title="冷启动文章晋级/退场完成",
|
|
|
+ detail={
|
|
|
+ "晋级文章数量": up_level_success_count,
|
|
|
+ "退场文章数量": exit_success_count,
|
|
|
+ "阅读均值倍数阈值": READ_TIMES_ON_AVG_THRESHOLD,
|
|
|
+ "探索次数阈值": DISCOVERY_TIMES_THRESHOLD
|
|
|
+ },
|
|
|
+ mention=False
|
|
|
+ )
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ main()
|
|
|
+
|