Pārlūkot izejas kodu

article_exit_with_title.py

冷启动文章晋级&&退场管理
luojunhui 11 mēneši atpakaļ
vecāks
revīzija
6c08b71fd4
1 mainītis faili ar 194 papildinājumiem un 0 dzēšanām
  1. 194 0
      flow_pool/article_exit_with_title.py

+ 194 - 0
flow_pool/article_exit_with_title.py

@@ -0,0 +1,194 @@
+"""
+@author: luojunhui
+"""
+import traceback
+
+import pandas as pd
+
+from applications import PQMySQL, longArticlesMySQL, bot, log
+from applications.aiditApi import get_generated_article_list
+
+
+def get_level_up_articles() -> set:
+    """
+    :return:
+    """
+    pool_level2 = "20240804003153130851174"
+    pool_level1 = "20240802171417146947657"
+    pool_level0 = "20240802143345289374071"
+    pool_level2_result = get_generated_article_list(pool_level2)
+    title_list_2 = [i[1] for i in pool_level2_result]
+    pool_level1_result = get_generated_article_list(pool_level1)
+    title_list_1 = [i[1] for i in pool_level1_result]
+    pool_level0_result = get_generated_article_list(pool_level0)
+    title_list_0 = [i[1] for i in pool_level0_result]
+    title_list = title_list_1 + title_list_0 + title_list_2
+    good_title_set = set(title_list)
+    return good_title_set
+
+
+class ArticleExitWithTitle(object):
+    """
+    文章退场表格维护
+    """
+
+    def __init__(self):
+        self.INIT_STATUS = 0
+        self.pq_client = None
+        self.lam_client = None
+
+    def init_database(self) -> bool:
+        """
+        初始化数据库
+        :return:
+        """
+        try:
+            self.pq_client = PQMySQL()
+        except Exception as e:
+            bot(
+                title="文章退场管理任务,数据库连接失败",
+                detail={
+                    "e": str(e),
+                    "error_msg": traceback.format_exc(),
+                    "server": "old server"
+                }
+            )
+            return False
+
+        try:
+            self.lam_client = longArticlesMySQL()
+        except Exception as e:
+            bot(
+                title="文章退场管理任务,数据库连接失败",
+                detail={
+                    "e": str(e),
+                    "error_msg": traceback.format_exc(),
+                    "server": "new server"
+                }
+            )
+        return True
+
+    def get_discovery_published_articles(self) -> pd.DataFrame:
+        """
+        :return:
+        """
+        sql = f"""
+            SELECT
+                title, max(read_rate), count(1) as title_count
+            FROM
+                datastat_sort_strategy
+            WHERE position > 2 and fans > 10000
+            GROUP BY title;
+        """
+        articles = self.pq_client.select(sql)
+        article_df = pd.DataFrame(articles, columns=['title', 'max_read_times_on_avg', 'articles_count'])
+        return article_df
+
+    def bad_article_manager(self, read_times_on_avg_threshold, discovery_times_threshold) -> list[str]:
+        """
+        找出质量很差的文章标题,将该标题设置为退场状态
+        :return:
+        """
+        discovery_published_articles_df = self.get_discovery_published_articles()
+        target_bad_dataframe = discovery_published_articles_df[
+            (discovery_published_articles_df['max_read_times_on_avg'] < read_times_on_avg_threshold)
+            & (discovery_published_articles_df['articles_count'] < discovery_times_threshold)
+            ]
+        target_bad_title_list = target_bad_dataframe['title'].tolist()
+        return target_bad_title_list
+
+    def record_title_list(self, title_list, status) -> int:
+        """
+        修改标题状态
+        :param status:
+        :param title_list:
+        :return: None
+        """
+        fail_list = []
+        insert_count = 0
+        for title in title_list:
+            insert_sql = f"""
+                INSERT INTO cold_start_title_pool
+                (title, status)
+                values
+                (%s, %s)
+            """
+            try:
+                self.lam_client.update(
+                    sql=insert_sql,
+                    params=(title, status)
+                )
+                insert_count += 1
+            except Exception as e:
+                update_sql = f"""
+                    UPDATE cold_start_title_pool
+                    SET status = %s
+                    where title = %s and status = %s;
+                """
+                try:
+                    self.lam_client.update(
+                        sql=update_sql,
+                        params=(status, title, self.INIT_STATUS)
+                    )
+                except Exception as e:
+                    error_msg = traceback.format_exc()
+                    log(
+                        task="article_exit_with_title",
+                        function="record_title_list",
+                        status="fail",
+                        data={
+                            "e": str(e),
+                            "error_msg": error_msg,
+                        }
+                    )
+                    fail_list.append(title)
+
+        if fail_list:
+            bot(
+                title="冷启动文章标题退场,sql操作失败",
+                detail=fail_list
+            )
+            return -1
+        else:
+            return insert_count
+
+
+def main():
+    """
+    main function
+    :return:
+    """
+    UP_LEVEL_STATUS = 1
+    ARTICLE_EXIT_STATUS = -1
+    READ_TIMES_ON_AVG_THRESHOLD = 0.5
+    DISCOVERY_TIMES_THRESHOLD = 3
+
+    article_title_manager = ArticleExitWithTitle()
+    article_title_manager.init_database()
+
+    # 处理晋级标题
+    up_level_title = get_level_up_articles()
+    up_level_success_count = article_title_manager.record_title_list(title_list=up_level_title, status=UP_LEVEL_STATUS)
+
+    # 处理退场标题
+    exit_article_list = article_title_manager.bad_article_manager(
+        read_times_on_avg_threshold=READ_TIMES_ON_AVG_THRESHOLD,
+        discovery_times_threshold=DISCOVERY_TIMES_THRESHOLD
+    )
+    exit_success_count = article_title_manager.record_title_list(title_list=exit_article_list, status=ARTICLE_EXIT_STATUS)
+
+    if exit_success_count >= 0 and up_level_success_count >= 0:
+        bot(
+            title="冷启动文章晋级, 退场完成",
+            detail={
+                "已经晋级文章数量": up_level_success_count,
+                "已经退场文章数控": exit_success_count,
+                "阅读均值倍数阈值": READ_TIMES_ON_AVG_THRESHOLD,
+                "探索次数阈值": DISCOVERY_TIMES_THRESHOLD
+            },
+            mention=False
+        )
+
+
+if __name__ == '__main__':
+    main()