Bläddra i källkod

Merge branch '2024-12-17-add-apollo-to-article-exit' of luojunhui/LongArticlesJob into master

luojunhui 4 månader sedan
förälder
incheckning
eac657785e
1 ändrade filer med 22 tillägg och 15 borttagningar
  1. 22 15
      flow_pool/exit_article_with_title.py

+ 22 - 15
flow_pool/exit_article_with_title.py

@@ -1,12 +1,17 @@
 """
 """
 @author: luojunhui
 @author: luojunhui
 """
 """
+import json
 import traceback
 import traceback
 
 
 from datetime import datetime, timedelta
 from datetime import datetime, timedelta
 
 
 from applications import PQMySQL, longArticlesMySQL, bot, log
 from applications import PQMySQL, longArticlesMySQL, bot, log
 from applications.aiditApi import get_generated_article_list
 from applications.aiditApi import get_generated_article_list
+from config import apolloConfig
+
+config = apolloConfig()
+article_exit_threshold = json.loads(config.getConfigValue("article_exit_threshold"))
 
 
 
 
 def get_level_up_articles() -> set:
 def get_level_up_articles() -> set:
@@ -166,11 +171,14 @@ def main():
     """
     """
     UP_LEVEL_STATUS = 1
     UP_LEVEL_STATUS = 1
     ARTICLE_EXIT_STATUS = -1
     ARTICLE_EXIT_STATUS = -1
-    READ_TIMES_ON_AVG_THRESHOLD = 0.5
-    DISCOVERY_TIMES_THRESHOLD = 10
-    PUBLISH_TIMES_THRESHOLD = 5
-    DAYS_THRESHOLD = 30
-    FIRST_PUBLISH_DATE_THRESHOLD = (datetime.now() - timedelta(days=DAYS_THRESHOLD)).strftime('%Y%m%d')
+    # 策略一:
+    read_times_on_avg_threshold = article_exit_threshold['strategy_1']['read_times_on_avg']
+    explore_times_threshold = article_exit_threshold['strategy_1']['explore_times_threshold']
+
+    # 策略二:
+    publish_times_threshold = article_exit_threshold['strategy_2']['publish_times_threshold']
+    days_threshold = article_exit_threshold['strategy_2']['days_threshold']
+    first_publish_date_threshold = (datetime.now() - timedelta(days=days_threshold)).strftime('%Y%m%d')
 
 
     article_title_manager = ArticleTitleStatusManager()
     article_title_manager = ArticleTitleStatusManager()
     article_title_manager.init_database()
     article_title_manager.init_database()
@@ -184,8 +192,8 @@ def main():
 
 
     # 处理退场标题V1
     # 处理退场标题V1
     exit_article_list = article_title_manager.get_bad_articles(
     exit_article_list = article_title_manager.get_bad_articles(
-        read_times_on_avg_threshold=READ_TIMES_ON_AVG_THRESHOLD,
-        discovery_times_threshold=DISCOVERY_TIMES_THRESHOLD
+        read_times_on_avg_threshold=read_times_on_avg_threshold,
+        discovery_times_threshold=explore_times_threshold
     )
     )
     exit_success_count = article_title_manager.save_titles(
     exit_success_count = article_title_manager.save_titles(
         title_list=exit_article_list,
         title_list=exit_article_list,
@@ -193,8 +201,8 @@ def main():
 
 
     # 处理退场标题v2
     # 处理退场标题v2
     exit_article_list_v2 = article_title_manager.get_bad_articles_v2(
     exit_article_list_v2 = article_title_manager.get_bad_articles_v2(
-        publish_date_threshold=FIRST_PUBLISH_DATE_THRESHOLD,
-        discovery_times_threshold=PUBLISH_TIMES_THRESHOLD
+        publish_date_threshold=first_publish_date_threshold,
+        discovery_times_threshold=publish_times_threshold
     )
     )
     exit_success_count_v2 = article_title_manager.save_titles(
     exit_success_count_v2 = article_title_manager.save_titles(
         title_list=exit_article_list_v2,
         title_list=exit_article_list_v2,
@@ -206,15 +214,14 @@ def main():
             "晋级文章数量": up_level_success_count,
             "晋级文章数量": up_level_success_count,
             "策略1:退场文章数量": exit_success_count,
             "策略1:退场文章数量": exit_success_count,
             "策略2:退场文章数量": exit_success_count_v2,
             "策略2:退场文章数量": exit_success_count_v2,
-            "策略1:阅读均值倍数阈值": READ_TIMES_ON_AVG_THRESHOLD,
-            "策略1:探索次数阈值": DISCOVERY_TIMES_THRESHOLD,
-            "策略2:发布次数阈值": PUBLISH_TIMES_THRESHOLD,
-            "策略2:发布天数阈值": DAYS_THRESHOLD
+            "策略1:阅读均值倍数阈值": read_times_on_avg_threshold,
+            "策略1:探索次数阈值": explore_times_threshold,
+            "策略2:发布次数阈值": publish_times_threshold,
+            "策略2:发布天数阈值": days_threshold
         },
         },
         mention=False
         mention=False
     )
     )
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
-    main()
-
+    main()