Ver código fonte

2025-12-30-新增冷启动策略
使用外部阅读优替换相关性优先

luojunhui 2 meses atrás
pai
commit
2e3f562758

+ 18 - 1
applications/tasks/cold_start_tasks/article_pool/article_pool_filter_strategy.py

@@ -79,6 +79,7 @@ class ArticlePoolFilterStrategy(ArticlePoolColdStartConst):
                     mention=False,
                 )
                 daily_article_num = self.DAILY_ARTICLE_NUM
+
             case "strategy_v2":
                 self.cold_start_records.append(
                     {
@@ -87,7 +88,23 @@ class ArticlePoolFilterStrategy(ArticlePoolColdStartConst):
                         "total_length": total_length,
                         "filter_by_title_length": length_level1,
                         "filter_by_sensitivity": length_level2,
-                        "filter_by_llm_sensitity": length_level3,
+                        "filter_by_llm_sensitivity": length_level3,
+                        "filter_by_score": length_level4,
+                        "read_avg_threshold": self.READ_TIMES_THRESHOLD,
+                        "read_threshold": self.READ_THRESHOLD,
+                        "title_length_threshold": self.TITLE_LENGTH_LIMIT,
+                    }
+                )
+
+            case "strategy_v3":
+                self.cold_start_records.append(
+                    {
+                        "category": category,
+                        "cold_start_num": min(daily_article_num, len(filter_df)),
+                        "total_length": total_length,
+                        "filter_by_title_length": length_level1,
+                        "filter_by_sensitivity": length_level2,
+                        "filter_by_llm_sensitivity": length_level3,
                         "filter_by_score": length_level4,
                         "read_avg_threshold": self.READ_TIMES_THRESHOLD,
                         "read_threshold": self.READ_THRESHOLD,

+ 11 - 0
applications/tasks/cold_start_tasks/article_pool_cold_start.py

@@ -228,6 +228,17 @@ class ArticlePoolColdStart(ArticlePoolColdStartStrategy, ArticlePoolFilterStrate
                     article_id_list=article_id_list
                 )
 
+            case "strategy_v3":
+                url_list = filter_article_df["link"].values.tolist()
+                await self.create_crawler_plan_and_bind_to_produce_plan(
+                    strategy, crawl_method, category, platform, url_list, plan_id
+                )
+                # change article status
+                article_id_list = filter_article_df["article_id"].values.tolist()
+                await self.change_article_status_while_publishing(
+                    article_id_list=article_id_list
+                )
+
     async def cold_start_by_category(self, category_list, platform, strategy):
         if not category_list:
             category_list = list(cold_start_category_map.keys())