Explorar o código

冷启动优化

luojunhui hai 2 meses
pai
achega
2e153daf8f

+ 1 - 1
applications/tasks/cold_start_tasks/article_pool/article_pool_cold_start_const.py

@@ -17,7 +17,7 @@ class ArticlePoolColdStartConst:
     TITLE_LENGTH_LIMIT = 12
     TITLE_LENGTH_MAX = 40
 
-    DEFAULT_CRAWLER_METHODS = ["1030-手动挑号", "account_association"]
+    DEFAULT_CRAWLER_METHODS = ["1030-手动挑号", "cooperate_account"]
 
     CATEGORY_CONFIG_MAP = {
         "知识科普": {

+ 1 - 1
applications/tasks/cold_start_tasks/article_pool/article_pool_filter_strategy.py

@@ -52,7 +52,7 @@ class ArticlePoolFilterStrategy(ArticlePoolColdStartConst):
         # 第4层通过相关性分数过滤
         filter_df = filter_df[filter_df["score"] > self.SIMILARITY_SCORE_THRESHOLD]
         length_level4 = filter_df.shape[0]
-        daily_article_num = self.CATEGORY_CONFIG_MAP.get(category, 120).get("num", 120)
+        daily_article_num = self.CATEGORY_CONFIG_MAP.get(category, {}).get("num", 120)
         match strategy:
             case "strategy_v1":
                 await feishu_robot.bot(

+ 1 - 1
applications/tasks/cold_start_tasks/article_pool_cold_start.py

@@ -122,7 +122,7 @@ class ArticlePoolColdStart(ArticlePoolColdStartStrategy, ArticlePoolFilterStrate
         # create_crawler_plan
         crawler_plan_response = await auto_create_crawler_task(
             plan_id=None,
-            plan_name=f"冷启动-{strategy}-{category}-{datetime.date.today().__str__()}-{len(url_list)}",
+            plan_name=f"冷启动-{strategy}-{crawl_method}-{category}-{datetime.date.today().__str__()}-{len(url_list)}",
             plan_tag="品类冷启动",
             platform=platform,
             url_list=url_list,

+ 2 - 2
applications/tasks/monitor_tasks/gzh_article_monitor.py

@@ -17,7 +17,7 @@ class MonitorConst:
     INIT_STATUS = 0
 
     # 监测周期
-    MONITOR_CYCLE = 3 * 24 * 3600
+    MONITOR_CYCLE = 2 * 24 * 3600
 
     # article code
     ARTICLE_ILLEGAL_CODE = 25012
@@ -324,7 +324,7 @@ class InnerGzhArticlesMonitor(MonitorConst):
                     await delete_illegal_gzh_articles(gh_id, title)
 
         except Exception as e:
-            print(f"crawler failed: {account_name}, error: {e}")
+            print(f"crawler failed: {account_name}-{url}, error: {e}")
 
     async def deal(self):
         article_list = await self.fetch_article_list_to_check()