浏览代码

新增服务号发文回收

luojunhui 3 周之前
父节点
当前提交
0a61ac1589

+ 1 - 0
applications/tasks/cold_start_tasks/article_pool/article_pool_cold_start_const.py

@@ -1,6 +1,7 @@
 class ArticlePoolColdStartConst:
     # article
     DAILY_ARTICLE_NUM = 1000
+    DAILY_CATEGORY_ARTICLE_NUM = 120
     SIMILARITY_SCORE_THRESHOLD = 0.5
 
     TITLE_NOT_SENSITIVE = 0

+ 11 - 1
applications/tasks/cold_start_tasks/article_pool/article_pool_cold_start_strategy.py

@@ -83,7 +83,7 @@ class ArticlePoolColdStartStrategy(ArticlePoolColdStartConst):
     ) -> List[Dict]:
         match strategy:
             case "strategy_v1":
-                query = f"""
+                query = """
                     select article_id, title, link,  llm_sensitivity, score, category_by_ai
                     from crawler_meta_article
                     where category = %s 
@@ -94,5 +94,15 @@ class ArticlePoolColdStartStrategy(ArticlePoolColdStartConst):
                     query=query, params=(crawl_method, "toutiao", self.INIT_STATUS)
                 )
                 return article_list
+            case "strategy_v2":
+                query = """
+                    select article_id, title, link, llm_sensitivity, score, category_by_ai
+                    from crawler_meta_article
+                    where platform = %s and category_by_ai = %s and status = %s;
+                """
+                article_list = await self.pool.async_fetch(
+                    query=query, params=("toutiao", category, self.INIT_STATUS)
+                )
+                return article_list
             case _:
                 raise ValueError("Invalid strategy")

+ 25 - 11
applications/tasks/cold_start_tasks/article_pool/article_pool_filter_strategy.py

@@ -93,7 +93,6 @@ class ArticlePoolFilterStrategy(ArticlePoolColdStartConst):
                     }
                 )
 
-
             case _:
                 raise ValueError("Invalid strategy")
 
@@ -105,16 +104,31 @@ class ArticlePoolFilterStrategy(ArticlePoolColdStartConst):
         total_length = dataframe.shape[0]
         filter_df = dataframe[dataframe["score"] > self.SIMILARITY_SCORE_THRESHOLD]
 
-        await feishu_robot.bot(
-            title="冷启动创建抓取计划",
-            detail={
-                "渠道": crawl_method,
-                "总文章数量": total_length,
-                "相关性分数过滤剩余": filter_df.shape[0],
-            },
-            mention=False,
-        )
-        return filter_df[: self.DAILY_ARTICLE_NUM]
+        match strategy:
+            case "strategy_v1":
+                daily_article_num = self.DAILY_ARTICLE_NUM
+                await feishu_robot.bot(
+                    title="冷启动创建抓取计划",
+                    detail={
+                        "渠道": crawl_method,
+                        "总文章数量": total_length,
+                        "相关性分数过滤剩余": filter_df.shape[0],
+                    },
+                    mention=False,
+                )
+            case "strategy_v2":
+                daily_article_num = self.DAILY_CATEGORY_ARTICLE_NUM
+                self.cold_start_records.append(
+                    {
+                        "category": category,
+                        "cold_start_num": min(daily_article_num, len(filter_df))
+                    }
+                )
+
+            case _:
+                raise ValueError("Invalid strategy")
+
+        return filter_df.head(daily_article_num)
 
     async def article_pool_filter(
         self, strategy, platform, dataframe, crawl_method, category

+ 2 - 1
applications/tasks/cold_start_tasks/article_pool_cold_start.py

@@ -59,6 +59,8 @@ class ArticlePoolColdStart(ArticlePoolColdStartStrategy, ArticlePoolFilterStrate
         过滤已添加至aigc中的标题
         """
         published_title_tuple = await get_titles_from_produce_plan(self.pool, plan_id)
+        if not published_title_tuple:
+            return 0
         update_query = """
             update crawler_meta_article set status = %s where title in %s and status = %s;
         """
@@ -303,7 +305,6 @@ class ArticlePoolColdStart(ArticlePoolColdStartStrategy, ArticlePoolFilterStrate
                             plan_id=plan_id,
                             category=category,
                         )
-                        # todo add bot notify
                     except Exception as e:
                         await feishu_robot.bot(
                             title="文章冷启动异常",