|
@@ -228,6 +228,118 @@ class ArticlePoolColdStart(ArticlePoolColdStartStrategy, ArticlePoolFilterStrate
|
|
|
article_id_list=article_id_list
|
|
article_id_list=article_id_list
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
|
|
+ case "strategy_v3":
|
|
|
|
|
+ url_list = filter_article_df["link"].values.tolist()
|
|
|
|
|
+ await self.create_crawler_plan_and_bind_to_produce_plan(
|
|
|
|
|
+ strategy, crawl_method, category, platform, url_list, plan_id
|
|
|
|
|
+ )
|
|
|
|
|
+ # change article status
|
|
|
|
|
+ article_id_list = filter_article_df["article_id"].values.tolist()
|
|
|
|
|
+ await self.change_article_status_while_publishing(
|
|
|
|
|
+ article_id_list=article_id_list
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ async def cold_start_by_category(self, category_list, platform, strategy):
|
|
|
|
|
+ if not category_list:
|
|
|
|
|
+ category_list = list(cold_start_category_map.keys())
|
|
|
|
|
+
|
|
|
|
|
+ for category in tqdm(category_list):
|
|
|
|
|
+ try:
|
|
|
|
|
+ plan_id = cold_start_category_map[category]
|
|
|
|
|
+ affected_rows = await self.filter_published_titles(plan_id)
|
|
|
|
|
+ await self.log_client.log(
|
|
|
|
|
+ contents={
|
|
|
|
|
+ "task": "article_pool_cold_start",
|
|
|
|
|
+ "platform": platform,
|
|
|
|
|
+ "category": category,
|
|
|
|
|
+ "status": "success",
|
|
|
|
|
+ "trace_id": self.trace_id,
|
|
|
|
|
+ "message": "通过已抓取标题修改文章状态",
|
|
|
|
|
+ "data": {"affected_rows": affected_rows},
|
|
|
|
|
+ }
|
|
|
|
|
+ )
|
|
|
|
|
+ await self.create_cold_start_plan(
|
|
|
|
|
+ platform=platform,
|
|
|
|
|
+ strategy=strategy,
|
|
|
|
|
+ plan_id=plan_id,
|
|
|
|
|
+ category=category,
|
|
|
|
|
+ )
|
|
|
|
|
+ await asyncio.sleep(120)
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ await feishu_robot.bot(
|
|
|
|
|
+ title="文章冷启动异常",
|
|
|
|
|
+ detail={
|
|
|
|
|
+ "category": category,
|
|
|
|
|
+ "strategy": strategy,
|
|
|
|
|
+ "error": str(e),
|
|
|
|
|
+ "function": "deal",
|
|
|
|
|
+ "traceback": traceback.format_exc(),
|
|
|
|
|
+ },
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ if self.cold_start_records:
|
|
|
|
|
+ columns = [
|
|
|
|
|
+ feishu_robot.create_feishu_columns_sheet(
|
|
|
|
|
+ sheet_type="plain_text",
|
|
|
|
|
+ sheet_name="category",
|
|
|
|
|
+ display_name="文章品类",
|
|
|
|
|
+ ),
|
|
|
|
|
+ feishu_robot.create_feishu_columns_sheet(
|
|
|
|
|
+ sheet_type="number",
|
|
|
|
|
+ sheet_name="cold_start_num",
|
|
|
|
|
+ display_name="本次冷启数量",
|
|
|
|
|
+ ),
|
|
|
|
|
+ feishu_robot.create_feishu_columns_sheet(
|
|
|
|
|
+ sheet_type="number",
|
|
|
|
|
+ sheet_name="total_length",
|
|
|
|
|
+ display_name="总文章剩余数量",
|
|
|
|
|
+ ),
|
|
|
|
|
+ feishu_robot.create_feishu_columns_sheet(
|
|
|
|
|
+ sheet_type="number",
|
|
|
|
|
+ sheet_name="filter_by_title_length",
|
|
|
|
|
+ display_name="标题长度过滤",
|
|
|
|
|
+ ),
|
|
|
|
|
+ feishu_robot.create_feishu_columns_sheet(
|
|
|
|
|
+ sheet_type="number",
|
|
|
|
|
+ sheet_name="filter_by_sensitivity",
|
|
|
|
|
+ display_name="敏感词过滤",
|
|
|
|
|
+ ),
|
|
|
|
|
+ feishu_robot.create_feishu_columns_sheet(
|
|
|
|
|
+ sheet_type="number",
|
|
|
|
|
+ sheet_name="filter_by_llm_sensitivity",
|
|
|
|
|
+ display_name="经过大模型判断敏感过滤",
|
|
|
|
|
+ ),
|
|
|
|
|
+ feishu_robot.create_feishu_columns_sheet(
|
|
|
|
|
+ sheet_type="number",
|
|
|
|
|
+ sheet_name="filter_by_score",
|
|
|
|
|
+ display_name="经过相关性分过滤",
|
|
|
|
|
+ ),
|
|
|
|
|
+ feishu_robot.create_feishu_columns_sheet(
|
|
|
|
|
+ sheet_type="number",
|
|
|
|
|
+ sheet_name="read_avg_threshold",
|
|
|
|
|
+ display_name="阅读均值倍数阈值",
|
|
|
|
|
+ ),
|
|
|
|
|
+ feishu_robot.create_feishu_columns_sheet(
|
|
|
|
|
+ sheet_type="number",
|
|
|
|
|
+ sheet_name="read_threshold",
|
|
|
|
|
+ display_name="阅读量阈值",
|
|
|
|
|
+ ),
|
|
|
|
|
+ feishu_robot.create_feishu_columns_sheet(
|
|
|
|
|
+ sheet_type="number",
|
|
|
|
|
+ sheet_name="title_length_threshold",
|
|
|
|
|
+ display_name="标题长度阈值",
|
|
|
|
|
+ ),
|
|
|
|
|
+ ]
|
|
|
|
|
+ await feishu_robot.bot(
|
|
|
|
|
+ title="长文文章路冷启动发布",
|
|
|
|
|
+ detail={
|
|
|
|
|
+ "columns": columns,
|
|
|
|
|
+ "rows": self.cold_start_records,
|
|
|
|
|
+ },
|
|
|
|
|
+ table=True,
|
|
|
|
|
+ mention=False,
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
async def deal(
|
|
async def deal(
|
|
|
self,
|
|
self,
|
|
|
platform: str,
|
|
platform: str,
|
|
@@ -288,105 +400,18 @@ class ArticlePoolColdStart(ArticlePoolColdStartStrategy, ArticlePoolFilterStrate
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
case "strategy_v2":
|
|
case "strategy_v2":
|
|
|
- if not category_list:
|
|
|
|
|
- category_list = list(cold_start_category_map.keys())
|
|
|
|
|
-
|
|
|
|
|
- for category in tqdm(category_list):
|
|
|
|
|
- try:
|
|
|
|
|
- plan_id = cold_start_category_map[category]
|
|
|
|
|
- affected_rows = await self.filter_published_titles(plan_id)
|
|
|
|
|
- await self.log_client.log(
|
|
|
|
|
- contents={
|
|
|
|
|
- "task": "article_pool_cold_start",
|
|
|
|
|
- "platform": platform,
|
|
|
|
|
- "category": category,
|
|
|
|
|
- "status": "success",
|
|
|
|
|
- "trace_id": self.trace_id,
|
|
|
|
|
- "message": "通过已抓取标题修改文章状态",
|
|
|
|
|
- "data": {"affected_rows": affected_rows},
|
|
|
|
|
- }
|
|
|
|
|
- )
|
|
|
|
|
- await self.create_cold_start_plan(
|
|
|
|
|
- platform=platform,
|
|
|
|
|
- strategy=strategy,
|
|
|
|
|
- plan_id=plan_id,
|
|
|
|
|
- category=category,
|
|
|
|
|
- )
|
|
|
|
|
- await asyncio.sleep(120)
|
|
|
|
|
- except Exception as e:
|
|
|
|
|
- await feishu_robot.bot(
|
|
|
|
|
- title="文章冷启动异常",
|
|
|
|
|
- detail={
|
|
|
|
|
- "category": category,
|
|
|
|
|
- "strategy": strategy,
|
|
|
|
|
- "error": str(e),
|
|
|
|
|
- "function": "deal",
|
|
|
|
|
- "traceback": traceback.format_exc(),
|
|
|
|
|
- },
|
|
|
|
|
- )
|
|
|
|
|
|
|
+ await self.cold_start_by_category(
|
|
|
|
|
+ category_list=category_list,
|
|
|
|
|
+ platform=platform,
|
|
|
|
|
+ strategy=strategy,
|
|
|
|
|
+ )
|
|
|
|
|
|
|
|
- if self.cold_start_records:
|
|
|
|
|
- columns = [
|
|
|
|
|
- feishu_robot.create_feishu_columns_sheet(
|
|
|
|
|
- sheet_type="plain_text",
|
|
|
|
|
- sheet_name="category",
|
|
|
|
|
- display_name="文章品类",
|
|
|
|
|
- ),
|
|
|
|
|
- feishu_robot.create_feishu_columns_sheet(
|
|
|
|
|
- sheet_type="number",
|
|
|
|
|
- sheet_name="cold_start_num",
|
|
|
|
|
- display_name="本次冷启数量",
|
|
|
|
|
- ),
|
|
|
|
|
- feishu_robot.create_feishu_columns_sheet(
|
|
|
|
|
- sheet_type="number",
|
|
|
|
|
- sheet_name="total_length",
|
|
|
|
|
- display_name="总文章剩余数量",
|
|
|
|
|
- ),
|
|
|
|
|
- feishu_robot.create_feishu_columns_sheet(
|
|
|
|
|
- sheet_type="number",
|
|
|
|
|
- sheet_name="filter_by_title_length",
|
|
|
|
|
- display_name="标题长度过滤",
|
|
|
|
|
- ),
|
|
|
|
|
- feishu_robot.create_feishu_columns_sheet(
|
|
|
|
|
- sheet_type="number",
|
|
|
|
|
- sheet_name="filter_by_sensitivity",
|
|
|
|
|
- display_name="敏感词过滤",
|
|
|
|
|
- ),
|
|
|
|
|
- feishu_robot.create_feishu_columns_sheet(
|
|
|
|
|
- sheet_type="number",
|
|
|
|
|
- sheet_name="filter_by_llm_sensitity",
|
|
|
|
|
- display_name="经过大模型判断敏感过滤",
|
|
|
|
|
- ),
|
|
|
|
|
- feishu_robot.create_feishu_columns_sheet(
|
|
|
|
|
- sheet_type="number",
|
|
|
|
|
- sheet_name="filter_by_score",
|
|
|
|
|
- display_name="经过相关性分过滤",
|
|
|
|
|
- ),
|
|
|
|
|
- feishu_robot.create_feishu_columns_sheet(
|
|
|
|
|
- sheet_type="number",
|
|
|
|
|
- sheet_name="read_avg_threshold",
|
|
|
|
|
- display_name="阅读均值倍数阈值",
|
|
|
|
|
- ),
|
|
|
|
|
- feishu_robot.create_feishu_columns_sheet(
|
|
|
|
|
- sheet_type="number",
|
|
|
|
|
- sheet_name="read_threshold",
|
|
|
|
|
- display_name="阅读量阈值",
|
|
|
|
|
- ),
|
|
|
|
|
- feishu_robot.create_feishu_columns_sheet(
|
|
|
|
|
- sheet_type="number",
|
|
|
|
|
- sheet_name="title_length_threshold",
|
|
|
|
|
- display_name="标题长度阈值",
|
|
|
|
|
- ),
|
|
|
|
|
- ]
|
|
|
|
|
- await feishu_robot.bot(
|
|
|
|
|
- title="长文文章路冷启动发布",
|
|
|
|
|
- detail={
|
|
|
|
|
- "columns": columns,
|
|
|
|
|
- "rows": self.cold_start_records,
|
|
|
|
|
- },
|
|
|
|
|
- table=True,
|
|
|
|
|
- mention=False,
|
|
|
|
|
- )
|
|
|
|
|
|
|
+ case "strategy_v3":
|
|
|
|
|
+ await self.cold_start_by_category(
|
|
|
|
|
+ category_list=category_list,
|
|
|
|
|
+ platform=platform,
|
|
|
|
|
+ strategy=strategy,
|
|
|
|
|
+ )
|
|
|
|
|
|
|
|
case _:
|
|
case _:
|
|
|
raise Exception(f"error strategy {strategy}")
|
|
raise Exception(f"error strategy {strategy}")
|