|
|
@@ -9,20 +9,23 @@ from typing import List
|
|
|
from pandas import DataFrame
|
|
|
from tqdm.asyncio import tqdm
|
|
|
|
|
|
-from applications.api import task_apollo, feishu_robot
|
|
|
-from applications.api import auto_create_crawler_task
|
|
|
-from applications.api import auto_bind_crawler_task_to_generate_task
|
|
|
-from applications.config import cold_start_category_map, input_source_map
|
|
|
-from applications.utils import get_titles_from_produce_plan
|
|
|
-from applications.tasks.cold_start_tasks.article_pool import (
|
|
|
+from app.infra.external import feishu_robot
|
|
|
+from app.infra.internal import auto_create_crawler_task
|
|
|
+from app.infra.internal import auto_bind_crawler_task_to_generate_task
|
|
|
+from app.infra.internal import get_titles_from_produce_plan
|
|
|
+from app.core.dependency import apollo_client
|
|
|
+from app.core.config.settings import ColdStartConfig
|
|
|
+
|
|
|
+from app.domains.cold_start_tasks.article_pool import (
|
|
|
ArticlePoolColdStartStrategy,
|
|
|
ArticlePoolFilterStrategy,
|
|
|
)
|
|
|
|
|
|
|
|
|
class ArticlePoolColdStart(ArticlePoolColdStartStrategy, ArticlePoolFilterStrategy):
|
|
|
- def __init__(self, pool, log_client, trace_id):
|
|
|
+ def __init__(self, pool, log_client, trace_id, config: ColdStartConfig):
|
|
|
super().__init__(pool, log_client, trace_id)
|
|
|
+ self.config = config
|
|
|
|
|
|
async def get_article_from_meta_table(
|
|
|
self, platform: str, crawl_method: str, strategy: str, category: str | None
|
|
|
@@ -145,7 +148,7 @@ class ArticlePoolColdStart(ArticlePoolColdStartStrategy, ArticlePoolFilterStrate
|
|
|
"inputSourceValue": crawler_plan_id,
|
|
|
"inputSourceLabel": crawler_plan_name,
|
|
|
"inputSourceModal": 3,
|
|
|
- "inputSourceChannel": input_source_map[platform],
|
|
|
+ "inputSourceChannel": self.config.input_source_map[platform],
|
|
|
}
|
|
|
]
|
|
|
generate_plan_response = await auto_bind_crawler_task_to_generate_task(
|
|
|
@@ -194,7 +197,7 @@ class ArticlePoolColdStart(ArticlePoolColdStartStrategy, ArticlePoolFilterStrate
|
|
|
match strategy:
|
|
|
case "strategy_v1":
|
|
|
# split article into each category
|
|
|
- category_list = await task_apollo.get_config_value(key="category_list")
|
|
|
+ category_list = await apollo_client.get_config_value(key="category_list")
|
|
|
for ai_category in category_list:
|
|
|
filter_category_df = filter_article_df[
|
|
|
filter_article_df["category_by_ai"] == ai_category
|
|
|
@@ -241,11 +244,11 @@ class ArticlePoolColdStart(ArticlePoolColdStartStrategy, ArticlePoolFilterStrate
|
|
|
|
|
|
async def cold_start_by_category(self, category_list, platform, strategy):
|
|
|
if not category_list:
|
|
|
- category_list = list(cold_start_category_map.keys())
|
|
|
+ category_list = list(self.config.cold_start_category_map.keys())
|
|
|
|
|
|
for category in tqdm(category_list):
|
|
|
try:
|
|
|
- plan_id = cold_start_category_map[category]
|
|
|
+ plan_id = self.config.cold_start_category_map[category]
|
|
|
affected_rows = await self.filter_published_titles(plan_id)
|
|
|
await self.log_client.log(
|
|
|
contents={
|
|
|
@@ -363,7 +366,7 @@ class ArticlePoolColdStart(ArticlePoolColdStartStrategy, ArticlePoolFilterStrate
|
|
|
}
|
|
|
)
|
|
|
|
|
|
- crawl_methods_map = await task_apollo.get_config_value(
|
|
|
+ crawl_methods_map = await apollo_client.get_config_value(
|
|
|
key="category_cold_start_map"
|
|
|
)
|
|
|
|