luojunhui 1 mese fa
parent
commit
3223a9fc0b
100 ha cambiato i file con 772 aggiunte e 168 eliminazioni
  1. 0 0
      app/__init__.py
  2. 0 0
      app/ab_test/__init__.py
  3. 0 0
      app/ab_test/ab_accounts.py
  4. 4 4
      app/ab_test/get_cover.py
  5. 0 0
      app/api/__init__.py
  6. 0 0
      app/api/middleware/auth.py
  7. 0 0
      app/api/middleware/error_handler.py
  8. 0 0
      app/api/middleware/rate_limiter.py
  9. 0 1
      app/api/service/__init__.py
  10. 1 1
      app/api/service/gzh_cookie_manager.py
  11. 36 3
      app/api/service/task_manager_service.py
  12. 1 0
      app/api/v1/__init__.py
  13. 4 4
      app/api/v1/routes.py
  14. 0 0
      app/core/__init__.py
  15. 1 0
      app/core/config/__init__.py
  16. 0 0
      app/core/config/cert/es_certs.crt
  17. 68 0
      app/core/config/global_settings.py
  18. 28 0
      app/core/config/settings/__init__.py
  19. 25 0
      app/core/config/settings/aliyun.py
  20. 25 0
      app/core/config/settings/apollo.py
  21. 54 0
      app/core/config/settings/category.py
  22. 37 0
      app/core/config/settings/cold_start.py
  23. 25 0
      app/core/config/settings/deepseek.py
  24. 20 0
      app/core/config/settings/elasticsearch.py
  25. 105 0
      app/core/config/settings/mysql.py
  26. 17 2
      app/core/config/settings/task_chinese_name.py
  27. 1 0
      app/core/database/__init__.py
  28. 50 28
      app/core/database/mysql_pools.py
  29. 2 0
      app/core/dependency/__init__.py
  30. 37 0
      app/core/dependency/dependencies.py
  31. 1 0
      app/core/observability/__init__.py
  32. 1 0
      app/core/observability/logging/__init__.py
  33. 8 3
      app/core/observability/logging/log_service.py
  34. 0 0
      app/core/pipeline/__init__.py
  35. 8 10
      app/core/pipeline/crawler_pipeline.py
  36. 1 1
      app/core/pipeline/data_recycle_pipeline.py
  37. 0 0
      app/domains/__init__.py
  38. 0 0
      app/domains/algorithm_tasks/__init__.py
  39. 0 0
      app/domains/algorithm_tasks/account_category_analysis.py
  40. 6 1
      app/domains/algorithm_tasks/models.py
  41. 0 0
      app/domains/analysis_task/__init__.py
  42. 0 0
      app/domains/analysis_task/account_position_info.py
  43. 1 1
      app/domains/analysis_task/crawler_detail.py
  44. 0 0
      app/domains/cold_start_tasks/__init__.py
  45. 0 0
      app/domains/cold_start_tasks/article_pool/__init__.py
  46. 0 0
      app/domains/cold_start_tasks/article_pool/article_pool_cold_start_const.py
  47. 1 1
      app/domains/cold_start_tasks/article_pool/article_pool_cold_start_strategy.py
  48. 2 2
      app/domains/cold_start_tasks/article_pool/article_pool_filter_strategy.py
  49. 15 12
      app/domains/cold_start_tasks/article_pool_cold_start.py
  50. 0 0
      app/domains/cold_start_tasks/video_pool/__init__.py
  51. 2 2
      app/domains/cold_start_tasks/video_pool/video_pool_audit_strategy.py
  52. 0 0
      app/domains/cold_start_tasks/video_pool/video_pool_const.py
  53. 0 0
      app/domains/cold_start_tasks/video_pool_cold_start.py
  54. 0 0
      app/domains/crawler_tasks/__init__.py
  55. 0 0
      app/domains/crawler_tasks/crawler_account_manager.py
  56. 8 8
      app/domains/crawler_tasks/crawler_gzh.py
  57. 3 3
      app/domains/crawler_tasks/crawler_gzh_fans.py
  58. 10 7
      app/domains/crawler_tasks/crawler_toutiao.py
  59. 0 0
      app/domains/data_recycle_tasks/__init__.py
  60. 3 6
      app/domains/data_recycle_tasks/article_detail_stat.py
  61. 5 5
      app/domains/data_recycle_tasks/recycle_daily_publish_articles.py
  62. 3 3
      app/domains/data_recycle_tasks/recycle_mini_program_detail.py
  63. 4 4
      app/domains/data_recycle_tasks/recycle_outside_account_articles.py
  64. 0 0
      app/domains/llm_tasks/__init__.py
  65. 3 3
      app/domains/llm_tasks/candidate_account_process.py
  66. 4 4
      app/domains/llm_tasks/process_title.py
  67. 0 0
      app/domains/llm_tasks/prompts.py
  68. 0 0
      app/domains/monitor_tasks/__init__.py
  69. 5 5
      app/domains/monitor_tasks/auto_reply_cards_monitor.py
  70. 3 3
      app/domains/monitor_tasks/cooperate_accounts_monitor.py
  71. 3 3
      app/domains/monitor_tasks/get_off_videos.py
  72. 5 5
      app/domains/monitor_tasks/gzh_article_monitor.py
  73. 2 2
      app/domains/monitor_tasks/kimi_balance.py
  74. 1 3
      app/domains/monitor_tasks/limited_account_analysis.py
  75. 2 2
      app/domains/monitor_tasks/task_processing_monitor.py
  76. 0 0
      app/infra/__init__.py
  77. 0 0
      app/infra/crawler/__init__.py
  78. 0 0
      app/infra/crawler/toutiao/__init__.py
  79. 1 1
      app/infra/crawler/toutiao/blogger.py
  80. 1 1
      app/infra/crawler/toutiao/detail_recommend.py
  81. 0 0
      app/infra/crawler/toutiao/main_page_recomend.py
  82. 1 3
      app/infra/crawler/toutiao/search.py
  83. 0 0
      app/infra/crawler/toutiao/toutiao.js
  84. 1 1
      app/infra/crawler/toutiao/use_js.py
  85. 0 0
      app/infra/crawler/wechat/__init__.py
  86. 1 1
      app/infra/crawler/wechat/gzh_article_stat.py
  87. 1 1
      app/infra/crawler/wechat/gzh_fans.py
  88. 3 3
      app/infra/crawler/wechat/gzh_spider.py
  89. 19 0
      app/infra/external/__init__.py
  90. 0 0
      app/infra/external/aliyun.py
  91. 33 2
      app/infra/external/apollo.py
  92. 10 4
      app/infra/external/deepseek_official.py
  93. 2 4
      app/infra/external/elastic_search.py
  94. 1 3
      app/infra/external/feishu.py
  95. 23 0
      app/infra/internal/__init__.py
  96. 21 1
      app/infra/internal/aigc_system.py
  97. 1 1
      app/infra/internal/piaoquan.py
  98. 2 0
      app/infra/shared/__init__.py
  99. 0 0
      app/infra/shared/async_tasks.py
  100. 0 0
      app/infra/shared/http_client.py

+ 0 - 0
applications/__init__.py → app/__init__.py


+ 0 - 0
applications/ab_test/__init__.py → app/ab_test/__init__.py


+ 0 - 0
applications/ab_test/ab_accounts.py → app/ab_test/ab_accounts.py


+ 4 - 4
applications/ab_test/get_cover.py → app/ab_test/get_cover.py

@@ -1,7 +1,7 @@
-from applications.utils.response import Response
-from applications.utils import fetch_channel_info
-from applications.utils import fetch_aigc_cover
-from applications.utils import fetch_long_video_cover
+from app.infra.utils.response import Response
+from app.infra.utils import fetch_channel_info
+from app.infra.utils import fetch_aigc_cover
+from app.infra.utils import fetch_long_video_cover
 
 
 class GetCoverService(Response):

+ 0 - 0
applications/tasks/cold_start_tasks/video_pool/__init__.py → app/api/__init__.py


+ 0 - 0
applications/crawler/toutiao/main_page_recomend.py → app/api/middleware/auth.py


+ 0 - 0
app/api/middleware/error_handler.py


+ 0 - 0
app/api/middleware/rate_limiter.py


+ 0 - 1
applications/service/__init__.py → app/api/service/__init__.py

@@ -1,5 +1,4 @@
 # 日志服务
-from .log_service import LogService
 
 # 前端交互
 from .task_manager_service import TaskManagerService

+ 1 - 1
applications/service/gzh_cookie_manager.py → app/api/service/gzh_cookie_manager.py

@@ -1,4 +1,4 @@
-from applications.tasks.crawler_tasks.crawler_gzh_fans import CrawlerGzhFansBase
+from app.domains.crawler_tasks.crawler_gzh_fans import CrawlerGzhFansBase
 
 
 class GzhCookieManager(CrawlerGzhFansBase):

+ 36 - 3
applications/service/task_manager_service.py → app/api/service/task_manager_service.py

@@ -1,8 +1,6 @@
 import json
 from typing import Optional
-
-from applications.utils import get_task_chinese_name
-
+from app.core.config import GlobalConfigSettings
 
 def _build_where(id_eq=None, date_string=None, trace_id=None, task_status=None):
     conds, params = [], []
@@ -48,6 +46,41 @@ class TaskConst:
     DEFAULT_SIZE = 50
 
 
+class TaskManagerUtils(TaskConst):
+    def __init__(self, config: GlobalConfigSettings):
+        self.config = config
+
+    def get_task_chinese_name(self, data):
+        """
+        通过输入任务详情信息获取任务名称
+        """
+        task_name = data["task_name"]
+        task_name_chinese = self.config.task_chinese_name.get(task_name, task_name)
+
+        # account_method
+        if task_name == "crawler_gzh_articles":
+            account_method = data.get("account_method", "")
+            account_method = account_method.replace(
+                "account_association", "账号联想"
+            ).replace("search", "")
+            crawl_mode = data.get("crawl_mode", "")
+            crawl_mode = crawl_mode.replace("search", "搜索").replace("account", "抓账号")
+            strategy = data.get("strategy", "")
+            return f"{task_name_chinese}\t{crawl_mode}\t{account_method}\t{strategy}"
+        elif task_name == "article_pool_cold_start":
+            platform = data.get("platform", "")
+            platform = platform.replace("toutiao", "今日头条").replace("weixin", "微信")
+            strategy = data.get("strategy", "")
+            strategy = strategy.replace("strategy", "策略")
+            category_list = data.get("category_list", [])
+            category_list = "、".join(category_list)
+            crawler_methods = data.get("crawler_methods", [])
+            crawler_methods = "、".join(crawler_methods)
+            return f"{task_name_chinese}\t{platform}\t{crawler_methods}\t{category_list}\t{strategy}"
+        else:
+            return task_name_chinese
+
+
 class TaskManagerService(TaskConst):
     def __init__(self, pool, data):
         self.pool = pool

+ 1 - 0
app/api/v1/__init__.py

@@ -0,0 +1 @@
+from .routes import server_routes

+ 4 - 4
routes/blueprint.py → app/api/v1/routes.py

@@ -1,9 +1,9 @@
 from quart import Blueprint, jsonify, request
-from applications.ab_test import GetCoverService
-from applications.utils import generate_task_trace_id
+from app.ab_test import GetCoverService
+from app.infra.shared.tools import generate_task_trace_id
 
-from applications.tasks import TaskScheduler
-from applications.service import TaskManagerService, GzhCookieManager
+from app.jobs import TaskScheduler
+from app.api.service import TaskManagerService, GzhCookieManager
 
 
 server_blueprint = Blueprint("api", __name__, url_prefix="/api")

+ 0 - 0
app/core/__init__.py


+ 1 - 0
app/core/config/__init__.py

@@ -0,0 +1 @@
+from .global_settings import GlobalConfigSettings

+ 0 - 0
applications/config/es_certs.crt → app/core/config/cert/es_certs.crt


+ 68 - 0
app/core/config/global_settings.py

@@ -0,0 +1,68 @@
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+from .settings import *
+
+
+class GlobalConfigSettings(BaseSettings):
+    """应用全局配置"""
+
+    # ============ 应用基础配置 ============
+    app_name: str = Field(default="LongArticleTaskServer", description="应用名称")
+    environment: str = Field(
+        default="development", description="运行环境: development/pre/production"
+    )
+    debug: bool = Field(default=False, description="调试模式")
+
+    # ============ 数据库配置 ============
+    aigc_db: AigcDatabaseConfig = Field(default_factory=AigcDatabaseConfig)
+    long_video_db: LongVideoDatabaseConfig = Field(
+        default_factory=LongVideoDatabaseConfig
+    )
+    long_articles_db: LongArticlesDatabaseConfig = Field(
+        default_factory=LongArticlesDatabaseConfig
+    )
+    piaoquan_crawler_db: PiaoquanCrawlerDatabaseConfig = Field(
+        default_factory=PiaoquanCrawlerDatabaseConfig
+    )
+    growth_db: GrowthDatabaseConfig = Field(default_factory=GrowthDatabaseConfig)
+
+    # ============ 外部服务配置 ============
+    deepseek: DeepSeekConfig = Field(default_factory=DeepSeekConfig)
+
+    aliyun_log: AliyunLogConfig = Field(default_factory=AliyunLogConfig)
+    elasticsearch: ElasticsearchConfig = Field(default_factory=ElasticsearchConfig)
+    apollo: ApolloConfig = Field(default_factory=ApolloConfig)
+
+    # ============ 业务配置 ============
+    cold_start: ColdStartConfig = Field(default_factory=ColdStartConfig)
+    category: CategoryConfig = Field(default_factory=CategoryConfig)
+    task_chinese_name: TaskChineseNameConfig = Field(default_factory=TaskChineseNameConfig)
+
+    model_config = SettingsConfigDict(
+        env_file=".env", env_file_encoding="utf-8", case_sensitive=False, extra="ignore"
+    )
+
+
+# # ============ 全局配置实例 ============
+# global_settings = GlobalConfigSettings()
+#
+#
+# # ============ 兼容旧代码的导出 ============
+# # 这些变量保持向后兼容,让旧代码可以继续使用
+# aigc_db_config = global_settings.aigc_db.to_dict()
+# long_video_db_config = global_settings.long_video_db.to_dict()
+# long_articles_db_config = global_settings.long_articles_db.to_dict()
+# piaoquan_crawler_db_config = global_settings.piaoquan_crawler_db.to_dict()
+# growth_db_config = global_settings.growth_db.to_dict()
+#
+# deep_seek_official_api_key = global_settings.deepseek.api_key
+# deep_seek_official_model = global_settings.deepseek.get_model_map()
+#
+# aliyun_log_config = global_settings.aliyun_log.to_dict()
+#
+# cold_start_category_map = global_settings.cold_start.category_map
+# input_source_map = global_settings.cold_start.input_source_map
+#
+# CATEGORY_FEATURES = global_settings.category.features
+# CATEGORY_MAP = global_settings.category.category_map

+ 28 - 0
app/core/config/settings/__init__.py

@@ -0,0 +1,28 @@
+from .apollo import ApolloConfig
+from .aliyun import AliyunLogConfig
+from .category import CategoryConfig
+from .cold_start import ColdStartConfig
+from .deepseek import DeepSeekConfig
+from .elasticsearch import ElasticsearchConfig
+from .mysql import AigcDatabaseConfig
+from .mysql import GrowthDatabaseConfig
+from .mysql import LongArticlesDatabaseConfig
+from .mysql import LongVideoDatabaseConfig
+from .mysql import PiaoquanCrawlerDatabaseConfig
+from .task_chinese_name import TaskChineseNameConfig
+
+
+__ALL__ = [
+    "ApolloConfig",
+    "AliyunLogConfig",
+    "CategoryConfig",
+    "ColdStartConfig",
+    "DeepSeekConfig",
+    "ElasticsearchConfig",
+    "AigcDatabaseConfig",
+    "GrowthDatabaseConfig",
+    "LongArticlesDatabaseConfig",
+    "LongVideoDatabaseConfig",
+    "PiaoquanCrawlerDatabaseConfig",
+    "TaskChineseNameConfig",
+]

+ 25 - 0
app/core/config/settings/aliyun.py

@@ -0,0 +1,25 @@
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+
+class AliyunLogConfig(BaseSettings):
+    """阿里云日志配置"""
+
+    endpoint: str = "cn-hangzhou.log.aliyuncs.com"
+    access_key_id: str = "LTAIP6x1l3DXfSxm"
+    access_key_secret: str = "KbTaM9ars4OX3PMS6Xm7rtxGr1FLon"
+    project: str = "changwen-alg"
+    logstore: str = "long_articles_job"
+
+    model_config = SettingsConfigDict(
+        env_prefix="ALIYUN_LOG_", env_file=".env", case_sensitive=False, extra="ignore"
+    )
+
+    def to_dict(self) -> dict:
+        """转换为字典格式,用于兼容旧代码"""
+        return {
+            "endpoint": self.endpoint,
+            "access_key_id": self.access_key_id,
+            "access_key_secret": self.access_key_secret,
+            "project": self.project,
+            "logstore": self.logstore,
+        }

+ 25 - 0
app/core/config/settings/apollo.py

@@ -0,0 +1,25 @@
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+from typing import Dict
+
+
+class ApolloConfig(BaseSettings):
+    """Apollo 配置中心配置"""
+
+    app_id: str = Field(default="LongArticlesJob", description="Apollo 应用 ID")
+    env: str = Field(default="pre", description="Apollo 环境: dev/pre/prod")
+
+    # apollo 配置中心地址映射
+    apollo_map: Dict[str, Dict[str, str]] = Field(
+        default_factory=lambda: {
+            "LongArticlesJob": {
+                "pre": "http://preapolloconfig-internal.piaoquantv.com/",
+                "dev": "https://devapolloconfig-internal.piaoquantv.com/",
+                "prod": "https://apolloconfig-internal.piaoquantv.com/",
+            }
+        }
+    )
+
+    model_config = SettingsConfigDict(
+        env_prefix="APOLLO_", env_file=".env", case_sensitive=False, extra="ignore"
+    )

+ 54 - 0
app/core/config/settings/category.py

@@ -0,0 +1,54 @@
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+from typing import Dict, List
+
+
+class CategoryConfig(BaseSettings):
+    """分类配置"""
+
+    # 分类特征列表
+    features: List[str] = Field(
+        default_factory=lambda: [
+            "CateSciencePop",
+            "CateMilitaryHistory",
+            "CateFamily",
+            "CateSocialRule",
+            "CateOddities",
+            "CateGossip",
+            "CateHealth",
+            "CateEmotional",
+            "CateNational",
+            "CateModernFigure",
+            "CateNostalgic",
+            "CatePolitics",
+            "CateHistoryFigure",
+            "CateSocialPhenomena",
+            "CateFinTech",
+            "view_count_rate",
+        ]
+    )
+
+    # 分类映射
+    category_map: Dict[str, str] = Field(
+        default_factory=lambda: {
+            "知识科普": "CateSciencePop",
+            "军事历史": "CateMilitaryHistory",
+            "家长里短": "CateFamily",
+            "社会法治": "CateSocialRule",
+            "奇闻趣事": "CateOddities",
+            "名人八卦": "CateGossip",
+            "健康养生": "CateHealth",
+            "情感故事": "CateEmotional",
+            "国家大事": "CateNational",
+            "现代人物": "CateModernFigure",
+            "怀旧时光": "CateNostalgic",
+            "政治新闻": "CatePolitics",
+            "历史人物": "CateHistoryFigure",
+            "社会现象": "CateSocialPhenomena",
+            "财经科技": "CateFinTech",
+        }
+    )
+
+    model_config = SettingsConfigDict(
+        env_prefix="CATEGORY_", env_file=".env", case_sensitive=False, extra="ignore"
+    )

+ 37 - 0
app/core/config/settings/cold_start.py

@@ -0,0 +1,37 @@
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+from typing import Dict
+
+
+class ColdStartConfig(BaseSettings):
+    """冷启动配置"""
+
+    # 分类映射
+    category_map: Dict[str, str] = Field(
+        default_factory=lambda: {
+            "知识科普": "20250813032110801233225",
+            "国家大事": "20250813032845706844854",
+            "历史人物": "20250813033415138644740",
+            "奇闻趣事": "20250813033056703155233",
+            "名人八卦": "20250813033257335290696",
+            "怀旧时光": "20250813033536622149424",
+            "情感故事": "20250813033605574986309",
+            "社会法治": "20250813033829272898432",
+            "现代人物": "20250813034015084388155",
+            "社会现象": "20250813034056506771309",
+            "健康养生": "20250813034120792523588",
+            "家长里短": "20250813034159621236902",
+            "军事历史": "20250813034227997109122",
+            "财经科技": "20250813034253336624837",
+            "政治新闻": "20250813034320561348119",
+        }
+    )
+
+    # 输入源映射
+    input_source_map: Dict[str, int] = Field(
+        default_factory=lambda: {"weixin": 5, "toutiao": 6}
+    )
+
+    model_config = SettingsConfigDict(
+        env_prefix="COLD_START_", env_file=".env", case_sensitive=False, extra="ignore"
+    )

+ 25 - 0
app/core/config/settings/deepseek.py

@@ -0,0 +1,25 @@
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+
+class DeepSeekConfig(BaseSettings):
+    """DeepSeek API 配置"""
+
+    api_key: str = Field(
+        default="sk-cfd2df92c8864ab999d66a615ee812c5", description="DeepSeek API Key"
+    )
+    reasoner_model: str = Field(
+        default="deepseek-reasoner", description="DeepSeek 推理模型"
+    )
+    chat_model: str = Field(default="deepseek-chat", description="DeepSeek 对话模型")
+
+    model_config = SettingsConfigDict(
+        env_prefix="DEEPSEEK_", env_file=".env", case_sensitive=False, extra="ignore"
+    )
+
+    def get_model_map(self) -> dict:
+        """获取模型映射字典,兼容旧代码"""
+        return {
+            "DeepSeek-R1": self.reasoner_model,
+            "DeepSeek-V3": self.chat_model,
+        }

+ 20 - 0
app/core/config/settings/elasticsearch.py

@@ -0,0 +1,20 @@
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+from typing import List
+
+
+class ElasticsearchConfig(BaseSettings):
+    """Elasticsearch 配置"""
+
+    hosts: List[str] = Field(default_factory=lambda: ["http://localhost:9200"])
+    username: str = ""
+    password: str = ""
+    index: str = "articles"
+
+    # ES mappings 和 settings(从 elastic_search_mappings.py 迁移)
+    mappings: dict = Field(default_factory=dict)
+    settings: dict = Field(default_factory=dict)
+
+    model_config = SettingsConfigDict(
+        env_prefix="ES_", env_file=".env", case_sensitive=False, extra="ignore"
+    )

+ 105 - 0
app/core/config/settings/mysql.py

@@ -0,0 +1,105 @@
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+
+class DatabaseConfig(BaseSettings):
+    """数据库配置基类"""
+
+    host: str
+    port: int = 3306
+    user: str
+    password: str
+    db: str
+    charset: str = "utf8mb4"
+    minsize: int = 5
+    maxsize: int = 20
+
+    model_config = SettingsConfigDict(
+        env_prefix="", case_sensitive=False, extra="ignore"
+    )
+
+    def to_dict(self) -> dict:
+        """转换为字典格式,用于兼容旧代码"""
+        return {
+            "host": self.host,
+            "port": self.port,
+            "user": self.user,
+            "password": self.password,
+            "db": self.db,
+            "charset": self.charset,
+            "minsize": self.minsize,
+            "maxsize": self.maxsize,
+        }
+
+
+class AigcDatabaseConfig(DatabaseConfig):
+    """AIGC 数据库配置"""
+
+    host: str = "rm-t4na9qj85v7790tf84o.mysql.singapore.rds.aliyuncs.com"
+    user: str = "crawler_admin"
+    password: str = "cyber#crawler_2023"
+    db: str = "aigc-admin-prod"
+
+    model_config = SettingsConfigDict(
+        env_prefix="AIGC_DB_", env_file=".env", case_sensitive=False, extra="ignore"
+    )
+
+
+class LongVideoDatabaseConfig(DatabaseConfig):
+    """长视频数据库配置"""
+
+    host: str = "rr-bp1x9785e8h5452bi157.mysql.rds.aliyuncs.com"
+    user: str = "wx2016_longvideo"
+    password: str = "wx2016_longvideoP@assword1234"
+    db: str = "longvideo"
+
+    model_config = SettingsConfigDict(
+        env_prefix="LONG_VIDEO_DB_",
+        env_file=".env",
+        case_sensitive=False,
+        extra="ignore",
+    )
+
+
+class LongArticlesDatabaseConfig(DatabaseConfig):
+    """长文数据库配置"""
+
+    host: str = "rm-bp14529nwwcw75yr1ko.mysql.rds.aliyuncs.com"
+    user: str = "changwen_admin"
+    password: str = "changwen@123456"
+    db: str = "long_articles"
+
+    model_config = SettingsConfigDict(
+        env_prefix="LONG_ARTICLES_DB_",
+        env_file=".env",
+        case_sensitive=False,
+        extra="ignore",
+    )
+
+
+class PiaoquanCrawlerDatabaseConfig(DatabaseConfig):
+    """票圈爬虫数据库配置"""
+
+    host: str = "rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com"
+    user: str = "crawler"
+    password: str = "crawler123456@"
+    db: str = "piaoquan-crawler"
+
+    model_config = SettingsConfigDict(
+        env_prefix="PIAOQUAN_CRAWLER_DB_",
+        env_file=".env",
+        case_sensitive=False,
+        extra="ignore",
+    )
+
+
+class GrowthDatabaseConfig(DatabaseConfig):
+    """增长数据库配置"""
+
+    host: str = "rm-bp17q95335a99272b.mysql.rds.aliyuncs.com"
+    user: str = "crawler"
+    password: str = "crawler123456@"
+    db: str = "growth"
+
+    model_config = SettingsConfigDict(
+        env_prefix="GROWTH_DB_", env_file=".env", case_sensitive=False, extra="ignore"
+    )

+ 17 - 2
applications/config/task_chinese_name.py → app/core/config/settings/task_chinese_name.py

@@ -1,4 +1,13 @@
-name_map = {
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+from typing import Dict
+
+
+class TaskChineseNameConfig(BaseSettings):
+    """冷启动配置"""
+    # 分类映射
+    category_map: Dict[str, str] = Field(
+        default_factory=lambda: {
     "title_rewrite": "标题重写",
     "crawler_gzh_articles": "抓取公众号文章",
     "crawler_account_manager": "抓取账号管理",
@@ -24,4 +33,10 @@ name_map = {
     "update_account_read_avg": "更新账号平均阅读率",
     "get_follow_result": "获取自动关注回复",
     "extract_reply_result": "解析自动回复结果",
-}
+    }
+
+    )
+
+    model_config = SettingsConfigDict(
+        env_prefix="TASK_CHINESE_NAME_", env_file=".env", case_sensitive=False, extra="ignore"
+    )

+ 1 - 0
app/core/database/__init__.py

@@ -0,0 +1 @@
+from .mysql_pools import DatabaseManager

+ 50 - 28
applications/database/mysql_pools.py → app/core/database/mysql_pools.py

@@ -1,44 +1,48 @@
-import logging
-
 from aiomysql import create_pool
 from aiomysql.cursors import DictCursor
-from applications.config import *
 
-logging.basicConfig(level=logging.INFO)
+from app.core.config import GlobalConfigSettings
+from app.core.observability import LogService
 
 
-class DatabaseManager:
-    def __init__(self):
-        self.databases = None
+class DatabaseManager(LogService):
+    def __init__(self, config: GlobalConfigSettings):
+        super().__init__(config.aliyun_log)
+        self.database_mapper = {
+            "aigc": config.aigc_db,
+            "growth": config.growth_db,
+            "long_video": config.long_video_db,
+            "long_articles": config.long_articles_db,
+            "piaoquan_crawler": config.piaoquan_crawler_db,
+        }
         self.pools = {}
 
     async def init_pools(self):
         # 从配置获取数据库配置,也可以直接在这里配置
-        self.databases = {
-            "aigc": aigc_db_config,
-            "long_video": long_video_db_config,
-            "long_articles": long_articles_db_config,
-            "piaoquan_crawler": piaoquan_crawler_db_config,
-            "growth": growth_db_config,
-        }
-
-        for db_name, config in self.databases.items():
+        for db_name, config in self.database_mapper.items():
             try:
                 pool = await create_pool(
-                    host=config["host"],
-                    port=config["port"],
-                    user=config["user"],
-                    password=config["password"],
-                    db=config["db"],
-                    minsize=config["minsize"],
-                    maxsize=config["maxsize"],
+                    host=config.host,
+                    port=config.port,
+                    user=config.user,
+                    password=config.password,
+                    db=config.db,
+                    minsize=config.minsize,
+                    maxsize=config.maxsize,
                     cursorclass=DictCursor,
                     autocommit=True,
                 )
                 self.pools[db_name] = pool
-                logging.info(f"Created connection pool for {db_name}")
+                print(f"Pool for {db_name} created successfully")
+
             except Exception as e:
-                logging.error(f"Failed to create pool for {db_name}: {str(e)}")
+                await self.log(
+                    contents={
+                        "db_name": db_name,
+                        "error": str(e),
+                        "message": f"Failed to create pool for {db_name}",
+                    }
+                )
                 self.pools[db_name] = None
 
     async def close_pools(self):
@@ -46,7 +50,6 @@ class DatabaseManager:
             if pool:
                 pool.close()
                 await pool.wait_closed()
-                logging.info(f"Closed connection pool for {name}")
 
     async def async_fetch(
         self, query, db_name="long_articles", params=None, cursor_type=DictCursor
@@ -63,7 +66,16 @@ class DatabaseManager:
 
             return fetch_response
         except Exception as e:
-            logging.error(f"Failed to fetch {query}: {str(e)}")
+            await self.log(
+                contents={
+                    "task": "async_fetch",
+                    "db_name": db_name,
+                    "error": str(e),
+                    "message": f"Failed to fetch data from {db_name}",
+                    "query": query,
+                    "params": params,
+                }
+            )
             return None
 
     async def async_save(
@@ -85,10 +97,20 @@ class DatabaseManager:
                     return affected_rows
                 except Exception as e:
                     await connection.rollback()
+                    await self.log(
+                        contents={
+                            "task": "async_save",
+                            "db_name": db_name,
+                            "error": str(e),
+                            "message": f"Failed to save data to {db_name}",
+                            "query": query,
+                            "params": params,
+                        }
+                    )
                     raise e
 
     def get_pool(self, db_name):
         return self.pools.get(db_name)
 
     def list_databases(self):
-        return list(self.databases.keys())
+        return list(self.database_mapper.keys())

+ 2 - 0
app/core/dependency/__init__.py

@@ -0,0 +1,2 @@
+from .dependencies import log_service, mysql_manager
+from .dependencies import apollo_client, es_client

+ 37 - 0
app/core/dependency/dependencies.py

@@ -0,0 +1,37 @@
+import logging
+
+from app.core.config import GlobalConfigSettings
+from app.core.database import DatabaseManager
+from app.core.observability import LogService
+from app.infra.external import AsyncApolloApi
+from app.infra.external import AsyncElasticSearchClient
+
+
+logging.basicConfig(level=logging.INFO)
+
+logging.info("开始加载全局配置")
+config = GlobalConfigSettings()
+logging.info("全局配置加载完成")
+
+logging.info("开始加载日志服务")
+log_service = LogService(config.aliyun_log)
+logging.info("日志服务加载完成")
+
+logging.info("开始创建数据库连接池")
+mysql_manager = DatabaseManager(config)
+logging.info("数据库连接池创建完成")
+
+logging.info("开始创建Apollo客户端")
+apollo_client = AsyncApolloApi(config.apollo, None, None)
+logging.info("Apollo客户端创建完成")
+
+logging.info("开始创建ElasticSearch客户端")
+es_client = AsyncElasticSearchClient(index_=config.elasticsearch.index)
+logging.info("ElasticSearch客户端创建完成")
+
+__ALL__ = [
+    "log_service",
+    "mysql_manager",
+    "apollo_client",
+    "es_client",
+]

+ 1 - 0
app/core/observability/__init__.py

@@ -0,0 +1 @@
+from .logging import LogService

+ 1 - 0
app/core/observability/logging/__init__.py

@@ -0,0 +1 @@
+from .log_service import LogService

+ 8 - 3
applications/service/log_service.py → app/core/observability/logging/log_service.py

@@ -5,12 +5,17 @@ import json
 import datetime
 from aliyun.log import LogClient, PutLogsRequest, LogItem
 
+from app.core.config.settings import AliyunLogConfig
+
 
 class LogService:
-    def __init__(self, endpoint, access_key_id, access_key_secret, project, logstore):
+    def __init__(self, log_config: AliyunLogConfig):
+        endpoint = log_config.endpoint
+        access_key_id = log_config.access_key_id
+        access_key_secret = log_config.access_key_secret
         self.client = LogClient(endpoint, access_key_id, access_key_secret)
-        self.project = project
-        self.logstore = logstore
+        self.project = log_config.project
+        self.logstore = log_config.logstore
         self.queue = asyncio.Queue()
         self.running = False
 

+ 0 - 0
applications/pipeline/__init__.py → app/core/pipeline/__init__.py


+ 8 - 10
applications/pipeline/crawler_pipeline.py → app/core/pipeline/crawler_pipeline.py

@@ -1,15 +1,13 @@
-import time
-
-from typing import Any, Dict, Tuple, Callable
+from typing import Dict, Tuple
 
 from pydantic import BaseModel
 
-from applications.api import AsyncApolloApi
-from applications.utils import CrawlerMetaArticle
-from applications.utils import CrawlerMetaAccount
+from app.infra.utils import CrawlerMetaArticle
+from app.infra.utils import CrawlerMetaAccount
+from app.core.dependency import apollo_client
 
 
-class CrawlerPipeline(AsyncApolloApi):
+class CrawlerPipeline:
     MODEL_TABLE_MAP: Dict[str, Tuple[type[BaseModel], str]] = {
         "article": (CrawlerMetaArticle, "crawler_meta_article"),
         "account": (CrawlerMetaAccount, "crawler_candidate_account_pool"),
@@ -17,12 +15,12 @@ class CrawlerPipeline(AsyncApolloApi):
     }
 
     def __init__(self, pool, log_client):
-        super().__init__()
         self.pool = pool
         self.log_client = log_client
 
-    async def whether_title_sensitive(self, title: str) -> bool:
-        sensitive_word_list = await self.get_config_value("sensitive_word_list")
+    @staticmethod
+    async def whether_title_sensitive(title: str) -> bool:
+        sensitive_word_list = await apollo_client.get_config_value("sensitive_word_list")
         for word in sensitive_word_list:
             if word in title:
                 return True

+ 1 - 1
applications/pipeline/data_recycle_pipeline.py → app/core/pipeline/data_recycle_pipeline.py

@@ -1,7 +1,7 @@
 import json
 from typing import List, Dict
 
-from applications.utils import show_desc_to_sta, str_to_md5
+from app.infra.shared.tools import show_desc_to_sta, str_to_md5
 
 
 insert_outside_article_query = """

+ 0 - 0
app/domains/__init__.py


+ 0 - 0
applications/tasks/algorithm_tasks/__init__.py → app/domains/algorithm_tasks/__init__.py


+ 0 - 0
applications/tasks/algorithm_tasks/account_category_analysis.py → app/domains/algorithm_tasks/account_category_analysis.py


+ 6 - 1
applications/tasks/algorithm_tasks/models.py → app/domains/algorithm_tasks/models.py

@@ -6,7 +6,12 @@ import statsmodels.api as sm
 
 from pandas import DataFrame
 
-from applications.config import CATEGORY_FEATURES, CATEGORY_MAP
+from app.core.config import GlobalConfigSettings
+
+config = GlobalConfigSettings()
+
+CATEGORY_FEATURES = config.category.features
+CATEGORY_MAP = config.category.category_map
 
 
 class CategoryRegression:

+ 0 - 0
applications/tasks/analysis_task/__init__.py → app/domains/analysis_task/__init__.py


+ 0 - 0
applications/tasks/analysis_task/account_position_info.py → app/domains/analysis_task/account_position_info.py


+ 1 - 1
applications/tasks/analysis_task/crawler_detail.py → app/domains/analysis_task/crawler_detail.py

@@ -1,4 +1,4 @@
-from applications.api import feishu_robot
+from app.infra.external import feishu_robot
 
 
 class CrawlerDetailAnalysisConst:

+ 0 - 0
applications/tasks/cold_start_tasks/__init__.py → app/domains/cold_start_tasks/__init__.py


+ 0 - 0
applications/tasks/cold_start_tasks/article_pool/__init__.py → app/domains/cold_start_tasks/article_pool/__init__.py


+ 0 - 0
applications/tasks/cold_start_tasks/article_pool/article_pool_cold_start_const.py → app/domains/cold_start_tasks/article_pool/article_pool_cold_start_const.py


+ 1 - 1
applications/tasks/cold_start_tasks/article_pool/article_pool_cold_start_strategy.py → app/domains/cold_start_tasks/article_pool/article_pool_cold_start_strategy.py

@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from typing import List, Dict
-from applications.tasks.cold_start_tasks.article_pool.article_pool_cold_start_const import (
+from app.domains.cold_start_tasks.article_pool.article_pool_cold_start_const import (
     ArticlePoolColdStartConst,
 )
 

+ 2 - 2
applications/tasks/cold_start_tasks/article_pool/article_pool_filter_strategy.py → app/domains/cold_start_tasks/article_pool/article_pool_filter_strategy.py

@@ -3,8 +3,8 @@ from __future__ import annotations
 from typing import Optional
 from pandas import DataFrame
 
-from applications.api import feishu_robot
-from applications.tasks.cold_start_tasks.article_pool.article_pool_cold_start_const import (
+from app.infra.external import feishu_robot
+from app.domains.cold_start_tasks.article_pool.article_pool_cold_start_const import (
     ArticlePoolColdStartConst,
 )
 

+ 15 - 12
applications/tasks/cold_start_tasks/article_pool_cold_start.py → app/domains/cold_start_tasks/article_pool_cold_start.py

@@ -9,20 +9,23 @@ from typing import List
 from pandas import DataFrame
 from tqdm.asyncio import tqdm
 
-from applications.api import task_apollo, feishu_robot
-from applications.api import auto_create_crawler_task
-from applications.api import auto_bind_crawler_task_to_generate_task
-from applications.config import cold_start_category_map, input_source_map
-from applications.utils import get_titles_from_produce_plan
-from applications.tasks.cold_start_tasks.article_pool import (
+from app.infra.external import feishu_robot
+from app.infra.internal import auto_create_crawler_task
+from app.infra.internal import auto_bind_crawler_task_to_generate_task
+from app.infra.internal import get_titles_from_produce_plan
+from app.core.dependency import apollo_client
+from app.core.config.settings import ColdStartConfig
+
+from app.domains.cold_start_tasks.article_pool import (
     ArticlePoolColdStartStrategy,
     ArticlePoolFilterStrategy,
 )
 
 
 class ArticlePoolColdStart(ArticlePoolColdStartStrategy, ArticlePoolFilterStrategy):
-    def __init__(self, pool, log_client, trace_id):
+    def __init__(self, pool, log_client,  trace_id, config: ColdStartConfig):
         super().__init__(pool, log_client, trace_id)
+        self.config = config
 
     async def get_article_from_meta_table(
         self, platform: str, crawl_method: str, strategy: str, category: str | None
@@ -145,7 +148,7 @@ class ArticlePoolColdStart(ArticlePoolColdStartStrategy, ArticlePoolFilterStrate
                 "inputSourceValue": crawler_plan_id,
                 "inputSourceLabel": crawler_plan_name,
                 "inputSourceModal": 3,
-                "inputSourceChannel": input_source_map[platform],
+                "inputSourceChannel": self.config.input_source_map[platform],
             }
         ]
         generate_plan_response = await auto_bind_crawler_task_to_generate_task(
@@ -194,7 +197,7 @@ class ArticlePoolColdStart(ArticlePoolColdStartStrategy, ArticlePoolFilterStrate
         match strategy:
             case "strategy_v1":
                 # split article into each category
-                category_list = await task_apollo.get_config_value(key="category_list")
+                category_list = await apollo_client.get_config_value(key="category_list")
                 for ai_category in category_list:
                     filter_category_df = filter_article_df[
                         filter_article_df["category_by_ai"] == ai_category
@@ -241,11 +244,11 @@ class ArticlePoolColdStart(ArticlePoolColdStartStrategy, ArticlePoolFilterStrate
 
     async def cold_start_by_category(self, category_list, platform, strategy):
         if not category_list:
-            category_list = list(cold_start_category_map.keys())
+            category_list = list(self.config.cold_start_category_map.keys())
 
         for category in tqdm(category_list):
             try:
-                plan_id = cold_start_category_map[category]
+                plan_id = self.config.cold_start_category_map[category]
                 affected_rows = await self.filter_published_titles(plan_id)
                 await self.log_client.log(
                     contents={
@@ -363,7 +366,7 @@ class ArticlePoolColdStart(ArticlePoolColdStartStrategy, ArticlePoolFilterStrate
                     }
                 )
 
-                crawl_methods_map = await task_apollo.get_config_value(
+                crawl_methods_map = await apollo_client.get_config_value(
                     key="category_cold_start_map"
                 )
 

+ 0 - 0
app/domains/cold_start_tasks/video_pool/__init__.py


+ 2 - 2
applications/tasks/cold_start_tasks/video_pool/video_pool_audit_strategy.py → app/domains/cold_start_tasks/video_pool/video_pool_audit_strategy.py

@@ -1,5 +1,5 @@
-from applications.api import fetch_piaoquan_video_list_detail
-from applications.api import insert_crawler_relation_to_aigc_system
+from app.infra.internal import fetch_piaoquan_video_list_detail
+from app.infra.internal import insert_crawler_relation_to_aigc_system
 
 from .video_pool_const import VideoPoolConst
 

+ 0 - 0
applications/tasks/cold_start_tasks/video_pool/video_pool_const.py → app/domains/cold_start_tasks/video_pool/video_pool_const.py


+ 0 - 0
applications/tasks/cold_start_tasks/video_pool_cold_start.py → app/domains/cold_start_tasks/video_pool_cold_start.py


+ 0 - 0
applications/tasks/crawler_tasks/__init__.py → app/domains/crawler_tasks/__init__.py


+ 0 - 0
applications/tasks/crawler_tasks/crawler_account_manager.py → app/domains/crawler_tasks/crawler_account_manager.py


+ 8 - 8
applications/tasks/crawler_tasks/crawler_gzh.py → app/domains/crawler_tasks/crawler_gzh.py

@@ -1,19 +1,19 @@
 from __future__ import annotations
 
 import asyncio
-import time, json
+import time
 import traceback
 from datetime import datetime, date, timedelta
 from typing import List, Dict
 from tqdm.asyncio import tqdm
 
-from applications.api import feishu_robot
-from applications.crawler.wechat import weixin_search
-from applications.crawler.wechat import get_article_detail
-from applications.crawler.wechat import get_article_list_from_account
-from applications.pipeline import CrawlerPipeline
-from applications.utils import timestamp_to_str, show_desc_to_sta
-from applications.utils import get_hot_titles, generate_gzh_id
+from app.infra.external import feishu_robot
+from app.infra.crawler.wechat import weixin_search
+from app.infra.crawler.wechat import get_article_detail
+from app.infra.crawler.wechat import get_article_list_from_account
+from app.core.pipeline import CrawlerPipeline
+from app.infra.shared.tools import timestamp_to_str, show_desc_to_sta, generate_gzh_id
+from app.infra.utils import get_hot_titles
 
 
 class CrawlerGzhConst:

+ 3 - 3
applications/tasks/crawler_tasks/crawler_gzh_fans.py → app/domains/crawler_tasks/crawler_gzh_fans.py

@@ -3,13 +3,13 @@ import json
 import time
 from datetime import datetime
 
-from applications.crawler.wechat import (
+from app.infra.crawler.wechat import (
     get_gzh_fans,
     get_access_token,
     get_union_id_batch,
 )
-from applications.api import feishu_robot
-from applications.utils import run_tasks_with_asyncio_task_group
+from app.infra.external import feishu_robot
+from app.infra.shared import run_tasks_with_asyncio_task_group
 
 
 class CrawlerGzhFansConst:

+ 10 - 7
applications/tasks/crawler_tasks/crawler_toutiao.py → app/domains/crawler_tasks/crawler_toutiao.py

@@ -6,16 +6,19 @@ import time
 import aiohttp
 import traceback
 from datetime import datetime
-from typing import List, Dict
+from typing import List
 
 from tqdm import tqdm
 
-from applications.api import feishu_robot
-from applications.crawler.toutiao import get_toutiao_account_info_list
-from applications.crawler.toutiao import search_in_toutiao
-from applications.crawler.toutiao import get_toutiao_detail
-from applications.pipeline import CrawlerPipeline
-from applications.utils import async_proxy, get_top_article_title_list
+from app.infra.external import feishu_robot
+from app.infra.crawler.toutiao import get_toutiao_account_info_list
+from app.infra.crawler.toutiao import search_in_toutiao
+from app.infra.crawler.toutiao import get_toutiao_detail
+from app.infra.shared.tools import async_proxy
+from app.infra.utils import get_top_article_title_list
+
+from app.core.pipeline import CrawlerPipeline
+
 
 
 class CrawlerToutiaoConst:

+ 0 - 0
applications/tasks/data_recycle_tasks/__init__.py → app/domains/data_recycle_tasks/__init__.py


+ 3 - 6
applications/tasks/data_recycle_tasks/article_detail_stat.py → app/domains/data_recycle_tasks/article_detail_stat.py

@@ -1,14 +1,11 @@
-import json
 import time
 import traceback
 from datetime import datetime, timedelta
 
-from applications.api import feishu_robot
+from app.infra.crawler.wechat import get_gzh_stat_daily
+from app.infra.crawler.wechat import get_access_token
 
-from applications.crawler.wechat import get_gzh_stat_daily
-from applications.crawler.wechat import get_access_token
-
-from applications.utils import run_tasks_with_asyncio_task_group
+from app.infra.shared import run_tasks_with_asyncio_task_group
 
 
 class ArticleDetailStatConst:

+ 5 - 5
applications/tasks/data_recycle_tasks/recycle_daily_publish_articles.py → app/domains/data_recycle_tasks/recycle_daily_publish_articles.py

@@ -7,11 +7,11 @@ import traceback
 
 from tqdm.asyncio import tqdm
 
-from applications.api import feishu_robot
-from applications.crawler.wechat import get_article_list_from_account
-from applications.crawler.wechat import get_article_detail
-from applications.pipeline import insert_article_into_recycle_pool
-from applications.utils import str_to_md5
+from app.infra.external import feishu_robot
+from app.infra.crawler.wechat import get_article_list_from_account
+from app.infra.crawler.wechat import get_article_detail
+from app.core.pipeline import insert_article_into_recycle_pool
+from app.infra.shared.tools import str_to_md5
 
 
 class Const:

+ 3 - 3
applications/tasks/data_recycle_tasks/recycle_mini_program_detail.py → app/domains/data_recycle_tasks/recycle_mini_program_detail.py

@@ -4,9 +4,9 @@ from typing import Any
 from datetime import datetime, timedelta
 
 
-from applications.crawler.wechat import get_article_detail
-from applications.utils import extract_root_source_id
-from applications.utils import run_tasks_with_asyncio_task_group
+from app.infra.crawler.wechat import get_article_detail
+from app.infra.shared.tools import extract_root_source_id
+from app.infra.shared import run_tasks_with_asyncio_task_group
 
 
 class MiniProgramConst:

+ 4 - 4
applications/tasks/data_recycle_tasks/recycle_outside_account_articles.py → app/domains/data_recycle_tasks/recycle_outside_account_articles.py

@@ -6,10 +6,10 @@ from tqdm.asyncio import tqdm
 
 from .recycle_daily_publish_articles import UpdateRootSourceIdAndUpdateTimeTask
 from .recycle_daily_publish_articles import Const
-from applications.crawler.wechat import get_article_list_from_account
-from applications.crawler.wechat import get_article_detail
-from applications.pipeline import insert_outside_article_into_recycle_pool
-from applications.api import feishu_robot
+from app.infra.crawler.wechat import get_article_list_from_account
+from app.infra.crawler.wechat import get_article_detail
+from app.core.pipeline import insert_outside_article_into_recycle_pool
+from app.infra.external import feishu_robot
 
 account_name_set = {
     "念念私语",

+ 0 - 0
applications/tasks/llm_tasks/__init__.py → app/domains/llm_tasks/__init__.py


+ 3 - 3
applications/tasks/llm_tasks/candidate_account_process.py → app/domains/llm_tasks/candidate_account_process.py

@@ -3,9 +3,9 @@ import traceback
 from typing import List, Dict, Optional
 from tqdm.asyncio import tqdm
 
-from applications.api import fetch_deepseek_completion
-from applications.api import feishu_robot
-from applications.utils import ci_lower
+from app.infra.external import fetch_deepseek_completion
+from app.infra.external import feishu_robot
+from app.infra.shared.tools import ci_lower
 
 
 class CandidateAccountProcessConst:

+ 4 - 4
applications/tasks/llm_tasks/process_title.py → app/domains/llm_tasks/process_title.py

@@ -4,10 +4,10 @@ import traceback
 
 from typing import Optional, List, Dict, Tuple
 
-from applications.api import fetch_deepseek_completion
-from applications.utils import yield_batch
-from applications.tasks.llm_tasks.prompts import extract_article_features
-from applications.tasks.llm_tasks.prompts import extract_article_category
+from app.infra.external import fetch_deepseek_completion
+from app.infra.shared.tools import yield_batch
+from app.domains.llm_tasks.prompts import extract_article_features
+from app.domains.llm_tasks.prompts import extract_article_category
 from tqdm.asyncio import tqdm
 
 

+ 0 - 0
applications/tasks/llm_tasks/prompts.py → app/domains/llm_tasks/prompts.py


+ 0 - 0
applications/tasks/monitor_tasks/__init__.py → app/domains/monitor_tasks/__init__.py


+ 5 - 5
applications/tasks/monitor_tasks/auto_reply_cards_monitor.py → app/domains/monitor_tasks/auto_reply_cards_monitor.py

@@ -13,11 +13,11 @@ from urllib.parse import unquote, parse_qs, urlparse
 import requests
 from requests.exceptions import RequestException
 
-from applications.utils import upload_to_oss
-from applications.utils import fetch_from_odps
-from applications.utils import AsyncHttpClient
-from applications.crawler.wechat import get_article_list_from_account
-from applications.crawler.wechat import get_article_detail
+from app.infra.shared.tools import upload_to_oss
+from app.infra.shared.tools import fetch_from_odps
+from app.infra.shared import AsyncHttpClient
+from app.infra.crawler.wechat import get_article_list_from_account
+from app.infra.crawler.wechat import get_article_detail
 
 
 class AutoReplyCardsMonitorConst:

+ 3 - 3
applications/tasks/monitor_tasks/cooperate_accounts_monitor.py → app/domains/monitor_tasks/cooperate_accounts_monitor.py

@@ -7,9 +7,9 @@ from tqdm import tqdm
 from datetime import datetime, timedelta
 from urllib.parse import unquote, parse_qs, urlparse
 
-from applications.utils import fetch_from_odps, show_desc_to_sta
-from applications.crawler.wechat import get_article_list_from_account
-from applications.crawler.wechat import get_article_detail
+from app.infra.shared.tools import fetch_from_odps, show_desc_to_sta
+from app.infra.crawler.wechat import get_article_list_from_account
+from app.infra.crawler.wechat import get_article_detail
 
 
 class CooperateAccountsMonitorTaskConst:

+ 3 - 3
applications/tasks/monitor_tasks/get_off_videos.py → app/domains/monitor_tasks/get_off_videos.py

@@ -4,9 +4,9 @@ from typing import List, Optional
 
 from tqdm import tqdm
 
-from applications.api import change_video_audit_status
-from applications.api import fetch_piaoquan_video_list_detail
-from applications.api import feishu_robot
+from app.infra.internal import change_video_audit_status
+from app.infra.internal import fetch_piaoquan_video_list_detail
+from app.infra.external import feishu_robot
 
 
 class GetOffVideosConst:

+ 5 - 5
applications/tasks/monitor_tasks/gzh_article_monitor.py → app/domains/monitor_tasks/gzh_article_monitor.py

@@ -4,11 +4,11 @@ from typing import Optional, List
 
 from tqdm import tqdm
 
-from applications.api import feishu_robot
-from applications.api import delete_illegal_gzh_articles
-from applications.crawler.wechat import get_article_detail
-from applications.crawler.wechat import get_article_list_from_account
-from applications.utils import str_to_md5
+from app.infra.external import feishu_robot
+from app.infra.internal import delete_illegal_gzh_articles
+from app.infra.crawler.wechat import get_article_detail
+from app.infra.crawler.wechat import get_article_list_from_account
+from app.infra.shared.tools import str_to_md5
 
 
 class MonitorConst:

+ 2 - 2
applications/tasks/monitor_tasks/kimi_balance.py → app/domains/monitor_tasks/kimi_balance.py

@@ -1,7 +1,7 @@
 import traceback
 from typing import Dict
-from applications.api import feishu_robot
-from applications.utils import AsyncHttpClient
+from app.infra.external import feishu_robot
+from app.infra.shared import AsyncHttpClient
 
 # const
 BALANCE_LIMIT_THRESHOLD = 100.0

+ 1 - 3
applications/tasks/monitor_tasks/limited_account_analysis.py → app/domains/monitor_tasks/limited_account_analysis.py

@@ -1,7 +1,5 @@
-import json
-
 from datetime import datetime, timedelta
-from applications.api import feishu_sheet, feishu_robot
+from app.infra.external import feishu_sheet
 
 
 class LimitedAccountAnalysisConst:

+ 2 - 2
applications/tasks/monitor_tasks/task_processing_monitor.py → app/domains/monitor_tasks/task_processing_monitor.py

@@ -1,7 +1,7 @@
 import time
 
-from applications.api import feishu_robot
-from applications.tasks.task_mapper import TaskMapper
+from app.infra.external import feishu_robot
+from app.jobs.task_mapper import TaskMapper
 
 
 class TaskProcessingMonitor(TaskMapper):

+ 0 - 0
app/infra/__init__.py


+ 0 - 0
app/infra/crawler/__init__.py


+ 0 - 0
applications/crawler/toutiao/__init__.py → app/infra/crawler/toutiao/__init__.py


+ 1 - 1
applications/crawler/toutiao/blogger.py → app/infra/crawler/toutiao/blogger.py

@@ -6,7 +6,7 @@ from __future__ import annotations
 
 import aiohttp
 
-from applications.utils import async_proxy
+from app.infra.shared.tools import async_proxy
 from .use_js import call_js_function
 
 

+ 1 - 1
applications/crawler/toutiao/detail_recommend.py → app/infra/crawler/toutiao/detail_recommend.py

@@ -8,7 +8,7 @@ import json
 import requests
 from tenacity import retry
 
-from applications.utils import proxy, request_retry
+from app.infra.shared.tools import proxy, request_retry
 from .use_js import call_js_function
 
 retry_desc = request_retry(retry_times=3, min_retry_delay=2, max_retry_delay=30)

+ 0 - 0
app/infra/crawler/toutiao/main_page_recomend.py


+ 1 - 3
applications/crawler/toutiao/search.py → app/infra/crawler/toutiao/search.py

@@ -1,6 +1,4 @@
-import json
-
-from applications.utils import AsyncHttpClient
+from app.infra.shared import AsyncHttpClient
 
 
 async def search_in_toutiao(keyword):

+ 0 - 0
applications/crawler/toutiao/toutiao.js → app/infra/crawler/toutiao/toutiao.js


+ 1 - 1
applications/crawler/toutiao/use_js.py → app/infra/crawler/toutiao/use_js.py

@@ -5,7 +5,7 @@
 import json
 import subprocess
 
-toutiao_js_path = "applications/crawler/toutiao/toutiao.js"
+toutiao_js_path = "app/infra/crawler/toutiao/toutiao.js"
 
 
 def call_js_function(arguments_list):

+ 0 - 0
applications/crawler/wechat/__init__.py → app/infra/crawler/wechat/__init__.py


+ 1 - 1
applications/crawler/wechat/gzh_article_stat.py → app/infra/crawler/wechat/gzh_article_stat.py

@@ -1,4 +1,4 @@
-from applications.utils import AsyncHttpClient
+from app.infra.shared import AsyncHttpClient
 
 
 # 抓取公众号粉丝

+ 1 - 1
applications/crawler/wechat/gzh_fans.py → app/infra/crawler/wechat/gzh_fans.py

@@ -1,6 +1,6 @@
 import random
 
-from applications.utils import AsyncHttpClient
+from app.infra.shared import AsyncHttpClient
 
 
 # 抓取公众号粉丝

+ 3 - 3
applications/crawler/wechat/gzh_spider.py → app/infra/crawler/wechat/gzh_spider.py

@@ -6,9 +6,9 @@ import requests
 from fake_useragent import FakeUserAgent
 from tenacity import retry
 
-from applications.api import log
-from applications.utils import request_retry
-from applications.utils import AsyncHttpClient
+from app.infra.external import log
+from app.infra.shared.tools import request_retry
+from app.infra.shared import AsyncHttpClient
 
 retry_desc = request_retry(retry_times=3, min_retry_delay=2, max_retry_delay=16)
 

+ 19 - 0
app/infra/external/__init__.py

@@ -0,0 +1,19 @@
+from .aliyun import log
+from .deepseek_official import fetch_deepseek_completion
+from .apollo import AsyncApolloApi
+from .feishu import FeishuBotApi
+from .feishu import FeishuSheetApi
+from .elastic_search import AsyncElasticSearchClient
+
+feishu_robot = FeishuBotApi()
+feishu_sheet = FeishuSheetApi()
+
+__all__ = [
+    "feishu_robot",
+    "feishu_sheet",
+    "AsyncApolloApi",
+    "fetch_deepseek_completion",
+    "log",
+    "AsyncElasticSearchClient",
+]
+

+ 0 - 0
applications/api/aliyun_log_api.py → app/infra/external/aliyun.py


+ 33 - 2
applications/utils/async_apollo_client.py → app/infra/external/apollo.py

@@ -5,13 +5,17 @@ import socket
 import asyncio
 import aiohttp
 
+from typing import Dict, Union
+
+from app.core.config.settings import ApolloConfig
+
 
 class AsyncApolloClient:
     def __init__(
         self,
         app_id,
+        config_server_url = "http://localhost:8080",
         cluster="default",
-        config_server_url="http://localhost:8080",
         timeout=35,
         ip=None,
     ):
@@ -26,7 +30,8 @@ class AsyncApolloClient:
         self._notification_map = {"application": -1}
         self._stop_event = asyncio.Event()
 
-    def _init_ip(self):
+    @staticmethod
+    def _init_ip():
         s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
         try:
             s.connect(("8.8.8.8", 53))
@@ -129,3 +134,29 @@ class AsyncApolloClient:
             await self._long_poll()
         logging.info("Listener stopped.")
         self.stopped = True
+
+
+class AsyncApolloApi(AsyncApolloClient):
+    def __init__(
+            self,
+            apollo_config: ApolloConfig,
+            app_id: str | None ,
+            env: str | None):
+        if not app_id:
+            app_id = apollo_config.app_id
+
+        if not env:
+            env = apollo_config.env
+
+        server_url = apollo_config.apollo_map.get(app_id, {}).get(env)
+        super().__init__(app_id, server_url)
+
+    async def get_config_value(
+        self, key: str, output_type: str = "json"
+    ) -> Union[Dict, str]:
+        match output_type:
+            case "json":
+                response = await self.get_value(key)
+                return json.loads(response)
+            case _:
+                return await self.get_value(key)

+ 10 - 4
applications/api/deep_seek_official_api.py → app/infra/external/deepseek_official.py

@@ -8,8 +8,14 @@ import json
 from typing import Dict, List, Optional
 from openai import OpenAI
 
-from applications.config import deep_seek_official_model
-from applications.config import deep_seek_official_api_key
+from app.core.config import GlobalConfigSettings
+
+config = GlobalConfigSettings()
+
+model_map = config.deepseek.get_model_map()
+api_key = config.deepseek.api_key
+chat_model = config.deepseek.chat_model
+reasoner_model = config.deepseek.reasoner_model
 
 
 def fetch_deepseek_completion(
@@ -21,7 +27,7 @@ def fetch_deepseek_completion(
 ) -> Optional[Dict | List]:
     messages = [{"role": "user", "content": prompt}]
     kwargs = {
-        "model": deep_seek_official_model.get(model, "deepseek-chat"),
+        "model": model_map.get(model, chat_model),
         "messages": messages,
     }
 
@@ -31,7 +37,7 @@ def fetch_deepseek_completion(
         kwargs["tool_choice"] = "auto"
 
     client = OpenAI(
-        api_key=deep_seek_official_api_key, base_url="https://api.deepseek.com"
+        api_key=api_key, base_url="https://api.deepseek.com"
     )
 
     if output_type == "json":

+ 2 - 4
applications/api/elastic_search_api.py → app/infra/external/elastic_search.py

@@ -3,14 +3,12 @@ import ssl
 from elasticsearch import AsyncElasticsearch
 from elasticsearch.helpers import async_bulk
 
-from applications.config import es_index
-
 
 class AsyncElasticSearchClient:
-    def __init__(self, index_=es_index):
+    def __init__(self, index_):
         self.password = "nkvvASQuQ0XUGRq5OLvm"
         self.hosts = ["https://192.168.205.85:9200", "https://192.168.205.85:9300"]
-        self.ctx = ssl.create_default_context(cafile="applications/config/es_certs.crt")
+        self.ctx = ssl.create_default_context(cafile="app/core/config/cert/es_certs.crt")
         self.es = AsyncElasticsearch(
             self.hosts, basic_auth=("elastic", self.password), ssl_context=self.ctx
         )

+ 1 - 3
applications/api/async_feishu_api.py → app/infra/external/feishu.py

@@ -1,8 +1,6 @@
 import json
 
-import requests
-
-from applications.utils import AsyncHttpClient
+from app.infra.shared import AsyncHttpClient
 
 
 class Feishu:

+ 23 - 0
app/infra/internal/__init__.py

@@ -0,0 +1,23 @@
+# piaoquan
+from .piaoquan import change_video_audit_status
+from .piaoquan import publish_video_to_piaoquan
+from .piaoquan import fetch_piaoquan_video_list_detail
+
+# aigc system api
+from .aigc_system import delete_illegal_gzh_articles
+from .aigc_system import auto_create_crawler_task
+from .aigc_system import auto_bind_crawler_task_to_generate_task
+from .aigc_system import insert_crawler_relation_to_aigc_system
+from .aigc_system import get_titles_from_produce_plan
+
+
+__all__ = [
+    "change_video_audit_status",
+    "publish_video_to_piaoquan",
+    "fetch_piaoquan_video_list_detail",
+    "delete_illegal_gzh_articles",
+    "auto_create_crawler_task",
+    "auto_bind_crawler_task_to_generate_task",
+    "insert_crawler_relation_to_aigc_system",
+    "get_titles_from_produce_plan",
+]

+ 21 - 1
applications/api/async_aigc_system_api.py → app/infra/internal/aigc_system.py

@@ -1,6 +1,7 @@
 import json
 from typing import Optional, Dict, List, TypedDict
-from applications.utils import AsyncHttpClient
+from datetime import datetime, timedelta
+from app.infra.shared import AsyncHttpClient
 
 HEADERS = {
     "Accept": "application/json",
@@ -184,3 +185,22 @@ async def insert_crawler_relation_to_aigc_system(
         res = await client.post(url=url, headers=HEADERS, data=payload)
 
     return res
+
+
+async def get_titles_from_produce_plan(pool, plan_id, threshold=None):
+    if not threshold:
+        fifteen_days_ago = datetime.now() - timedelta(days=30)
+        threshold = fifteen_days_ago.strftime("%Y%m%d") + 15 * "0"
+
+    query = f"""
+        select distinct t2.title
+        from produce_plan_input_source t3
+            join crawler_plan_result_rel t1 on t3.input_source_value = t1.plan_id
+            join crawler_content t2 on t1.channel_source_id = t2.channel_content_id
+        where t3.plan_id = %s
+        and t3.input_source_value > %s;
+    """
+    response = await pool.async_fetch(
+        query=query, db_name="aigc", params=(plan_id, threshold)
+    )
+    return tuple([i["title"] for i in response])

+ 1 - 1
applications/api/async_piaoquan_api.py → app/infra/internal/piaoquan.py

@@ -1,6 +1,6 @@
 from typing import Optional, Dict, List
 
-from applications.utils import AsyncHttpClient
+from app.infra.shared import AsyncHttpClient
 
 
 async def fetch_piaoquan_video_list_detail(video_list: List[int]) -> Optional[Dict]:

+ 2 - 0
app/infra/shared/__init__.py

@@ -0,0 +1,2 @@
+from .async_tasks import run_tasks_with_asyncio_task_group
+from .http_client import AsyncHttpClient

+ 0 - 0
applications/utils/async_tasks.py → app/infra/shared/async_tasks.py


+ 0 - 0
applications/utils/async_http_client.py → app/infra/shared/http_client.py


Some files were not shown because too many files changed in this diff