Просмотр исходного кода

Merge branch 'feature/luojunhui/20260128-code-improve' of Server/LongArticleTaskServer into master

luojunhui 1 месяц назад
Родитель
Сommit
57ef5aa7d4
100 измененных файлов с 1387 добавлено и 259 удалено
  1. 102 77
      README.md
  2. 0 0
      app/ab_test/__init__.py
  3. 0 0
      app/ab_test/ab_accounts.py
  4. 4 4
      app/ab_test/get_cover.py
  5. 0 0
      app/api/middleware/auth.py
  6. 0 0
      app/api/middleware/error_handler.py
  7. 0 0
      app/api/middleware/rate_limiter.py
  8. 15 0
      app/api/service/__init__.py
  9. 2 2
      app/api/service/gzh_cookie_manager.py
  10. 166 0
      app/api/service/task_manager_service.py
  11. 17 9
      app/api/service/task_scheduler.py
  12. 11 0
      app/api/v1/endpoints/__init__.py
  13. 27 0
      app/api/v1/endpoints/abtest.py
  14. 15 0
      app/api/v1/endpoints/health.py
  15. 42 0
      app/api/v1/endpoints/tasks.py
  16. 27 0
      app/api/v1/endpoints/tokens.py
  17. 3 0
      app/api/v1/routes/__init__.py
  18. 44 0
      app/api/v1/routes/routes.py
  19. 14 0
      app/api/v1/utils/__init__.py
  20. 25 0
      app/api/v1/utils/_utils.py
  21. 16 0
      app/api/v1/utils/deps.py
  22. 41 0
      app/api/v1/utils/schemas.py
  23. 1 0
      app/core/bootstrap/__init__.py
  24. 35 0
      app/core/bootstrap/resource_manager.py
  25. 1 0
      app/core/config/__init__.py
  26. 0 0
      app/core/config/cert/es_certs.crt
  27. 46 0
      app/core/config/global_settings.py
  28. 28 0
      app/core/config/settings/__init__.py
  29. 25 0
      app/core/config/settings/aliyun.py
  30. 25 0
      app/core/config/settings/apollo.py
  31. 54 0
      app/core/config/settings/category.py
  32. 37 0
      app/core/config/settings/cold_start.py
  33. 25 0
      app/core/config/settings/deepseek.py
  34. 20 0
      app/core/config/settings/elasticsearch.py
  35. 105 0
      app/core/config/settings/mysql.py
  36. 45 0
      app/core/config/settings/task_chinese_name.py
  37. 1 0
      app/core/database/__init__.py
  38. 53 25
      app/core/database/mysql_pools.py
  39. 1 0
      app/core/dependency/__init__.py
  40. 21 0
      app/core/dependency/dependencies.py
  41. 1 0
      app/core/observability/__init__.py
  42. 1 0
      app/core/observability/logging/__init__.py
  43. 95 0
      app/core/observability/logging/log_service.py
  44. 0 0
      app/core/pipeline/__init__.py
  45. 9 11
      app/core/pipeline/crawler_pipeline.py
  46. 1 1
      app/core/pipeline/data_recycle_pipeline.py
  47. 0 0
      app/core/pipeline/schemas.py
  48. 0 0
      app/domains/algorithm_tasks/__init__.py
  49. 0 0
      app/domains/algorithm_tasks/account_category_analysis.py
  50. 6 1
      app/domains/algorithm_tasks/models.py
  51. 0 0
      app/domains/analysis_task/__init__.py
  52. 0 0
      app/domains/analysis_task/account_position_info.py
  53. 1 1
      app/domains/analysis_task/crawler_detail.py
  54. 0 0
      app/domains/cold_start_tasks/__init__.py
  55. 0 0
      app/domains/cold_start_tasks/article_pool/__init__.py
  56. 0 0
      app/domains/cold_start_tasks/article_pool/article_pool_cold_start_const.py
  57. 1 1
      app/domains/cold_start_tasks/article_pool/article_pool_cold_start_strategy.py
  58. 2 2
      app/domains/cold_start_tasks/article_pool/article_pool_filter_strategy.py
  59. 21 12
      app/domains/cold_start_tasks/article_pool_cold_start.py
  60. 0 0
      app/domains/cold_start_tasks/video_pool/__init__.py
  61. 2 2
      app/domains/cold_start_tasks/video_pool/video_pool_audit_strategy.py
  62. 0 0
      app/domains/cold_start_tasks/video_pool/video_pool_const.py
  63. 0 0
      app/domains/cold_start_tasks/video_pool_cold_start.py
  64. 0 0
      app/domains/crawler_tasks/__init__.py
  65. 0 0
      app/domains/crawler_tasks/crawler_account_manager.py
  66. 14 14
      app/domains/crawler_tasks/crawler_gzh.py
  67. 3 3
      app/domains/crawler_tasks/crawler_gzh_fans.py
  68. 23 13
      app/domains/crawler_tasks/crawler_toutiao.py
  69. 0 0
      app/domains/data_recycle_tasks/__init__.py
  70. 6 9
      app/domains/data_recycle_tasks/article_detail_stat.py
  71. 6 6
      app/domains/data_recycle_tasks/recycle_daily_publish_articles.py
  72. 3 3
      app/domains/data_recycle_tasks/recycle_mini_program_detail.py
  73. 4 4
      app/domains/data_recycle_tasks/recycle_outside_account_articles.py
  74. 0 0
      app/domains/llm_tasks/__init__.py
  75. 3 3
      app/domains/llm_tasks/candidate_account_process.py
  76. 4 4
      app/domains/llm_tasks/process_title.py
  77. 0 0
      app/domains/llm_tasks/prompts.py
  78. 0 0
      app/domains/monitor_tasks/__init__.py
  79. 5 5
      app/domains/monitor_tasks/auto_reply_cards_monitor.py
  80. 4 5
      app/domains/monitor_tasks/cooperate_accounts_monitor.py
  81. 3 3
      app/domains/monitor_tasks/get_off_videos.py
  82. 5 5
      app/domains/monitor_tasks/gzh_article_monitor.py
  83. 2 2
      app/domains/monitor_tasks/kimi_balance.py
  84. 1 3
      app/domains/monitor_tasks/limited_account_analysis.py
  85. 2 2
      app/domains/monitor_tasks/task_processing_monitor.py
  86. 0 0
      app/infra/crawler/__init__.py
  87. 0 0
      app/infra/crawler/toutiao/__init__.py
  88. 1 1
      app/infra/crawler/toutiao/blogger.py
  89. 1 1
      app/infra/crawler/toutiao/detail_recommend.py
  90. 0 0
      app/infra/crawler/toutiao/main_page_recomend.py
  91. 1 3
      app/infra/crawler/toutiao/search.py
  92. 0 0
      app/infra/crawler/toutiao/toutiao.js
  93. 1 1
      app/infra/crawler/toutiao/use_js.py
  94. 1 1
      app/infra/crawler/wechat/__init__.py
  95. 3 8
      app/infra/crawler/wechat/gzh_article_stat.py
  96. 2 6
      app/infra/crawler/wechat/gzh_fans.py
  97. 6 4
      app/infra/crawler/wechat/gzh_spider.py
  98. 18 0
      app/infra/external/__init__.py
  99. 0 0
      app/infra/external/aliyun.py
  100. 31 2
      app/infra/external/apollo.py

+ 102 - 77
README.md

@@ -17,61 +17,61 @@ docker compose up -d
 ├── Dockerfile
 ├── LICENSE
 ├── README.md
-├── app_config.toml
-├── applications
-│   ├── __init__.py
+├── app
 │   ├── ab_test
 │   │   ├── __init__.py
 │   │   ├── ab_accounts.py
 │   │   └── get_cover.py
 │   ├── api
-│   │   ├── __init__.py
-│   │   ├── aliyun_log_api.py
-│   │   ├── async_aigc_system_api.py
-│   │   ├── async_apollo_api.py
-│   │   ├── async_feishu_api.py
-│   │   ├── async_piaoquan_api.py
-│   │   ├── deep_seek_official_api.py
-│   │   └── elastic_search_api.py
-│   ├── config
-│   │   ├── __init__.py
-│   │   ├── aliyun_log_config.py
-│   │   ├── category_config.py
-│   │   ├── cold_start_config.py
-│   │   ├── deepseek_config.py
-│   │   ├── elastic_search_mappings.py
-│   │   ├── es_certs.crt
-│   │   ├── mysql_config.py
-│   │   └── task_chinese_name.py
-│   ├── crawler
-│   │   ├── tophub
-│   │   ├── toutiao
-│   │   │   ├── __init__.py
-│   │   │   ├── blogger.py
-│   │   │   ├── detail_recommend.py
-│   │   │   ├── main_page_recomend.py
-│   │   │   ├── search.py
-│   │   │   ├── toutiao.js
-│   │   │   └── use_js.py
-│   │   └── wechat
+│   │   ├── middleware
+│   │   │   ├── auth.py
+│   │   │   ├── error_handler.py
+│   │   │   └── rate_limiter.py
+│   │   ├── service
+│   │   │   ├── __init__.py
+│   │   │   ├── gzh_cookie_manager.py
+│   │   │   ├── task_manager_service.py
+│   │   │   └── task_scheduler.py
+│   │   └── v1
 │   │       ├── __init__.py
-│   │       ├── gzh_article_stat.py
-│   │       ├── gzh_fans.py
-│   │       └── gzh_spider.py
-│   ├── database
-│   │   ├── __init__.py
-│   │   └── mysql_pools.py
-│   ├── pipeline
-│   │   ├── __init__.py
-│   │   ├── crawler_pipeline.py
-│   │   └── data_recycle_pipeline.py
-│   ├── service
-│   │   ├── __init__.py
-│   │   ├── gzh_cookie_manager.py
-│   │   ├── log_service.py
-│   │   └── task_manager_service.py
-│   ├── tasks
-│   │   ├── __init__.py
+│   │       └── routes.py
+│   ├── core
+│   │   ├── bootstrap
+│   │   │   ├── __init__.py
+│   │   │   └── resource_manager.py
+│   │   ├── config
+│   │   │   ├── __init__.py
+│   │   │   ├── cert
+│   │   │   │   └── es_certs.crt
+│   │   │   ├── global_settings.py
+│   │   │   └── settings
+│   │   │       ├── __init__.py
+│   │   │       ├── aliyun.py
+│   │   │       ├── apollo.py
+│   │   │       ├── category.py
+│   │   │       ├── cold_start.py
+│   │   │       ├── deepseek.py
+│   │   │       ├── elasticsearch.py
+│   │   │       ├── mysql.py
+│   │   │       └── task_chinese_name.py
+│   │   ├── database
+│   │   │   ├── __init__.py
+│   │   │   └── mysql_pools.py
+│   │   ├── dependency
+│   │   │   ├── __init__.py
+│   │   │   └── dependencies.py
+│   │   ├── observability
+│   │   │   ├── __init__.py
+│   │   │   ├── logging
+│   │   │   │   ├── __init__.py
+│   │   │   │   └── log_service.py
+│   │   │   └── tracing
+│   │   └── pipeline
+│   │       ├── __init__.py
+│   │       ├── crawler_pipeline.py
+│   │       ├── data_recycle_pipeline.py
+│   │       └── schemas.py
+│   ├── domains
 │   │   ├── algorithm_tasks
 │   │   │   ├── __init__.py
 │   │   │   ├── account_category_analysis.py
@@ -110,37 +110,62 @@ docker compose up -d
 │   │   │   ├── candidate_account_process.py
 │   │   │   ├── process_title.py
 │   │   │   └── prompts.py
-│   │   ├── monitor_tasks
-│   │   │   ├── __init__.py
-│   │   │   ├── auto_reply_cards_monitor.py
-│   │   │   ├── cooperate_accounts_monitor.py
-│   │   │   ├── get_off_videos.py
-│   │   │   ├── gzh_article_monitor.py
-│   │   │   ├── kimi_balance.py
-│   │   │   ├── limited_account_analysis.py
-│   │   │   └── task_processing_monitor.py
-│   │   ├── task_config.py
-│   │   ├── task_handler.py
-│   │   ├── task_mapper.py
-│   │   ├── task_scheduler.py
-│   │   └── task_utils.py
-│   └── utils
-│       ├── __init__.py
-│       ├── aigc_system_database.py
-│       ├── async_apollo_client.py
-│       ├── async_http_client.py
-│       ├── async_mysql_utils.py
-│       ├── async_tasks.py
-│       ├── common.py
-│       ├── get_cover.py
-│       ├── item.py
-│       └── response.py
+│   │   └── monitor_tasks
+│   │       ├── __init__.py
+│   │       ├── auto_reply_cards_monitor.py
+│   │       ├── cooperate_accounts_monitor.py
+│   │       ├── get_off_videos.py
+│   │       ├── gzh_article_monitor.py
+│   │       ├── kimi_balance.py
+│   │       ├── limited_account_analysis.py
+│   │       └── task_processing_monitor.py
+│   ├── infra
+│   │   ├── crawler
+│   │   │   ├── __init__.py
+│   │   │   ├── tophub
+│   │   │   ├── toutiao
+│   │   │   │   ├── __init__.py
+│   │   │   │   ├── blogger.py
+│   │   │   │   ├── detail_recommend.py
+│   │   │   │   ├── main_page_recomend.py
+│   │   │   │   ├── search.py
+│   │   │   │   ├── toutiao.js
+│   │   │   │   └── use_js.py
+│   │   │   └── wechat
+│   │   │       ├── __init__.py
+│   │   │       ├── gzh_article_stat.py
+│   │   │       ├── gzh_fans.py
+│   │   │       └── gzh_spider.py
+│   │   ├── external
+│   │   │   ├── __init__.py
+│   │   │   ├── aliyun.py
+│   │   │   ├── apollo.py
+│   │   │   ├── deepseek_official.py
+│   │   │   ├── elastic_search.py
+│   │   │   └── feishu.py
+│   │   ├── internal
+│   │   │   ├── __init__.py
+│   │   │   ├── aigc_system.py
+│   │   │   ├── long_articles.py
+│   │   │   └── piaoquan.py
+│   │   ├── shared
+│   │   │   ├── __init__.py
+│   │   │   ├── async_tasks.py
+│   │   │   ├── http_client.py
+│   │   │   └── tools.py
+│   │   └── utils
+│   │       ├── __init__.py
+│   │       ├── get_cover.py
+│   │       └── response.py
+│   └── jobs
+│       ├── task_config.py
+│       ├── task_handler.py
+│       ├── task_mapper.py
+│       └── task_utils.py
+├── app_config.toml
 ├── docker-compose.yaml
 ├── jenkins_bash.sh
 ├── requirements.txt
-├── routes
-│   ├── __init__.py
-│   └── blueprint.py
 └── task_app.py
 ```
 

+ 0 - 0
applications/ab_test/__init__.py → app/ab_test/__init__.py


+ 0 - 0
applications/ab_test/ab_accounts.py → app/ab_test/ab_accounts.py


+ 4 - 4
applications/ab_test/get_cover.py → app/ab_test/get_cover.py

@@ -1,7 +1,7 @@
-from applications.utils.response import Response
-from applications.utils import fetch_channel_info
-from applications.utils import fetch_aigc_cover
-from applications.utils import fetch_long_video_cover
+from app.infra.shared.response import Response
+from app.infra.utils import fetch_channel_info
+from app.infra.utils import fetch_aigc_cover
+from app.infra.utils import fetch_long_video_cover
 
 
 class GetCoverService(Response):

+ 0 - 0
applications/__init__.py → app/api/middleware/auth.py


+ 0 - 0
applications/crawler/toutiao/main_page_recomend.py → app/api/middleware/error_handler.py


+ 0 - 0
applications/tasks/cold_start_tasks/video_pool/__init__.py → app/api/middleware/rate_limiter.py


+ 15 - 0
app/api/service/__init__.py

@@ -0,0 +1,15 @@
+# 日志服务
+
+# 前端交互
+from .task_manager_service import TaskManager
+from .gzh_cookie_manager import GzhCookieManager
+
+# 任务调度器
+from .task_scheduler import TaskScheduler
+
+
+__all__ = [
+    "TaskManager",
+    "GzhCookieManager",
+    "TaskScheduler",
+]

+ 2 - 2
applications/service/gzh_cookie_manager.py → app/api/service/gzh_cookie_manager.py

@@ -1,4 +1,4 @@
-from applications.tasks.crawler_tasks.crawler_gzh_fans import CrawlerGzhFansBase
+from app.domains.crawler_tasks.crawler_gzh_fans import CrawlerGzhFansBase
 
 
 class GzhCookieManager(CrawlerGzhFansBase):
@@ -6,7 +6,7 @@ class GzhCookieManager(CrawlerGzhFansBase):
         super().__init__(pool, log_client)
 
     async def deal(self, data):
-        gh_id = data.get('gzh_id')
+        gh_id = data.get("gzh_id")
         if not gh_id:
             return {"error": "gh_id is required"}
 

+ 166 - 0
app/api/service/task_manager_service.py

@@ -0,0 +1,166 @@
+import json
+from typing import Optional
+
+from app.core.config import GlobalConfigSettings
+from app.core.config.settings import TaskChineseNameConfig
+
+
+class TaskConst:
+    INIT_STATUS = 0
+    PROCESSING_STATUS = 1
+    FINISHED_STATUS = 2
+    FAILED_STATUS = 99
+    STATUS_TEXT = {0: "初始化", 1: "处理中", 2: "完成", 99: "失败"}
+
+    DEFAULT_PAGE = 1
+    DEFAULT_SIZE = 50
+
+
+class TaskManagerUtils(TaskConst):
+    def __init__(self, config: TaskChineseNameConfig):
+        self.config = config
+
+    def get_task_chinese_name(self, data):
+        """
+        通过输入任务详情信息获取任务名称
+        """
+        task_name = data["task_name"]
+        task_name_chinese = self.config.name_map.get(task_name, task_name)
+
+        # account_method
+        if task_name == "crawler_gzh_articles":
+            account_method = data.get("account_method", "")
+            account_method = account_method.replace(
+                "account_association", "账号联想"
+            ).replace("search", "")
+            crawl_mode = data.get("crawl_mode", "")
+            crawl_mode = crawl_mode.replace("search", "搜索").replace(
+                "account", "抓账号"
+            )
+            strategy = data.get("strategy", "")
+            return f"{task_name_chinese}\t{crawl_mode}\t{account_method}\t{strategy}"
+        elif task_name == "article_pool_cold_start":
+            platform = data.get("platform", "")
+            platform = platform.replace("toutiao", "今日头条").replace("weixin", "微信")
+            strategy = data.get("strategy", "")
+            strategy = strategy.replace("strategy", "策略")
+            category_list = data.get("category_list", [])
+            category_list = "、".join(category_list)
+            crawler_methods = data.get("crawler_methods", [])
+            crawler_methods = "、".join(crawler_methods)
+            return f"{task_name_chinese}\t{platform}\t{crawler_methods}\t{category_list}\t{strategy}"
+        else:
+            return task_name_chinese
+
+    @staticmethod
+    def _build_where(id_eq=None, date_string=None, trace_id=None, task_status=None):
+        conds, params = [], []
+
+        if id_eq is not None:
+            conds.append("id = %s")
+            params.append(id_eq)
+
+        if date_string:  # 字符串非空
+            conds.append("date_string = %s")
+            params.append(date_string)
+
+        if trace_id:
+            conds.append("trace_id LIKE %s")
+            # 如果调用方已经传了 %,就原样用;否则自动做包含匹配
+            params.append(trace_id if "%" in trace_id else f"%{trace_id}%")
+
+        if task_status is not None:
+            conds.append("task_status = %s")
+            params.append(task_status)
+
+        where_clause = " AND ".join(conds) if conds else "1=1"
+        return where_clause, params
+
+    @staticmethod
+    def _safe_json(v):
+        try:
+            if isinstance(v, (str, bytes, bytearray)):
+                return json.loads(v)
+            return v or {}
+        except Exception:
+            return {}
+
+
+class TaskManager(TaskManagerUtils):
+    def __init__(self, pool, data, config: GlobalConfigSettings):
+        super().__init__(config.task_chinese_name)
+        self.pool = pool
+        self.data = data
+
+    async def list_tasks(self):
+        page = self.data.get("page", self.DEFAULT_PAGE)
+        page_size = self.data.get("size", self.DEFAULT_SIZE)
+        sort_by = self.data.get("sort_by", "id")
+        sort_dir = self.data.get("sort_dir", "desc").lower()
+
+        # 过滤条件
+        id_eq: Optional[int] = self.data.get("id") and int(self.data.get("id"))
+        date_string: Optional[str] = self.data.get("date_string")
+        trace_id: Optional[str] = self.data.get("trace_id")
+        task_status: Optional[int] = self.data.get("task_status") and int(
+            self.data.get("task_status")
+        )
+
+        # 1) WHERE 子句
+        where_clause, params = self._build_where(
+            id_eq, date_string, trace_id, task_status
+        )
+        sort_whitelist = {
+            "id",
+            "date_string",
+            "task_status",
+            "start_timestamp",
+            "finish_timestamp",
+        }
+        sort_by = sort_by if sort_by in sort_whitelist else "id"
+        sort_dir = "ASC" if str(sort_dir).lower() == "asc" else "DESC"
+
+        # 3) 分页(边界保护)
+        page = max(1, int(page))
+        page_size = max(1, min(int(page_size), 200))  # 适当限流
+        offset = (page - 1) * page_size
+
+        # 4) 统计总数(注意:WHERE 片段直接插入,值用参数化)
+        sql_count = f"""
+                SELECT COUNT(1) AS cnt
+                FROM long_articles_task_manager
+                WHERE {where_clause}
+        """
+        count_rows = await self.pool.async_fetch(query=sql_count, params=tuple(params))
+        total = count_rows[0]["cnt"] if count_rows else 0
+
+        # 5) 查询数据
+        sql_list = f"""
+                SELECT id, date_string, task_status, start_timestamp, finish_timestamp, trace_id, data
+                FROM long_articles_task_manager
+                WHERE {where_clause}
+                ORDER BY {sort_by} {sort_dir}
+                LIMIT %s OFFSET %s
+            """
+        list_params = (*params, page_size, offset)
+        rows = await self.pool.async_fetch(query=sql_list, params=list_params)
+        items = [
+            {
+                **r,
+                "status_text": self.STATUS_TEXT.get(
+                    r["task_status"], str(r["task_status"])
+                ),
+                "task_name": self.get_task_chinese_name(self._safe_json(r["data"])),
+            }
+            for r in rows
+        ]
+        return {"total": total, "page": page, "page_size": page_size, "items": items}
+
+    async def get_task(self, task_id: int):
+        pass
+
+    async def retry_task(self, task_id: int):
+        pass
+
+    async def cancel_task(self, task_id: int):
+        pass

+ 17 - 9
applications/tasks/task_scheduler.py → app/api/service/task_scheduler.py

@@ -4,22 +4,23 @@ import time
 from datetime import datetime, timedelta
 from typing import Optional, Dict, Any, List
 
-from applications.api import feishu_robot
-from applications.utils import task_schedule_response
-from applications.tasks.task_handler import TaskHandler
-from applications.tasks.task_config import (
+from app.infra.external import feishu_robot
+from app.infra.shared import task_schedule_response
+from app.jobs.task_handler import TaskHandler
+from app.jobs.task_config import (
     TaskStatus,
     TaskConstants,
     get_task_config,
 )
-from applications.tasks.task_utils import (
+from app.jobs.task_utils import (
     TaskError,
     TaskValidationError,
-    TaskTimeoutError,
     TaskConcurrencyError,
-    TaskLockError,
     TaskUtils,
 )
+from app.core.config import GlobalConfigSettings
+from app.core.database import DatabaseManager
+from app.core.observability import LogService
 
 
 class TaskScheduler(TaskHandler):
@@ -31,8 +32,15 @@ class TaskScheduler(TaskHandler):
         result = await scheduler.deal()
     """
 
-    def __init__(self, data: dict, log_service, db_client, trace_id: str):
-        super().__init__(data, log_service, db_client, trace_id)
+    def __init__(
+        self,
+        data: dict,
+        log_service: LogService,
+        db_client: DatabaseManager,
+        trace_id: str,
+        config: GlobalConfigSettings,
+    ):
+        super().__init__(data, log_service, db_client, trace_id, config)
         self.table = TaskUtils.validate_table_name(TaskConstants.TASK_TABLE)
 
     # ==================== 数据库操作 ====================

+ 11 - 0
app/api/v1/endpoints/__init__.py

@@ -0,0 +1,11 @@
+from .abtest import create_abtest_bp
+from .health import create_health_bp
+from .tasks import create_tasks_bp
+from .tokens import create_tokens_bp
+
+__all__ = [
+    "create_abtest_bp",
+    "create_health_bp",
+    "create_tasks_bp",
+    "create_tokens_bp",
+]

+ 27 - 0
app/api/v1/endpoints/abtest.py

@@ -0,0 +1,27 @@
+from __future__ import annotations
+
+from pydantic import ValidationError
+from quart import Blueprint, jsonify
+
+from app.ab_test import GetCoverService
+from app.api.v1.utils import ApiDependencies
+from app.api.v1.utils import GetCoverRequest
+from app.api.v1.utils import parse_json, validation_error_response
+
+
+def create_abtest_bp(deps: ApiDependencies) -> Blueprint:
+    bp = Blueprint("abtest", __name__)
+
+    @bp.route("/get_cover", methods=["POST"])
+    async def get_cover():
+        try:
+            _, body = await parse_json(GetCoverRequest)
+        except ValidationError as e:
+            payload, status = validation_error_response(e)
+            return jsonify(payload), status
+
+        service = GetCoverService(deps.db, body)
+        result = await service.deal()
+        return jsonify(result)
+
+    return bp

+ 15 - 0
app/api/v1/endpoints/health.py

@@ -0,0 +1,15 @@
+from __future__ import annotations
+
+from quart import Blueprint, jsonify
+
+
+def create_health_bp() -> Blueprint:
+    bp = Blueprint("health", __name__)
+
+    @bp.route("/health", methods=["GET"])
+    async def health():
+        return jsonify(
+            {"code": 0, "message": "success", "data": {"message": "hello world"}}
+        )
+
+    return bp

+ 42 - 0
app/api/v1/endpoints/tasks.py

@@ -0,0 +1,42 @@
+from __future__ import annotations
+
+from pydantic import ValidationError
+from quart import Blueprint, jsonify
+
+from app.api.service import TaskManager, TaskScheduler
+from app.api.v1.utils import ApiDependencies
+from app.api.v1.utils import RunTaskRequest, TaskListRequest
+from app.api.v1.utils import parse_json, validation_error_response
+from app.infra.shared.tools import generate_task_trace_id
+
+
+def create_tasks_bp(deps: ApiDependencies) -> Blueprint:
+    bp = Blueprint("tasks", __name__)
+
+    @bp.route("/run_task", methods=["POST"])
+    async def run_task():
+        trace_id = generate_task_trace_id()
+
+        try:
+            _, body = await parse_json(RunTaskRequest)
+        except ValidationError as e:
+            payload, status = validation_error_response(e)
+            return jsonify(payload), status
+
+        scheduler = TaskScheduler(body, deps.log, deps.db, trace_id, deps.config)
+        result = await scheduler.deal()
+        return jsonify(result)
+
+    @bp.route("/tasks", methods=["POST"])
+    async def list_tasks():
+        try:
+            _, body = await parse_json(TaskListRequest)
+        except ValidationError as e:
+            payload, status = validation_error_response(e)
+            return jsonify(payload), status
+
+        manager = TaskManager(pool=deps.db, data=body, config=deps.config)
+        result = await manager.list_tasks()
+        return jsonify(result)
+
+    return bp

+ 27 - 0
app/api/v1/endpoints/tokens.py

@@ -0,0 +1,27 @@
+from __future__ import annotations
+
+from pydantic import ValidationError
+from quart import Blueprint, jsonify
+
+from app.api.service import GzhCookieManager
+from app.api.v1.utils import ApiDependencies
+from app.api.v1.utils import SaveTokenRequest
+from app.api.v1.utils import parse_json, validation_error_response
+
+
+def create_tokens_bp(deps: ApiDependencies) -> Blueprint:
+    bp = Blueprint("tokens", __name__)
+
+    @bp.route("/save_token", methods=["POST"])
+    async def save_token():
+        try:
+            _, body = await parse_json(SaveTokenRequest)
+        except ValidationError as e:
+            payload, status = validation_error_response(e)
+            return jsonify(payload), status
+
+        manager = GzhCookieManager(pool=deps.db, log_client=deps.log)
+        result = await manager.deal(body)
+        return jsonify(result)
+
+    return bp

+ 3 - 0
app/api/v1/routes/__init__.py

@@ -0,0 +1,3 @@
+from .routes import server_routes
+
+__all__ = ["server_routes"]

+ 44 - 0
app/api/v1/routes/routes.py

@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from quart import Blueprint
+
+from app.api.v1.utils import ApiDependencies
+from app.api.v1.endpoints import (
+    create_abtest_bp,
+    create_health_bp,
+    create_tasks_bp,
+    create_tokens_bp,
+)
+from app.core.config import GlobalConfigSettings
+from app.core.database import DatabaseManager
+from app.core.observability import LogService
+
+
+def register_v1_blueprints(deps: ApiDependencies) -> Blueprint:
+    """
+    v1 路由统一注册入口(按领域拆分)。
+
+    - /api/get_cover
+    - /api/run_task
+    - /api/tasks
+    - /api/save_token
+    - /api/health
+    """
+    api = Blueprint("api", __name__, url_prefix="/api")
+
+    api.register_blueprint(create_health_bp())
+    api.register_blueprint(create_tasks_bp(deps))
+    api.register_blueprint(create_tokens_bp(deps))
+    api.register_blueprint(create_abtest_bp(deps))
+
+    return api
+
+
+def server_routes(
+    pools: DatabaseManager, log_service: LogService, config: GlobalConfigSettings
+) -> Blueprint:
+    """
+    兼容旧入口:保留 server_routes 签名,内部转为新的 deps + 统一注册。
+    """
+    deps = ApiDependencies(db=pools, log=log_service, config=config)
+    return register_v1_blueprints(deps)

+ 14 - 0
app/api/v1/utils/__init__.py

@@ -0,0 +1,14 @@
+from ._utils import parse_json, validation_error_response
+from .deps import ApiDependencies
+from .schemas import RunTaskRequest, TaskListRequest, SaveTokenRequest, GetCoverRequest
+
+
+__all__ = [
+    "parse_json",
+    "validation_error_response",
+    "RunTaskRequest",
+    "TaskListRequest",
+    "SaveTokenRequest",
+    "GetCoverRequest",
+    "ApiDependencies",
+]

+ 25 - 0
app/api/v1/utils/_utils.py

@@ -0,0 +1,25 @@
+from __future__ import annotations
+
+from typing import Any, Dict, Tuple, Type, TypeVar
+
+from pydantic import BaseModel, ValidationError
+from quart import request
+
+T = TypeVar("T", bound=BaseModel)
+
+
+async def parse_json(model: Type[T]) -> Tuple[T, Dict[str, Any]]:
+    """
+    解析 JSON 请求体并用 Pydantic 校验。
+
+    Returns:
+        (obj, raw_dict) 方便向下兼容:既能用模型字段,也可把原 dict 透传给旧代码。
+    """
+    raw = await request.get_json()
+    raw = raw or {}
+    obj = model.model_validate(raw)
+    return obj, raw
+
+
+def validation_error_response(e: ValidationError) -> Tuple[Dict[str, Any], int]:
+    return {"code": 400, "message": "invalid request body", "errors": e.errors()}, 400

+ 16 - 0
app/api/v1/utils/deps.py

@@ -0,0 +1,16 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+from app.core.config import GlobalConfigSettings
+from app.core.database import DatabaseManager
+from app.core.observability import LogService
+
+
+@dataclass(frozen=True)
+class ApiDependencies:
+    """API 层依赖容器:统一管理 db/log/config 等依赖。"""
+
+    db: DatabaseManager
+    log: LogService
+    config: GlobalConfigSettings

+ 41 - 0
app/api/v1/utils/schemas.py

@@ -0,0 +1,41 @@
+from __future__ import annotations
+
+from typing import Any, Dict, Optional
+
+from pydantic import BaseModel, ConfigDict, Field
+
+
+class BaseRequest(BaseModel):
+    """所有请求模型基类:默认允许额外字段,避免破坏兼容性。"""
+
+    model_config = ConfigDict(extra="allow")
+
+
+class RunTaskRequest(BaseRequest):
+    task_name: str = Field(..., min_length=1)
+    date_string: Optional[str] = None
+
+
+class TaskListRequest(BaseRequest):
+    page: int = Field(default=1, ge=1)
+    size: int = Field(default=50, ge=1, le=200)
+    sort_by: str = Field(default="id", min_length=1)
+    sort_dir: str = Field(default="desc", min_length=1)
+
+    id: Optional[int] = None
+    date_string: Optional[str] = None
+    trace_id: Optional[str] = None
+    task_status: Optional[int] = None
+
+
+class GetCoverRequest(BaseRequest):
+    """GetCoverService 的请求体字段不固定,先保持兼容。"""
+
+    # 用一个可选字段占位,避免空模型在某些场景不好读
+    payload: Optional[Dict[str, Any]] = None
+
+
+class SaveTokenRequest(BaseRequest):
+    """GzhCookieManager 的请求体字段不固定,先保持兼容。"""
+
+    token: Optional[str] = None

+ 1 - 0
app/core/bootstrap/__init__.py

@@ -0,0 +1 @@
+from .resource_manager import AppContext

+ 35 - 0
app/core/bootstrap/resource_manager.py

@@ -0,0 +1,35 @@
+import logging
+from app.core.dependency import ServerContainer
+
+logger = logging.getLogger(__name__)
+
+
+class AppContext:
+    def __init__(self, container: ServerContainer):
+        self.container = container
+
+    async def start_up(self):
+        logger.info("初始化数据库连接池")
+        mysql = self.container.mysql_manager()
+        await mysql.init_pools()
+        logger.info("Mysql pools init successfully")
+
+        logger.info("初始化日志服务")
+        log_service = self.container.log_service()
+        await log_service.start()
+        logger.info("aliyun log service init successfully")
+
+    async def shutdown(self):
+        logger.info("关闭数据库连接池")
+        mysql = self.container.mysql_manager()
+        await mysql.close_pools()
+        logger.info("应用资源已释放")
+        logger.info("关闭日志服务")
+        log_service = self.container.log_service()
+        await log_service.stop()
+        logger.info("aliyun log service stopped")
+
+
+__all__ = [
+    "AppContext",
+]

+ 1 - 0
app/core/config/__init__.py

@@ -0,0 +1 @@
+from .global_settings import GlobalConfigSettings

+ 0 - 0
applications/config/es_certs.crt → app/core/config/cert/es_certs.crt


+ 46 - 0
app/core/config/global_settings.py

@@ -0,0 +1,46 @@
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+from .settings import *
+
+
+class GlobalConfigSettings(BaseSettings):
+    """应用全局配置"""
+
+    # ============ 应用基础配置 ============
+    app_name: str = Field(default="LongArticleTaskServer", description="应用名称")
+    environment: str = Field(
+        default="development", description="运行环境: development/pre/production"
+    )
+    debug: bool = Field(default=False, description="调试模式")
+
+    # ============ 数据库配置 ============
+    aigc_db: AigcDatabaseConfig = Field(default_factory=AigcDatabaseConfig)
+    long_video_db: LongVideoDatabaseConfig = Field(
+        default_factory=LongVideoDatabaseConfig
+    )
+    long_articles_db: LongArticlesDatabaseConfig = Field(
+        default_factory=LongArticlesDatabaseConfig
+    )
+    piaoquan_crawler_db: PiaoquanCrawlerDatabaseConfig = Field(
+        default_factory=PiaoquanCrawlerDatabaseConfig
+    )
+    growth_db: GrowthDatabaseConfig = Field(default_factory=GrowthDatabaseConfig)
+
+    # ============ 外部服务配置 ============
+    deepseek: DeepSeekConfig = Field(default_factory=DeepSeekConfig)
+
+    aliyun_log: AliyunLogConfig = Field(default_factory=AliyunLogConfig)
+    elasticsearch: ElasticsearchConfig = Field(default_factory=ElasticsearchConfig)
+    apollo: ApolloConfig = Field(default_factory=ApolloConfig)
+
+    # ============ 业务配置 ============
+    cold_start: ColdStartConfig = Field(default_factory=ColdStartConfig)
+    category: CategoryConfig = Field(default_factory=CategoryConfig)
+    task_chinese_name: TaskChineseNameConfig = Field(
+        default_factory=TaskChineseNameConfig
+    )
+
+    model_config = SettingsConfigDict(
+        env_file=".env", env_file_encoding="utf-8", case_sensitive=False, extra="ignore"
+    )

+ 28 - 0
app/core/config/settings/__init__.py

@@ -0,0 +1,28 @@
+from .apollo import ApolloConfig
+from .aliyun import AliyunLogConfig
+from .category import CategoryConfig
+from .cold_start import ColdStartConfig
+from .deepseek import DeepSeekConfig
+from .elasticsearch import ElasticsearchConfig
+from .mysql import AigcDatabaseConfig
+from .mysql import GrowthDatabaseConfig
+from .mysql import LongArticlesDatabaseConfig
+from .mysql import LongVideoDatabaseConfig
+from .mysql import PiaoquanCrawlerDatabaseConfig
+from .task_chinese_name import TaskChineseNameConfig
+
+
+__ALL__ = [
+    "ApolloConfig",
+    "AliyunLogConfig",
+    "CategoryConfig",
+    "ColdStartConfig",
+    "DeepSeekConfig",
+    "ElasticsearchConfig",
+    "AigcDatabaseConfig",
+    "GrowthDatabaseConfig",
+    "LongArticlesDatabaseConfig",
+    "LongVideoDatabaseConfig",
+    "PiaoquanCrawlerDatabaseConfig",
+    "TaskChineseNameConfig",
+]

+ 25 - 0
app/core/config/settings/aliyun.py

@@ -0,0 +1,25 @@
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+
+class AliyunLogConfig(BaseSettings):
+    """阿里云日志配置"""
+
+    endpoint: str = "cn-hangzhou.log.aliyuncs.com"
+    access_key_id: str = "LTAIP6x1l3DXfSxm"
+    access_key_secret: str = "KbTaM9ars4OX3PMS6Xm7rtxGr1FLon"
+    project: str = "changwen-alg"
+    logstore: str = "long_articles_job"
+
+    model_config = SettingsConfigDict(
+        env_prefix="ALIYUN_LOG_", env_file=".env", case_sensitive=False, extra="ignore"
+    )
+
+    def to_dict(self) -> dict:
+        """转换为字典格式,用于兼容旧代码"""
+        return {
+            "endpoint": self.endpoint,
+            "access_key_id": self.access_key_id,
+            "access_key_secret": self.access_key_secret,
+            "project": self.project,
+            "logstore": self.logstore,
+        }

+ 25 - 0
app/core/config/settings/apollo.py

@@ -0,0 +1,25 @@
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+from typing import Dict
+
+
+class ApolloConfig(BaseSettings):
+    """Apollo 配置中心配置"""
+
+    app_id: str = Field(default="LongArticlesJob", description="Apollo 应用 ID")
+    env: str = Field(default="pre", description="Apollo 环境: dev/pre/prod")
+
+    # apollo 配置中心地址映射
+    apollo_map: Dict[str, Dict[str, str]] = Field(
+        default_factory=lambda: {
+            "LongArticlesJob": {
+                "pre": "http://preapolloconfig-internal.piaoquantv.com/",
+                "dev": "https://devapolloconfig-internal.piaoquantv.com/",
+                "prod": "https://apolloconfig-internal.piaoquantv.com/",
+            }
+        }
+    )
+
+    model_config = SettingsConfigDict(
+        env_prefix="APOLLO_", env_file=".env", case_sensitive=False, extra="ignore"
+    )

+ 54 - 0
app/core/config/settings/category.py

@@ -0,0 +1,54 @@
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+from typing import Dict, List
+
+
+class CategoryConfig(BaseSettings):
+    """分类配置"""
+
+    # 分类特征列表
+    features: List[str] = Field(
+        default_factory=lambda: [
+            "CateSciencePop",
+            "CateMilitaryHistory",
+            "CateFamily",
+            "CateSocialRule",
+            "CateOddities",
+            "CateGossip",
+            "CateHealth",
+            "CateEmotional",
+            "CateNational",
+            "CateModernFigure",
+            "CateNostalgic",
+            "CatePolitics",
+            "CateHistoryFigure",
+            "CateSocialPhenomena",
+            "CateFinTech",
+            "view_count_rate",
+        ]
+    )
+
+    # 分类映射
+    category_map: Dict[str, str] = Field(
+        default_factory=lambda: {
+            "知识科普": "CateSciencePop",
+            "军事历史": "CateMilitaryHistory",
+            "家长里短": "CateFamily",
+            "社会法治": "CateSocialRule",
+            "奇闻趣事": "CateOddities",
+            "名人八卦": "CateGossip",
+            "健康养生": "CateHealth",
+            "情感故事": "CateEmotional",
+            "国家大事": "CateNational",
+            "现代人物": "CateModernFigure",
+            "怀旧时光": "CateNostalgic",
+            "政治新闻": "CatePolitics",
+            "历史人物": "CateHistoryFigure",
+            "社会现象": "CateSocialPhenomena",
+            "财经科技": "CateFinTech",
+        }
+    )
+
+    model_config = SettingsConfigDict(
+        env_prefix="CATEGORY_", env_file=".env", case_sensitive=False, extra="ignore"
+    )

+ 37 - 0
app/core/config/settings/cold_start.py

@@ -0,0 +1,37 @@
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+from typing import Dict
+
+
+class ColdStartConfig(BaseSettings):
+    """冷启动配置"""
+
+    # 分类映射
+    cold_start_category_map: Dict[str, str] = Field(
+        default_factory=lambda: {
+            "知识科普": "20250813032110801233225",
+            "国家大事": "20250813032845706844854",
+            "历史人物": "20250813033415138644740",
+            "奇闻趣事": "20250813033056703155233",
+            "名人八卦": "20250813033257335290696",
+            "怀旧时光": "20250813033536622149424",
+            "情感故事": "20250813033605574986309",
+            "社会法治": "20250813033829272898432",
+            "现代人物": "20250813034015084388155",
+            "社会现象": "20250813034056506771309",
+            "健康养生": "20250813034120792523588",
+            "家长里短": "20250813034159621236902",
+            "军事历史": "20250813034227997109122",
+            "财经科技": "20250813034253336624837",
+            "政治新闻": "20250813034320561348119",
+        }
+    )
+
+    # 输入源映射
+    input_source_map: Dict[str, int] = Field(
+        default_factory=lambda: {"weixin": 5, "toutiao": 6}
+    )
+
+    model_config = SettingsConfigDict(
+        env_prefix="COLD_START_", env_file=".env", case_sensitive=False, extra="ignore"
+    )

+ 25 - 0
app/core/config/settings/deepseek.py

@@ -0,0 +1,25 @@
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+
+class DeepSeekConfig(BaseSettings):
+    """DeepSeek API 配置"""
+
+    api_key: str = Field(
+        default="sk-cfd2df92c8864ab999d66a615ee812c5", description="DeepSeek API Key"
+    )
+    reasoner_model: str = Field(
+        default="deepseek-reasoner", description="DeepSeek 推理模型"
+    )
+    chat_model: str = Field(default="deepseek-chat", description="DeepSeek 对话模型")
+
+    model_config = SettingsConfigDict(
+        env_prefix="DEEPSEEK_", env_file=".env", case_sensitive=False, extra="ignore"
+    )
+
+    def get_model_map(self) -> dict:
+        """获取模型映射字典,兼容旧代码"""
+        return {
+            "DeepSeek-R1": self.reasoner_model,
+            "DeepSeek-V3": self.chat_model,
+        }

+ 20 - 0
app/core/config/settings/elasticsearch.py

@@ -0,0 +1,20 @@
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+from typing import List
+
+
+class ElasticsearchConfig(BaseSettings):
+    """Elasticsearch 配置"""
+
+    hosts: List[str] = Field(default_factory=lambda: ["http://localhost:9200"])
+    username: str = ""
+    password: str = ""
+    index: str = "articles"
+
+    # ES mappings 和 settings(从 elastic_search_mappings.py 迁移)
+    mappings: dict = Field(default_factory=dict)
+    settings: dict = Field(default_factory=dict)
+
+    model_config = SettingsConfigDict(
+        env_prefix="ES_", env_file=".env", case_sensitive=False, extra="ignore"
+    )

+ 105 - 0
app/core/config/settings/mysql.py

@@ -0,0 +1,105 @@
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+
+class DatabaseConfig(BaseSettings):
+    """数据库配置基类"""
+
+    host: str
+    port: int = 3306
+    user: str
+    password: str
+    db: str
+    charset: str = "utf8mb4"
+    minsize: int = 5
+    maxsize: int = 20
+
+    model_config = SettingsConfigDict(
+        env_prefix="", case_sensitive=False, extra="ignore"
+    )
+
+    def to_dict(self) -> dict:
+        """转换为字典格式,用于兼容旧代码"""
+        return {
+            "host": self.host,
+            "port": self.port,
+            "user": self.user,
+            "password": self.password,
+            "db": self.db,
+            "charset": self.charset,
+            "minsize": self.minsize,
+            "maxsize": self.maxsize,
+        }
+
+
+class AigcDatabaseConfig(DatabaseConfig):
+    """AIGC 数据库配置"""
+
+    host: str = "rm-t4na9qj85v7790tf84o.mysql.singapore.rds.aliyuncs.com"
+    user: str = "crawler_admin"
+    password: str = "cyber#crawler_2023"
+    db: str = "aigc-admin-prod"
+
+    model_config = SettingsConfigDict(
+        env_prefix="AIGC_DB_", env_file=".env", case_sensitive=False, extra="ignore"
+    )
+
+
+class LongVideoDatabaseConfig(DatabaseConfig):
+    """长视频数据库配置"""
+
+    host: str = "rr-bp1x9785e8h5452bi157.mysql.rds.aliyuncs.com"
+    user: str = "wx2016_longvideo"
+    password: str = "wx2016_longvideoP@assword1234"
+    db: str = "longvideo"
+
+    model_config = SettingsConfigDict(
+        env_prefix="LONG_VIDEO_DB_",
+        env_file=".env",
+        case_sensitive=False,
+        extra="ignore",
+    )
+
+
+class LongArticlesDatabaseConfig(DatabaseConfig):
+    """长文数据库配置"""
+
+    host: str = "rm-bp14529nwwcw75yr1ko.mysql.rds.aliyuncs.com"
+    user: str = "changwen_admin"
+    password: str = "changwen@123456"
+    db: str = "long_articles"
+
+    model_config = SettingsConfigDict(
+        env_prefix="LONG_ARTICLES_DB_",
+        env_file=".env",
+        case_sensitive=False,
+        extra="ignore",
+    )
+
+
+class PiaoquanCrawlerDatabaseConfig(DatabaseConfig):
+    """票圈爬虫数据库配置"""
+
+    host: str = "rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com"
+    user: str = "crawler"
+    password: str = "crawler123456@"
+    db: str = "piaoquan-crawler"
+
+    model_config = SettingsConfigDict(
+        env_prefix="PIAOQUAN_CRAWLER_DB_",
+        env_file=".env",
+        case_sensitive=False,
+        extra="ignore",
+    )
+
+
+class GrowthDatabaseConfig(DatabaseConfig):
+    """增长数据库配置"""
+
+    host: str = "rm-bp17q95335a99272b.mysql.rds.aliyuncs.com"
+    user: str = "crawler"
+    password: str = "crawler123456@"
+    db: str = "growth"
+
+    model_config = SettingsConfigDict(
+        env_prefix="GROWTH_DB_", env_file=".env", case_sensitive=False, extra="ignore"
+    )

+ 45 - 0
app/core/config/settings/task_chinese_name.py

@@ -0,0 +1,45 @@
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+from typing import Dict
+
+
+class TaskChineseNameConfig(BaseSettings):
+    """冷启动配置"""
+
+    # 分类映射
+    name_map: Dict[str, str] = Field(
+        default_factory=lambda: {
+            "title_rewrite": "标题重写",
+            "crawler_gzh_articles": "抓取公众号文章",
+            "crawler_account_manager": "抓取账号管理",
+            "article_pool_category_generation": "文章池品类生成",
+            "candidate_account_quality_analysis": "候选账号质量分析",
+            "article_pool_cold_start": "文章路冷启动",
+            "crawler_toutiao": "头条抓取",
+            "task_processing_monitor": "协程监测",
+            "update_root_source_id": "更新今日root_source_id",
+            "daily_publish_articles_recycle": "回收今日发文",
+            "inner_article_monitor": "账号发文违规监测",
+            "outside_article_monitor": "外部服务号发文监测",
+            "get_off_videos": "自动下架视频",
+            "check_publish_video_audit_status": "校验发布视频状态",
+            "check_kimi_balance": "检验kimi余额",
+            "account_category_analysis": "账号品类分析",
+            "mini_program_detail_process": "更新小程序信息",
+            "crawler_detail_analysis": "抓取详情分析",
+            "limited_account_analysis": "限流账号分析处理",
+            "auto_follow_account": "自动关注账号",
+            "update_account_open_rate_avg": "更新账号平均打开率",
+            "update_limited_account_info": "更新限流账号信息",
+            "update_account_read_avg": "更新账号平均阅读率",
+            "get_follow_result": "获取自动关注回复",
+            "extract_reply_result": "解析自动回复结果",
+        }
+    )
+
+    model_config = SettingsConfigDict(
+        env_prefix="TASK_CHINESE_NAME_",
+        env_file=".env",
+        case_sensitive=False,
+        extra="ignore",
+    )

+ 1 - 0
app/core/database/__init__.py

@@ -0,0 +1 @@
+from .mysql_pools import DatabaseManager

+ 53 - 25
applications/database/mysql_pools.py → app/core/database/mysql_pools.py

@@ -2,43 +2,52 @@ import logging
 
 from aiomysql import create_pool
 from aiomysql.cursors import DictCursor
-from applications.config import *
+
+from app.core.config import GlobalConfigSettings
+from app.core.observability import LogService
+
 
 logging.basicConfig(level=logging.INFO)
 
 
-class DatabaseManager:
-    def __init__(self):
-        self.databases = None
+class DatabaseManager(LogService):
+    def __init__(self, config: GlobalConfigSettings):
+        super().__init__(config.aliyun_log)
+        self.database_mapper = {
+            "aigc": config.aigc_db,
+            "growth": config.growth_db,
+            "long_video": config.long_video_db,
+            "long_articles": config.long_articles_db,
+            "piaoquan_crawler": config.piaoquan_crawler_db,
+        }
         self.pools = {}
 
     async def init_pools(self):
         # 从配置获取数据库配置,也可以直接在这里配置
-        self.databases = {
-            "aigc": aigc_db_config,
-            "long_video": long_video_db_config,
-            "long_articles": long_articles_db_config,
-            "piaoquan_crawler": piaoquan_crawler_db_config,
-            "growth": growth_db_config,
-        }
-
-        for db_name, config in self.databases.items():
+        for db_name, config in self.database_mapper.items():
             try:
                 pool = await create_pool(
-                    host=config["host"],
-                    port=config["port"],
-                    user=config["user"],
-                    password=config["password"],
-                    db=config["db"],
-                    minsize=config["minsize"],
-                    maxsize=config["maxsize"],
+                    host=config.host,
+                    port=config.port,
+                    user=config.user,
+                    password=config.password,
+                    db=config.db,
+                    minsize=config.minsize,
+                    maxsize=config.maxsize,
                     cursorclass=DictCursor,
                     autocommit=True,
                 )
                 self.pools[db_name] = pool
-                logging.info(f"Created connection pool for {db_name}")
+                logging.info(f"{db_name} MYSQL连接池 created successfully")
+
             except Exception as e:
-                logging.error(f"Failed to create pool for {db_name}: {str(e)}")
+                await self.log(
+                    contents={
+                        "db_name": db_name,
+                        "error": str(e),
+                        "message": f"Failed to create pool for {db_name}",
+                    }
+                )
                 self.pools[db_name] = None
 
     async def close_pools(self):
@@ -46,7 +55,7 @@ class DatabaseManager:
             if pool:
                 pool.close()
                 await pool.wait_closed()
-                logging.info(f"Closed connection pool for {name}")
+                logging.info(f"{name} MYSQL连接池 closed successfully")
 
     async def async_fetch(
         self, query, db_name="long_articles", params=None, cursor_type=DictCursor
@@ -63,7 +72,16 @@ class DatabaseManager:
 
             return fetch_response
         except Exception as e:
-            logging.error(f"Failed to fetch {query}: {str(e)}")
+            await self.log(
+                contents={
+                    "task": "async_fetch",
+                    "db_name": db_name,
+                    "error": str(e),
+                    "message": f"Failed to fetch data from {db_name}",
+                    "query": query,
+                    "params": params,
+                }
+            )
             return None
 
     async def async_save(
@@ -85,10 +103,20 @@ class DatabaseManager:
                     return affected_rows
                 except Exception as e:
                     await connection.rollback()
+                    await self.log(
+                        contents={
+                            "task": "async_save",
+                            "db_name": db_name,
+                            "error": str(e),
+                            "message": f"Failed to save data to {db_name}",
+                            "query": query,
+                            "params": params,
+                        }
+                    )
                     raise e
 
     def get_pool(self, db_name):
         return self.pools.get(db_name)
 
     def list_databases(self):
-        return list(self.databases.keys())
+        return list(self.database_mapper.keys())

+ 1 - 0
app/core/dependency/__init__.py

@@ -0,0 +1 @@
+from .dependencies import ServerContainer

+ 21 - 0
app/core/dependency/dependencies.py

@@ -0,0 +1,21 @@
+from dependency_injector import containers, providers
+
+from app.core.config import GlobalConfigSettings
+from app.core.database import DatabaseManager
+from app.core.observability import LogService
+
+
+class ServerContainer(containers.DeclarativeContainer):
+    # config
+    config = providers.Singleton(GlobalConfigSettings)
+
+    # 阿里云日志
+    log_service = providers.Singleton(LogService, log_config=config.provided.aliyun_log)
+
+    # MySQL
+    mysql_manager = providers.Singleton(DatabaseManager, config=config)
+
+
+__all__ = [
+    "ServerContainer",
+]

+ 1 - 0
app/core/observability/__init__.py

@@ -0,0 +1 @@
+from .logging import LogService

+ 1 - 0
app/core/observability/logging/__init__.py

@@ -0,0 +1 @@
+from .log_service import LogService

+ 95 - 0
app/core/observability/logging/log_service.py

@@ -0,0 +1,95 @@
+import asyncio
+import traceback
+import time, json
+import datetime
+import contextlib
+from typing import Optional
+
+from aliyun.log import LogClient, PutLogsRequest, LogItem
+from app.core.config.settings import AliyunLogConfig
+
+
+class LogService:
+    def __init__(self, log_config: AliyunLogConfig):
+        self.config = log_config
+
+        self.client: Optional[LogClient] = None
+        self.queue: Optional[asyncio.Queue] = None
+
+        self._worker_task: Optional[asyncio.Task] = None
+        self._running = False
+
+    async def start(self):
+        if self._running:
+            return
+
+        self.client = LogClient(
+            self.config.endpoint,
+            self.config.access_key_id,
+            self.config.access_key_secret,
+        )
+        self.queue = asyncio.Queue(maxsize=10000)
+
+        self._running = True
+        self._worker_task = asyncio.create_task(self._worker())
+
+    async def stop(self):
+        if not self._running:
+            return
+
+        self._running = False
+
+        if self._worker_task:
+            self._worker_task.cancel()
+            with contextlib.suppress(asyncio.CancelledError):
+                await self._worker_task
+
+        self._worker_task = None
+        self.queue = None
+        self.client = None
+
+    async def log(self, contents: dict):
+        if not self._running or self.queue is None:
+            return
+
+        try:
+            self.queue.put_nowait(contents)
+        except asyncio.QueueFull:
+            # 可以打 stderr / 统计丢日志数量
+            pass
+
+    async def _worker(self):
+        try:
+            while self._running:
+                contents = await self.queue.get()
+                try:
+                    await asyncio.to_thread(self._put_log, contents)
+                except Exception as e:
+                    print(f"[Log Error] {e}")
+                    print(traceback.format_exc())
+        except asyncio.CancelledError:
+            pass
+
+    def _put_log(self, contents: dict):
+        timestamp = int(time.time())
+        contents["datetime"] = datetime.datetime.now().isoformat()
+
+        safe_items = [
+            (
+                str(k),
+                json.dumps(v, ensure_ascii=False)
+                if isinstance(v, (dict, list))
+                else str(v),
+            )
+            for k, v in contents.items()
+        ]
+
+        log_item = LogItem(timestamp=timestamp, contents=safe_items)
+        req = PutLogsRequest(
+            self.config.project,
+            self.config.logstore,
+            topic="",
+            source="",
+            logitems=[log_item],
+        )
+        self.client.put_logs(req)

+ 0 - 0
applications/pipeline/__init__.py → app/core/pipeline/__init__.py


+ 9 - 11
applications/pipeline/crawler_pipeline.py → app/core/pipeline/crawler_pipeline.py

@@ -1,28 +1,26 @@
-import time
-
-from typing import Any, Dict, Tuple, Callable
-
+from typing import Dict, Tuple
 from pydantic import BaseModel
 
-from applications.api import AsyncApolloApi
-from applications.utils import CrawlerMetaArticle
-from applications.utils import CrawlerMetaAccount
+from app.infra.external import AsyncApolloApi
+from .schemas import CrawlerMetaArticle, CrawlerMetaAccount
 
 
-class CrawlerPipeline(AsyncApolloApi):
+class CrawlerPipeline:
     MODEL_TABLE_MAP: Dict[str, Tuple[type[BaseModel], str]] = {
         "article": (CrawlerMetaArticle, "crawler_meta_article"),
         "account": (CrawlerMetaAccount, "crawler_candidate_account_pool"),
         # 如后续有新类型,直接在这里加即可
     }
 
-    def __init__(self, pool, log_client):
-        super().__init__()
+    def __init__(self, pool, log_client, config):
         self.pool = pool
         self.log_client = log_client
+        self.apollo_client = AsyncApolloApi(apollo_config=config, app_id=None, env=None)
 
     async def whether_title_sensitive(self, title: str) -> bool:
-        sensitive_word_list = await self.get_config_value("sensitive_word_list")
+        sensitive_word_list = await self.apollo_client.get_config_value(
+            "sensitive_word_list"
+        )
         for word in sensitive_word_list:
             if word in title:
                 return True

+ 1 - 1
applications/pipeline/data_recycle_pipeline.py → app/core/pipeline/data_recycle_pipeline.py

@@ -1,7 +1,7 @@
 import json
 from typing import List, Dict
 
-from applications.utils import show_desc_to_sta, str_to_md5
+from app.infra.shared.tools import show_desc_to_sta, str_to_md5
 
 
 insert_outside_article_query = """

+ 0 - 0
applications/utils/item.py → app/core/pipeline/schemas.py


+ 0 - 0
applications/tasks/algorithm_tasks/__init__.py → app/domains/algorithm_tasks/__init__.py


+ 0 - 0
applications/tasks/algorithm_tasks/account_category_analysis.py → app/domains/algorithm_tasks/account_category_analysis.py


+ 6 - 1
applications/tasks/algorithm_tasks/models.py → app/domains/algorithm_tasks/models.py

@@ -6,7 +6,12 @@ import statsmodels.api as sm
 
 from pandas import DataFrame
 
-from applications.config import CATEGORY_FEATURES, CATEGORY_MAP
+from app.core.config import GlobalConfigSettings
+
+config = GlobalConfigSettings()
+
+CATEGORY_FEATURES = config.category.features
+CATEGORY_MAP = config.category.category_map
 
 
 class CategoryRegression:

+ 0 - 0
applications/tasks/analysis_task/__init__.py → app/domains/analysis_task/__init__.py


+ 0 - 0
applications/tasks/analysis_task/account_position_info.py → app/domains/analysis_task/account_position_info.py


+ 1 - 1
applications/tasks/analysis_task/crawler_detail.py → app/domains/analysis_task/crawler_detail.py

@@ -1,4 +1,4 @@
-from applications.api import feishu_robot
+from app.infra.external import feishu_robot
 
 
 class CrawlerDetailAnalysisConst:

+ 0 - 0
applications/tasks/cold_start_tasks/__init__.py → app/domains/cold_start_tasks/__init__.py


+ 0 - 0
applications/tasks/cold_start_tasks/article_pool/__init__.py → app/domains/cold_start_tasks/article_pool/__init__.py


+ 0 - 0
applications/tasks/cold_start_tasks/article_pool/article_pool_cold_start_const.py → app/domains/cold_start_tasks/article_pool/article_pool_cold_start_const.py


+ 1 - 1
applications/tasks/cold_start_tasks/article_pool/article_pool_cold_start_strategy.py → app/domains/cold_start_tasks/article_pool/article_pool_cold_start_strategy.py

@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from typing import List, Dict
-from applications.tasks.cold_start_tasks.article_pool.article_pool_cold_start_const import (
+from app.domains.cold_start_tasks.article_pool.article_pool_cold_start_const import (
     ArticlePoolColdStartConst,
 )
 

+ 2 - 2
applications/tasks/cold_start_tasks/article_pool/article_pool_filter_strategy.py → app/domains/cold_start_tasks/article_pool/article_pool_filter_strategy.py

@@ -3,8 +3,8 @@ from __future__ import annotations
 from typing import Optional
 from pandas import DataFrame
 
-from applications.api import feishu_robot
-from applications.tasks.cold_start_tasks.article_pool.article_pool_cold_start_const import (
+from app.infra.external import feishu_robot
+from app.domains.cold_start_tasks.article_pool.article_pool_cold_start_const import (
     ArticlePoolColdStartConst,
 )
 

+ 21 - 12
applications/tasks/cold_start_tasks/article_pool_cold_start.py → app/domains/cold_start_tasks/article_pool_cold_start.py

@@ -9,20 +9,26 @@ from typing import List
 from pandas import DataFrame
 from tqdm.asyncio import tqdm
 
-from applications.api import task_apollo, feishu_robot
-from applications.api import auto_create_crawler_task
-from applications.api import auto_bind_crawler_task_to_generate_task
-from applications.config import cold_start_category_map, input_source_map
-from applications.utils import get_titles_from_produce_plan
-from applications.tasks.cold_start_tasks.article_pool import (
+from app.infra.external import feishu_robot
+from app.infra.external import AsyncApolloApi
+
+from app.infra.internal import auto_create_crawler_task
+from app.infra.internal import auto_bind_crawler_task_to_generate_task
+from app.infra.internal import get_titles_from_produce_plan
+
+from app.domains.cold_start_tasks.article_pool import (
     ArticlePoolColdStartStrategy,
     ArticlePoolFilterStrategy,
 )
 
 
 class ArticlePoolColdStart(ArticlePoolColdStartStrategy, ArticlePoolFilterStrategy):
-    def __init__(self, pool, log_client, trace_id):
+    def __init__(self, pool, log_client, trace_id, global_config):
         super().__init__(pool, log_client, trace_id)
+        self.config = global_config.cold_start
+        self.apollo_client = AsyncApolloApi(
+            apollo_config=global_config.apollo, app_id=None, env=None
+        )
 
     async def get_article_from_meta_table(
         self, platform: str, crawl_method: str, strategy: str, category: str | None
@@ -145,7 +151,7 @@ class ArticlePoolColdStart(ArticlePoolColdStartStrategy, ArticlePoolFilterStrate
                 "inputSourceValue": crawler_plan_id,
                 "inputSourceLabel": crawler_plan_name,
                 "inputSourceModal": 3,
-                "inputSourceChannel": input_source_map[platform],
+                "inputSourceChannel": self.config.input_source_map[platform],
             }
         ]
         generate_plan_response = await auto_bind_crawler_task_to_generate_task(
@@ -194,7 +200,9 @@ class ArticlePoolColdStart(ArticlePoolColdStartStrategy, ArticlePoolFilterStrate
         match strategy:
             case "strategy_v1":
                 # split article into each category
-                category_list = await task_apollo.get_config_value(key="category_list")
+                category_list = await self.apollo_client.get_config_value(
+                    key="category_list"
+                )
                 for ai_category in category_list:
                     filter_category_df = filter_article_df[
                         filter_article_df["category_by_ai"] == ai_category
@@ -241,11 +249,11 @@ class ArticlePoolColdStart(ArticlePoolColdStartStrategy, ArticlePoolFilterStrate
 
     async def cold_start_by_category(self, category_list, platform, strategy):
         if not category_list:
-            category_list = list(cold_start_category_map.keys())
+            category_list = list(self.config.cold_start_category_map.keys())
 
         for category in tqdm(category_list):
             try:
-                plan_id = cold_start_category_map[category]
+                plan_id = self.config.cold_start_category_map[category]
                 affected_rows = await self.filter_published_titles(plan_id)
                 await self.log_client.log(
                     contents={
@@ -275,6 +283,7 @@ class ArticlePoolColdStart(ArticlePoolColdStartStrategy, ArticlePoolFilterStrate
                         "function": "deal",
                         "traceback": traceback.format_exc(),
                     },
+                    mention=False,
                 )
 
         if self.cold_start_records:
@@ -363,7 +372,7 @@ class ArticlePoolColdStart(ArticlePoolColdStartStrategy, ArticlePoolFilterStrate
                     }
                 )
 
-                crawl_methods_map = await task_apollo.get_config_value(
+                crawl_methods_map = await self.apollo_client.get_config_value(
                     key="category_cold_start_map"
                 )
 

+ 0 - 0
app/domains/cold_start_tasks/video_pool/__init__.py


+ 2 - 2
applications/tasks/cold_start_tasks/video_pool/video_pool_audit_strategy.py → app/domains/cold_start_tasks/video_pool/video_pool_audit_strategy.py

@@ -1,5 +1,5 @@
-from applications.api import fetch_piaoquan_video_list_detail
-from applications.api import insert_crawler_relation_to_aigc_system
+from app.infra.internal import fetch_piaoquan_video_list_detail
+from app.infra.internal import insert_crawler_relation_to_aigc_system
 
 from .video_pool_const import VideoPoolConst
 

+ 0 - 0
applications/tasks/cold_start_tasks/video_pool/video_pool_const.py → app/domains/cold_start_tasks/video_pool/video_pool_const.py


+ 0 - 0
applications/tasks/cold_start_tasks/video_pool_cold_start.py → app/domains/cold_start_tasks/video_pool_cold_start.py


+ 0 - 0
applications/tasks/crawler_tasks/__init__.py → app/domains/crawler_tasks/__init__.py


+ 0 - 0
applications/tasks/crawler_tasks/crawler_account_manager.py → app/domains/crawler_tasks/crawler_account_manager.py


+ 14 - 14
applications/tasks/crawler_tasks/crawler_gzh.py → app/domains/crawler_tasks/crawler_gzh.py

@@ -1,19 +1,19 @@
 from __future__ import annotations
 
 import asyncio
-import time, json
+import time
 import traceback
 from datetime import datetime, date, timedelta
 from typing import List, Dict
 from tqdm.asyncio import tqdm
 
-from applications.api import feishu_robot
-from applications.crawler.wechat import weixin_search
-from applications.crawler.wechat import get_article_detail
-from applications.crawler.wechat import get_article_list_from_account
-from applications.pipeline import CrawlerPipeline
-from applications.utils import timestamp_to_str, show_desc_to_sta
-from applications.utils import get_hot_titles, generate_gzh_id
+from app.infra.internal import get_hot_titles
+from app.infra.external import feishu_robot
+from app.infra.crawler.wechat import weixin_search
+from app.infra.crawler.wechat import get_article_detail
+from app.infra.crawler.wechat import get_article_list_from_account
+from app.core.pipeline import CrawlerPipeline
+from app.infra.shared.tools import timestamp_to_str, show_desc_to_sta, generate_gzh_id
 
 
 class CrawlerGzhConst:
@@ -38,8 +38,8 @@ class CrawlerGzhConst:
 
 
 class CrawlerGzhBaseStrategy(CrawlerPipeline, CrawlerGzhConst):
-    def __init__(self, pool, log_client, trace_id):
-        super().__init__(pool, log_client)
+    def __init__(self, pool, log_client, trace_id, config):
+        super().__init__(pool, log_client, config.apollo)
         self.trace_id = trace_id
 
     async def get_crawler_accounts(self, method: str, strategy: str) -> List[Dict]:
@@ -209,8 +209,8 @@ class CrawlerGzhBaseStrategy(CrawlerPipeline, CrawlerGzhConst):
 
 
 class CrawlerGzhAccountArticles(CrawlerGzhBaseStrategy):
-    def __init__(self, pool, log_client, trace_id):
-        super().__init__(pool, log_client, trace_id)
+    def __init__(self, pool, log_client, trace_id, config):
+        super().__init__(pool, log_client, trace_id, config)
 
     async def insert_article_into_meta(self, gh_id, account_method, msg_list):
         """
@@ -300,8 +300,8 @@ class CrawlerGzhAccountArticles(CrawlerGzhBaseStrategy):
 
 
 class CrawlerGzhSearchArticles(CrawlerGzhBaseStrategy):
-    def __init__(self, pool, log_client, trace_id):
-        super().__init__(pool, log_client, trace_id)
+    def __init__(self, pool, log_client, trace_id, config):
+        super().__init__(pool, log_client, trace_id, config)
 
     async def crawl_search_articles_detail(
         self, article_list: List[Dict], source_title: str

+ 3 - 3
applications/tasks/crawler_tasks/crawler_gzh_fans.py → app/domains/crawler_tasks/crawler_gzh_fans.py

@@ -3,13 +3,13 @@ import json
 import time
 from datetime import datetime
 
-from applications.crawler.wechat import (
+from app.infra.crawler.wechat import (
     get_gzh_fans,
     get_access_token,
     get_union_id_batch,
 )
-from applications.api import feishu_robot
-from applications.utils import run_tasks_with_asyncio_task_group
+from app.infra.external import feishu_robot
+from app.infra.shared import run_tasks_with_asyncio_task_group
 
 
 class CrawlerGzhFansConst:

+ 23 - 13
applications/tasks/crawler_tasks/crawler_toutiao.py → app/domains/crawler_tasks/crawler_toutiao.py

@@ -1,21 +1,25 @@
 from __future__ import annotations
 
 import asyncio
-import json
-import time
+import time, json
 import aiohttp
 import traceback
 from datetime import datetime
-from typing import List, Dict
+from typing import List
 
 from tqdm import tqdm
 
-from applications.api import feishu_robot
-from applications.crawler.toutiao import get_toutiao_account_info_list
-from applications.crawler.toutiao import search_in_toutiao
-from applications.crawler.toutiao import get_toutiao_detail
-from applications.pipeline import CrawlerPipeline
-from applications.utils import async_proxy, get_top_article_title_list
+from app.infra.crawler.toutiao import get_toutiao_account_info_list
+from app.infra.crawler.toutiao import search_in_toutiao
+from app.infra.crawler.toutiao import get_toutiao_detail
+from app.infra.external import feishu_robot
+from app.infra.internal import get_top_article_title_list
+from app.infra.shared.tools import async_proxy
+
+from app.core.pipeline import CrawlerPipeline
+from app.core.config import GlobalConfigSettings
+from app.core.database import DatabaseManager
+from app.core.observability import LogService
 
 
 class CrawlerToutiaoConst:
@@ -47,8 +51,14 @@ class CrawlerToutiaoConst:
 
 
 class CrawlerToutiao(CrawlerPipeline, CrawlerToutiaoConst):
-    def __init__(self, pool, log_client, trace_id):
-        super().__init__(pool, log_client)
+    def __init__(
+        self,
+        pool: DatabaseManager,
+        log_client: LogService,
+        trace_id: str,
+        config: GlobalConfigSettings,
+    ):
+        super().__init__(pool, log_client, config.apollo)
         self.trace_id = trace_id
 
     async def get_request_params(self, category):
@@ -124,7 +134,7 @@ class CrawlerToutiao(CrawlerPipeline, CrawlerToutiaoConst):
         has_more = True
         current_cursor = max_behot_time
         max_cursor = max_cursor or self.DEFAULT_CURSOR
-        cookie = await self.get_config_value(
+        cookie = await self.apollo_client.get_config_value(
             key="toutiao_blogger_cookie", output_type="string"
         )
         while has_more:
@@ -410,7 +420,7 @@ class CrawlerToutiao(CrawlerPipeline, CrawlerToutiaoConst):
     # 搜索抓账号
     async def search_candidate_accounts(self):
         top_title_list = await get_top_article_title_list(pool=self.pool)
-        cookie = await self.get_config_value(
+        cookie = await self.apollo_client.get_config_value(
             key="toutiao_blogger_cookie", output_type="string"
         )
         for article in top_title_list:

+ 0 - 0
applications/tasks/data_recycle_tasks/__init__.py → app/domains/data_recycle_tasks/__init__.py


+ 6 - 9
applications/tasks/data_recycle_tasks/article_detail_stat.py → app/domains/data_recycle_tasks/article_detail_stat.py

@@ -1,14 +1,11 @@
-import json
 import time
 import traceback
 from datetime import datetime, timedelta
 
-from applications.api import feishu_robot
+from app.infra.crawler.wechat import get_gzh_stat_daily
+from app.infra.crawler.wechat import get_access_token
 
-from applications.crawler.wechat import get_gzh_stat_daily
-from applications.crawler.wechat import get_access_token
-
-from applications.utils import run_tasks_with_asyncio_task_group
+from app.infra.shared import run_tasks_with_asyncio_task_group
 
 
 class ArticleDetailStatConst:
@@ -315,7 +312,7 @@ class ArticleDetailStat(ArticleDetailStatMapper):
                     contents={
                         "task": "article_detail_stat",
                         "account_name": account["account_name"],
-                        "status": "success"
+                        "status": "success",
                     }
                 )
             except Exception as e:
@@ -325,6 +322,6 @@ class ArticleDetailStat(ArticleDetailStatMapper):
                         "account_name": account["account_name"],
                         "error": str(e),
                         "traceback": traceback.format_exc(),
-                        "status": "fail"
+                        "status": "fail",
                     }
-                )
+                )

+ 6 - 6
applications/tasks/data_recycle_tasks/recycle_daily_publish_articles.py → app/domains/data_recycle_tasks/recycle_daily_publish_articles.py

@@ -7,11 +7,11 @@ import traceback
 
 from tqdm.asyncio import tqdm
 
-from applications.api import feishu_robot
-from applications.crawler.wechat import get_article_list_from_account
-from applications.crawler.wechat import get_article_detail
-from applications.pipeline import insert_article_into_recycle_pool
-from applications.utils import str_to_md5
+from app.infra.external import feishu_robot
+from app.infra.crawler.wechat import get_article_list_from_account
+from app.infra.crawler.wechat import get_article_detail
+from app.core.pipeline import insert_article_into_recycle_pool
+from app.infra.shared.tools import str_to_md5
 
 
 class Const:
@@ -47,7 +47,7 @@ class Const:
         "gh_72bace6b3059",
         "gh_dd4c857bbb36",
         "gh_ff487cb5dab3",
-        "gh_ac43eb24376d"
+        "gh_ac43eb24376d",
     ]
 
     # NOT USED SERVER ACCOUNT

+ 3 - 3
applications/tasks/data_recycle_tasks/recycle_mini_program_detail.py → app/domains/data_recycle_tasks/recycle_mini_program_detail.py

@@ -4,9 +4,9 @@ from typing import Any
 from datetime import datetime, timedelta
 
 
-from applications.crawler.wechat import get_article_detail
-from applications.utils import extract_root_source_id
-from applications.utils import run_tasks_with_asyncio_task_group
+from app.infra.crawler.wechat import get_article_detail
+from app.infra.shared.tools import extract_root_source_id
+from app.infra.shared import run_tasks_with_asyncio_task_group
 
 
 class MiniProgramConst:

+ 4 - 4
applications/tasks/data_recycle_tasks/recycle_outside_account_articles.py → app/domains/data_recycle_tasks/recycle_outside_account_articles.py

@@ -6,10 +6,10 @@ from tqdm.asyncio import tqdm
 
 from .recycle_daily_publish_articles import UpdateRootSourceIdAndUpdateTimeTask
 from .recycle_daily_publish_articles import Const
-from applications.crawler.wechat import get_article_list_from_account
-from applications.crawler.wechat import get_article_detail
-from applications.pipeline import insert_outside_article_into_recycle_pool
-from applications.api import feishu_robot
+from app.infra.crawler.wechat import get_article_list_from_account
+from app.infra.crawler.wechat import get_article_detail
+from app.core.pipeline import insert_outside_article_into_recycle_pool
+from app.infra.external import feishu_robot
 
 account_name_set = {
     "念念私语",

+ 0 - 0
applications/tasks/llm_tasks/__init__.py → app/domains/llm_tasks/__init__.py


+ 3 - 3
applications/tasks/llm_tasks/candidate_account_process.py → app/domains/llm_tasks/candidate_account_process.py

@@ -3,9 +3,9 @@ import traceback
 from typing import List, Dict, Optional
 from tqdm.asyncio import tqdm
 
-from applications.api import fetch_deepseek_completion
-from applications.api import feishu_robot
-from applications.utils import ci_lower
+from app.infra.external import fetch_deepseek_completion
+from app.infra.external import feishu_robot
+from app.infra.shared.tools import ci_lower
 
 
 class CandidateAccountProcessConst:

+ 4 - 4
applications/tasks/llm_tasks/process_title.py → app/domains/llm_tasks/process_title.py

@@ -4,10 +4,10 @@ import traceback
 
 from typing import Optional, List, Dict, Tuple
 
-from applications.api import fetch_deepseek_completion
-from applications.utils import yield_batch
-from applications.tasks.llm_tasks.prompts import extract_article_features
-from applications.tasks.llm_tasks.prompts import extract_article_category
+from app.infra.external import fetch_deepseek_completion
+from app.infra.shared.tools import yield_batch
+from app.domains.llm_tasks.prompts import extract_article_features
+from app.domains.llm_tasks.prompts import extract_article_category
 from tqdm.asyncio import tqdm
 
 

+ 0 - 0
applications/tasks/llm_tasks/prompts.py → app/domains/llm_tasks/prompts.py


+ 0 - 0
applications/tasks/monitor_tasks/__init__.py → app/domains/monitor_tasks/__init__.py


+ 5 - 5
applications/tasks/monitor_tasks/auto_reply_cards_monitor.py → app/domains/monitor_tasks/auto_reply_cards_monitor.py

@@ -13,11 +13,11 @@ from urllib.parse import unquote, parse_qs, urlparse
 import requests
 from requests.exceptions import RequestException
 
-from applications.utils import upload_to_oss
-from applications.utils import fetch_from_odps
-from applications.utils import AsyncHttpClient
-from applications.crawler.wechat import get_article_list_from_account
-from applications.crawler.wechat import get_article_detail
+from app.infra.shared.tools import upload_to_oss
+from app.infra.shared.tools import fetch_from_odps
+from app.infra.shared import AsyncHttpClient
+from app.infra.crawler.wechat import get_article_list_from_account
+from app.infra.crawler.wechat import get_article_detail
 
 
 class AutoReplyCardsMonitorConst:

+ 4 - 5
applications/tasks/monitor_tasks/cooperate_accounts_monitor.py → app/domains/monitor_tasks/cooperate_accounts_monitor.py

@@ -7,9 +7,9 @@ from tqdm import tqdm
 from datetime import datetime, timedelta
 from urllib.parse import unquote, parse_qs, urlparse
 
-from applications.utils import fetch_from_odps, show_desc_to_sta
-from applications.crawler.wechat import get_article_list_from_account
-from applications.crawler.wechat import get_article_detail
+from app.infra.shared.tools import fetch_from_odps, show_desc_to_sta
+from app.infra.crawler.wechat import get_article_list_from_account
+from app.infra.crawler.wechat import get_article_detail
 
 
 class CooperateAccountsMonitorTaskConst:
@@ -286,7 +286,7 @@ class CooperateAccountsMonitorTask(CooperateAccountsMonitorMapper):
                     single_article["send_time"],
                     self.extract_wx_sn(single_article["ContentUrl"]),
                     show_stat.get("show_view_count", 0),
-                    show_stat.get("show_like_count", 0)
+                    show_stat.get("show_like_count", 0),
                 )
                 params.append(single_param)
 
@@ -387,4 +387,3 @@ class CooperateAccountsMonitorTask(CooperateAccountsMonitorMapper):
                 else:
                     print("没有需要处理详情的账号")
                     return
-

+ 3 - 3
applications/tasks/monitor_tasks/get_off_videos.py → app/domains/monitor_tasks/get_off_videos.py

@@ -4,9 +4,9 @@ from typing import List, Optional
 
 from tqdm import tqdm
 
-from applications.api import change_video_audit_status
-from applications.api import fetch_piaoquan_video_list_detail
-from applications.api import feishu_robot
+from app.infra.internal import change_video_audit_status
+from app.infra.internal import fetch_piaoquan_video_list_detail
+from app.infra.external import feishu_robot
 
 
 class GetOffVideosConst:

+ 5 - 5
applications/tasks/monitor_tasks/gzh_article_monitor.py → app/domains/monitor_tasks/gzh_article_monitor.py

@@ -4,11 +4,11 @@ from typing import Optional, List
 
 from tqdm import tqdm
 
-from applications.api import feishu_robot
-from applications.api import delete_illegal_gzh_articles
-from applications.crawler.wechat import get_article_detail
-from applications.crawler.wechat import get_article_list_from_account
-from applications.utils import str_to_md5
+from app.infra.external import feishu_robot
+from app.infra.internal import delete_illegal_gzh_articles
+from app.infra.crawler.wechat import get_article_detail
+from app.infra.crawler.wechat import get_article_list_from_account
+from app.infra.shared.tools import str_to_md5
 
 
 class MonitorConst:

+ 2 - 2
applications/tasks/monitor_tasks/kimi_balance.py → app/domains/monitor_tasks/kimi_balance.py

@@ -1,7 +1,7 @@
 import traceback
 from typing import Dict
-from applications.api import feishu_robot
-from applications.utils import AsyncHttpClient
+from app.infra.external import feishu_robot
+from app.infra.shared import AsyncHttpClient
 
 # const
 BALANCE_LIMIT_THRESHOLD = 100.0

+ 1 - 3
applications/tasks/monitor_tasks/limited_account_analysis.py → app/domains/monitor_tasks/limited_account_analysis.py

@@ -1,7 +1,5 @@
-import json
-
 from datetime import datetime, timedelta
-from applications.api import feishu_sheet, feishu_robot
+from app.infra.external import feishu_sheet
 
 
 class LimitedAccountAnalysisConst:

+ 2 - 2
applications/tasks/monitor_tasks/task_processing_monitor.py → app/domains/monitor_tasks/task_processing_monitor.py

@@ -1,7 +1,7 @@
 import time
 
-from applications.api import feishu_robot
-from applications.tasks.task_mapper import TaskMapper
+from app.infra.external import feishu_robot
+from app.jobs.task_mapper import TaskMapper
 
 
 class TaskProcessingMonitor(TaskMapper):

+ 0 - 0
app/infra/crawler/__init__.py


+ 0 - 0
applications/crawler/toutiao/__init__.py → app/infra/crawler/toutiao/__init__.py


+ 1 - 1
applications/crawler/toutiao/blogger.py → app/infra/crawler/toutiao/blogger.py

@@ -6,7 +6,7 @@ from __future__ import annotations
 
 import aiohttp
 
-from applications.utils import async_proxy
+from app.infra.shared.tools import async_proxy
 from .use_js import call_js_function
 
 

+ 1 - 1
applications/crawler/toutiao/detail_recommend.py → app/infra/crawler/toutiao/detail_recommend.py

@@ -8,7 +8,7 @@ import json
 import requests
 from tenacity import retry
 
-from applications.utils import proxy, request_retry
+from app.infra.shared.tools import proxy, request_retry
 from .use_js import call_js_function
 
 retry_desc = request_retry(retry_times=3, min_retry_delay=2, max_retry_delay=30)

+ 0 - 0
app/infra/crawler/toutiao/main_page_recomend.py


+ 1 - 3
applications/crawler/toutiao/search.py → app/infra/crawler/toutiao/search.py

@@ -1,6 +1,4 @@
-import json
-
-from applications.utils import AsyncHttpClient
+from app.infra.shared import AsyncHttpClient
 
 
 async def search_in_toutiao(keyword):

+ 0 - 0
applications/crawler/toutiao/toutiao.js → app/infra/crawler/toutiao/toutiao.js


+ 1 - 1
applications/crawler/toutiao/use_js.py → app/infra/crawler/toutiao/use_js.py

@@ -5,7 +5,7 @@
 import json
 import subprocess
 
-toutiao_js_path = "applications/crawler/toutiao/toutiao.js"
+toutiao_js_path = "app/infra/crawler/toutiao/toutiao.js"
 
 
 def call_js_function(arguments_list):

+ 1 - 1
applications/crawler/wechat/__init__.py → app/infra/crawler/wechat/__init__.py

@@ -1,3 +1,3 @@
 from .gzh_spider import *
 from .gzh_fans import *
-from .gzh_article_stat import *
+from .gzh_article_stat import *

+ 3 - 8
applications/crawler/wechat/gzh_article_stat.py → app/infra/crawler/wechat/gzh_article_stat.py

@@ -1,21 +1,16 @@
-from applications.utils import AsyncHttpClient
+from app.infra.shared import AsyncHttpClient
 
 
 # 抓取公众号粉丝
 async def get_gzh_stat_daily(access_token: str, date_string: str):
     url = f"https://api.weixin.qq.com/datacube/getarticletotaldetail?access_token={access_token}"
-    data = {
-        "begin_date": date_string,
-        "end_date": date_string
-    }
+    data = {"begin_date": date_string, "end_date": date_string}
     headers = {
         "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36",
-        "Content-Type": "application/json"
+        "Content-Type": "application/json",
     }
     # 发送请求
     async with AsyncHttpClient(timeout=10) as http_client:
         response = await http_client.post(url, headers=headers, json=data)
 
     return response
-
-

+ 2 - 6
applications/crawler/wechat/gzh_fans.py → app/infra/crawler/wechat/gzh_fans.py

@@ -1,6 +1,6 @@
 import random
 
-from applications.utils import AsyncHttpClient
+from app.infra.shared import AsyncHttpClient
 
 
 # 抓取公众号粉丝
@@ -48,11 +48,7 @@ async def get_gzh_fans(token, cookie, cursor_id, cursor_timestamp):
 # 获取 access_token
 async def get_access_token(app_id, app_secret):
     url = f"https://api.weixin.qq.com/cgi-bin/stable_token"
-    data = {
-        "grant_type": "client_credential",
-        "appid": app_id,
-        "secret": app_secret
-    }
+    data = {"grant_type": "client_credential", "appid": app_id, "secret": app_secret}
     async with AsyncHttpClient(timeout=100) as http_client:
         response = await http_client.post(url, json=data)
 

+ 6 - 4
applications/crawler/wechat/gzh_spider.py → app/infra/crawler/wechat/gzh_spider.py

@@ -6,9 +6,9 @@ import requests
 from fake_useragent import FakeUserAgent
 from tenacity import retry
 
-from applications.api import log
-from applications.utils import request_retry
-from applications.utils import AsyncHttpClient
+from app.infra.external import log
+from app.infra.shared.tools import request_retry
+from app.infra.shared import AsyncHttpClient
 
 retry_desc = request_retry(retry_times=3, min_retry_delay=2, max_retry_delay=16)
 
@@ -49,7 +49,9 @@ async def get_article_detail(
 
 
 @retry(**retry_desc)
-async def get_article_list_from_account(account_id: str, index=None, is_cache=True) -> dict | None:
+async def get_article_list_from_account(
+    account_id: str, index=None, is_cache=True
+) -> dict | None:
     target_url = f"{base_url}/blogger"
     payload = json.dumps(
         {

+ 18 - 0
app/infra/external/__init__.py

@@ -0,0 +1,18 @@
+from .aliyun import log
+from .deepseek_official import fetch_deepseek_completion
+from .apollo import AsyncApolloApi
+from .feishu import FeishuBotApi
+from .feishu import FeishuSheetApi
+from .elastic_search import AsyncElasticSearchClient
+
+feishu_robot = FeishuBotApi()
+feishu_sheet = FeishuSheetApi()
+
+__all__ = [
+    "feishu_robot",
+    "feishu_sheet",
+    "AsyncApolloApi",
+    "fetch_deepseek_completion",
+    "log",
+    "AsyncElasticSearchClient",
+]

+ 0 - 0
applications/api/aliyun_log_api.py → app/infra/external/aliyun.py


+ 31 - 2
applications/utils/async_apollo_client.py → app/infra/external/apollo.py

@@ -5,13 +5,17 @@ import socket
 import asyncio
 import aiohttp
 
+from typing import Dict, Union
+
+from app.core.config.settings import ApolloConfig
+
 
 class AsyncApolloClient:
     def __init__(
         self,
         app_id,
-        cluster="default",
         config_server_url="http://localhost:8080",
+        cluster="default",
         timeout=35,
         ip=None,
     ):
@@ -26,7 +30,8 @@ class AsyncApolloClient:
         self._notification_map = {"application": -1}
         self._stop_event = asyncio.Event()
 
-    def _init_ip(self):
+    @staticmethod
+    def _init_ip():
         s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
         try:
             s.connect(("8.8.8.8", 53))
@@ -129,3 +134,27 @@ class AsyncApolloClient:
             await self._long_poll()
         logging.info("Listener stopped.")
         self.stopped = True
+
+
+class AsyncApolloApi(AsyncApolloClient):
+    def __init__(
+        self, apollo_config: ApolloConfig, app_id: str | None, env: str | None
+    ):
+        if not app_id:
+            app_id = apollo_config.app_id
+
+        if not env:
+            env = apollo_config.env
+
+        server_url = apollo_config.apollo_map.get(app_id, {}).get(env)
+        super().__init__(app_id, server_url)
+
+    async def get_config_value(
+        self, key: str, output_type: str = "json"
+    ) -> Union[Dict, str]:
+        match output_type:
+            case "json":
+                response = await self.get_value(key)
+                return json.loads(response)
+            case _:
+                return await self.get_value(key)

Некоторые файлы не были показаны из-за большого количества измененных файлов