1 неделя назад · f692fdb1f7
--- a/CONFIGURATION.md
+++ b/CONFIGURATION.md
@@ -1,213 +0,0 @@
 
				-# AutoScraperX 配置说明
			
 
				-
			
 
				-本文档详细说明了AutoScraperX项目的配置项。
			
 
				-
			
 
				----
			
 
				-
			
 
				-# 环境配置说明
			
 
				-
			
 
				-环境配置通过 `.env` 文件进行配置，以下为所有可配置项:
			
 
				-
			
 
				-| 配置项 | 描述 | 是否必填 | 默认值 |
			
 
				-|--------|------|----------|--------|
			
 
				-| ENV | 运行环境 (可选值: prod, dev) | 否 | prod |
			
 
				-| DB_HOST | 数据库主机地址 | 是 |  |
			
 
				-| DB_PORT | 数据库端口 | 否 | 3306 |
			
 
				-| DB_USER | 数据库用户名 | 是 |  |
			
 
				-| DB_PASSWORD | 数据库密码 | 是 |  |
			
 
				-| DB_NAME | 数据库名称 | 是 |  |
			
 
				-| DB_CHARSET | 数据库字符集 | 是 |  |
			
 
				-| ROCKETMQ_ENDPOINT | RocketMQ接入点 | 是 |  |
			
 
				-| ROCKETMQ_ACCESS_KEY_ID | RocketMQ访问密钥ID | 是 |  |
			
 
				-| ROCKETMQ_ACCESS_KEY_SECRET | RocketMQ访问密钥 | 是 |  |
			
 
				-| FEISHU_APPID | 飞书应用ID | 是 |  |
			
 
				-| FEISHU_APPSECRET | 飞书应用密钥 | 是 |  |
			
 
				-| ALIYUN_ACCESS_KEY_ID | 阿里云访问密钥ID | 是 |  |
			
 
				-| ALIYUN_ACCESS_KEY_SECRET | 阿里云访问密钥 | 是 |  |
			
 
				-| REDIS_HOST | Redis主机地址 | 是 |  |
			
 
				-| REDIS_PORT | Redis端口 | 否 | 6379 |
			
 
				-| REDIS_PASSWORD | Redis密码 | 是 |  |
			
 
				-
			
 
				----
			
 
				-
			
 
				-# 爬虫配置说明
			
 
				-
			
 
				-爬虫配置通过 `config/spiders_config.yaml` 文件进行配置。
			
 
				-
			
 
				-## 配置示例
			
 
				-
			
 
				-```yaml
			
 
				-default:
			
 
				-  base_url: http://8.217.192.46:8889
			
 
				-  request_timeout: 30
			
 
				-  max_retries: 3
			
 
				-  headers:
			
 
				-    {"Content-Type": "application/json"}
			
 
				-
			
 
				-benshanzhufurecommend:
			
 
				-  platform: benshanzhufu
			
 
				-  mode: recommend
			
 
				-  path: /crawler/ben_shan_zhu_fu/recommend
			
 
				-  method: post
			
 
				-  request_body:
			
 
				-    cursor: "{{next_cursor}}"
			
 
				-  loop_times: 50
			
 
				-  loop_interval:
			
 
				-    min: 30
			
 
				-    max: 60
			
 
				-  feishu_sheetid: "aTSJH4"
			
 
				-  response_parse:
			
 
				-    data: "$.data"
			
 
				-    next_cursor: "$.data.next_cursor"
			
 
				-    data_path: "$.data.data"
			
 
				-    fields:
			
 
				-      video_id: "$.nid"
			
 
				-      video_title: "$.title"
			
 
				-      play_cnt: 0
			
 
				-      publish_time_stamp: "$.update_time"
			
 
				-      out_user_id: "$.nid"
			
 
				-      cover_url: "$.video_cover"
			
 
				-      like_cnt: 0
			
 
				-      video_url: "$.video_url"
			
 
				-      out_video_id: "$.nid"
			
 
				-
			
 
				-
			
 
				-yuannifuqimanmanrecommend:
			
 
				-  platform: yuannifuqimanman
			
 
				-  mode: recommend
			
 
				-  path: /crawler/yuan_ni_fu_qi_man_man/recommend
			
 
				-  method: post
			
 
				-  request_body:
			
 
				-    cursor: "{{next_cursor}}"
			
 
				-  loop_times: 100
			
 
				-  loop_interval:
			
 
				-    min: 30
			
 
				-    max: 60
			
 
				-  feishu_sheetid: "golXy9"
			
 
				-  response_parse:
			
 
				-    data: "$.data"
			
 
				-    next_cursor: "$.data.next_cursor"
			
 
				-    data_path: "$.data.data"
			
 
				-    fields:
			
 
				-      video_id: "$.nid"
			
 
				-      video_title: "$.title"
			
 
				-      out_user_id: "$.nid"
			
 
				-      cover_url: "$.video_cover"
			
 
				-      video_url: "$.video_url"
			
 
				-      out_video_id: "$.nid"
			
 
				-
			
 
				-xiaoniangaoauthor:
			
 
				-  platform: xiaoniangao
			
 
				-  mode: author
			
 
				-  path: /crawler/xiao_nian_gao_plus/blogger
			
 
				-  method: post
			
 
				-  request_body:
			
 
				-      cursor: "{{next_cursor}}"
			
 
				-      account_id: "{{uid}}" # 数据库的uid
			
 
				-  loop_times: 100
			
 
				-  loop_interval:
			
 
				-    min: 5
			
 
				-    max: 10
			
 
				-  feishu_sheetid: "K0gA9Y"
			
 
				-  response_parse:
			
 
				-    uid: "$.uid" # 数据库的uid
			
 
				-    next_cursor: "$.cursor"
			
 
				-    data: "$.data"
			
 
				-    has_more: "$.data.has_more"
			
 
				-    data_path: "$.data.data"
			
 
				-    fields:
			
 
				-      video_title: "$.title"
			
 
				-      duration: "$.du"
			
 
				-      play_cnt: "$.play_pv"
			
 
				-      like_cnt: "$.favor.total"
			
 
				-      comment_cnt: "$.comment_count"
			
 
				-      share_cnt: "$.share"
			
 
				-      width: "$.w"
			
 
				-      height: "$.h"
			
 
				-      avatar_url: "$.user.hurl"
			
 
				-      cover_url: "$.url"
			
 
				-      video_url: "$.v_url"
			
 
				-      out_user_id: "$.user.mid"
			
 
				-      out_video_id: "$.vid"
			
 
				-      publish_time_stamp: "$.t"
			
 
				-
			
 
				-xiaoniangaorecommend:
			
 
				-  platform: xiaoniangao
			
 
				-  mode: recommend
			
 
				-  path: crawler/xiao_nian_gao_plus/recommend
			
 
				-  method: post
			
 
				-  request_body:
			
 
				-
			
 
				-  loop_times: 100
			
 
				-  loop_interval:
			
 
				-    min: 5
			
 
				-    max: 20
			
 
				-  feishu_sheetid: "D1nVxQ"
			
 
				-  response_parse:
			
 
				-    uid: "$.uid" # 数据库的uid
			
 
				-    next_cursor: "$.cursor"
			
 
				-    data: "$.data"
			
 
				-    has_more: "$.data.has_more"
			
 
				-    data_path: "$.data.data"
			
 
				-    fields:
			
 
				-      video_title: "$.title"
			
 
				-      duration: "$.du"
			
 
				-      play_cnt: "$.play_pv"
			
 
				-      like_cnt: "$.favor.total"
			
 
				-      comment_cnt: "$.comment_count"
			
 
				-      share_cnt: "$.share"
			
 
				-      width: "$.w"
			
 
				-      height: "$.h"
			
 
				-      avatar_url: "$.user.hurl"
			
 
				-      cover_url: "$.url"
			
 
				-      video_url: "$.v_url"
			
 
				-      out_user_id: "$.user.mid"
			
 
				-      out_video_id: "$.vid"
			
 
				-      publish_time_stamp: "$.t"
			
 
				-
			
 
				-
			
 
				-
			
 
				-```
			
 
				-
			
 
				-## 字段说明
			
 
				-
			
 
				-### 全局配置字段
			
 
				-
			
 
				-| 字段 | 描述 |
			
 
				-|------|------|
			
 
				-| base_url | 基础URL，用于拼接完整请求URL |
			
 
				-| request_timeout | 请求超时时间（秒） |
			
 
				-| max_retries | 最大重试次数 |
			
 
				-| headers | 请求头信息 |
			
 
				-
			
 
				-### 平台配置字段
			
 
				-
			
 
				-| 字段 | 描述 |
			
 
				-|------|------|
			
 
				-| platform | 平台名称 |
			
 
				-| mode | 爬取模式（如 recommend, author） |
			
 
				-| path | API路径 |
			
 
				-| url | 完整请求URL |
			
 
				-| method | HTTP请求方法 |
			
 
				-| request_body | 请求体参数 |
			
 
				-| loop_times | 循环次数 |
			
 
				-| loop_interval | 循环间隔（min/max） |
			
 
				-| response_parse | 响应解析配置 |
			
 
				-| feishu_sheetid | 飞书表格ID |
			
 
				-
			
 
				-### 响应解析字段
			
 
				-
			
 
				-| 字段 | 描述 |
			
 
				-|------|------|
			
 
				-| data_path | 数据列表路径 |
			
 
				-| next_cursor | 下一页游标路径 |
			
 
				-| has_more | 是否还有更多数据路径 |
			
 
				-| fields | 字段映射配置 |
			
 
				-
			
 
				----
			
 
				-
			
 
				-## 当前配置状态
			
 
				-
			
 
				-- 平台配置数量: 4
			
 
				-- 运行环境: prod
			
 
				-- 配置文件路径: /Users/zhangliang/Documents/piaoquan/AutoScraperX/config/spiders_config.yaml
			
--- a/README.md
+++ b/README.md
@@ -9,10 +9,10 @@
 
				 ```bash
			
 
				 ├── config/                # 配置文件
			
 
				 │   ├── __init__.py        # 配置初始化
			
 
				-│   ├── base.py            # 环境配置定义
			
 
				+│   ├── recommend.py            # 环境配置定义
			
 
				 │   └── spiders_config.yaml# 爬虫平台配置
			
 
				 ├── core/                  # 核心框架模块
			
 
				-│   ├── base/              # 基础组件（异步客户端等）
			
 
				+│   ├── recommend/              # 基础组件（异步客户端等）
			
 
				 │   ├── models/            # 数据模型
			
 
				 │   ├── utils/             # 工具类
			
 
				 │   │   ├── config_manager.py      # 统一配置管理器
			
@@ -54,7 +54,7 @@
 
				 - **Pipeline**：负责数据校验、去重和推送至 ETL MQ
			
 
				 - **MQ 系统**：阿里云 MQ，支持按平台配置多个 Topic，消费完成再手动 ACK
			
 
				 - **配置系统**：
			
 
				-  - 环境配置：通过 `.env` 文件和 `config/base.py` 管理
			
 
				+  - 环境配置：通过 `.env` 文件和 `config/recommend.py` 管理
			
 
				   - 爬虫配置：通过 `config/spiders_config.yaml` 管理
			
 
				 
			
 
				 ---
			
--- a/config/__init__.py
+++ b/config/__init__.py
@@ -1,3 +1,24 @@
 
				-from .prod import settings  # 使用生产环境配置
			
 
				+# config/__init__.py
			
 
				+"""统一配置入口"""
			
 
				+from .base import settings
			
 
				+from core.utils.spider_config import SpiderConfig
			
 
				+from core.models.spiders_config_models import PlatformConfig
			
 
				 
			
 
				-__all__ = ['settings']
			
 
				+
			
 
				+# 提供统一的配置访问接口
			
 
				+class ConfigManager:
			
 
				+    @staticmethod
			
 
				+    def get_spider_config(spider_name: str) -> PlatformConfig:
			
 
				+        return SpiderConfig.get_platform_config(spider_name)
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def get_all_spiders() -> list:
			
 
				+        return SpiderConfig.list_all_platforms()
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def get_app_settings():
			
 
				+        return settings
			
 
				+
			
 
				+
			
 
				+# 全局配置管理器
			
 
				+config_manager = ConfigManager()
			
--- a/config/base.py
+++ b/config/base.py
@@ -1,103 +1,106 @@
 
				+# base.py 修改后
			
 
				 import os
			
 
				-
			
 
				 from dotenv import load_dotenv
			
 
				 from core.utils.path_utils import project_root, log_dir
			
 
				-from pydantic import BaseSettings, Field, AnyUrl, validator
			
 
				-
			
 
				-# 在 Settings 类之前加载 .env 文件
			
 
				+from pydantic import Field, AnyUrl, field_validator, ConfigDict
			
 
				+from pydantic_settings import BaseSettings  # 注意：BaseSettings 现在在 pydantic_settings 中
			
 
				 
			
 
				-load_dotenv(os.path.join(project_root,".env"))
			
 
				+load_dotenv(os.path.join(project_root, ".env"))
			
 
				 
			
 
				 
			
 
				 class Settings(BaseSettings):
			
 
				+    # Pydantic 2.x 使用 ConfigDict 替代内部 Config 类
			
 
				+    model_config = ConfigDict(
			
 
				+        env_file=".env",
			
 
				+        env_file_encoding='utf-8',
			
 
				+        case_sensitive=False,
			
 
				+        env_prefix="",  # 明确指定环境变量前缀
			
 
				+    )
			
 
				+
			
 
				     # 环境标识
			
 
				-    ENV: str = "prod"  # prod/dev
			
 
				+    ENV: str = "prod"
			
 
				     ENABLE_ALIYUN_LOG: bool = True
			
 
				 
			
 
				     # 日志配置
			
 
				     LOG_DIR: str = log_dir
			
 
				     LOG_LEVEL: str = "INFO"
			
 
				 
			
 
				-    # 阿里云数据库配置 (RDS)
			
 
				-    DB_HOST: str = Field(..., env="DB_HOST")
			
 
				-    DB_PORT: int = Field(3306, env="DB_PORT")
			
 
				-    DB_USER: str = Field(..., env="DB_USER")
			
 
				-    DB_PASSWORD: str = Field(..., env="DB_PASSWORD")
			
 
				-    DB_NAME: str = Field(..., env="DB_NAME")
			
 
				-    DB_CHARSET: str = Field(..., env="DB_CHARSET")
			
 
				+    # 阿里云数据库配置
			
 
				+    DB_HOST: str = Field(..., validation_alias="DB_HOST")
			
 
				+    DB_PORT: int = Field(3306, validation_alias="DB_PORT")
			
 
				+    DB_USER: str = Field(..., validation_alias="DB_USER")
			
 
				+    DB_PASSWORD: str = Field(..., validation_alias="DB_PASSWORD")
			
 
				+    DB_NAME: str = Field(..., validation_alias="DB_NAME")
			
 
				+    DB_CHARSET: str = Field(..., validation_alias="DB_CHARSET")
			
 
				     DB_POOL_SIZE: int = 20
			
 
				     DB_POOL_RECYCLE: int = 3600
			
 
				 
			
 
				     # 阿里云RocketMQ配置
			
 
				-    ROCKETMQ_ENDPOINT: AnyUrl = Field(..., env="ROCKETMQ_ENDPOINT")
			
 
				-    ROCKETMQ_ACCESS_KEY_ID: str = Field(..., env="ROCKETMQ_ACCESS_KEY_ID")
			
 
				-    ROCKETMQ_ACCESS_KEY_SECRET: str = Field(..., env="ROCKETMQ_ACCESS_KEY_SECRET")
			
 
				-    ROCKETMQ_INSTANCE_ID: str = Field(..., env="ROCKETMQ_INSTANCE_ID")
			
 
				-    ROCKETMQ_WAIT_SECONDS: int = 30 # 最长30s
			
 
				+    ROCKETMQ_ENDPOINT: str = Field(..., validation_alias="ROCKETMQ_ENDPOINT")
			
 
				+    ROCKETMQ_ACCESS_KEY_ID: str = Field(..., validation_alias="ROCKETMQ_ACCESS_KEY_ID")
			
 
				+    ROCKETMQ_ACCESS_KEY_SECRET: str = Field(..., validation_alias="ROCKETMQ_ACCESS_KEY_SECRET")
			
 
				+    ROCKETMQ_INSTANCE_ID: str = Field(..., validation_alias="ROCKETMQ_INSTANCE_ID")
			
 
				+    ROCKETMQ_WAIT_SECONDS: int = 30
			
 
				     ROCKETMQ_BATCH: int = 1
			
 
				 
			
 
				     # 飞书配置
			
 
				-    FEISHU_APPID: str = Field(..., env="FEISHU_APPID")
			
 
				-    FEISHU_APPSECRET: str = Field(..., env="FEISHU_APPSECRET")
			
 
				+    FEISHU_APPID: str = Field(..., validation_alias="FEISHU_APPID")
			
 
				+    FEISHU_APPSECRET: str = Field(..., validation_alias="FEISHU_APPSECRET")
			
 
				 
			
 
				     # 连接池配置
			
 
				     CONNECTION_TIMEOUT: int = 10
			
 
				     REQUEST_TIMEOUT: int = 30
			
 
				 
			
 
				     # 阿里云日志
			
 
				-    ALIYUN_ACCESS_KEY_ID: str = Field(..., env="ALIYUN_ACCESS_KEY_ID")
			
 
				-    ALIYUN_ACCESS_KEY_SECRET: str = Field(..., env="ALIYUN_ACCESS_KEY_SECRET")
			
 
				+    ALIYUN_ACCESS_KEY_ID: str = Field(..., validation_alias="ALIYUN_ACCESS_KEY_ID")
			
 
				+    ALIYUN_ACCESS_KEY_SECRET: str = Field(..., validation_alias="ALIYUN_ACCESS_KEY_SECRET")
			
 
				 
			
 
				     # redis
			
 
				-    REDIS_HOST: str = Field(..., env="REDIS_HOST")
			
 
				-    REDIS_PORT: int = Field(..., env="REDIS_PORT")
			
 
				-    REDIS_PASSWORD: str = Field(..., env="REDIS_PASSWORD")
			
 
				-    REDIS_DB: int = Field(0, env="REDIS_DB")
			
 
				-    REDIS_MAX_CONNECTIONS: int = Field(20, env="REDIS_MAX_CONNECTIONS")
			
 
				+    REDIS_HOST: str = Field(..., validation_alias="REDIS_HOST")
			
 
				+    REDIS_PORT: int = Field(..., validation_alias="REDIS_PORT")
			
 
				+    REDIS_PASSWORD: str = Field(..., validation_alias="REDIS_PASSWORD")
			
 
				+    REDIS_DB: int = Field(0, validation_alias="REDIS_DB")
			
 
				+    REDIS_MAX_CONNECTIONS: int = Field(20, validation_alias="REDIS_MAX_CONNECTIONS")
			
 
				+
			
 
				     @property
			
 
				     def redis_url(self) -> str:
			
 
				-        """生成Redis连接URL"""
			
 
				         return f"redis://:{self.REDIS_PASSWORD}@{self.REDIS_HOST}:{self.REDIS_PORT}/{self.REDIS_DB}"
			
 
				 
			
 
				-    @validator('DB_PORT', 'REDIS_PORT')
			
 
				-    def validate_port(cls, v):
			
 
				+    # Pydantic 2.x 验证器语法
			
 
				+    @field_validator('DB_PORT', 'REDIS_PORT')
			
 
				+    @classmethod
			
 
				+    def validate_port(cls, v: int) -> int:
			
 
				         if not 1 <= v <= 65535:
			
 
				             raise ValueError('Port must be between 1 and 65535')
			
 
				         return v
			
 
				 
			
 
				-    @validator('DB_POOL_SIZE', 'DB_POOL_RECYCLE', 'REDIS_MAX_CONNECTIONS')
			
 
				-    def validate_positive_int(cls, v, field):
			
 
				+    @field_validator('DB_POOL_SIZE', 'DB_POOL_RECYCLE', 'REDIS_MAX_CONNECTIONS')
			
 
				+    @classmethod
			
 
				+    def validate_positive_int(cls, v: int) -> int:
			
 
				         if v <= 0:
			
 
				-            raise ValueError(f'{field.name} must be positive')
			
 
				+            raise ValueError('Value must be positive')
			
 
				         return v
			
 
				 
			
 
				-    @validator('ROCKETMQ_WAIT_SECONDS')
			
 
				-    def validate_rocketmq_wait_seconds(cls, v):
			
 
				+    @field_validator('ROCKETMQ_WAIT_SECONDS')
			
 
				+    @classmethod
			
 
				+    def validate_rocketmq_wait_seconds(cls, v: int) -> int:
			
 
				         if not 1 <= v <= 30:
			
 
				             raise ValueError('ROCKETMQ_WAIT_SECONDS must be between 1 and 30')
			
 
				         return v
			
 
				 
			
 
				-    @validator('ROCKETMQ_BATCH')
			
 
				-    def validate_rocketmq_batch(cls, v):
			
 
				+    @field_validator('ROCKETMQ_BATCH')
			
 
				+    @classmethod
			
 
				+    def validate_rocketmq_batch(cls, v: int) -> int:
			
 
				         if not 1 <= v <= 16:
			
 
				             raise ValueError('ROCKETMQ_BATCH must be between 1 and 16')
			
 
				         return v
			
 
				 
			
 
				-    @validator('CONNECTION_TIMEOUT', 'REQUEST_TIMEOUT')
			
 
				-    def validate_timeouts(cls, v, field):
			
 
				+    @field_validator('CONNECTION_TIMEOUT', 'REQUEST_TIMEOUT')
			
 
				+    @classmethod
			
 
				+    def validate_timeouts(cls, v: int) -> int:
			
 
				         if v <= 0:
			
 
				-            raise ValueError(f'{field.name} must be positive')
			
 
				+            raise ValueError('Timeout must be positive')
			
 
				         return v
			
 
				 
			
 
				-    class Config:
			
 
				-        env_file = ".env"
			
 
				-        env_file_encoding = 'utf-8'
			
 
				-        case_sensitive = False
			
 
				-
			
 
				-    # @property
			
 
				-    # def database_url(self) -> str:
			
 
				-    #     """生成安全的数据库连接字符串"""
			
 
				-    #     return f"mysql+asyncmy://{self.DB_USER}:{self.DB_PASSWORD}@{self.DB_HOST}:{self.DB_PORT}/{self.DB_NAME}?charset={self.DB_CHARSET}"
			
 
				-
			
 
				 
			
 
				-settings = Settings()
			
 
				+settings = Settings()
			
--- a/config/spiders_config.yaml
+++ b/config/spiders_config.yaml
@@ -12,7 +12,7 @@ benshanzhufurecommend:
 
				   method: post
			
 
				   request_body:
			
 
				     cursor: "{{next_cursor}}"
			
 
				-  loop_times: 50
			
 
				+  loop_times: 7
			
 
				   loop_interval:
			
 
				     min: 30
			
 
				     max: 60
			
--- a/core/base/async_request_client.py
+++ b/core/base/async_request_client.py
@@ -1,5 +1,6 @@
 
				 import asyncio
			
 
				-from typing import Optional, Dict
			
 
				+import json
			
 
				+from typing import Optional, Dict, Any
			
 
				 import aiohttp
			
 
				 
			
 
				 from core.utils.log.logger_manager import LoggerManager
			
@@ -7,78 +8,185 @@ from core.utils.log.logger_manager import LoggerManager
 
				 
			
 
				 class AsyncRequestClient:
			
 
				     """
			
 
				-    请求失败重试，本地日志记录
			
 
				-    请求返回code！=0重试，本地日志记录
			
 
				-    重试达到最大次数后上报阿里云日志
			
 
				+    异步HTTP请求客户端，支持重试机制和详细日志记录
			
 
				+
			
 
				+    特性:
			
 
				+    - 请求失败自动重试（网络错误和业务逻辑错误）
			
 
				+    - 指数退避重试策略
			
 
				+    - 本地日志和阿里云日志双重记录
			
 
				+    - 可配置的重试参数和超时设置
			
 
				     """
			
 
				-    def __init__(self, logger: Optional[LoggerManager.get_logger()] = None,
			
 
				-                 aliyun_log: Optional[LoggerManager.get_aliyun_logger()] = None,
			
 
				-                 max_retries=3, timeout=30):
			
 
				+
			
 
				+    class BusinessLogicError(Exception):
			
 
				+        """业务逻辑错误异常"""
			
 
				+        pass
			
 
				+
			
 
				+    def __init__(
			
 
				+            self,
			
 
				+            logger: Optional[Any] = None,
			
 
				+            aliyun_log: Optional[Any] = None,
			
 
				+            max_retries: int = 3,
			
 
				+            base_delay: float = 1.0,
			
 
				+            timeout: int = 30
			
 
				+    ):
			
 
				+        """
			
 
				+        初始化请求客户端
			
 
				+
			
 
				+        Args:
			
 
				+            logger: 本地日志记录器实例
			
 
				+            aliyun_log: 阿里云日志记录器实例
			
 
				+            max_retries: 最大重试次数
			
 
				+            base_delay: 基础等待时间(秒)，用于指数退避计算
			
 
				+            timeout: 请求超时时间(秒)
			
 
				+        """
			
 
				         self.logger = logger
			
 
				         self.aliyun_log = aliyun_log
			
 
				         self.max_retries = max_retries
			
 
				+        self.base_delay = base_delay
			
 
				         self.timeout = timeout
			
 
				 
			
 
				-    async def request(self, session: aiohttp.ClientSession, method: str, url: str, **kwargs) -> Optional[Dict]:
			
 
				+    async def request(
			
 
				+            self,
			
 
				+            session: aiohttp.ClientSession,
			
 
				+            method: str,
			
 
				+            url: str,
			
 
				+            **kwargs
			
 
				+    ) -> Optional[Dict]:
			
 
				+        """
			
 
				+        发送HTTP请求，支持重试机制
			
 
				+
			
 
				+        Args:
			
 
				+            session: aiohttp会话对象
			
 
				+            method: HTTP方法(GET, POST等)
			
 
				+            url: 请求URL
			
 
				+            **kwargs: 其他请求参数
			
 
				+
			
 
				+        Returns:
			
 
				+            Optional[Dict]: 响应数据(JSON格式)，失败时返回None
			
 
				+        """
			
 
				         retries = 0
			
 
				-        resp = None  # 初始化resp变量
			
 
				+        last_error = None
			
 
				 
			
 
				-        while retries < self.max_retries:
			
 
				+        # 记录请求开始
			
 
				+        self._log_request_start(method, url, kwargs)
			
 
				+
			
 
				+        while retries <= self.max_retries:
			
 
				             try:
			
 
				-                if self.logger:
			
 
				-                    self.logger.info(f"请求 {method} {url}, 请求参数{kwargs}")
			
 
				-                if self.aliyun_log:
			
 
				-                    self.aliyun_log.logging(
			
 
				-                        code="1012",
			
 
				-                        message="初始化请求",
			
 
				-                        data={"url": url,
			
 
				-                              "method": method,
			
 
				-                              "requestBody": kwargs}
			
 
				-                    )
			
 
				-                async with session.request(method, url, **kwargs) as response:
			
 
				+                # 计算等待时间(指数退避)
			
 
				+                if retries > 0:
			
 
				+                    delay = self.base_delay * (2 ** (retries - 1))
			
 
				+                    self._log_retry_delay(retries, delay)
			
 
				+                    await asyncio.sleep(delay)
			
 
				+
			
 
				+                # 发送请求
			
 
				+                async with session.request(
			
 
				+                        method, url,
			
 
				+                        timeout=aiohttp.ClientTimeout(total=self.timeout),
			
 
				+                        **kwargs
			
 
				+                ) as response:
			
 
				+                    # 检查HTTP状态
			
 
				                     response.raise_for_status()
			
 
				-                    resp = await response.json()
			
 
				-                    if resp.get('code') != 0:
			
 
				-                        retries += 1
			
 
				-                        if self.logger:
			
 
				-                            self.logger.info(f"{url} 响应 {resp}, 重试 {retries}/{self.max_retries}")
			
 
				-                        if retries >= self.max_retries:
			
 
				-                            error_msg = f"请求响应code非0且达到最大重试次数 {self.max_retries}"
			
 
				-                            if self.logger:
			
 
				-                                self.logger.error(error_msg)
			
 
				-                            if self.aliyun_log:
			
 
				-                                self.aliyun_log.logging(
			
 
				-                                    code="9006",
			
 
				-                                    message=error_msg,
			
 
				-                                    data={
			
 
				-                                        "url": url,
			
 
				-                                        "method": method,
			
 
				-                                        "requestBody": kwargs,
			
 
				-                                        "response": resp
			
 
				-                                    }
			
 
				-                                )
			
 
				-                        await asyncio.sleep(5)
			
 
				-                        continue
			
 
				-                    self.logger.info(f"{url} 响应: {resp}")
			
 
				-                    return resp
			
 
				+
			
 
				+                    # 解析响应
			
 
				+                    resp_data = await response.json()
			
 
				+
			
 
				+                    # 检查业务状态码
			
 
				+                    if resp_data.get('code') != 0:
			
 
				+                        raise self.BusinessLogicError(
			
 
				+                            f"业务逻辑错误: code={resp_data.get('code')}"
			
 
				+                        )
			
 
				+
			
 
				+                    # 记录成功响应
			
 
				+                    self._log_success_response(url, resp_data)
			
 
				+                    return resp_data
			
 
				+
			
 
				+            except (aiohttp.ClientError, asyncio.TimeoutError) as e:
			
 
				+                last_error = e
			
 
				+                retries += 1
			
 
				+                self._log_network_error(e, retries, url)
			
 
				+
			
 
				+            except self.BusinessLogicError as e:
			
 
				+                last_error = e
			
 
				+                retries += 1
			
 
				+                self._log_business_error(e, retries, url)
			
 
				+
			
 
				             except Exception as e:
			
 
				+                last_error = e
			
 
				                 retries += 1
			
 
				-                if retries >= self.max_retries:
			
 
				-                    if self.logger:
			
 
				-                        self.logger.error(f"请求失败达到最大重试次数: {e}")
			
 
				-                    if self.aliyun_log:
			
 
				-                        self.aliyun_log.logging(
			
 
				-                            code="9006",
			
 
				-                            message="请求异常达到最大重试次数",
			
 
				-                            data={
			
 
				-                                "url": url,
			
 
				-                                "method": method,
			
 
				-                                "requestBody": kwargs,
			
 
				-                                "response": str(resp) if resp else str(e),
			
 
				-                                "error_type": type(e).__name__
			
 
				-                            }
			
 
				-                        )
			
 
				-                    return
			
 
				-                if self.logger:
			
 
				-                    self.logger.warning(f"请求失败 {e}, 重试 {retries}/{self.max_retries}")
			
 
				-                await asyncio.sleep(5)
			
 
				+                self._log_unexpected_error(e, retries, url)
			
 
				+
			
 
				+        # 所有重试都失败
			
 
				+        self._log_final_failure(method, url, kwargs, last_error)
			
 
				+        return None
			
 
				+
			
 
				+    def _log_request_start(self, method: str, url: str, kwargs: Dict):
			
 
				+        """记录请求开始"""
			
 
				+        if self.logger:
			
 
				+            # 简化日志，避免记录过大请求体
			
 
				+            # log_data = {k: v for k, v in kwargs.items() if k != 'json' and k != 'data'}
			
 
				+            # if 'json' in kwargs:
			
 
				+            #     log_data['json'] = '[...]'  # 简化JSON内容
			
 
				+            self.logger.info(f"请求 {method} {url}, 参数: {kwargs}")
			
 
				+
			
 
				+        if self.aliyun_log:
			
 
				+            self.aliyun_log.logging(
			
 
				+                code="1012",
			
 
				+                message="初始化请求",
			
 
				+                data={
			
 
				+                    "url": url,
			
 
				+                    "method": method,
			
 
				+                    "requestBody": {k: v for k, v in kwargs.items() if k != 'json'}
			
 
				+                }
			
 
				+            )
			
 
				+
			
 
				+    def _log_retry_delay(self, retries: int, delay: float):
			
 
				+        """记录重试等待"""
			
 
				+        if self.logger:
			
 
				+            self.logger.info(f"第 {retries} 次重试，等待 {delay:.2f} 秒")
			
 
				+
			
 
				+    def _log_network_error(self, error: Exception, retries: int, url: str):
			
 
				+        """记录网络错误"""
			
 
				+        if self.logger:
			
 
				+            self.logger.warning(
			
 
				+                f"请求 {url} 网络错误({retries}/{self.max_retries}): {error}"
			
 
				+            )
			
 
				+
			
 
				+    def _log_business_error(self, error: Exception, retries: int, url: str):
			
 
				+        """记录业务逻辑错误"""
			
 
				+        if self.logger:
			
 
				+            self.logger.warning(
			
 
				+                f"请求 {url} 业务逻辑错误({retries}/{self.max_retries}): {error}"
			
 
				+            )
			
 
				+
			
 
				+    def _log_unexpected_error(self, error: Exception, retries: int, url: str):
			
 
				+        """记录未预期错误"""
			
 
				+        if self.logger:
			
 
				+            self.logger.error(
			
 
				+                f"请求 {url} 未预期错误({retries}/{self.max_retries}): {error}"
			
 
				+            )
			
 
				+
			
 
				+    def _log_success_response(self, url: str, response: Dict):
			
 
				+        """记录成功响应"""
			
 
				+        if self.logger:
			
 
				+            # 只记录关键信息，避免日志过大
			
 
				+            self.logger.info(f"请求 {url} 成功: code={response.get('code')}")
			
 
				+
			
 
				+    def _log_final_failure(self, method: str, url: str, kwargs: Dict, error: Exception):
			
 
				+        """记录最终失败"""
			
 
				+        error_msg = f"请求 {method} {url} 最终失败: {error}"
			
 
				+
			
 
				+        if self.logger:
			
 
				+            self.logger.error(error_msg)
			
 
				+
			
 
				+        if self.aliyun_log:
			
 
				+            self.aliyun_log.logging(
			
 
				+                code="9006",
			
 
				+                message=error_msg,
			
 
				+                data={
			
 
				+                    "url": url,
			
 
				+                    "method": method,
			
 
				+                    "requestBody": {k: v for k, v in kwargs.items() if k != 'json'},
			
 
				+                    "error_type": type(error).__name__,
			
 
				+                    "error_message": str(error)
			
 
				+                }
			
 
				+            )
			
--- a/core/di/__init__.py
+++ b/core/di/__init__.py
--- a/core/di/container.py
+++ b/core/di/container.py
@@ -0,0 +1,40 @@
 
				+# core/di/container.py
			
 
				+from typing import Dict, Any, Type
			
 
				+from dependency_injector import containers, providers
			
 
				+from config.base import settings
			
 
				+from services.async_mysql_service import AsyncMysqlService
			
 
				+from services.async_mq_producer import AsyncMQProducer
			
 
				+from services.async_mq_consumer import AsyncRocketMQConsumer
			
 
				+
			
 
				+
			
 
				+class ServiceContainer(containers.DeclarativeContainer):
			
 
				+    """服务容器：统一管理所有服务的生命周期"""
			
 
				+
			
 
				+    # 配置
			
 
				+    config = providers.Configuration()
			
 
				+
			
 
				+    # 数据库服务工厂
			
 
				+    db_service = providers.Factory(
			
 
				+        AsyncMysqlService,
			
 
				+        platform=config.platform,
			
 
				+        mode=config.mode
			
 
				+    )
			
 
				+
			
 
				+    # MQ 生产者工厂
			
 
				+    mq_producer = providers.Singleton(
			
 
				+        AsyncMQProducer,
			
 
				+        topic_name="topic_crawler_etl_prod_v2",
			
 
				+        platform=config.platform,
			
 
				+        mode=config.mode
			
 
				+    )
			
 
				+
			
 
				+    # MQ 消费者工厂
			
 
				+    mq_consumer = providers.Factory(
			
 
				+        AsyncRocketMQConsumer,
			
 
				+        topic_name=config.topic_name,
			
 
				+        group_id=config.group_id
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+# 全局容器实例
			
 
				+container = ServiceContainer()
			
--- a/core/models/message_parser.py
+++ b/core/models/message_parser.py
@@ -0,0 +1,86 @@
 
				+from pydantic import BaseModel, Field
			
 
				+from typing import Optional, Union, List
			
 
				+import json
			
 
				+
			
 
				+
			
 
				+class RuleField(BaseModel):
			
 
				+    min: Union[int, float] = 0
			
 
				+    max: Union[int, float] = 0
			
 
				+
			
 
				+
			
 
				+class RuleModel(BaseModel):
			
 
				+    period: Optional[RuleField] = None
			
 
				+    duration: Optional[RuleField] = None
			
 
				+    play_cnt: Optional[RuleField] = None
			
 
				+    like_cnt: Optional[RuleField] = None
			
 
				+    comment_cnt: Optional[RuleField] = None
			
 
				+    share_cnt: Optional[RuleField] = None
			
 
				+    videos_cnt: Optional[RuleField] = None
			
 
				+    width: Optional[RuleField] = None
			
 
				+    height: Optional[RuleField] = None
			
 
				+
			
 
				+
			
 
				+class MessageModel(BaseModel):
			
 
				+    id: int
			
 
				+    rule: str
			
 
				+    parsed_rule: List[RuleModel] = Field(default_factory=list)
			
 
				+
			
 
				+    def model_post_init(self, __context) -> None:
			
 
				+        # 解析 rule 字符串
			
 
				+        try:
			
 
				+            if self.rule:
			
 
				+                rule_list = json.loads(self.rule)
			
 
				+                if not isinstance(rule_list, list):
			
 
				+                    rule_list = [rule_list]
			
 
				+                self.parsed_rule = []
			
 
				+                for item in rule_list:
			
 
				+                    try:
			
 
				+                        self.parsed_rule.append(RuleModel(**item))
			
 
				+                    except Exception as e:
			
 
				+                        print(f"解析规则项失败: {item}, 错误: {e}")
			
 
				+        except json.JSONDecodeError as e:
			
 
				+            print(f"JSON 解析错误: {e}")
			
 
				+        except Exception as e:
			
 
				+            print(f"其他错误: {e}")
			
 
				+
			
 
				+        super().model_post_init(__context)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    message_data = {
			
 
				+        "createTime": 1747042241127,
			
 
				+        "id": 147,
			
 
				+        "interval": 86400,
			
 
				+        "machine": "",
			
 
				+        "mode": "related",
			
 
				+        "operator": "张良",
			
 
				+        "rule": '[{"videos_cnt":{"min":200,"max":0}},{"duration":{"min":30,"max":1200}}]',
			
 
				+        "source": "zhongqingkandianrelated",
			
 
				+        "spiderName": "run_zqkd_related",
			
 
				+        "startTime": 1758211500000,
			
 
				+        "status": 0,
			
 
				+        "taskName": "中青看点相关推荐",
			
 
				+        "updateTime": 1749178503914
			
 
				+    }
			
 
				+
			
 
				+    try:
			
 
				+        message = MessageModel(**message_data)
			
 
				+        print(f"ID: {message.id}")
			
 
				+        print(f"原始规则字符串: {message.rule}")
			
 
				+        print("解析后的规则:")
			
 
				+
			
 
				+
			
 
				+        for idx, rule in enumerate(message.parsed_rule, 1):
			
 
				+            print(f"规则 {idx}:")
			
 
				+            rule_dict = rule.model_dump()
			
 
				+            for field_name, field_value in rule_dict.items():
			
 
				+                if field_value is not None:
			
 
				+                    print(f"  {field_name}: min={field_value['min']}, max={field_value['max']}")
			
 
				+
			
 
				+        if not message.parsed_rule:
			
 
				+            print("  没有有效的规则")
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        print(f"解析失败: {e}")
			
 
				+        import traceback
			
 
				+        traceback.print_exc()
			
--- a/core/models/spiders_config_models.py
+++ b/core/models/spiders_config_models.py
@@ -1,25 +1,29 @@
 
				-from pydantic import BaseModel, AnyUrl, validator
			
 
				+# spiders_config_models.py 修改后
			
 
				+from pydantic import BaseModel, AnyUrl, field_validator, ConfigDict
			
 
				 from typing import Dict, Any, Optional, Union
			
 
				 
			
 
				 
			
 
				 class BaseConfig(BaseModel):
			
 
				-    base_url: Optional[AnyUrl]
			
 
				+    model_config = ConfigDict(extra='forbid')  # 禁止额外字段
			
 
				+
			
 
				+    base_url: Optional[AnyUrl] = None
			
 
				     request_timeout: int = 30
			
 
				     max_retries: int = 3
			
 
				     headers: Dict[str, Any] = {}
			
 
				 
			
 
				-    @validator('request_timeout', 'max_retries')
			
 
				-    def validate_positive_int(cls, v, field):
			
 
				+    @field_validator('request_timeout', 'max_retries')
			
 
				+    @classmethod
			
 
				+    def validate_positive_int(cls, v: int) -> int:
			
 
				         if v <= 0:
			
 
				-            raise ValueError(f'{field.name} must be positive')
			
 
				+            raise ValueError('Value must be positive')
			
 
				         return v
			
 
				 
			
 
				 
			
 
				 class PlatformConfig(BaseConfig):
			
 
				     platform: str
			
 
				     mode: str
			
 
				-    path: Optional[str]
			
 
				-    url: AnyUrl
			
 
				+    path: Optional[str] = None
			
 
				+    url: str
			
 
				     method: str
			
 
				     request_body: Dict[str, Any] = {}
			
 
				     loop_times: int = 1
			
@@ -28,21 +32,24 @@ class PlatformConfig(BaseConfig):
 
				     retry_times: int = 0
			
 
				     feishu_sheetid: Optional[str] = None
			
 
				 
			
 
				-    @validator('method')
			
 
				-    def validate_method(cls, v):
			
 
				+    @field_validator('method')
			
 
				+    @classmethod
			
 
				+    def validate_method(cls, v: str) -> str:
			
 
				         allowed_methods = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH']
			
 
				         if v.upper() not in allowed_methods:
			
 
				             raise ValueError(f'Method must be one of {", ".join(allowed_methods)}')
			
 
				         return v.upper()
			
 
				 
			
 
				-    @validator('loop_times')
			
 
				-    def validate_loop_times(cls, v):
			
 
				+    @field_validator('loop_times')
			
 
				+    @classmethod
			
 
				+    def validate_loop_times(cls, v: int) -> int:
			
 
				         if v <= 0:
			
 
				             raise ValueError('loop_times must be positive')
			
 
				         return v
			
 
				 
			
 
				-    @validator('loop_interval')
			
 
				-    def validate_loop_interval(cls, v):
			
 
				+    @field_validator('loop_interval')
			
 
				+    @classmethod
			
 
				+    def validate_loop_interval(cls, v: Dict[str, int]) -> Dict[str, int]:
			
 
				         if 'min' not in v or 'max' not in v:
			
 
				             raise ValueError('loop_interval must contain both min and max keys')
			
 
				         if v['min'] < 0 or v['max'] < 0:
			
@@ -51,33 +58,35 @@ class PlatformConfig(BaseConfig):
 
				             raise ValueError('min value cannot be greater than max value')
			
 
				         return v
			
 
				 
			
 
				-    @validator('response_parse')
			
 
				-    def validate_response_parse(cls, v):
			
 
				+    @field_validator('response_parse')
			
 
				+    @classmethod
			
 
				+    def validate_response_parse(cls, v: Dict[str, Any]) -> Dict[str, Any]:
			
 
				         if 'data_path' not in v:
			
 
				             raise ValueError('response_parse must contain data_path')
			
 
				         return v
			
 
				 
			
 
				-    @validator('retry_times')
			
 
				-    def validate_retry_times(cls, v):
			
 
				+    @field_validator('retry_times')
			
 
				+    @classmethod
			
 
				+    def validate_retry_times(cls, v: int) -> int:
			
 
				         if v < 0:
			
 
				             raise ValueError('retry_times must be non-negative')
			
 
				         return v
			
 
				 
			
 
				-    @validator('request_body')
			
 
				-    def validate_request_body(cls, v):
			
 
				-        # 确保request_body中的值是基本类型或字典/列表
			
 
				+    @field_validator('request_body')
			
 
				+    @classmethod
			
 
				+    def validate_request_body(cls, v: Dict[str, Any]) -> Dict[str, Any]:
			
 
				         if not isinstance(v, dict):
			
 
				             raise ValueError('request_body must be a dictionary')
			
 
				-        
			
 
				+
			
 
				         def is_valid_type(value):
			
 
				             if isinstance(value, (str, int, float, bool, type(None))):
			
 
				                 return True
			
 
				             elif isinstance(value, (list, tuple)):
			
 
				                 return all(is_valid_type(item) for item in value)
			
 
				             elif isinstance(value, dict):
			
 
				-                return all(isinstance(k, str) and is_valid_type(v) for k, v in value.items())
			
 
				+                return all(isinstance(k, str) and is_valid_type(v_val) for k, v_val in value.items())
			
 
				             return False
			
 
				-            
			
 
				+
			
 
				         for key, value in v.items():
			
 
				             if not is_valid_type(value):
			
 
				                 raise ValueError(f'Invalid type for request_body["{key}"]: {type(value)}')
			
--- a/core/models/video_item.py
+++ b/core/models/video_item.py
@@ -1,31 +1,27 @@
 
				+# video_item.py 修改后
			
 
				 import time
			
 
				 import uuid
			
 
				 from typing import Optional, Union
			
 
				-
			
 
				-from pydantic import BaseModel, Field, validator
			
 
				-
			
 
				+from pydantic import BaseModel, Field, field_validator, ConfigDict
			
 
				 from services.clean_title import clean_title
			
 
				 
			
 
				 
			
 
				 class VideoItem(BaseModel):
			
 
				-    """
			
 
				-    视频数据结构，支持字段校验和预处理逻辑
			
 
				-    - 字段初始化后可通过 `prepare()` 异步方法补全和清洗数据
			
 
				-    - 使用 `produce_item()` 返回最终有效数据 dict
			
 
				-    """
			
 
				-
			
 
				-    video_id: Optional[str] = Field(default_factory=lambda: str(uuid.uuid4()))
			
 
				-    user_id: str
			
 
				+    # Pydantic 2.x 配置
			
 
				+    model_config = ConfigDict(arbitrary_types_allowed=True)
			
 
				+
			
 
				+    video_id: Optional[Union[int, str]] = Field(default_factory=lambda: str(uuid.uuid4()))
			
 
				+    user_id: Union[int, str]
			
 
				     user_name: str
			
 
				-    out_video_id: str
			
 
				-    out_user_id: Optional[str]
			
 
				+    out_video_id: Union[int, str]
			
 
				+    out_user_id: Optional[Union[int, str]]
			
 
				     video_url: str
			
 
				     cover_url: str
			
 
				     platform: str
			
 
				     strategy: str
			
 
				-    session: Optional[str]
			
 
				+    session: Optional[str] = None
			
 
				 
			
 
				-    video_title: Optional[str]
			
 
				+    video_title: Optional[str] = None
			
 
				     publish_time_stamp: Optional[Union[int, str]] = None
			
 
				     update_time_stamp: Optional[Union[int, str]] = None
			
 
				 
			
@@ -40,30 +36,29 @@ class VideoItem(BaseModel):
 
				     publish_time_str: Optional[str] = None
			
 
				     publish_time: Optional[str] = None
			
 
				 
			
 
				-    # 添加验证器确保数值字段非负
			
 
				-    @validator('duration', 'play_cnt', 'like_cnt', 'comment_cnt', 'share_cnt', 'width', 'height')
			
 
				-    def validate_non_negative(cls, v, field):
			
 
				+
			
 
				+    # Pydantic 2.x 验证器语法
			
 
				+    @field_validator('duration', 'play_cnt', 'like_cnt', 'comment_cnt', 'share_cnt', 'width', 'height')
			
 
				+    @classmethod
			
 
				+    def validate_non_negative(cls, v: int) -> int:
			
 
				         if v < 0:
			
 
				-            raise ValueError(f'{field.name} must be non-negative')
			
 
				+            raise ValueError('Value must be non-negative')
			
 
				         return v
			
 
				 
			
 
				-    @validator('video_url', 'cover_url')
			
 
				-    def validate_url(cls, v, field):
			
 
				+    @field_validator('video_url', 'cover_url')
			
 
				+    @classmethod
			
 
				+    def validate_url(cls, v: str) -> str:
			
 
				         if v and not (v.startswith('http://') or v.startswith('https://')):
			
 
				-            raise ValueError(f'{field.name} must be a valid URL')
			
 
				+            raise ValueError('URL must start with http:// or https://')
			
 
				         return v
			
 
				 
			
 
				+
			
 
				     async def prepare(self):
			
 
				-        """
			
 
				-        异步预处理：清洗标题、补全发布时间和更新时间
			
 
				-        """
			
 
				-        # 标题清洗
			
 
				+        """异步预处理（保持不变）"""
			
 
				         if self.video_title:
			
 
				             self.video_title = await clean_title(self.video_title)
			
 
				 
			
 
				-        # 发布时间处理
			
 
				         if self.publish_time_stamp:
			
 
				-            # 确保publish_time_stamp是整数类型
			
 
				             if isinstance(self.publish_time_stamp, str):
			
 
				                 try:
			
 
				                     if len(self.publish_time_stamp) == 13:
			
@@ -82,11 +77,9 @@ class VideoItem(BaseModel):
 
				         )
			
 
				         self.publish_time = self.publish_time_str
			
 
				 
			
 
				-        # 更新时间戳默认当前时间
			
 
				         if not self.update_time_stamp:
			
 
				             self.update_time_stamp = int(time.time())
			
 
				         else:
			
 
				-            # 确保update_time_stamp是整数类型
			
 
				             if isinstance(self.update_time_stamp, str):
			
 
				                 try:
			
 
				                     if len(self.update_time_stamp) == 13:
			
@@ -102,9 +95,7 @@ class VideoItem(BaseModel):
 
				             self.session = str(f"{self.platform}_{int(time.time())}")
			
 
				 
			
 
				     async def produce_item(self) -> Optional[dict]:
			
 
				-        """
			
 
				-        异步生成最终数据字典，校验必要字段是否存在，返回 None 则不合格
			
 
				-        """
			
 
				+        """异步生成最终数据字典"""
			
 
				         await self.prepare()
			
 
				 
			
 
				         must_fields = [
			
@@ -115,4 +106,5 @@ class VideoItem(BaseModel):
 
				             if not getattr(self, f, None):
			
 
				                 return None
			
 
				 
			
 
				-        return self.dict()
			
 
				+        # Pydantic 2.x: 使用 model_dump() 替代 dict()
			
 
				+        return self.model_dump()
			
--- a/core/utils/codes.py
+++ b/core/utils/codes.py
@@ -27,6 +27,11 @@ CODES = {
 
				     "1011": "视频数量达到当日最大值",
			
 
				     "1012": "实时入库量",
			
 
				 
			
 
				+    # 爬虫过程（2xxx）
			
 
				+    ""
			
 
				+    "2000": "请求到的数据长度",
			
 
				+
			
 
				+
			
 
				     # 配置错误类 (4xxx)
			
 
				     "4000": "爬虫配置缺失",
			
 
				     "4001": "ETL配置缺失",
			
@@ -55,6 +60,7 @@ CODES = {
 
				     "9019": "任务参数缺失",
			
 
				     "9020": "过滤条件不匹配",
			
 
				     "9021": "接口返回数据为空",
			
 
				+    "9022": "爬虫执行失败",
			
 
				 
			
 
				     # 系统致命错误类 (99xx)
			
 
				     "9900": "数据库连接失败",
			
--- a/core/utils/config_documentation.py
+++ b/core/utils/config_documentation.py
@@ -1,219 +0,0 @@
 
				-"""
			
 
				-配置文档生成工具
			
 
				-自动生成配置文件说明文档
			
 
				-"""
			
 
				-import yaml
			
 
				-from core.utils.config_manager import get_config_manager
			
 
				-from core.utils.path_utils import spiders_config_path
			
 
				-
			
 
				-
			
 
				-class ConfigDocumentation:
			
 
				-    """
			
 
				-    配置文档生成工具
			
 
				-    """
			
 
				-    
			
 
				-    def __init__(self):
			
 
				-        self.config_manager = get_config_manager()
			
 
				-
			
 
				-    def generate_env_config_docs(self) -> str:
			
 
				-        """
			
 
				-        生成环境配置文档
			
 
				-        """
			
 
				-        docs = "# 环境配置说明\n\n"
			
 
				-        docs += "环境配置通过 `.env` 文件进行配置，以下为所有可配置项:\n\n"
			
 
				-        
			
 
				-        env_settings_info = {
			
 
				-            "ENV": {
			
 
				-                "description": "运行环境",
			
 
				-                "default": "prod",
			
 
				-                "options": ["prod", "dev"]
			
 
				-            },
			
 
				-            "DB_HOST": {
			
 
				-                "description": "数据库主机地址",
			
 
				-                "required": True
			
 
				-            },
			
 
				-            "DB_PORT": {
			
 
				-                "description": "数据库端口",
			
 
				-                "default": 3306
			
 
				-            },
			
 
				-            "DB_USER": {
			
 
				-                "description": "数据库用户名",
			
 
				-                "required": True
			
 
				-            },
			
 
				-            "DB_PASSWORD": {
			
 
				-                "description": "数据库密码",
			
 
				-                "required": True
			
 
				-            },
			
 
				-            "DB_NAME": {
			
 
				-                "description": "数据库名称",
			
 
				-                "required": True
			
 
				-            },
			
 
				-            "DB_CHARSET": {
			
 
				-                "description": "数据库字符集",
			
 
				-                "required": True
			
 
				-            },
			
 
				-            "ROCKETMQ_ENDPOINT": {
			
 
				-                "description": "RocketMQ接入点",
			
 
				-                "required": True
			
 
				-            },
			
 
				-            "ROCKETMQ_ACCESS_KEY_ID": {
			
 
				-                "description": "RocketMQ访问密钥ID",
			
 
				-                "required": True
			
 
				-            },
			
 
				-            "ROCKETMQ_ACCESS_KEY_SECRET": {
			
 
				-                "description": "RocketMQ访问密钥",
			
 
				-                "required": True
			
 
				-            },
			
 
				-            "FEISHU_APPID": {
			
 
				-                "description": "飞书应用ID",
			
 
				-                "required": True
			
 
				-            },
			
 
				-            "FEISHU_APPSECRET": {
			
 
				-                "description": "飞书应用密钥",
			
 
				-                "required": True
			
 
				-            },
			
 
				-            "ALIYUN_ACCESS_KEY_ID": {
			
 
				-                "description": "阿里云访问密钥ID",
			
 
				-                "required": True
			
 
				-            },
			
 
				-            "ALIYUN_ACCESS_KEY_SECRET": {
			
 
				-                "description": "阿里云访问密钥",
			
 
				-                "required": True
			
 
				-            },
			
 
				-            "REDIS_HOST": {
			
 
				-                "description": "Redis主机地址",
			
 
				-                "required": True
			
 
				-            },
			
 
				-            "REDIS_PORT": {
			
 
				-                "description": "Redis端口",
			
 
				-                "default": 6379
			
 
				-            },
			
 
				-            "REDIS_PASSWORD": {
			
 
				-                "description": "Redis密码",
			
 
				-                "required": True
			
 
				-            }
			
 
				-        }
			
 
				-        
			
 
				-        docs += "| 配置项 | 描述 | 是否必填 | 默认值 |\n"
			
 
				-        docs += "|--------|------|----------|--------|\n"
			
 
				-        
			
 
				-        for key, info in env_settings_info.items():
			
 
				-            description = info.get("description", "")
			
 
				-            required = "是" if info.get("required", False) else "否"
			
 
				-            default = str(info.get("default", "")) if info.get("default") is not None else ""
			
 
				-            options = ", ".join(info.get("options", []))
			
 
				-            if options:
			
 
				-                description += f" (可选值: {options})"
			
 
				-                
			
 
				-            docs += f"| {key} | {description} | {required} | {default} |\n"
			
 
				-            
			
 
				-        return docs
			
 
				-
			
 
				-    def generate_spider_config_docs(self) -> str:
			
 
				-        """
			
 
				-        生成爬虫配置文档
			
 
				-        """
			
 
				-        docs = "# 爬虫配置说明\n\n"
			
 
				-        docs += "爬虫配置通过 `config/spiders_config.yaml` 文件进行配置。\n\n"
			
 
				-        
			
 
				-        # 添加配置示例
			
 
				-        docs += "## 配置示例\n\n```yaml\n"
			
 
				-        with open(spiders_config_path, 'r', encoding='utf-8') as f:
			
 
				-            docs += f.read()
			
 
				-        docs += "```\n\n"
			
 
				-        
			
 
				-        # 添加字段说明
			
 
				-        docs += "## 字段说明\n\n"
			
 
				-        
			
 
				-        global_config_fields = {
			
 
				-            "base_url": "基础URL，用于拼接完整请求URL",
			
 
				-            "request_timeout": "请求超时时间（秒）",
			
 
				-            "max_retries": "最大重试次数",
			
 
				-            "headers": "请求头信息"
			
 
				-        }
			
 
				-        
			
 
				-        platform_config_fields = {
			
 
				-            "platform": "平台名称",
			
 
				-            "mode": "爬取模式（如 recommend, author）",
			
 
				-            "path": "API路径",
			
 
				-            "url": "完整请求URL",
			
 
				-            "method": "HTTP请求方法",
			
 
				-            "request_body": "请求体参数",
			
 
				-            "loop_times": "循环次数",
			
 
				-            "loop_interval": "循环间隔（min/max）",
			
 
				-            "response_parse": "响应解析配置",
			
 
				-            "feishu_sheetid": "飞书表格ID"
			
 
				-        }
			
 
				-        
			
 
				-        response_parse_fields = {
			
 
				-            "data_path": "数据列表路径",
			
 
				-            "next_cursor": "下一页游标路径",
			
 
				-            "has_more": "是否还有更多数据路径",
			
 
				-            "fields": "字段映射配置"
			
 
				-        }
			
 
				-        
			
 
				-        docs += "### 全局配置字段\n\n"
			
 
				-        docs += "| 字段 | 描述 |\n"
			
 
				-        docs += "|------|------|\n"
			
 
				-        for field, description in global_config_fields.items():
			
 
				-            docs += f"| {field} | {description} |\n"
			
 
				-            
			
 
				-        docs += "\n### 平台配置字段\n\n"
			
 
				-        docs += "| 字段 | 描述 |\n"
			
 
				-        docs += "|------|------|\n"
			
 
				-        for field, description in platform_config_fields.items():
			
 
				-            docs += f"| {field} | {description} |\n"
			
 
				-            
			
 
				-        docs += "\n### 响应解析字段\n\n"
			
 
				-        docs += "| 字段 | 描述 |\n"
			
 
				-        docs += "|------|------|\n"
			
 
				-        for field, description in response_parse_fields.items():
			
 
				-            docs += f"| {field} | {description} |\n"
			
 
				-            
			
 
				-        return docs
			
 
				-
			
 
				-    def generate_complete_docs(self) -> str:
			
 
				-        """
			
 
				-        生成完整配置文档
			
 
				-        """
			
 
				-        docs = "# AutoScraperX 配置说明\n\n"
			
 
				-        docs += "本文档详细说明了AutoScraperX项目的配置项。\n\n"
			
 
				-        docs += "---\n\n"
			
 
				-        docs += self.generate_env_config_docs()
			
 
				-        docs += "\n---\n\n"
			
 
				-        docs += self.generate_spider_config_docs()
			
 
				-        docs += "\n---\n\n"
			
 
				-        docs += "## 当前配置状态\n\n"
			
 
				-        
			
 
				-        try:
			
 
				-            stats = self.config_manager.get_config_stats()
			
 
				-            docs += f"- 平台配置数量: {stats['total_platforms']}\n"
			
 
				-            docs += f"- 运行环境: {stats['env']}\n"
			
 
				-            docs += f"- 配置文件路径: {stats['config_file']}\n"
			
 
				-        except Exception as e:
			
 
				-            docs += f"配置状态获取失败: {e}\n"
			
 
				-            
			
 
				-        return docs
			
 
				-
			
 
				-    def save_docs(self, filepath: str = "CONFIGURATION.md"):
			
 
				-        """
			
 
				-        保存文档到文件
			
 
				-        """
			
 
				-        docs = self.generate_complete_docs()
			
 
				-        with open(filepath, 'w', encoding='utf-8') as f:
			
 
				-            f.write(docs)
			
 
				-        return filepath
			
 
				-
			
 
				-
			
 
				-def generate_config_docs():
			
 
				-    """
			
 
				-    生成配置文档
			
 
				-    """
			
 
				-    doc_generator = ConfigDocumentation()
			
 
				-    filepath = doc_generator.save_docs()
			
 
				-    print(f"配置文档已保存到: {filepath}")
			
 
				-    return filepath
			
 
				-
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    generate_config_docs()
			
--- a/core/utils/config_health_check.py
+++ b/core/utils/config_health_check.py
@@ -1,184 +0,0 @@
 
				-"""
			
 
				-配置健康检查工具
			
 
				-用于验证配置文件的完整性和正确性
			
 
				-"""
			
 
				-import sys
			
 
				-from typing import List, Dict, Any
			
 
				-from core.utils.config_manager import get_config_manager
			
 
				-from core.utils.spider_config import SpiderConfig
			
 
				-from config import settings
			
 
				-
			
 
				-
			
 
				-class ConfigHealthCheck:
			
 
				-    """
			
 
				-    配置健康检查工具
			
 
				-    """
			
 
				-    
			
 
				-    def __init__(self):
			
 
				-        self.config_manager = get_config_manager()
			
 
				-        self.errors = []
			
 
				-        self.warnings = []
			
 
				-
			
 
				-    def check_env_config(self) -> bool:
			
 
				-        """
			
 
				-        检查环境配置
			
 
				-        """
			
 
				-        try:
			
 
				-            # 检查必要配置是否存在
			
 
				-            required_settings = [
			
 
				-                'DB_HOST', 'DB_USER', 'DB_PASSWORD', 'DB_NAME',
			
 
				-                'ROCKETMQ_ENDPOINT', 'ROCKETMQ_ACCESS_KEY_ID', 'ROCKETMQ_ACCESS_KEY_SECRET',
			
 
				-                'FEISHU_APPID', 'FEISHU_APPSECRET',
			
 
				-                'ALIYUN_ACCESS_KEY_ID', 'ALIYUN_ACCESS_KEY_SECRET',
			
 
				-                'REDIS_HOST', 'REDIS_PASSWORD'
			
 
				-            ]
			
 
				-            
			
 
				-            for setting in required_settings:
			
 
				-                if not getattr(settings, setting, None):
			
 
				-                    self.errors.append(f"环境配置缺失: {setting}")
			
 
				-            
			
 
				-            # 检查URL格式
			
 
				-            url_settings = ['ROCKETMQ_ENDPOINT']
			
 
				-            for setting in url_settings:
			
 
				-                url = getattr(settings, setting, None)
			
 
				-                if url and not isinstance(url, str):
			
 
				-                    self.errors.append(f"URL配置格式错误: {setting}")
			
 
				-            
			
 
				-            return len(self.errors) == 0
			
 
				-            
			
 
				-        except Exception as e:
			
 
				-            self.errors.append(f"环境配置检查异常: {str(e)}")
			
 
				-            return False
			
 
				-
			
 
				-    def check_spider_configs(self) -> bool:
			
 
				-        """
			
 
				-        检查所有爬虫配置
			
 
				-        """
			
 
				-        try:
			
 
				-            platforms = self.config_manager.list_platforms()
			
 
				-            if not platforms:
			
 
				-                self.warnings.append("未找到任何平台配置")
			
 
				-                return True
			
 
				-                
			
 
				-            valid_count = 0
			
 
				-            for platform in platforms:
			
 
				-                try:
			
 
				-                    config = self.config_manager.get_platform_config(platform)
			
 
				-                    # 验证配置字段
			
 
				-                    if not config.platform:
			
 
				-                        self.errors.append(f"平台 {platform} 缺少 platform 字段")
			
 
				-                    if not config.mode:
			
 
				-                        self.errors.append(f"平台 {platform} 缺少 mode 字段")
			
 
				-                    if not config.url:
			
 
				-                        self.errors.append(f"平台 {platform} 缺少 url 字段")
			
 
				-                    valid_count += 1
			
 
				-                except Exception as e:
			
 
				-                    self.errors.append(f"平台 {platform} 配置验证失败: {str(e)}")
			
 
				-            
			
 
				-            return len(self.errors) == 0
			
 
				-            
			
 
				-        except Exception as e:
			
 
				-            self.errors.append(f"爬虫配置检查异常: {str(e)}")
			
 
				-            return False
			
 
				-
			
 
				-    def check_file_permissions(self) -> bool:
			
 
				-        """
			
 
				-        检查配置文件权限
			
 
				-        """
			
 
				-        import os
			
 
				-        from core.utils.path_utils import spiders_config_path
			
 
				-        
			
 
				-        try:
			
 
				-            # 检查爬虫配置文件是否存在
			
 
				-            if not os.path.exists(spiders_config_path):
			
 
				-                self.errors.append(f"爬虫配置文件不存在: {spiders_config_path}")
			
 
				-                return False
			
 
				-                
			
 
				-            # 检查文件是否可读
			
 
				-            if not os.access(spiders_config_path, os.R_OK):
			
 
				-                self.errors.append(f"爬虫配置文件不可读: {spiders_config_path}")
			
 
				-                
			
 
				-            return len(self.errors) == 0
			
 
				-            
			
 
				-        except Exception as e:
			
 
				-            self.errors.append(f"文件权限检查异常: {str(e)}")
			
 
				-            return False
			
 
				-
			
 
				-    def run_all_checks(self) -> Dict[str, Any]:
			
 
				-        """
			
 
				-        运行所有检查
			
 
				-        """
			
 
				-        self.errors.clear()
			
 
				-        self.warnings.clear()
			
 
				-        
			
 
				-        env_ok = self.check_env_config()
			
 
				-        spider_ok = self.check_spider_configs()
			
 
				-        file_ok = self.check_file_permissions()
			
 
				-        
			
 
				-        overall_ok = env_ok and spider_ok and file_ok
			
 
				-        
			
 
				-        return {
			
 
				-            "success": overall_ok,
			
 
				-            "errors": self.errors.copy(),
			
 
				-            "warnings": self.warnings.copy(),
			
 
				-            "details": {
			
 
				-                "env_config": env_ok,
			
 
				-                "spider_configs": spider_ok,
			
 
				-                "file_permissions": file_ok
			
 
				-            }
			
 
				-        }
			
 
				-
			
 
				-    def print_report(self):
			
 
				-        """
			
 
				-        打印健康检查报告
			
 
				-        """
			
 
				-        result = self.run_all_checks()
			
 
				-        
			
 
				-        print("=" * 50)
			
 
				-        print("配置健康检查报告")
			
 
				-        print("=" * 50)
			
 
				-        
			
 
				-        if result["success"]:
			
 
				-            print("✓ 所有配置检查通过")
			
 
				-        else:
			
 
				-            print("✗ 配置存在问题")
			
 
				-            
			
 
				-        print(f"\n详细信息:")
			
 
				-        print(f"  环境配置: {'✓' if result['details']['env_config'] else '✗'}")
			
 
				-        print(f"  爬虫配置: {'✓' if result['details']['spider_configs'] else '✗'}")
			
 
				-        print(f"  文件权限: {'✓' if result['details']['file_permissions'] else '✗'}")
			
 
				-        
			
 
				-        if result["warnings"]:
			
 
				-            print(f"\n警告:")
			
 
				-            for warning in result["warnings"]:
			
 
				-                print(f"  - {warning}")
			
 
				-                
			
 
				-        if result["errors"]:
			
 
				-            print(f"\n错误:")
			
 
				-            for error in result["errors"]:
			
 
				-                print(f"  - {error}")
			
 
				-        
			
 
				-        print("\n统计信息:")
			
 
				-        try:
			
 
				-            stats = self.config_manager.get_config_stats()
			
 
				-            print(f"  平台数量: {stats['total_platforms']}")
			
 
				-            print(f"  运行环境: {stats['env']}")
			
 
				-        except Exception as e:
			
 
				-            print(f"  统计信息获取失败: {e}")
			
 
				-            
			
 
				-        print("=" * 50)
			
 
				-        
			
 
				-        return result
			
 
				-
			
 
				-
			
 
				-def run_health_check():
			
 
				-    """
			
 
				-    运行配置健康检查
			
 
				-    """
			
 
				-    checker = ConfigHealthCheck()
			
 
				-    return checker.print_report()
			
 
				-
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    result = run_health_check()
			
 
				-    sys.exit(0 if result["success"] else 1)
			
--- a/core/utils/config_manager.py
+++ b/core/utils/config_manager.py
@@ -1,129 +0,0 @@
 
				-"""
			
 
				-配置管理服务
			
 
				-统一管理环境配置和爬虫配置
			
 
				-"""
			
 
				-import json
			
 
				-from typing import Dict, Any, Optional
			
 
				-from core.utils.spider_config import SpiderConfig
			
 
				-from core.models.spiders_config_models import PlatformConfig
			
 
				-
			
 
				-
			
 
				-class ConfigManager:
			
 
				-    """
			
 
				-    统一配置管理器
			
 
				-    提供对环境配置和爬虫配置的统一访问接口
			
 
				-    """
			
 
				-    
			
 
				-    def __init__(self):
			
 
				-        # 延迟导入settings以避免循环导入
			
 
				-        from config import settings
			
 
				-        self._env_settings = settings
			
 
				-        self._spider_config = SpiderConfig
			
 
				-
			
 
				-    @property
			
 
				-    def env_settings(self):
			
 
				-        """
			
 
				-        获取环境配置
			
 
				-        """
			
 
				-        return self._env_settings
			
 
				-
			
 
				-    def get_platform_config(self, platform_name: str) -> PlatformConfig:
			
 
				-        """
			
 
				-        获取平台爬虫配置
			
 
				-        """
			
 
				-        return self._spider_config.get_platform_config(platform_name)
			
 
				-
			
 
				-    def list_platforms(self) -> list:
			
 
				-        """
			
 
				-        获取所有平台列表
			
 
				-        """
			
 
				-        return self._spider_config.list_all_platforms()
			
 
				-
			
 
				-    def reload_spider_configs(self):
			
 
				-        """
			
 
				-        重新加载爬虫配置
			
 
				-        """
			
 
				-        self._spider_config.reload_config()
			
 
				-
			
 
				-    def get_config_stats(self) -> Dict[str, Any]:
			
 
				-        """
			
 
				-        获取配置统计信息
			
 
				-        """
			
 
				-        stats = self._spider_config.get_config_stats()
			
 
				-        stats["env"] = self._env_settings.ENV
			
 
				-        return stats
			
 
				-
			
 
				-    def validate_platform_config(self, platform_name: str) -> bool:
			
 
				-        """
			
 
				-        验证平台配置是否有效
			
 
				-        """
			
 
				-        try:
			
 
				-            self.get_platform_config(platform_name)
			
 
				-            return True
			
 
				-        except Exception:
			
 
				-            return False
			
 
				-
			
 
				-    def export_configs(self) -> Dict[str, Any]:
			
 
				-        """
			
 
				-        导出所有配置信息（用于调试和监控）
			
 
				-        """
			
 
				-        return {
			
 
				-            "env_settings": {
			
 
				-                "env": self._env_settings.ENV,
			
 
				-                "log_level": self._env_settings.LOG_LEVEL,
			
 
				-                "db_host": self._env_settings.DB_HOST,
			
 
				-                "rocketmq_endpoint": str(self._env_settings.ROCKETMQ_ENDPOINT),
			
 
				-                # 不包含敏感信息如密码、密钥等
			
 
				-            },
			
 
				-            "spider_configs": self.list_platforms(),
			
 
				-            "stats": self.get_config_stats()
			
 
				-        }
			
 
				-
			
 
				-    def get_platform_configs_summary(self) -> Dict[str, Dict[str, Any]]:
			
 
				-        """
			
 
				-        获取所有平台配置摘要信息
			
 
				-        """
			
 
				-        platforms = self.list_platforms()
			
 
				-        summary = {}
			
 
				-        
			
 
				-        for platform in platforms:
			
 
				-            try:
			
 
				-                config = self.get_platform_config(platform)
			
 
				-                summary[platform] = {
			
 
				-                    "platform": config.platform,
			
 
				-                    "mode": config.mode,
			
 
				-                    "method": config.method,
			
 
				-                    "url": str(config.url),
			
 
				-                    "loop_times": config.loop_times,
			
 
				-                }
			
 
				-            except Exception as e:
			
 
				-                summary[platform] = {
			
 
				-                    "error": str(e)
			
 
				-                }
			
 
				-                
			
 
				-        return summary
			
 
				-
			
 
				-    async def reload_configs_runtime(self):
			
 
				-        """
			
 
				-        运行时重新加载配置（支持不重启服务的情况下重新加载配置）
			
 
				-        这个方法可以在接收到特定信号或API调用时被调用
			
 
				-        """
			
 
				-        try:
			
 
				-            # 重新加载爬虫配置
			
 
				-            self.reload_spider_configs()
			
 
				-            return True
			
 
				-        except Exception as e:
			
 
				-            # 记录错误日志
			
 
				-            print(f"运行时重新加载配置失败: {e}")
			
 
				-            return False
			
 
				-
			
 
				-
			
 
				-# 全局配置管理器实例
			
 
				-config_manager = ConfigManager()
			
 
				-
			
 
				-
			
 
				-def get_config_manager() -> ConfigManager:
			
 
				-    """
			
 
				-    获取配置管理器实例
			
 
				-    """
			
 
				-    return config_manager
			
--- a/core/utils/feishu_data_async.py
+++ b/core/utils/feishu_data_async.py
@@ -138,10 +138,7 @@ class FeishuDataAsync:
 
				                     },
			
 
				 
			
 
				             }
			
 
				-        print(payload)
			
 
				-
			
 
				         async with self.session.post(url, headers=headers, json=payload,ssl=False) as response:
			
 
				-            print(response)
			
 
				             if response.status != 200:
			
 
				                 error_text = await response.text()
			
 
				             if response.status != 200:
			
--- a/core/utils/helpers.py
+++ b/core/utils/helpers.py
@@ -2,7 +2,7 @@ import asyncio
 
				 
			
 
				 from typing import List, Dict
			
 
				 
			
 
				-from datetime import datetime
			
 
				+from datetime import datetime, timedelta
			
 
				 from core.utils.feishu_data_async import FeishuDataAsync
			
 
				 from core.utils.gpt4o_mini_help import GPT4oMini
			
 
				 
			
@@ -17,13 +17,28 @@ async def get_title_filter_word() -> List[str]:
 
				     sheet_id = "BS9uyu"
			
 
				     async with FeishuDataAsync() as feishu:
			
 
				         feishu_data = await feishu.get_values(spreadsheet_token=spreadsheet_token, sheet_id=sheet_id)
			
 
				-        return feishu_data[1]
			
 
				+        for row in feishu_data[1:]:
			
 
				+            title_rule = row[0]
			
 
				+            if title_rule:
			
 
				+                return title_rule.split(",")
			
 
				+            else:
			
 
				+                return None
			
 
				+        return None
			
 
				 
			
 
				 async def generate_titles(sheet_id: str,video_obj: Dict,logger,aliyun_log):
			
 
				     title_filter_word = await get_title_filter_word()
			
 
				     title = video_obj.get("video_title")
			
 
				     if not title:
			
 
				         return video_obj
			
 
				+    # 确保title和title_filter_word中的所有元素都是字符串
			
 
				+    if title is None:
			
 
				+        title = ""
			
 
				+    else:
			
 
				+        title = str(title)
			
 
				+
			
 
				+    # 过滤掉title_filter_word中的非字符串元素，并确保它们不为None
			
 
				+    title_filter_word = [str(keyword) for keyword in title_filter_word if keyword is not None]
			
 
				+    
			
 
				     contains_keyword = any(keyword in title for keyword in title_filter_word)
			
 
				     logger.info(f"【{title}】标题包含过滤关键词：{contains_keyword}")
			
 
				     if contains_keyword:
			
@@ -33,8 +48,8 @@ async def generate_titles(sheet_id: str,video_obj: Dict,logger,aliyun_log):
 
				         current_time = datetime.now()
			
 
				         formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
			
 
				         values = [
			
 
				-                video_obj["v_url"],
			
 
				-                video_obj["url"],
			
 
				+                video_obj["video_url"],
			
 
				+                video_obj["cover_url"],
			
 
				                 title,
			
 
				                 new_title,
			
 
				                 formatted_time,
			
@@ -47,9 +62,25 @@ async def insert_safe_data(sheet_id: str, values: List):
 
				     async with FeishuDataAsync() as feishu:
			
 
				         await feishu.insert_values(spreadsheet_token=spreadsheet_token, sheet_id=sheet_id,ranges="A2:Z2",values=values)
			
 
				 
			
 
				+async def is_near_next_day(threshold_minutes: int = 3) -> bool:
			
 
				+        """
			
 
				+        时间检查
			
 
				+
			
 
				+        """
			
 
				+        now = datetime.now()
			
 
				+        tomorrow = now.date() + timedelta(days=1)
			
 
				+        midnight = datetime.combine(tomorrow, datetime.min.time())
			
 
				+        time_left = midnight - now
			
 
				+
			
 
				+        if time_left.total_seconds() < threshold_minutes * 60:
			
 
				+            return True
			
 
				+
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				 
			
 
				 
			
 
				 
			
 
				-if __name__ == '__main__':
			
 
				-     asyncio.run(insert_safe_data("K0gA9Y", ["1","2","3","4","5"]))
			
 
				-    
			
 
				+# if __name__ == '__main__':
			
 
				+#      # asyncio.run(insert_safe_data("K0gA9Y", ["1","2","3","4","5"]))
			
 
				+#       print(asyncio.run(get_title_filter_word()))
			
--- a/core/utils/log/log_codes.py
+++ b/core/utils/log/log_codes.py
@@ -14,8 +14,8 @@ codes.py
 
				 CODES = {
			
 
				     # 成功类 (1xxx)
			
 
				     "1000": "任务接收成功",
			
 
				-    "1001": "数据采集成功",
			
 
				-    "1002": "视频处理成功",
			
 
				+    "1001": "获取到一条数据",
			
 
				+    "1002": "推送ETL成功",
			
 
				     "1003": "爬虫执行指标汇总",
			
 
				     "1004": "未获取到数据",
			
 
				     "1005": "视频过滤成功",
			
@@ -74,6 +74,7 @@ CODES = {
 
				     "9023": "请求返回code非0",
			
 
				     "9024": "字段提取失败",
			
 
				     "9025": "接口返回为空",
			
 
				+    "9026": "处理数据过程异常",
			
 
				 
			
 
				     # 系统致命错误 (99xx)
			
 
				     "9900": "数据库连接失败",
			
--- a/core/utils/log_codes.py
+++ b/core/utils/log_codes.py
@@ -0,0 +1,87 @@
 
				+# core/utils/log_codes.py
			
 
				+"""
			
 
				+阿里云日志码统一管理
			
 
				+采用分层分类的方式管理所有日志码
			
 
				+"""
			
 
				+
			
 
				+from enum import Enum
			
 
				+from typing import Dict, Any
			
 
				+
			
 
				+
			
 
				+class LogCategory(Enum):
			
 
				+    """日志分类"""
			
 
				+    SYSTEM = "系统"  # 系统相关日志
			
 
				+    TASK = "任务"  # 任务处理日志
			
 
				+    DATA = "数据"  # 数据处理日志
			
 
				+    ERROR = "错误"  # 错误日志
			
 
				+    MONITOR = "监控"  # 监控日志
			
 
				+
			
 
				+
			
 
				+class LogCodeManager:
			
 
				+    """日志码管理器"""
			
 
				+
			
 
				+    # 系统日志码 (1000-1999)
			
 
				+    SYSTEM_START = ("1000", "系统启动", LogCategory.SYSTEM)
			
 
				+    SYSTEM_SHUTDOWN = ("1001", "系统关闭", LogCategory.SYSTEM)
			
 
				+    SYSTEM_HEALTHY = ("1002", "系统健康检查", LogCategory.SYSTEM)
			
 
				+
			
 
				+    # 任务日志码 (2000-2999)
			
 
				+    TASK_RECEIVED = ("2000", "任务接收成功", LogCategory.TASK)
			
 
				+    TASK_START = ("2001", "任务开始执行", LogCategory.TASK)
			
 
				+    TASK_SUCCESS = ("2010", "任务执行成功", LogCategory.TASK)
			
 
				+    TASK_FAILED = ("2011", "任务执行失败", LogCategory.TASK)
			
 
				+
			
 
				+    # 数据处理日志码 (3000-3999)
			
 
				+    DATA_PROCESS_START = ("3000", "开始处理数据", LogCategory.DATA)
			
 
				+    DATA_PROCESS_SUCCESS = ("3001", "数据处理成功", LogCategory.DATA)
			
 
				+    DATA_VALIDATION_FAILED = ("3002", "数据验证失败", LogCategory.DATA)
			
 
				+    DATA_TRANSFORM_SUCCESS = ("3003", "数据转换成功", LogCategory.DATA)
			
 
				+
			
 
				+    # 错误日志码 (9000-9999)
			
 
				+    ERROR_GENERAL = ("9000", "一般错误", LogCategory.ERROR)
			
 
				+    ERROR_NETWORK = ("9001", "网络错误", LogCategory.ERROR)
			
 
				+    ERROR_DATABASE = ("9002", "数据库错误", LogCategory.ERROR)
			
 
				+    ERROR_CONFIG = ("9003", "配置错误", LogCategory.ERROR)
			
 
				+
			
 
				+    # 监控日志码 (1500-1599)
			
 
				+    MONITOR_PROCESS_START = ("1500", "进程启动", LogCategory.MONITOR)
			
 
				+    MONITOR_PROCESS_RESTART = ("1501", "进程重启", LogCategory.MONITOR)
			
 
				+    MONITOR_PROCESS_STOP = ("1502", "进程停止", LogCategory.MONITOR)
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_code_info(cls, code: str) -> Dict[str, Any]:
			
 
				+        """根据code获取日志信息"""
			
 
				+        for attr_name in dir(cls):
			
 
				+            if not attr_name.startswith('_') and attr_name.isupper():
			
 
				+                code_info = getattr(cls, attr_name)
			
 
				+                if isinstance(code_info, tuple) and code_info[0] == code:
			
 
				+                    return {
			
 
				+                        "code": code_info[0],
			
 
				+                        "message": code_info[1],
			
 
				+                        "category": code_info[2].value
			
 
				+                    }
			
 
				+        return None
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_codes_by_category(cls, category: LogCategory) -> list:
			
 
				+        """根据分类获取所有日志码"""
			
 
				+        codes = []
			
 
				+        for attr_name in dir(cls):
			
 
				+            if not attr_name.startswith('_') and attr_name.isupper():
			
 
				+                code_info = getattr(cls, attr_name)
			
 
				+                if isinstance(code_info, tuple) and code_info[2] == category:
			
 
				+                    codes.append({
			
 
				+                        "name": attr_name,
			
 
				+                        "code": code_info[0],
			
 
				+                        "message": code_info[1]
			
 
				+                    })
			
 
				+        return codes
			
 
				+
			
 
				+    @classmethod
			
 
				+    def validate_code(cls, code: str) -> bool:
			
 
				+        """验证日志码是否存在"""
			
 
				+        return cls.get_code_info(code) is not None
			
 
				+
			
 
				+
			
 
				+# 全局实例
			
 
				+log_codes = LogCodeManager()
			
--- a/core/utils/spider_config.py
+++ b/core/utils/spider_config.py
@@ -1,4 +1,4 @@
 
				-# spider_config.py
			
 
				+# spider_config.py 修改后
			
 
				 import os
			
 
				 from urllib.parse import urljoin
			
 
				 import yaml
			
@@ -12,23 +12,14 @@ class SpiderConfig:
 
				 
			
 
				     @classmethod
			
 
				     def _load_yaml(cls):
			
 
				-        """
			
 
				-        加载spiders_config.yaml
			
 
				-        :return:
			
 
				-        """
			
 
				         if not os.path.exists(cls._config_path):
			
 
				             raise FileNotFoundError(f"[配置错误] 找不到配置文件: {cls._config_path}")
			
 
				-            
			
 
				-        # 检查文件是否修改过
			
 
				+
			
 
				         with open(cls._config_path, "r", encoding="utf-8") as f:
			
 
				             cls._config = yaml.safe_load(f)
			
 
				 
			
 
				     @classmethod
			
 
				     def get_platform_config(cls, classname: str) -> PlatformConfig:
			
 
				-        """
			
 
				-        获取平台配置，并拼接完整 URL
			
 
				-        支持类方法调用 + 单次加载配置
			
 
				-        """
			
 
				         if cls._config is None:
			
 
				             cls._load_yaml()
			
 
				 
			
@@ -38,52 +29,36 @@ class SpiderConfig:
 
				         platform_config = cls._config.get(classname, {})
			
 
				         base_config = cls._config.get("default", {})
			
 
				 
			
 
				-        # 合并配置：平台配置覆盖默认配置
			
 
				+        # 合并配置
			
 
				         merged = {**base_config, **platform_config}
			
 
				 
			
 
				-        # 自动拼接完整 url（优先用完整 url）
			
 
				+        # 自动拼接完整 url
			
 
				         if "url" not in merged and "base_url" in merged and "path" in merged:
			
 
				             merged["url"] = urljoin(merged["base_url"], merged["path"])
			
 
				 
			
 
				-        # 使用 pydantic 进行验证
			
 
				         try:
			
 
				+            # Pydantic 2.x 直接实例化
			
 
				             return PlatformConfig(**merged)
			
 
				         except Exception as e:
			
 
				             raise ValueError(f"[配置错误] 平台 {classname} 的配置验证失败: {e}")
			
 
				 
			
 
				     @classmethod
			
 
				     def reload_config(cls):
			
 
				-        """
			
 
				-        强制重新加载配置文件
			
 
				-        """
			
 
				         cls._config = None
			
 
				         cls._load_yaml()
			
 
				 
			
 
				     @classmethod
			
 
				     def list_all_platforms(cls):
			
 
				-        """
			
 
				-        获取所有平台配置名称列表
			
 
				-        """
			
 
				         if cls._config is None:
			
 
				             cls._load_yaml()
			
 
				-        platforms = [key for key in cls._config.keys() if key != "default"]
			
 
				-        return platforms
			
 
				+        return [key for key in cls._config.keys() if key != "default"]
			
 
				 
			
 
				     @classmethod
			
 
				     def get_config_stats(cls):
			
 
				-        """
			
 
				-        获取配置统计信息
			
 
				-        """
			
 
				         if cls._config is None:
			
 
				             cls._load_yaml()
			
 
				         return {
			
 
				             "total_platforms": len(cls.list_all_platforms()),
			
 
				             "last_modified": os.path.getmtime(cls._config_path) if os.path.exists(cls._config_path) else 0,
			
 
				             "config_file": cls._config_path
			
 
				-        }
			
 
				-
			
 
				-
			
 
				-# 示例使用
			
 
				-if __name__ == '__main__':
			
 
				-    config = SpiderConfig.get_platform_config("yuannifuqimanmanrecommend")
			
 
				-    print(config)
			
 
				+        }
			
--- a/core/video_processor.py
+++ b/core/video_processor.py
@@ -0,0 +1,140 @@
 
				+# core/video_processor.py
			
 
				+import time
			
 
				+import traceback
			
 
				+from typing import Dict, Optional, Any
			
 
				+from core.models.video_item import VideoItem
			
 
				+from core.utils.extractors import extract_fields
			
 
				+from core.utils.helpers import generate_titles
			
 
				+from core.utils.log.logger_manager import LoggerManager
			
 
				+from services.pipeline import PiaoQuanPipeline
			
 
				+import uuid
			
 
				+
			
 
				+
			
 
				+class VideoProcessor:
			
 
				+    """
			
 
				+    视频处理器：
			
 
				+    1、字段统一
			
 
				+    2、数据过滤
			
 
				+    3、视频处理（标题生成）
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, platform: str, mode: str, field_map: Dict,
			
 
				+                 feishu_sheetid: str = None, logger=None, aliyun_log=None):
			
 
				+        self.platform = platform
			
 
				+        self.mode = mode
			
 
				+        self.field_map = field_map
			
 
				+        self.feishu_sheetid = feishu_sheetid
			
 
				+        self.logger = logger or LoggerManager.get_logger(platform=platform, mode=mode)
			
 
				+        self.aliyun_log = aliyun_log or LoggerManager.get_aliyun_logger(platform=platform, mode=mode)
			
 
				+
			
 
				+    async def process_single_video(self, raw_data: Dict, user_info: Dict,
			
 
				+                                   rule_dict: Dict, env: str = "prod") -> Optional[Dict]:
			
 
				+        """
			
 
				+        完整的单条视频处理流程
			
 
				+        """
			
 
				+        try:
			
 
				+            # 1. 提取字段并创建视频对象
			
 
				+            video_obj = await self._create_video_item(raw_data, user_info)
			
 
				+
			
 
				+            if not video_obj:
			
 
				+                return None
			
 
				+
			
 
				+            # 2. 数据过滤
			
 
				+            if not await self._filter_video(video_obj, rule_dict, env):
			
 
				+                return None
			
 
				+
			
 
				+            # 3. 集成视频处理（标题生成等）
			
 
				+            processed_video = await self._integrated_handling(video_obj)
			
 
				+            if not processed_video:
			
 
				+                return None
			
 
				+
			
 
				+            return processed_video
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            self.logger.error(f"视频处理失败: {e}")
			
 
				+            return None
			
 
				+
			
 
				+    async def _create_video_item(self, raw_data: Dict, user_info: Dict) -> Optional[Dict]:
			
 
				+        """创建视频数据对象"""
			
 
				+        try:
			
 
				+            # 提取字段
			
 
				+            item_kwargs = extract_fields(raw_data, self.field_map,
			
 
				+                                         logger=self.logger,
			
 
				+                                         aliyun_log=self.aliyun_log)
			
 
				+
			
 
				+            if not item_kwargs:
			
 
				+                self.logger.error("字段提取失败")
			
 
				+                return None
			
 
				+
			
 
				+
			
 
				+            # 确保必需的用户信息存在
			
 
				+            user_id = user_info.get("uid")
			
 
				+            user_name = user_info.get("nick_name")
			
 
				+            
			
 
				+            if not user_id:
			
 
				+                self.logger.error("用户ID为空")
			
 
				+                return None
			
 
				+
			
 
				+            # 添加平台相关信息
			
 
				+            item_kwargs.update({
			
 
				+                "user_id": str(user_id),
			
 
				+                "user_name": user_name,
			
 
				+                "platform": self.platform,
			
 
				+                "strategy": self.mode,
			
 
				+                "session": f"{self.platform}_{int(time.time())}"  # 确保session字段存在
			
 
				+            })
			
 
				+
			
 
				+            # 创建视频项
			
 
				+            video_item = VideoItem(**item_kwargs)
			
 
				+            return await video_item.produce_item()
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            self.logger.error(f"创建视频项失败: {e}")
			
 
				+            return None
			
 
				+
			
 
				+    async def _filter_video(self, video: Dict, rule_dict: Dict, env: str) -> bool:
			
 
				+        """过滤视频数据"""
			
 
				+        try:
			
 
				+            pipeline = PiaoQuanPipeline(
			
 
				+                platform=self.platform,
			
 
				+                mode=self.mode,
			
 
				+                rule_dict=rule_dict,
			
 
				+                env=env,
			
 
				+                item=video,
			
 
				+                trace_id=f"{self.platform}_{uuid.uuid1()}"
			
 
				+            )
			
 
				+            return await pipeline.process_item()
			
 
				+        except Exception as e:
			
 
				+            self.logger.error(f"过滤视频失败: {e}")
			
 
				+            return False
			
 
				+
			
 
				+    async def _integrated_handling(self, video: Dict) -> Optional[Dict]:
			
 
				+        """
			
 
				+        集成视频处理 - 标题生成等业务逻辑
			
 
				+        返回处理后的视频数据，如果处理失败返回None
			
 
				+        """
			
 
				+        try:
			
 
				+            if self.feishu_sheetid:
			
 
				+                # 调用标题生成逻辑
			
 
				+                processed_video = await generate_titles(
			
 
				+                    self.feishu_sheetid,
			
 
				+                    video,
			
 
				+                    self.logger,
			
 
				+                    self.aliyun_log
			
 
				+                )
			
 
				+
			
 
				+                if processed_video:
			
 
				+                    self.logger.info(f"视频标题处理完成: {video}")
			
 
				+                    return processed_video
			
 
				+                else:
			
 
				+                    self.logger.warning("标题生成失败，使用原始视频数据")
			
 
				+                    return video
			
 
				+            else:
			
 
				+                self.logger.debug("未配置飞书sheetid，跳过标题生成")
			
 
				+                return video
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            tb = traceback.format_exc()
			
 
				+            self.logger.error(f"集成视频处理失败: {e} \n {tb}")
			
 
				+            # 处理失败时返回原始数据，不中断流程
			
 
				+            return video
			
--- a/main.py
+++ b/main.py
@@ -36,6 +36,7 @@ def main():
 
				 
			
 
				     max_processes = cpu_count()
			
 
				     num_processes = min(len(topic_list), max_processes)
			
 
				+    # topic均分给进程
			
 
				     topic_groups = split_topics(topic_list, num_processes)
			
 
				     logger.info(f"[主进程] CPU 核心数: {max_processes}，启动进程数: {num_processes}")
			
 
				 
			
@@ -44,7 +45,7 @@ def main():
 
				     for group_id, topic_group in enumerate(topic_groups):
			
 
				         start_worker_process(group_id, topic_group, process_map)
			
 
				 
			
 
				-    # 重启状态跟踪 - 新增重启限制
			
 
				+    # 重启状态跟踪
			
 
				     restart_count = {group_id: 0 for group_id in range(len(topic_groups))}
			
 
				     last_restart_time = {group_id: 0 for group_id in range(len(topic_groups))}
			
 
				 
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,73 +1,73 @@
 
				 aiohappyeyeballs==2.6.1
			
 
				-aiohttp==3.12.13
			
 
				-aiosignal==1.3.2
			
 
				-aliyun-log-python-sdk==0.9.24
			
 
				-aliyun-python-sdk-core==2.13.36
			
 
				+aiohttp==3.12.15
			
 
				+aiosignal==1.4.0
			
 
				+aliyun-log-python-sdk==0.9.32
			
 
				+aliyun-python-sdk-core==2.16.0
			
 
				 annotated-types==0.7.0
			
 
				 async-timeout==5.0.1
			
 
				 asyncmy==0.2.10
			
 
				 attrs==25.3.0
			
 
				-build==1.2.2.post1
			
 
				-certifi==2025.6.15
			
 
				-cffi==1.17.1
			
 
				-charset-normalizer==3.4.2
			
 
				-click==8.2.1
			
 
				-coverage==7.9.1
			
 
				-cryptography==45.0.4
			
 
				-dateparser==1.2.1
			
 
				+build==1.3.0
			
 
				+certifi==2025.8.3
			
 
				+cffi==2.0.0
			
 
				+charset-normalizer==3.4.3
			
 
				+click==8.3.0
			
 
				+coverage==7.10.7
			
 
				+cryptography==46.0.1
			
 
				+dateparser==1.2.2
			
 
				 dotenv==0.9.9
			
 
				-elastic-transport==8.17.1
			
 
				-elasticsearch==9.0.2
			
 
				+elastic-transport==9.1.0
			
 
				+elasticsearch==9.1.1
			
 
				 frozenlist==1.7.0
			
 
				 googleapis-common-protos==1.70.0
			
 
				-grpcio==1.73.1
			
 
				-grpcio-tools==1.71.2
			
 
				+grpcio==1.75.0
			
 
				+grpcio-tools==1.75.0
			
 
				 idna==3.10
			
 
				 importlib_metadata==8.7.0
			
 
				 iniconfig==2.1.0
			
 
				-jmespath==0.10.0
			
 
				+jmespath==1.0.1
			
 
				 jsonpath-ng==1.7.0
			
 
				 loguru==0.7.3
			
 
				 lz4==4.4.4
			
 
				 mq-http-sdk==1.0.3
			
 
				-multidict==6.4.4
			
 
				-opentelemetry-api==1.34.1
			
 
				-opentelemetry-exporter-otlp==1.34.1
			
 
				-opentelemetry-exporter-otlp-proto-common==1.34.1
			
 
				-opentelemetry-exporter-otlp-proto-grpc==1.34.1
			
 
				-opentelemetry-exporter-otlp-proto-http==1.34.1
			
 
				-opentelemetry-proto==1.34.1
			
 
				-opentelemetry-sdk==1.34.1
			
 
				+multidict==6.6.4
			
 
				+opentelemetry-api==1.37.0
			
 
				+opentelemetry-exporter-otlp==1.37.0
			
 
				+opentelemetry-exporter-otlp-proto-common==1.37.0
			
 
				+opentelemetry-exporter-otlp-proto-grpc==1.37.0
			
 
				+opentelemetry-exporter-otlp-proto-http==1.37.0
			
 
				+opentelemetry-proto==1.37.0
			
 
				+opentelemetry-sdk==1.37.0
			
 
				 opentelemetry-semantic-conventions==0.55b1
			
 
				 packaging==25.0
			
 
				-pip-tools==7.4.1
			
 
				+pip-tools==7.5.0
			
 
				 pluggy==1.6.0
			
 
				 ply==3.11
			
 
				 propcache==0.3.2
			
 
				-protobuf==5.27.0
			
 
				-pycparser==2.22
			
 
				-pydantic==1.10.22
			
 
				+protobuf==6.32.1
			
 
				+pycparser==2.23
			
 
				+pydantic==2.11.9
			
 
				 pydantic-env-settings==0.1.1
			
 
				-pydantic_core==2.33.2
			
 
				+pydantic_core==2.39.0
			
 
				 Pygments==2.19.2
			
 
				-PyMySQL==1.1.1
			
 
				+PyMySQL==1.1.2
			
 
				 pyproject_hooks==1.2.0
			
 
				-pytest==8.4.1
			
 
				-pytest-asyncio==1.0.0
			
 
				+pytest==8.4.2
			
 
				+pytest-asyncio==1.2.0
			
 
				 python-dateutil==2.9.0.post0
			
 
				-python-dotenv==1.1.0
			
 
				+python-dotenv==1.1.1
			
 
				 pytz==2025.2
			
 
				 PyYAML==6.0.2
			
 
				-redis==6.2.0
			
 
				-regex==2024.11.6
			
 
				-requests==2.32.4
			
 
				+redis==6.4.0
			
 
				+regex==2025.9.18
			
 
				+requests==2.32.5
			
 
				 rocketmq-client-python==2.0.0
			
 
				 rocketmq-python-client==5.0.6
			
 
				 six==1.17.0
			
 
				 tenacity==9.1.2
			
 
				 typing-inspection==0.4.1
			
 
				-typing_extensions==4.14.0
			
 
				+typing_extensions==4.15.0
			
 
				 tzlocal==5.3.1
			
 
				-urllib3==2.4.0
			
 
				+urllib3==2.5.0
			
 
				 yarl==1.20.1
			
 
				-zipp==3.23.0
			
 
				+zipp==3.23.0
			
--- a/services/pipeline.py
+++ b/services/pipeline.py
@@ -66,11 +66,8 @@ class PiaoQuanPipeline:
 
				 
			
 
				         max_d = self.rule_dict.get("period", {}).get("max", 1000)
			
 
				         min_d = self.rule_dict.get("period", {}).get("min", 1000)
			
 
				-        days = max(max_d, min_d)
			
 
				+        days = int(max(max_d, min_d))
			
 
				 
			
 
				-        # feishu_days = await self.feishu_time_list()
			
 
				-        # if feishu_days:
			
 
				-        #     days = int(feishu_days)
			
 
				 
			
 
				         now_ts = int(time.time())
			
 
				 
			
@@ -182,7 +179,7 @@ class PiaoQuanPipeline:
 
				                     if not flag:
			
 
				                         self.logger.info(f"校验结束: 下载规则不符合{flag}")
			
 
				                         self.aliyun_log.logging(
			
 
				-                            code="2004",
			
 
				+                            code="2005",
			
 
				                             trace_id=self.trace_id,
			
 
				                             data=self.item,
			
 
				                             message="{}: {} <= {} <= {}, {}".format(
			
--- a/spiders/advanced_factory.py
+++ b/spiders/advanced_factory.py
@@ -0,0 +1,108 @@
 
				+# spiders/advanced_factory.py
			
 
				+from typing import Dict, List, Type, Optional
			
 
				+from config.spider_config import SpiderConfig
			
 
				+from core.di.container import container
			
 
				+from spiders.basespider import BaseSpider
			
 
				+from spiders.spider_registry import SPIDER_CLASS_MAP, get_spider_class
			
 
				+
			
 
				+
			
 
				+class AdvancedSpiderFactory:
			
 
				+    """高级爬虫工厂，支持自动配置映射和依赖注入"""
			
 
				+
			
 
				+    @classmethod
			
 
				+    def create_spider(cls, spider_key: str, rule_dict: Dict, user_list: List,
			
 
				+                      env: str = "prod", **kwargs) -> BaseSpider:
			
 
				+        """
			
 
				+        智能创建爬虫实例
			
 
				+        """
			
 
				+        # 1. 获取配置
			
 
				+        config = SpiderConfig.get_platform_config(spider_key)
			
 
				+
			
 
				+        # 2. 确定爬虫类
			
 
				+        spider_class = cls._resolve_spider_class(spider_key, config)
			
 
				+
			
 
				+        # 3. 准备依赖服务
			
 
				+        dependencies = cls._prepare_dependencies(config, **kwargs)
			
 
				+
			
 
				+        # 4. 创建实例
			
 
				+        return spider_class(
			
 
				+            rule_dict=rule_dict,
			
 
				+            user_list=user_list,
			
 
				+            env=env,
			
 
				+            **dependencies
			
 
				+        )
			
 
				+
			
 
				+    @classmethod
			
 
				+    def create_spider_by_topic(cls, topic: str, task_id: str, env: str = "prod") -> Optional[BaseSpider]:
			
 
				+        """
			
 
				+        根据MQ topic和任务ID创建爬虫（完整流程）
			
 
				+        """
			
 
				+        # 1. 映射topic到爬虫类型
			
 
				+        spider_key = cls._map_topic_to_spider_key(topic)
			
 
				+        if not spider_key:
			
 
				+            return None
			
 
				+
			
 
				+        # 2. 从数据库获取任务配置
			
 
				+        rule_dict, user_list = cls._get_task_config(task_id)
			
 
				+        if not rule_dict:
			
 
				+            return None
			
 
				+
			
 
				+        # 3. 创建爬虫实例
			
 
				+        return cls.create_spider(spider_key, rule_dict, user_list, env)
			
 
				+
			
 
				+    @classmethod
			
 
				+    def _resolve_spider_class(cls, spider_key: str, config) -> Type[BaseSpider]:
			
 
				+        """解析爬虫类（支持自动发现）"""
			
 
				+        # 优先使用注册表
			
 
				+        if spider_key in SPIDER_CLASS_MAP:
			
 
				+            return SPIDER_CLASS_MAP[spider_key]
			
 
				+
			
 
				+        # 根据命名约定自动推断
			
 
				+        if spider_key.endswith('recommend'):
			
 
				+            from spiders.recommendspider import RecommendSpider
			
 
				+            return RecommendSpider
			
 
				+        elif spider_key.endswith('author'):
			
 
				+            from spiders.authorspider import AuthorSpider
			
 
				+            return AuthorSpider
			
 
				+
			
 
				+        # 默认使用基类
			
 
				+        return BaseSpider
			
 
				+
			
 
				+    @classmethod
			
 
				+    def _prepare_dependencies(cls, config, **kwargs):
			
 
				+        """准备依赖服务"""
			
 
				+        dependencies = {}
			
 
				+
			
 
				+        # 数据库服务
			
 
				+        if 'db_service' not in kwargs:
			
 
				+            dependencies['db_service'] = container.db_service(
			
 
				+                platform=config.platform,
			
 
				+                mode=config.mode
			
 
				+            )
			
 
				+
			
 
				+        # MQ 生产者
			
 
				+        if 'mq_producer' not in kwargs:
			
 
				+            dependencies['mq_producer'] = container.mq_producer()
			
 
				+
			
 
				+        return {**dependencies, **kwargs}
			
 
				+
			
 
				+    @classmethod
			
 
				+    def _map_topic_to_spider_key(cls, topic: str) -> Optional[str]:
			
 
				+        """topic到爬虫配置的映射"""
			
 
				+        mapping = {
			
 
				+            "bszf_recommend_prod": "benshanzhufurecommend",
			
 
				+            "ynfqmm_recommend_prod": "yuannifuqimanmanrecommend",
			
 
				+            "xng_author_prod": "xiaoniangaoauthor"
			
 
				+        }
			
 
				+        return mapping.get(topic)
			
 
				+
			
 
				+    @classmethod
			
 
				+    def _get_task_config(cls, task_id: str):
			
 
				+        """从数据库获取任务配置"""
			
 
				+        # 这里需要实现数据库查询逻辑
			
 
				+        # 返回 (rule_dict, user_list)
			
 
				+        return {}, []
			
 
				+
			
 
				+
			
 
				+# 全局工厂实例
			
 
				+spider_factory = AdvancedSpiderFactory()
			
--- a/spiders/author/__init__.py
+++ b/spiders/author/__init__.py
--- a/spiders/author/xiaoniangao_author.py
+++ b/spiders/author/xiaoniangao_author.py
--- a/spiders/authorspider.py
+++ b/spiders/authorspider.py
@@ -1,75 +1,91 @@
 
				+# spiders/authorspider.py
			
 
				 from datetime import datetime, timedelta
			
 
				-from spiders.basespider import BaseSpider
			
 
				-from typing import Optional, List, Dict
			
 
				-import aiohttp
			
 
				+from typing import List, Dict, Optional
			
 
				 
			
 
				+from core.utils.helpers import is_near_next_day
			
 
				+from spiders.basespider import BaseSpider
			
 
				 from core.utils.extractors import safe_extract
			
 
				 
			
 
				 
			
 
				 class AuthorSpider(BaseSpider):
			
 
				-    """账号模式爬虫：从用户列表爬取"""
			
 
				+    """作者模式爬虫 """
			
 
				 
			
 
				     def __init__(self, rule_dict: Dict, user_list: List, env: str = "prod"):
			
 
				         super().__init__(rule_dict, user_list, env)
			
 
				         # 账号模式特有状态
			
 
				         self.user_list_from_db = []  # 数据库用户列表
			
 
				         self.current_user_index = 0  # 当前用户索引
			
 
				-        self.current_cursor = "" # 当前分页游标（初始为空）
			
 
				-        self.next_cursor_last = ""
			
 
				-
			
 
				+        self.current_cursor = ""  # 当前分页游标（初始为空）
			
 
				 
			
 
				     async def before_run(self):
			
 
				-        """运行前：获取用户列表"""
			
 
				+        """运行前：获取用户列表 """
			
 
				+        await super().before_run()
			
 
				         self.user_list_from_db = await self.fetch_user_list()
			
 
				         if not self.user_list_from_db:
			
 
				             self.logger.warning("用户列表为空，终止账号模式")
			
 
				         self.logger.info(f"{self.platform}获取用户列表完成，共 {len(self.user_list_from_db)} 个用户")
			
 
				 
			
 
				+    async def execute(self):
			
 
				+        """执行核心逻辑 - 使用 make_request 方法"""
			
 
				+        if not await self.is_video_count_sufficient():
			
 
				+            self.logger.info("视频数量已达到上限，跳过执行")
			
 
				+            return
			
 
				+
			
 
				+        while await self.is_video_count_sufficient():
			
 
				+
			
 
				+            # 检查时间条件
			
 
				+            if await is_near_next_day():
			
 
				+                self.logger.info(f"距离第二天不足3分钟，停止执行")
			
 
				+                return
			
 
				+
			
 
				+            user = self.user_list_from_db[self.current_user_index]
			
 
				+            crawler_user_uid = user.get("link")
			
 
				+            self.logger.info(
			
 
				+                f"处理用户 uid={crawler_user_uid}（第{self.current_user_index + 1}个），"
			
 
				+                f"当前cursor: {self.current_cursor or '0'}"
			
 
				+            )
			
 
				+
			
 
				+            # 构建请求体
			
 
				+            request_body = self._build_request_body(user)
			
 
				 
			
 
				-    async def core_loop(self):
			
 
				-        """核心循环：处理每个用户的视频"""
			
 
				-        async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=self.timeout)) as session:
			
 
				-            while self.current_user_index < len(self.user_list_from_db):
			
 
				-                if self.is_less_than_3_minutes():
			
 
				-                    return
			
 
				-                # 检查数量限制
			
 
				-                if not await self.is_video_count_sufficient():
			
 
				-                    return
			
 
				-                # 当前用户
			
 
				-                user = self.user_list_from_db[self.current_user_index]
			
 
				-                crawler_user_uid = user.get("link")  # 数据库中的link字段
			
 
				-                self.logger.info(
			
 
				-                    f"处理用户 uid={crawler_user_uid}（第{self.current_user_index + 1}个），"
			
 
				-                    f"当前cursor: {self.current_cursor or '0'}"
			
 
				-                )
			
 
				-
			
 
				-                # 构建请求体：注入uid和cursor
			
 
				-                request_body = self._build_request_body(user)
			
 
				-
			
 
				-                # 获取当前用户视频
			
 
				-                raw_data = await self.crawl_user_videos(session, request_body, crawler_user_uid)
			
 
				-                if not raw_data:
			
 
				-                    # 切换到下一个用户
			
 
				-                    self.current_user_index += 1
			
 
				-                    continue
			
 
				-                # 处理数据
			
 
				-                if self.platform == "xiaoniangao":
			
 
				-                    self.user_list = [user]
			
 
				-                await self.process_data(raw_data)
			
 
				-                if self.current_user_index == len(self.user_list_from_db)-1:
			
 
				-                    self.current_cursor =  self.next_cursor_last
			
 
				-                    self.current_user_index = 0
			
 
				-                    continue
			
 
				+            # 获取当前用户视频
			
 
				+            raw_data = await self.crawl_user_videos(request_body, crawler_user_uid)
			
 
				+
			
 
				+            if not raw_data:
			
 
				+                # 当前用户无数据，切换到下一个用户
			
 
				                 self.current_user_index += 1
			
 
				-                await self.wait()
			
 
				+                self.current_cursor = ""  # 重置游标
			
 
				+                await self.wait_between_iterations()
			
 
				+                continue
			
 
				+
			
 
				+
			
 
				+            # 处理数据
			
 
				+            if self.platform == "xiaoniangao":
			
 
				+                self.user_list = [user]  # 特殊逻辑
			
 
				+
			
 
				+            pass_video = await self.process_data(raw_data)
			
 
				+            # 根据是否有通过的数据和下一页游标判断是否继续当前用户
			
 
				+            if pass_video == 0 and not self.current_cursor:
			
 
				+                # 没有通过数据或没有更多数据，切换到下一个用户
			
 
				+                self.current_user_index += 1
			
 
				+                self.current_cursor = ""
			
 
				+
			
 
				+            self.logger.info(
			
 
				+                f"用户 {crawler_user_uid} 获取到 {pass_video} 个通过视频,继续扫描第{self.current_cursor}页")
			
 
				+
			
 
				 
			
 
				+            # 检查是否所有用户处理完毕
			
 
				+            if self.current_user_index >= len(self.user_list_from_db):
			
 
				+                self.current_user_index = 0  # 重置索引
			
 
				+                self.current_cursor = ""
			
 
				+
			
 
				+            await self.wait_between_iterations()
			
 
				 
			
 
				     def _build_request_body(self, user: Dict) -> Dict:
			
 
				-        """构建请求体：将用户link和当前cursor注入"""
			
 
				-        # 准备"虚拟数据"，键名对应你的配置路径（$.uid 和 $.cursor）
			
 
				+        """构建请求体"""
			
 
				         virtual_data = {
			
 
				-            "uid": str(user.get("link")),  # 对应配置中的 $.uid
			
 
				-            "cursor": self.current_cursor  # 对应配置中的 $.cursor
			
 
				+            "uid": str(user.get("link")),
			
 
				+            "cursor": self.current_cursor
			
 
				         }
			
 
				 
			
 
				         return self.request_preparer.prepare(
			
@@ -77,37 +93,30 @@ class AuthorSpider(BaseSpider):
 
				             response_data=virtual_data
			
 
				         )
			
 
				 
			
 
				-    async def fetch_user_list(self) -> List[Dict]:
			
 
				-        """获取待爬取的用户列表（从数据库）"""
			
 
				-        return []
			
 
				-
			
 
				-    async def crawl_user_videos(self, session, request_body: Dict, user_uid: str) -> Optional[List[Dict]]:
			
 
				-        """请求用户视频接口"""
			
 
				-        response = await self.request_client.request(
			
 
				-            session=session,
			
 
				-            method=self.method,
			
 
				-            url=self.url,
			
 
				-            headers=self.headers,
			
 
				-            json=request_body
			
 
				-        )
			
 
				-        # has_more = safe_extract(response,self.has_more)
			
 
				-        # 解析用户视频列表
			
 
				-        data_list = safe_extract(response, self.data_path)
			
 
				+    async def crawl_user_videos(self, request_body: Dict, user_uid: str) -> Optional[List[Dict]]:
			
 
				+        """请求用户视频接口 - 使用 make_request 方法"""
			
 
				+        # 使用基类的 make_request 方法发送请求
			
 
				+        response = await self.make_request(request_body)
			
 
				+
			
 
				+        if not response:
			
 
				+            self.logger.info(f"用户 {user_uid} 请求失败")
			
 
				+            return None
			
 
				+
			
 
				+        # 游标处理逻辑
			
 
				         if safe_extract(response, self.next_cursor):
			
 
				-           self.next_cursor_last = safe_extract(response, self.next_cursor)
			
 
				+            self.current_cursor = safe_extract(response, self.next_cursor)
			
 
				+
			
 
				+        data_list = safe_extract(response, self.data_path)
			
 
				         if not data_list:
			
 
				             self.logger.info(f"用户 {user_uid} 无更多视频数据")
			
 
				             return None
			
 
				+
			
 
				         return data_list
			
 
				 
			
 
				+    async def fetch_user_list(self) -> List[Dict]:
			
 
				+        """获取待爬取的用户列表（从数据库）- 子类实现"""
			
 
				+        return self.user_list  # 默认返回传入的列表
			
 
				+
			
 
				     async def fetch_detail(self, item: Dict) -> Dict:
			
 
				         """账号模式：补充视频详情（子类自行实现）"""
			
 
				-        return item  # 默认返回原数据
			
 
				-
			
 
				-
			
 
				-    def is_less_than_3_minutes(self):
			
 
				-        now = datetime.now()
			
 
				-        tomorrow = now.date() + timedelta(days=1)
			
 
				-        midnight = datetime.combine(tomorrow, datetime.min.time())
			
 
				-        time_left = midnight - now
			
 
				-        return time_left.total_seconds() < 3 * 60
			
 
				+        return item  # 默认返回原数据
			
--- a/spiders/basespider.py
+++ b/spiders/basespider.py
@@ -1,261 +1,270 @@
 
				+# spiders/basespider.py
			
 
				 import asyncio
			
 
				 import random
			
 
				 import traceback
			
 
				-import uuid
			
 
				-from typing import List, Dict, Optional, Any
			
 
				+from typing import List, Dict, Any, Optional
			
 
				 from abc import ABC, abstractmethod
			
 
				 
			
 
				-from core.models.video_item import VideoItem
			
 
				-from core.utils.helpers import generate_titles
			
 
				+import aiohttp
			
 
				+
			
 
				 from core.utils.request_preparer import RequestPreparer
			
 
				 from core.utils.spider_config import SpiderConfig
			
 
				-from core.utils.extractors import safe_extract, extract_fields
			
 
				 from core.utils.log.logger_manager import LoggerManager
			
 
				+from core.video_processor import VideoProcessor
			
 
				 from services.async_mysql_service import AsyncMysqlService
			
 
				-from services.pipeline import PiaoQuanPipeline
			
 
				-from core.base.async_request_client import AsyncRequestClient
			
 
				 from services.async_mq_producer import AsyncMQProducer
			
 
				+from core.base.async_request_client import AsyncRequestClient
			
 
				 
			
 
				 
			
 
				 class BaseSpider(ABC):
			
 
				-    """通用爬虫基类"""
			
 
				-    
			
 
				+    """爬虫基类 - 简化版本，不包含循环逻辑"""
			
 
				+
			
 
				     def __init__(self, rule_dict: Dict, user_list: List, env: str = "prod",
			
 
				                  request_client: AsyncRequestClient = None,
			
 
				                  db_service: AsyncMysqlService = None,
			
 
				                  mq_producer: AsyncMQProducer = None):
			
 
				+        # 基础属性
			
 
				         self.rule_dict = rule_dict
			
 
				         self.user_list = user_list
			
 
				         self.env = env
			
 
				-        self.class_name = self.__class__.__name__.lower()
			
 
				-
			
 
				-        # 初始化核心组件
			
 
				-        self._setup_configuration()
			
 
				-        self._setup_logging()
			
 
				-        self._setup_services(request_client, db_service, mq_producer)
			
 
				-        self._setup_state()
			
 
				-
			
 
				-        # 通用状态
			
 
				-        self.total_success = 0
			
 
				-        self.total_fail = 0
			
 
				-        self.video = None
			
 
				-
			
 
				-
			
 
				-    def _setup_configuration(self):
			
 
				-        self.platform_config = SpiderConfig.get_platform_config(classname=self.class_name)
			
 
				-        if not self.platform_config:
			
 
				-            raise ValueError(f"找不到爬虫配置: {self.class_name}")
			
 
				-        self.platform = self.platform_config.platform
			
 
				-        self.mode = self.platform_config.mode
			
 
				-        self.url = self.platform_config.url
			
 
				-        self.method = self.platform_config.method.upper()
			
 
				-        self.headers = self.platform_config.headers or {}
			
 
				-        self.request_body_template = self.platform_config.request_body or {}
			
 
				-        self.response_parse_config = self.platform_config.response_parse or {}
			
 
				-        self.data_path = self.response_parse_config.get("data_path")
			
 
				-        self.has_more = self.response_parse_config.get("has_more")
			
 
				-        self.field_map = self.response_parse_config.get("fields", {})
			
 
				-        self.next_cursor = self.response_parse_config.get("next_cursor") or ""
			
 
				-        self.loop_times = self.platform_config.loop_times or 100
			
 
				-        self.loop_interval = self.platform_config.loop_interval or {"min": 2, "max": 5}
			
 
				-        self.timeout = self.platform_config.request_timeout or 30
			
 
				-        self.max_retries = self.platform_config.max_retries or 3
			
 
				-        self.feishu_sheetid = self.platform_config.feishu_sheetid
			
 
				-
			
 
				-    def _setup_logging(self):
			
 
				-        self.logger = LoggerManager.get_logger(platform=self.platform, mode=self.mode)
			
 
				-        self.aliyun_log = LoggerManager.get_aliyun_logger(platform=self.platform, mode=self.mode)
			
 
				-        self.logger.info(f"爬虫 '{self.platform}/{self.mode}' 初始化...")
			
 
				-
			
 
				-    def _setup_services(self, request_client: AsyncRequestClient = None,
			
 
				-                        db_service: AsyncMysqlService = None,
			
 
				-                        mq_producer: AsyncMQProducer = None):
			
 
				-        """初始化服务组件"""
			
 
				-        self.request_client = request_client or AsyncRequestClient(
			
 
				+
			
 
				+        # 服务依赖
			
 
				+        self.request_client = request_client
			
 
				+        self.db_service = db_service
			
 
				+        self.mq_producer = mq_producer
			
 
				+
			
 
				+        # 通过类名获取配置
			
 
				+        class_name = self.__class__.__name__.lower()
			
 
				+        self.config = SpiderConfig.get_platform_config(class_name)
			
 
				+        self._setup_from_config()
			
 
				+
			
 
				+        # 日志服务
			
 
				+        self.logger = LoggerManager.get_logger(
			
 
				+            platform=self.config.platform,
			
 
				+            mode=self.config.mode
			
 
				+        )
			
 
				+        self.aliyun_log = LoggerManager.get_aliyun_logger(
			
 
				+            platform=self.config.platform,
			
 
				+            mode=self.config.mode
			
 
				+        )
			
 
				+
			
 
				+        # 请求准备器
			
 
				+        self.request_preparer = RequestPreparer(
			
 
				+            response_parse_config=self.config.response_parse,
			
 
				             logger=self.logger,
			
 
				             aliyun_log=self.aliyun_log
			
 
				         )
			
 
				-        self.db_service = db_service or AsyncMysqlService(
			
 
				-            platform=self.platform,
			
 
				-            mode=self.mode
			
 
				-        )
			
 
				-        self.mq_producer = mq_producer or AsyncMQProducer(
			
 
				-            topic_name="topic_crawler_etl_prod_v2",
			
 
				+
			
 
				+        # 状态跟踪
			
 
				+        self.stats = {
			
 
				+            'success': 0,
			
 
				+            'fail': 0,
			
 
				+            'start_time': None,
			
 
				+            'end_time': None
			
 
				+        }
			
 
				+
			
 
				+        # 如果没有传入服务，则创建默认实例
			
 
				+        self._setup_default_services()
			
 
				+
			
 
				+        # 初始化视频处理器
			
 
				+        self.video_processor = VideoProcessor(
			
 
				             platform=self.platform,
			
 
				-            mode=self.mode
			
 
				-        )
			
 
				-    def _setup_state(self):
			
 
				-        self.last_response_data = {}
			
 
				-        self.request_preparer = RequestPreparer(
			
 
				-            response_parse_config=self.response_parse_config,
			
 
				+            mode=self.mode,
			
 
				+            field_map=self.field_map,
			
 
				+            feishu_sheetid=self.feishu_sheetid,
			
 
				             logger=self.logger,
			
 
				             aliyun_log=self.aliyun_log
			
 
				         )
			
 
				 
			
 
				-    # 核心入口（统一流程）
			
 
				+    def _setup_default_services(self):
			
 
				+        """设置默认服务实例"""
			
 
				+        if not self.request_client:
			
 
				+            self.request_client = AsyncRequestClient(
			
 
				+                logger=self.logger,
			
 
				+                aliyun_log=self.aliyun_log
			
 
				+            )
			
 
				+
			
 
				+        if not self.db_service:
			
 
				+            self.db_service = AsyncMysqlService(
			
 
				+                platform=self.config.platform,
			
 
				+                mode=self.config.mode
			
 
				+            )
			
 
				+
			
 
				+        if not self.mq_producer:
			
 
				+            self.mq_producer = AsyncMQProducer(
			
 
				+                topic_name="topic_crawler_etl_prod_v2",
			
 
				+                platform=self.config.platform,
			
 
				+                mode=self.config.mode
			
 
				+            )
			
 
				+
			
 
				+    def _setup_from_config(self):
			
 
				+        """从配置中设置属性"""
			
 
				+        self.platform = self.config.platform
			
 
				+        self.mode = self.config.mode
			
 
				+        self.url = self.config.url
			
 
				+        self.method = self.config.method.upper()
			
 
				+        self.headers = self.config.headers or {}
			
 
				+        self.request_body_template = self.config.request_body or {}
			
 
				+        self.loop_times = self.config.loop_times or 100
			
 
				+        self.timeout = self.config.request_timeout or 30
			
 
				+        self.feishu_sheetid = self.config.feishu_sheetid
			
 
				+
			
 
				+        # 响应解析配置
			
 
				+        response_parse = self.config.response_parse or {}
			
 
				+        self.data_path = response_parse.get("data_path")
			
 
				+        self.has_more = response_parse.get("has_more")
			
 
				+        self.next_cursor = response_parse.get("next_cursor")
			
 
				+        self.field_map = response_parse.get("fields", {})
			
 
				+
			
 
				     async def run(self):
			
 
				-        """主流程：初始化→核心循环→收尾"""
			
 
				+        """主运行流程 - 子类实现完整循环逻辑"""
			
 
				+        self.stats['start_time'] = asyncio.get_event_loop().time()
			
 
				         self.logger.info(f"开始运行爬虫: {self.platform}/{self.mode}")
			
 
				-        await self.before_run()
			
 
				+
			
 
				         try:
			
 
				-            await self.core_loop()  # 子类实现具体模式逻辑
			
 
				+            await self.before_run()
			
 
				+            await self.execute()  # 子类实现完整的执行逻辑
			
 
				         except Exception as e:
			
 
				-            tb = traceback.format_exc()
			
 
				-            self.logger.exception(f"运行异常: {e},堆栈信息：{tb}")
			
 
				+            self.logger.error(f"爬虫执行异常: {e}\n{traceback.format_exc()}")
			
 
				         finally:
			
 
				             await self.after_run()
			
 
				-            self.logger.info(f"总统计：成功{self.total_success}，失败{self.total_fail}")
			
 
				+            self._log_final_stats()
			
 
				 
			
 
				     @abstractmethod
			
 
				-    async def core_loop(self):
			
 
				-        """子类必须实现：模式特有核心循环（推荐/账号）"""
			
 
				+    async def execute(self):
			
 
				+        """执行核心逻辑 - 子类必须实现完整循环逻辑"""
			
 
				         pass
			
 
				 
			
 
				-    async def fetch_detail(self, item: Dict) -> Dict:
			
 
				-        """子类选择实现：补充详情（完全由子类控制）"""
			
 
				-        return item
			
 
				-
			
 
				-    # 通用数据处理流程
			
 
				-    async def process_data(self, video_data: List[Dict]):
			
 
				-        """处理原始数据列表（清洗→过滤→推送）"""
			
 
				-        for item in video_data:
			
 
				+    async def process_data(self, data: List[Dict]):
			
 
				+        """处理数据"""
			
 
				+        success_count = 0
			
 
				+        for item in data:
			
 
				+            self.aliyun_log.logging(
			
 
				+                code="1001",
			
 
				+                message=f"获取到一条数据",
			
 
				+                data=item
			
 
				+            )
			
 
				             try:
			
 
				-                # 补充详情（完全由子类实现）
			
 
				-                detail_data = await self.fetch_detail(item)
			
 
				-                # 处理并推送
			
 
				-                result = await self.process_and_push_video(detail_data)
			
 
				-                if result:
			
 
				-                    self.total_success += 1
			
 
				+                if await self.process_single_item(item):
			
 
				+                    success_count += 1
			
 
				+                    self.stats['success'] += 1
			
 
				                 else:
			
 
				-                    self.total_fail += 1
			
 
				+                    self.stats['fail'] += 1
			
 
				             except Exception as e:
			
 
				-                self.logger.exception(f"处理单条数据失败: {e}")
			
 
				-                self.total_fail += 1
			
 
				+                self.logger.error(f"处理单条数据失败: {e}")
			
 
				+                self.stats['fail'] += 1
			
 
				+        self.logger.info(f"批次处理完成: 成功 {success_count}/{len(data)}")
			
 
				+        return success_count
			
 
				 
			
 
				-    async def process_and_push_video(self, video: Dict[str, Any]) -> bool:
			
 
				-        try:
			
 
				-            self.video = video
			
 
				-            video_obj = await self.process_video(video)
			
 
				-            if not video_obj:
			
 
				-                return False
			
 
				-            if not await self.filter_data(video_obj):
			
 
				-                return False
			
 
				-            # video_obj = await self.integrated_video_handling(video_obj)
			
 
				-            return await self.push_to_etl(video_obj)
			
 
				-        except Exception as e:
			
 
				-            self.logger.exception(f"视频处理异常: {e}")
			
 
				-            return False
			
 
				-
			
 
				-    async def publish_video_user(self) -> Dict[str, Any]:
			
 
				-        """获取随机发布用户"""
			
 
				-        if self.user_list:
			
 
				-            return random.choice(self.user_list)
			
 
				-        else:
			
 
				-            self.logger.error("未获取到用户列表数据")
			
 
				-            return None
			
 
				 
			
 
				 
			
 
				-    async def process_video(self, video: Dict) -> Optional[Dict]:
			
 
				-        """
			
 
				-        字段映射
			
 
				-        统一字段抽取及 VideoItem 初始化
			
 
				-        """
			
 
				-        self.logger.info(f"处理视频数据: {video}")
			
 
				-        publish_user = await self.publish_video_user()
			
 
				-        
			
 
				-        # 检查是否成功获取到发布用户
			
 
				-        if not publish_user:
			
 
				-            self.logger.error("无法获取发布用户信息")
			
 
				-            return None
			
 
				-            
			
 
				-        item_kwargs = extract_fields(video, self.field_map, logger=self.logger, aliyun_log=self.aliyun_log)
			
 
				-        item_kwargs.update({
			
 
				-            "user_id": publish_user.get("uid"),
			
 
				-            "user_name": publish_user.get("nick_name"),
			
 
				-            "platform": self.platform,
			
 
				-            "strategy": self.mode,
			
 
				-        })
			
 
				-        try:
			
 
				-            item = VideoItem(**item_kwargs)
			
 
				-            video_dict = await item.produce_item()
			
 
				-            if not video_dict:
			
 
				-                self.logger.warning("VideoItem 校验失败")
			
 
				-                return None
			
 
				-            return video_dict
			
 
				-        except Exception as e:
			
 
				-            self.logger.error(f"VideoItem 初始化失败: {e}")
			
 
				-            return None
			
 
				+    async def process_single_item(self, item: Dict) -> bool:
			
 
				+        """处理单条数据"""
			
 
				+        # 1. 补充详情
			
 
				+        detail_item = await self.fetch_detail(item)
			
 
				 
			
 
				-    async def filter_data(self, video: Dict) -> bool:
			
 
				-        """
			
 
				-           数据校验过滤，默认使用 PiaoQuanPipeline
			
 
				-           子类可重写此方法实现自定义过滤
			
 
				-        """
			
 
				-        pipeline = PiaoQuanPipeline(
			
 
				-            platform=self.platform,
			
 
				-            mode=self.mode,
			
 
				+        # 2. 使用视频处理器进行完整处理
			
 
				+        video_obj = await self.video_processor.process_single_video(
			
 
				+            raw_data=detail_item,
			
 
				+            user_info=await self.select_publish_user(),
			
 
				             rule_dict=self.rule_dict,
			
 
				-            env=self.env,
			
 
				-            item=video,
			
 
				-            trace_id=self.platform + str(uuid.uuid1())
			
 
				+            env=self.env
			
 
				         )
			
 
				-        return await pipeline.process_item()
			
 
				 
			
 
				-    async def integrated_video_handling(self, video: Dict) -> None:
			
 
				-        """
			
 
				-          钩子函数：可在此实现自动生成标题或其他业务逻辑
			
 
				-        """
			
 
				-        # 视频标题处理生成
			
 
				-        return await generate_titles(self.feishu_sheetid, video,self.logger,self.aliyun_log)
			
 
				+        if not video_obj:
			
 
				+            return False
			
 
				 
			
 
				-    async def push_to_etl(self, video: Dict) -> bool:
			
 
				+        # 3. 推送数据
			
 
				+        return await self.push_video(video_obj)
			
 
				+
			
 
				+    async def select_publish_user(self) -> Dict:
			
 
				+        """选择发布用户"""
			
 
				+        if self.user_list:
			
 
				+            return random.choice(self.user_list)
			
 
				+        return {}
			
 
				+
			
 
				+    async def fetch_detail(self, item: Dict) -> Dict:
			
 
				+        """补充详情 - 子类可重写"""
			
 
				+        return item
			
 
				+
			
 
				+    async def push_video(self, video: Dict) -> bool:
			
 
				+        """推送视频数据"""
			
 
				         try:
			
 
				             await self.mq_producer.send_msg(video)
			
 
				-            self.aliyun_log.logging(code="1009",
			
 
				-                                    message="推送ETL成功",
			
 
				-                                    data=video)
			
 
				-            self.logger.info(f"成功推送视频至ETL: {video}")
			
 
				+            self.aliyun_log.logging(
			
 
				+                code="1002",
			
 
				+                message="推送ETL成功",
			
 
				+                data=video
			
 
				+            )
			
 
				             return True
			
 
				         except Exception as e:
			
 
				-            self.logger.exception(f"推送ETL失败: {e}")
			
 
				+            self.logger.error(f"推送视频失败: {e}")
			
 
				             return False
			
 
				 
			
 
				     async def is_video_count_sufficient(self) -> bool:
			
 
				         """
			
 
				-        校验当日视频是否达到最大爬取量
			
 
				-        True未达到
			
 
				-        False达到最大量
			
 
				-        :return:True/False
			
 
				+        检查视频数量是否足够
			
 
				+        未达到 True 反之 False
			
 
				         """
			
 
				         max_count = self.rule_dict.get("videos_cnt", {}).get("min", 0)
			
 
				         if max_count <= 0:
			
 
				-            self.logger.info(f"{self.platform} 未限制视频入库量，跳过检测")
			
 
				             return True
			
 
				+
			
 
				         current_count = await self.db_service.get_today_videos()
			
 
				         if current_count >= max_count:
			
 
				-            self.logger.info(f"{self.platform} 视频数量达到当日最大值: {current_count}/{max_count}")
			
 
				-            self.aliyun_log.logging(code="1011", message="视频数量达到当日最大值", data=f"{current_count}")
			
 
				+            self.logger.info(f"视频数量达到当日最大值: {current_count}/{max_count}")
			
 
				+            self.aliyun_log.logging(
			
 
				+                code="1011",
			
 
				+                message="视频数量达到最大值",
			
 
				+                data={
			
 
				+                    "current_count": current_count,
			
 
				+                    "max_count": max_count
			
 
				+                }
			
 
				+            )
			
 
				             return False
			
 
				-        self.logger.info(f"{self.platform} 今日入库视频数: {current_count}/{max_count}")
			
 
				-        self.aliyun_log.logging(code="1012",
			
 
				-                                message=f"目前入库量{current_count}",
			
 
				-                                data=f"{current_count}/{max_count}"
			
 
				-                                )
			
 
				+
			
 
				         return True
			
 
				 
			
 
				-    async def wait(self):
			
 
				-        """等待随机时间间隔"""
			
 
				-        # 确保loop_interval包含min和max键
			
 
				-        min_time = self.loop_interval.get("min", 1)
			
 
				-        max_time = self.loop_interval.get("max", 5)
			
 
				-        wait_time = random.randint(min_time, max_time)
			
 
				-        self.logger.info(f"等待 {wait_time} 秒后继续")
			
 
				+    async def wait_between_iterations(self, wait_time: int = None):
			
 
				+        """等待间隔"""
			
 
				+        if wait_time is None:
			
 
				+            interval_config = getattr(self.config, 'loop_interval', {})
			
 
				+            min_time = interval_config.get('min', 1)
			
 
				+            max_time = interval_config.get('max', 5)
			
 
				+            wait_time = random.randint(min_time, max_time)
			
 
				+
			
 
				+        self.logger.info(f"等待 {wait_time} 秒")
			
 
				         await asyncio.sleep(wait_time)
			
 
				 
			
 
				+    async def make_request(self, request_body: Dict) -> Optional[Dict]:
			
 
				+        """发送请求"""
			
 
				+        async with aiohttp.ClientSession(
			
 
				+                timeout=aiohttp.ClientTimeout(total=self.timeout)
			
 
				+        ) as session:
			
 
				+            return await self.request_client.request(
			
 
				+                session=session,
			
 
				+                method=self.method,
			
 
				+                url=self.url,
			
 
				+                headers=self.headers,
			
 
				+                json=request_body
			
 
				+            )
			
 
				+
			
 
				     async def before_run(self):
			
 
				-        """运行前钩子（子类可重写）"""
			
 
				+        """运行前准备 - 子类可重写"""
			
 
				         pass
			
 
				 
			
 
				     async def after_run(self):
			
 
				-        """运行后钩子（子类可重写）"""
			
 
				-        pass
			
 
				+        """运行后清理 - 子类可重写"""
			
 
				+        pass
			
 
				+
			
 
				+    def _log_final_stats(self):
			
 
				+        """记录最终统计"""
			
 
				+        self.stats['end_time'] = asyncio.get_event_loop().time()
			
 
				+        duration = 0
			
 
				+        if self.stats['start_time'] is not None:
			
 
				+            duration = self.stats['end_time'] - self.stats['start_time']
			
 
				+
			
 
				+        self.logger.info(
			
 
				+            f"爬虫执行完成: 成功 {self.stats['success']}, "
			
 
				+            f"失败 {self.stats['fail']}, 耗时 {duration:.2f}秒"
			
 
				+        )
			
--- a/spiders/recommend/__init__.py
+++ b/spiders/recommend/__init__.py
--- a/spiders/recommend/benshanzhufu_recommend.py
+++ b/spiders/recommend/benshanzhufu_recommend.py
@@ -4,8 +4,12 @@ from spiders.recommendspider import RecommendSpider
 
				 
			
 
				 
			
 
				 class BenshanzhufuRecommend(RecommendSpider):
			
 
				-    pass
			
 
				+    """本山祝福推荐爬虫 - 保持原有的服务注入方式"""
			
 
				 
			
 
				+    async def custom_start_checks(self) -> bool:
			
 
				+        """自定义启动检查"""
			
 
				+        self.logger.info("本山祝福爬虫启动检查通过")
			
 
				+        return True
			
 
				 
			
 
				 async def main():
			
 
				     rule_dict = {"videos_cnt":{"min":500}}
			
--- a/spiders/recommend/yuannifuqimanman_recommend.py
+++ b/spiders/recommend/yuannifuqimanman_recommend.py
@@ -5,7 +5,10 @@ from spiders.recommendspider import RecommendSpider
 
				 
			
 
				 
			
 
				 class YuannifuqimanmanRecommend(RecommendSpider):
			
 
				-    pass
			
 
				+    async def custom_start_checks(self) -> bool:
			
 
				+        """自定义启动检查"""
			
 
				+        self.logger.info("愿你福气满满爬虫启动检查通过")
			
 
				+        return True
			
 
				 
			
 
				 
			
 
				 async def main():
			
--- a/spiders/recommendspider.py
+++ b/spiders/recommendspider.py
@@ -1,58 +1,54 @@
 
				+# spiders/recommendspider.py
			
 
				+from typing import List, Dict, Optional
			
 
				 from spiders.basespider import BaseSpider
			
 
				-from typing import Optional, List, Dict
			
 
				-import aiohttp
			
 
				-
			
 
				 from core.utils.extractors import safe_extract
			
 
				 
			
 
				 
			
 
				 class RecommendSpider(BaseSpider):
			
 
				-    """推荐模式爬虫：从推荐接口分页爬取"""
			
 
				-
			
 
				-    async def core_loop(self):
			
 
				-        """核心循环：分页请求推荐接口"""
			
 
				-        async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=self.timeout)) as session:
			
 
				-            for loop_index in range(self.loop_times):
			
 
				-                # 检查数量限制
			
 
				-                self.logger.info(f"检测{self.platform}当日入库视频量")
			
 
				-                if not await self.is_video_count_sufficient():
			
 
				-                    return
			
 
				-                    
			
 
				-                # 获取推荐列表数据
			
 
				-                self.logger.info(f"开始获取{self.platform}推荐列表数据")
			
 
				-                raw_data = await self.crawl_data(session)
			
 
				-                if not raw_data:
			
 
				-                    self.logger.info("视频列表为空，开始下次请求")
			
 
				-                    await self.wait()
			
 
				-                    continue
			
 
				-                    
			
 
				-                # 处理数据
			
 
				-                await self.process_data(raw_data)
			
 
				-                
			
 
				-                # 等待下一轮
			
 
				-                await self.wait()
			
 
				-
			
 
				-    async def crawl_data(self, session) -> Optional[List[Dict]]:
			
 
				-        """请求推荐接口（适配推荐模式）"""
			
 
				-        request_body = self.request_preparer.prepare(self.request_body_template, self.last_response_data)
			
 
				-        response = await self.request_client.request(
			
 
				-            session=session,
			
 
				-            method=self.method,
			
 
				-            url=self.url,
			
 
				-            headers=self.headers,
			
 
				-            json=request_body
			
 
				-        )
			
 
				-
			
 
				-        self.last_response_data = response
			
 
				-        
			
 
				-        # 解析推荐列表
			
 
				-        if not response:
			
 
				-            self.logger.warning("接口响应为空")
			
 
				-            return None
			
 
				-            
			
 
				-        data_list = safe_extract(response, self.data_path)
			
 
				-        if not data_list:
			
 
				-            self.logger.info(f"接口返回视频列表为空: {response}")
			
 
				-            self.aliyun_log.logging(code="9021", message="接口返回视频列表为空", data=response)
			
 
				-            return None
			
 
				-            
			
 
				-        return data_list
			
 
				+    """推荐模式爬虫 - 重新封装版本"""
			
 
				+
			
 
				+    def __init__(self, rule_dict: Dict, user_list: List, env: str = "prod",
			
 
				+                 request_client=None, db_service=None, mq_producer=None):
			
 
				+        super().__init__(rule_dict, user_list, env, request_client, db_service, mq_producer)
			
 
				+        self.last_response = None
			
 
				+
			
 
				+    async def execute(self):
			
 
				+        """执行核心逻辑 - 使用 make_request 方法"""
			
 
				+        if not await self.is_video_count_sufficient():
			
 
				+            self.logger.info("视频数量已达到上限，跳过执行")
			
 
				+            return
			
 
				+
			
 
				+        iteration = 0
			
 
				+        while iteration < self.loop_times and await self.is_video_count_sufficient():
			
 
				+            self.logger.info(f"执行第 {iteration + 1} 轮")
			
 
				+
			
 
				+            # 准备请求体
			
 
				+            request_body = self.request_preparer.prepare(
			
 
				+                self.request_body_template,
			
 
				+                self.last_response or {}
			
 
				+            )
			
 
				+
			
 
				+            # 发送请求 - 使用 make_request 方法
			
 
				+            response = await self.make_request(request_body)
			
 
				+
			
 
				+            if not response:
			
 
				+                self.logger.info("未获取到响应数据")
			
 
				+                iteration += 1
			
 
				+                await self.wait_between_iterations()
			
 
				+                continue
			
 
				+
			
 
				+            self.last_response = response
			
 
				+
			
 
				+            # 提取数据
			
 
				+            data_list = safe_extract(response, self.data_path)
			
 
				+            if not data_list:
			
 
				+                self.logger.info("未获取到数据")
			
 
				+                iteration += 1
			
 
				+                await self.wait_between_iterations()
			
 
				+                continue
			
 
				+
			
 
				+            # 处理数据
			
 
				+            await self.process_data(data_list)
			
 
				+
			
 
				+            iteration += 1
			
 
				+            await self.wait_between_iterations()
			
--- a/spiders/spider_registry.py
+++ b/spiders/spider_registry.py
@@ -1,11 +1,11 @@
 
				-# spider_registry.py完整注释版
			
 
				+x# spider_registry.py完整注释版
			
 
				 """爬虫注册表模块：维护topic到爬虫类的映射关系"""
			
 
				 
			
 
				 from core.utils.log.logger_manager import LoggerManager
			
 
				 from spiders.basespider import BaseSpider
			
 
				-from spiders.benshanzhufu_recommend import BenshanzhufuRecommend
			
 
				-from spiders.xiaoniangao_author import XiaoniangaoAuthor
			
 
				-from spiders.yuannifuqimanman_recommend import YuannifuqimanmanRecommend
			
 
				+from spiders.recommend.benshanzhufu_recommend import BenshanzhufuRecommend
			
 
				+from spiders.author.xiaoniangao_author import XiaoniangaoAuthor
			
 
				+from spiders.recommend.yuannifuqimanman_recommend import YuannifuqimanmanRecommend
			
 
				 
			
 
				 logger = LoggerManager.get_logger()
			
 
				 aliyun_log = LoggerManager.get_aliyun_logger()
			
--- a/test/__init__.py
+++ b/test/__init__.py
--- a/test/test_pydantic_upgrade.py
+++ b/test/test_pydantic_upgrade.py
@@ -0,0 +1,57 @@
 
				+# test_pydantic_upgrade.py
			
 
				+# !/usr/bin/env python3
			
 
				+"""Pydantic 2.x 升级验证脚本"""
			
 
				+
			
 
				+from config.base import settings
			
 
				+from core.models.video_item import VideoItem
			
 
				+from core.utils.spider_config import SpiderConfig
			
 
				+
			
 
				+
			
 
				+def test_settings():
			
 
				+    """测试配置加载"""
			
 
				+    print("=== 测试 Settings ===")
			
 
				+    print(f"环境: {settings.ENV}")
			
 
				+    print(f"数据库主机: {settings.DB_HOST}")
			
 
				+    print(f"Redis URL: {settings.redis_url}")
			
 
				+    print("✓ Settings 加载成功")
			
 
				+
			
 
				+
			
 
				+def test_video_item():
			
 
				+    """测试视频项模型"""
			
 
				+    print("\n=== 测试 VideoItem ===")
			
 
				+    video_data = {
			
 
				+        "user_id": "test_user",
			
 
				+        "user_name": "测试用户",
			
 
				+        "out_video_id": "12345",
			
 
				+        "video_url": "https://example.com/video.mp4",
			
 
				+        "cover_url": "https://example.com/cover.jpg",
			
 
				+        "platform": "test",
			
 
				+        "strategy": "recommend",
			
 
				+        "out_user_id":"123",
			
 
				+        "session":"2134",
			
 
				+        "video_title":"123"
			
 
				+    }
			
 
				+
			
 
				+    item = VideoItem(**video_data)
			
 
				+    print(f"创建视频项: {item.video_id}")
			
 
				+    print("✓ VideoItem 创建成功")
			
 
				+
			
 
				+
			
 
				+def test_spider_config():
			
 
				+    """测试爬虫配置"""
			
 
				+    print("\n=== 测试 SpiderConfig ===")
			
 
				+    try:
			
 
				+        config = SpiderConfig.get_platform_config("benshanzhufurecommend")
			
 
				+        print(f"平台: {config.platform}")
			
 
				+        print(f"模式: {config.mode}")
			
 
				+        print(f"URL: {config.url}")
			
 
				+        print("✓ SpiderConfig 加载成功")
			
 
				+    except Exception as e:
			
 
				+        print(f"✗ SpiderConfig 加载失败: {e}")
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    test_settings()
			
 
				+    test_video_item()
			
 
				+    test_spider_config()
			
 
				+    print("\n=== 升级验证完成 ===")
			
--- a/tests/test1.py
+++ b/tests/test1.py
@@ -1,8 +0,0 @@
 
				-# topics = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
			
 
				-# num_groups = 4
			
 
				-#
			
 
				-# print([topics[i::num_groups] for i in range(num_groups)])
			
 
				-v = "{{next_cursor}}"
			
 
				-if isinstance(v, str) and v.startswith("{{") and v.endswith("}}"):
			
 
				-    key = v.strip("{}")
			
 
				-    print(key)
			
--- a/tests/test_async_redis_service.py
+++ b/tests/test_async_redis_service.py
@@ -1,33 +0,0 @@
 
				-import pytest
			
 
				-import asyncio
			
 
				-
			
 
				-from config import settings
			
 
				-from services.async_redis_service import AsyncRedisService
			
 
				-from core.base.async_redis_client import RedisManager
			
 
				-
			
 
				-@pytest.mark.asyncio
			
 
				-async def test_async_redis_service_mark_and_get_status():
			
 
				-    await RedisManager.init(redis_url=settings.redis_url)
			
 
				-    service = AsyncRedisService(prefix="crawler:task", ttl=20)
			
 
				-    test_message_id = "test_123456"
			
 
				-
			
 
				-    # 确保干净
			
 
				-    pool = RedisManager.get_pool()
			
 
				-    await pool.delete(service._build_key(test_message_id))
			
 
				-
			
 
				-    # 初始应获取 None
			
 
				-    status = await service.get_status(test_message_id)
			
 
				-    assert status is None
			
 
				-
			
 
				-    # 标记为执行中
			
 
				-    await service.mark_processing(test_message_id)
			
 
				-    status = await service.get_status(test_message_id)
			
 
				-    assert status == "0"
			
 
				-
			
 
				-    # 标记为完成
			
 
				-    await service.mark_done(test_message_id)
			
 
				-    status = await service.get_status(test_message_id)
			
 
				-    assert status == "1"
			
 
				-
			
 
				-    # 清理
			
 
				-    # await pool.delete(service._build_key(test_message_id))
			
--- a/tests/test_benshanzhufu_recommend.py
+++ b/tests/test_benshanzhufu_recommend.py
@@ -1,10 +0,0 @@
 
				-import unittest
			
 
				-
			
 
				-
			
 
				-class MyTestCase(unittest.TestCase):
			
 
				-    def test_something(self):
			
 
				-        self.assertEqual(True, False)  # add assertion here
			
 
				-
			
 
				-
			
 
				-if __name__ == '__main__':
			
 
				-    unittest.main()
			
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -1,4 +0,0 @@
 
				-from config import settings
			
 
				-
			
 
				-print("=== 配置验证 ===")
			
 
				-print("DB连接:", settings.REDIS_HOST)
			
--- a/tests/test_feishu.py
+++ b/tests/test_feishu.py
@@ -1,21 +0,0 @@
 
				-import asyncio
			
 
				-import random
			
 
				-import string
			
 
				-from datetime import datetime
			
 
				-
			
 
				-# 导入封装的异步工具类
			
 
				-from core.utils.feishu_data_async import FeishuDataAsync  # 假设已保存为该文件名
			
 
				-
			
 
				-async def test_feishu():
			
 
				-    async with FeishuDataAsync() as feishu:
			
 
				-        spreadsheet_token = "ISFnspGcjhyxO6tj0fycbQARnUg"
			
 
				-        sheet_id = "a44975"
			
 
				-        valus =await feishu.get_values(spreadsheet_token=spreadsheet_token,sheet_id=sheet_id)
			
 
				-
			
 
				-        print(valus)
			
 
				-        await feishu.insert_values(spreadsheet_token=spreadsheet_token, sheet_id=sheet_id, ranges="A2:Z2", values=["2", "23333", "3", "4"])
			
 
				-
			
 
				-
			
 
				-
			
 
				-if __name__ == '__main__':
			
 
				-    asyncio.run(test_feishu())
			
--- a/tests/test_video_item.py
+++ b/tests/test_video_item.py
@@ -1,30 +0,0 @@
 
				-import asyncio
			
 
				-import time
			
 
				-from pprint import pprint
			
 
				-
			
 
				-from core.models.video_item import VideoItem  # 你的 Pydantic 模型路径
			
 
				-
			
 
				-
			
 
				-async def main():
			
 
				-    fake_video_data = {
			
 
				-        "user_id": "uid456",
			
 
				-        "user_name": "测试用户",
			
 
				-        "out_video_id": "out789",
			
 
				-        "out_user_id": "out_user",
			
 
				-        "video_url": "http://example.com/video.mp4",
			
 
				-        "cover_url": "http://example.com/cover.jpg",
			
 
				-        "video_title": "   测试 视频 标题！！！",
			
 
				-        "publish_time_stamp": int(time.time()) - 86400,  # 昨天
			
 
				-        "strategy": "recommend",
			
 
				-        "classname": "test_platform"
			
 
				-    }
			
 
				-
			
 
				-    item = VideoItem(**fake_video_data)
			
 
				-    result = await item.produce_item()
			
 
				-
			
 
				-    print("✅ 校验通过的最终结构:")
			
 
				-    pprint(result)
			
 
				-
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    asyncio.run(main())