zhangliang
/
AutoScraperX


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
							# spiders_config_models.py 修改后
from pydantic import BaseModel, AnyUrl, field_validator, ConfigDict
from typing import Dict, Any, Optional, Union


class BaseConfig(BaseModel):
    model_config = ConfigDict(extra='forbid')  # 禁止额外字段

    base_url: Optional[AnyUrl] = None
    request_timeout: int = 30
    max_retries: int = 3
    headers: Dict[str, Any] = {}

    @field_validator('request_timeout', 'max_retries')
    @classmethod
    def validate_positive_int(cls, v: int) -> int:
        if v <= 0:
            raise ValueError('Value must be positive')
        return v


class PlatformConfig(BaseConfig):
    platform: str
    mode: str
    path: Optional[str] = None
    url: str
    method: str
    request_body: Dict[str, Any] = {}
    loop_times: int = 1
    loop_interval: Dict[str, int] = {}
    response_parse: Dict[str, Any] = {}
    retry_times: int = 0
    feishu_sheetid: Optional[str] = None

    @field_validator('method')
    @classmethod
    def validate_method(cls, v: str) -> str:
        allowed_methods = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH']
        if v.upper() not in allowed_methods:
            raise ValueError(f'Method must be one of {", ".join(allowed_methods)}')
        return v.upper()

    @field_validator('loop_times')
    @classmethod
    def validate_loop_times(cls, v: int) -> int:
        if v <= 0:
            raise ValueError('loop_times must be positive')
        return v

    @field_validator('loop_interval')
    @classmethod
    def validate_loop_interval(cls, v: Dict[str, int]) -> Dict[str, int]:
        if 'min' not in v or 'max' not in v:
            raise ValueError('loop_interval must contain both min and max keys')
        if v['min'] < 0 or v['max'] < 0:
            raise ValueError('loop_interval values must be non-negative')
        if v['min'] > v['max']:
            raise ValueError('min value cannot be greater than max value')
        return v

    @field_validator('response_parse')
    @classmethod
    def validate_response_parse(cls, v: Dict[str, Any]) -> Dict[str, Any]:
        if 'data_path' not in v:
            raise ValueError('response_parse must contain data_path')
        return v

    @field_validator('retry_times')
    @classmethod
    def validate_retry_times(cls, v: int) -> int:
        if v < 0:
            raise ValueError('retry_times must be non-negative')
        return v

    @field_validator('request_body')
    @classmethod
    def validate_request_body(cls, v: Dict[str, Any]) -> Dict[str, Any]:
        if not isinstance(v, dict):
            raise ValueError('request_body must be a dictionary')

        def is_valid_type(value):
            if isinstance(value, (str, int, float, bool, type(None))):
                return True
            elif isinstance(value, (list, tuple)):
                return all(is_valid_type(item) for item in value)
            elif isinstance(value, dict):
                return all(isinstance(k, str) and is_valid_type(v_val) for k, v_val in value.items())
            return False

        for key, value in v.items():
            if not is_valid_type(value):
                raise ValueError(f'Invalid type for request_body["{key}"]: {type(value)}')
        return v