spiders_config_models.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. # spiders_config_models.py 修改后
  2. from pydantic import BaseModel, AnyUrl, field_validator, ConfigDict
  3. from typing import Dict, Any, Optional, Union
  4. class BaseConfig(BaseModel):
  5. model_config = ConfigDict(extra='forbid') # 禁止额外字段
  6. base_url: Optional[AnyUrl] = None
  7. request_timeout: int = 30
  8. max_retries: int = 3
  9. headers: Dict[str, Any] = {}
  10. @field_validator('request_timeout', 'max_retries')
  11. @classmethod
  12. def validate_positive_int(cls, v: int) -> int:
  13. if v <= 0:
  14. raise ValueError('Value must be positive')
  15. return v
  16. class PlatformConfig(BaseConfig):
  17. platform: str
  18. mode: str
  19. path: Optional[str] = None
  20. url: str
  21. method: str
  22. request_body: Dict[str, Any] = {}
  23. loop_times: int = 1
  24. loop_interval: Dict[str, int] = {}
  25. response_parse: Dict[str, Any] = {}
  26. retry_times: int = 0
  27. feishu_sheetid: Optional[str] = None
  28. @field_validator('method')
  29. @classmethod
  30. def validate_method(cls, v: str) -> str:
  31. allowed_methods = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH']
  32. if v.upper() not in allowed_methods:
  33. raise ValueError(f'Method must be one of {", ".join(allowed_methods)}')
  34. return v.upper()
  35. @field_validator('loop_times')
  36. @classmethod
  37. def validate_loop_times(cls, v: int) -> int:
  38. if v <= 0:
  39. raise ValueError('loop_times must be positive')
  40. return v
  41. @field_validator('loop_interval')
  42. @classmethod
  43. def validate_loop_interval(cls, v: Dict[str, int]) -> Dict[str, int]:
  44. if 'min' not in v or 'max' not in v:
  45. raise ValueError('loop_interval must contain both min and max keys')
  46. if v['min'] < 0 or v['max'] < 0:
  47. raise ValueError('loop_interval values must be non-negative')
  48. if v['min'] > v['max']:
  49. raise ValueError('min value cannot be greater than max value')
  50. return v
  51. @field_validator('response_parse')
  52. @classmethod
  53. def validate_response_parse(cls, v: Dict[str, Any]) -> Dict[str, Any]:
  54. if 'data_path' not in v:
  55. raise ValueError('response_parse must contain data_path')
  56. return v
  57. @field_validator('retry_times')
  58. @classmethod
  59. def validate_retry_times(cls, v: int) -> int:
  60. if v < 0:
  61. raise ValueError('retry_times must be non-negative')
  62. return v
  63. @field_validator('request_body')
  64. @classmethod
  65. def validate_request_body(cls, v: Dict[str, Any]) -> Dict[str, Any]:
  66. if not isinstance(v, dict):
  67. raise ValueError('request_body must be a dictionary')
  68. def is_valid_type(value):
  69. if isinstance(value, (str, int, float, bool, type(None))):
  70. return True
  71. elif isinstance(value, (list, tuple)):
  72. return all(is_valid_type(item) for item in value)
  73. elif isinstance(value, dict):
  74. return all(isinstance(k, str) and is_valid_type(v_val) for k, v_val in value.items())
  75. return False
  76. for key, value in v.items():
  77. if not is_valid_type(value):
  78. raise ValueError(f'Invalid type for request_body["{key}"]: {type(value)}')
  79. return v