"""应用配置。 配置优先级:环境变量 > 这里的默认值。 """ from __future__ import annotations import json import os from dataclasses import dataclass, field from pathlib import Path from typing import Any PROJECT_ROOT = Path(__file__).resolve().parents[2] def _env(name: str, default: str) -> str: value = os.getenv(name) if value is None or value == "": return default return value def _env_int(name: str, default: int) -> int: value = os.getenv(name) if value is None or value == "": return default return int(value) def _env_int_optional(name: str, default: int | None = None) -> int | None: value = os.getenv(name) if value is None or value == "": return default return int(value) def _env_float(name: str, default: float) -> float: value = os.getenv(name) if value is None or value == "": return default return float(value) def _env_float_optional(name: str, default: float | None = None) -> float | None: value = os.getenv(name) if value is None or value == "": return default return float(value) def _env_first(names: tuple[str, ...], default: str) -> str: for name in names: value = os.getenv(name) if value is not None and value != "": return value return default def _env_bool(name: str, default: bool) -> bool: value = os.getenv(name) if value is None or value == "": return default return value.strip().lower() in {"1", "true", "yes", "y", "on"} def _load_json_file(path_value: str) -> Any: path = Path(path_value).expanduser() if not path.is_absolute(): path = PROJECT_ROOT / path return json.loads(path.read_text(encoding="utf-8")) def _env_json(name: str, default: Any, file_env_name: str | None = None) -> Any: if file_env_name: file_path = os.getenv(file_env_name) if file_path: return _load_json_file(file_path) value = os.getenv(name) if value is None or value == "": return default return json.loads(value) @dataclass(frozen=True) class Settings: crawapi_base_url: str = "http://crawapi.piaoquantv.com" crawapi_hot_content_rank_path: str = "/crawler/jin_ri_re_bang/content_rank" crawapi_keyword_search_path: str = "/crawler/bai_du/keyword" decode_api_url: str = "https://aigc-api.aiddit.com/aigc/api/task/decode" decode_result_api_url: str = "https://aigc-api.aiddit.com/aigc/api/task/decode/result" decode_config_id: int = 70 request_timeout_seconds: int = 180 https_verify_ssl: bool = False hot_flow_cron_hours: str = "6,12,18" hot_flow_cron_minute: int = 0 hot_flow_interval_seconds: int = 1800 decode_result_flow_interval_seconds: int = 1800 decode_result_batch_size: int = 50 contribution_score_threshold: float = 0.6 hot_flow_sources: list[dict[str, Any]] = field( default_factory=lambda: [ {"source": "百度"}, {"source": "微博"}, {"source": "微信"}, { "source": "快手", "hot_rank_base_url": "http://crawler.aiddit.com", "hot_rank_path": "/crawler/kuai_shou/hot_rank", "hot_rank_payload": {}, }, { "source": "抖音", "hot_rank_base_url": "http://8.217.190.241:8888", "hot_rank_path": "/crawler/dou_yin/hot_rank", "hot_rank_payload": {"tab_name": "热点榜"}, }, ] ) mysql_host: str = "rm-t4nh1xx6o2a6vj8qu3o.mysql.singapore.rds.aliyuncs.com" mysql_port: int = 3306 mysql_user: str = "content_rw" mysql_password: str = "bC1aH4bA1lB0" mysql_database: str = "external_demand" mysql_charset: str = "utf8mb4" open_router_api_key: str = "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" open_router_default_model: str = "anthropic/claude-haiku-4-5" open_router_timeout_seconds: int = 60 open_router_http_referer: str = "" open_router_app_title: str = "external_demand" open_router_base_url: str = "https://openrouter.ai/api/v1" open_router_temperature: float | None = 0.7 open_router_max_tokens: int | None = 20000 odps_access_id: str = "LTAI9EBa0bd5PrDa" odps_access_key: str = "vAalxds7YxhfOA2yVv8GziCg3Y87v5" odps_project: str = "loghubods" odps_endpoint: str = "http://service.odps.aliyun.com/api" odps_tunnel_endpoint: str = "" demand_pool_source_table: str = "dwd_multi_demand_pool_di" demand_pool_excluded_strategy: str = "当下供需gap-分词" demand_pool_top_n: int = 200 hot_demand_pool_strategy: str = "新热事件" wxindex_score_threshold: float = 100_000.0 odps_daily_write_limit: int = 500 postprocess_batch_size: int = 20 contribution_match_llm_model: str = "" contribution_match_llm_max_attempts: int = 3 contribution_match_llm_retry_sleep_seconds: float = 1.0 contribution_match_llm_max_tokens: int = 4000 wxindex_llm_model: str = "anthropic/claude-haiku-4-5" wxindex_llm_max_attempts: int = 3 wxindex_llm_max_tokens: int = 4000 wxindex_api_url: str = "http://crawapi.piaoquantv.com/crawler/wei_xin/wxindex" wxindex_lookback_days: int = 7 wxindex_words_cron_hours: str = "10,14" wxindex_words_cron_minute: int = 0 wxindex_heat_pattern_cron_hours: str = "11,15" wxindex_heat_pattern_cron_minute: int = 0 demand_event_sense_threshold: float = 6.0 demand_senior_fit_threshold: float = 6.0 demand_quality_llm_model: str = "anthropic/claude-haiku-4-5" demand_quality_llm_max_attempts: int = 3 demand_quality_llm_retry_sleep_seconds: float = 1.0 demand_quality_llm_max_tokens: int = 4000 category_filter_llm_model: str = "anthropic/claude-haiku-4-5" category_filter_llm_max_attempts: int = 3 category_filter_llm_retry_sleep_seconds: float = 1.0 category_filter_llm_max_tokens: int = 1024 category_filter_body_max_chars: int = 2000 category_filter_item_sleep_seconds: float = 0.0 @classmethod def from_env(cls) -> "Settings": defaults = cls() return cls( crawapi_base_url=_env("CRAWAPI_BASE_URL", defaults.crawapi_base_url), crawapi_hot_content_rank_path=_env( "CRAWAPI_HOT_CONTENT_RANK_PATH", defaults.crawapi_hot_content_rank_path, ), crawapi_keyword_search_path=_env( "CRAWAPI_KEYWORD_SEARCH_PATH", defaults.crawapi_keyword_search_path, ), decode_api_url=_env("DECODE_API_URL", defaults.decode_api_url), decode_result_api_url=_env( "DECODE_RESULT_API_URL", defaults.decode_result_api_url, ), decode_config_id=_env_int("DECODE_CONFIG_ID", defaults.decode_config_id), request_timeout_seconds=_env_int( "REQUEST_TIMEOUT_SECONDS", defaults.request_timeout_seconds, ), https_verify_ssl=_env_bool("HTTPS_VERIFY_SSL", defaults.https_verify_ssl), hot_flow_cron_hours=_env( "HOT_FLOW_CRON_HOURS", defaults.hot_flow_cron_hours, ), hot_flow_cron_minute=_env_int( "HOT_FLOW_CRON_MINUTE", defaults.hot_flow_cron_minute, ), hot_flow_interval_seconds=_env_int( "HOT_FLOW_INTERVAL_SECONDS", defaults.hot_flow_interval_seconds, ), decode_result_flow_interval_seconds=_env_int( "DECODE_RESULT_FLOW_INTERVAL_SECONDS", defaults.decode_result_flow_interval_seconds, ), decode_result_batch_size=_env_int( "DECODE_RESULT_BATCH_SIZE", defaults.decode_result_batch_size, ), contribution_score_threshold=float( _env( "CONTRIBUTION_SCORE_THRESHOLD", str(defaults.contribution_score_threshold), ) ), hot_flow_sources=_env_json( "HOT_FLOW_SOURCES_JSON", defaults.hot_flow_sources, "HOT_FLOW_SOURCES_FILE", ), mysql_host=_env("MYSQL_HOST", defaults.mysql_host), mysql_port=_env_int("MYSQL_PORT", defaults.mysql_port), mysql_user=_env("MYSQL_USER", defaults.mysql_user), mysql_password=_env("MYSQL_PASSWORD", defaults.mysql_password), mysql_database=_env("MYSQL_DATABASE", defaults.mysql_database), mysql_charset=_env("MYSQL_CHARSET", defaults.mysql_charset), open_router_api_key=_env_first( ("OPEN_ROUTER_API_KEY", "OPENROUTER_API_KEY"), defaults.open_router_api_key, ), open_router_default_model=_env( "OPEN_ROUTER_DEFAULT_MODEL", defaults.open_router_default_model, ), open_router_timeout_seconds=_env_int( "OPEN_ROUTER_TIMEOUT_SECONDS", defaults.open_router_timeout_seconds, ), open_router_http_referer=_env_first( ("OPEN_ROUTER_HTTP_REFERER", "OPENROUTER_HTTP_REFERER"), defaults.open_router_http_referer, ), open_router_app_title=_env_first( ("OPEN_ROUTER_APP_TITLE", "OPENROUTER_X_OPEN_ROUTER_TITLE"), defaults.open_router_app_title, ), open_router_base_url=_env( "OPEN_ROUTER_BASE_URL", defaults.open_router_base_url, ), open_router_temperature=_env_float_optional( "OPEN_ROUTER_TEMPERATURE", defaults.open_router_temperature, ), open_router_max_tokens=_env_int_optional( "OPEN_ROUTER_MAX_TOKENS", defaults.open_router_max_tokens, ), odps_access_id=_env("ODPS_ACCESS_ID", defaults.odps_access_id), odps_access_key=_env("ODPS_ACCESS_KEY", defaults.odps_access_key), odps_project=_env("ODPS_PROJECT", defaults.odps_project), odps_endpoint=_env("ODPS_ENDPOINT", defaults.odps_endpoint), odps_tunnel_endpoint=_env( "ODPS_TUNNEL_ENDPOINT", defaults.odps_tunnel_endpoint, ), demand_pool_source_table=_env( "DEMAND_POOL_SOURCE_TABLE", defaults.demand_pool_source_table, ), demand_pool_excluded_strategy=_env( "DEMAND_POOL_EXCLUDED_STRATEGY", defaults.demand_pool_excluded_strategy, ), demand_pool_top_n=_env_int( "DEMAND_POOL_TOP_N", defaults.demand_pool_top_n, ), hot_demand_pool_strategy=_env( "HOT_DEMAND_POOL_STRATEGY", defaults.hot_demand_pool_strategy, ), wxindex_score_threshold=_env_float( "WXINDEX_SCORE_THRESHOLD", _env_float( "HOT_DEMAND_POOL_WXINDEX_THRESHOLD", _env_float( "WXINDEX_LATEST_SCORE_THRESHOLD", defaults.wxindex_score_threshold, ), ), ), odps_daily_write_limit=_env_int( "ODPS_DAILY_WRITE_LIMIT", defaults.odps_daily_write_limit, ), postprocess_batch_size=_env_int( "POSTPROCESS_BATCH_SIZE", defaults.postprocess_batch_size, ), contribution_match_llm_model=_env( "CONTRIBUTION_MATCH_LLM_MODEL", defaults.contribution_match_llm_model, ), contribution_match_llm_max_attempts=_env_int( "CONTRIBUTION_MATCH_LLM_MAX_ATTEMPTS", defaults.contribution_match_llm_max_attempts, ), contribution_match_llm_retry_sleep_seconds=_env_float( "CONTRIBUTION_MATCH_LLM_RETRY_SLEEP_SECONDS", defaults.contribution_match_llm_retry_sleep_seconds, ), contribution_match_llm_max_tokens=_env_int( "CONTRIBUTION_MATCH_LLM_MAX_TOKENS", defaults.contribution_match_llm_max_tokens, ), wxindex_llm_model=_env( "WXINDEX_LLM_MODEL", defaults.wxindex_llm_model, ), wxindex_llm_max_attempts=_env_int( "WXINDEX_LLM_MAX_ATTEMPTS", defaults.wxindex_llm_max_attempts, ), wxindex_llm_max_tokens=_env_int( "WXINDEX_LLM_MAX_TOKENS", defaults.wxindex_llm_max_tokens, ), wxindex_api_url=_env("WXINDEX_API_URL", defaults.wxindex_api_url), wxindex_lookback_days=_env_int( "WXINDEX_LOOKBACK_DAYS", defaults.wxindex_lookback_days, ), wxindex_words_cron_hours=_env( "WXINDEX_WORDS_CRON_HOURS", defaults.wxindex_words_cron_hours, ), wxindex_words_cron_minute=_env_int( "WXINDEX_WORDS_CRON_MINUTE", defaults.wxindex_words_cron_minute, ), wxindex_heat_pattern_cron_hours=_env( "WXINDEX_HEAT_PATTERN_CRON_HOURS", defaults.wxindex_heat_pattern_cron_hours, ), wxindex_heat_pattern_cron_minute=_env_int( "WXINDEX_HEAT_PATTERN_CRON_MINUTE", defaults.wxindex_heat_pattern_cron_minute, ), demand_event_sense_threshold=_env_float( "DEMAND_EVENT_SENSE_THRESHOLD", defaults.demand_event_sense_threshold, ), demand_senior_fit_threshold=_env_float( "DEMAND_SENIOR_FIT_THRESHOLD", defaults.demand_senior_fit_threshold, ), demand_quality_llm_model=_env( "DEMAND_QUALITY_LLM_MODEL", defaults.demand_quality_llm_model, ), demand_quality_llm_max_attempts=_env_int( "DEMAND_QUALITY_LLM_MAX_ATTEMPTS", defaults.demand_quality_llm_max_attempts, ), demand_quality_llm_retry_sleep_seconds=_env_float( "DEMAND_QUALITY_LLM_RETRY_SLEEP_SECONDS", defaults.demand_quality_llm_retry_sleep_seconds, ), demand_quality_llm_max_tokens=_env_int( "DEMAND_QUALITY_LLM_MAX_TOKENS", defaults.demand_quality_llm_max_tokens, ), category_filter_llm_model=_env( "CATEGORY_FILTER_LLM_MODEL", defaults.category_filter_llm_model, ), category_filter_llm_max_attempts=_env_int( "CATEGORY_FILTER_LLM_MAX_ATTEMPTS", defaults.category_filter_llm_max_attempts, ), category_filter_llm_retry_sleep_seconds=_env_float( "CATEGORY_FILTER_LLM_RETRY_SLEEP_SECONDS", defaults.category_filter_llm_retry_sleep_seconds, ), category_filter_llm_max_tokens=_env_int( "CATEGORY_FILTER_LLM_MAX_TOKENS", defaults.category_filter_llm_max_tokens, ), category_filter_body_max_chars=_env_int( "CATEGORY_FILTER_BODY_MAX_CHARS", defaults.category_filter_body_max_chars, ), category_filter_item_sleep_seconds=_env_float( "CATEGORY_FILTER_ITEM_SLEEP_SECONDS", defaults.category_filter_item_sleep_seconds, ), ) settings = Settings.from_env()