| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411 |
- """应用配置。
- 配置优先级:环境变量 > 这里的默认值。
- """
- from __future__ import annotations
- import json
- import os
- from dataclasses import dataclass, field
- from pathlib import Path
- from typing import Any
- PROJECT_ROOT = Path(__file__).resolve().parents[2]
- def _env(name: str, default: str) -> str:
- value = os.getenv(name)
- if value is None or value == "":
- return default
- return value
- def _env_int(name: str, default: int) -> int:
- value = os.getenv(name)
- if value is None or value == "":
- return default
- return int(value)
- def _env_int_optional(name: str, default: int | None = None) -> int | None:
- value = os.getenv(name)
- if value is None or value == "":
- return default
- return int(value)
- def _env_float(name: str, default: float) -> float:
- value = os.getenv(name)
- if value is None or value == "":
- return default
- return float(value)
- def _env_float_optional(name: str, default: float | None = None) -> float | None:
- value = os.getenv(name)
- if value is None or value == "":
- return default
- return float(value)
- def _env_first(names: tuple[str, ...], default: str) -> str:
- for name in names:
- value = os.getenv(name)
- if value is not None and value != "":
- return value
- return default
- def _env_bool(name: str, default: bool) -> bool:
- value = os.getenv(name)
- if value is None or value == "":
- return default
- return value.strip().lower() in {"1", "true", "yes", "y", "on"}
- def _load_json_file(path_value: str) -> Any:
- path = Path(path_value).expanduser()
- if not path.is_absolute():
- path = PROJECT_ROOT / path
- return json.loads(path.read_text(encoding="utf-8"))
- def _env_json(name: str, default: Any, file_env_name: str | None = None) -> Any:
- if file_env_name:
- file_path = os.getenv(file_env_name)
- if file_path:
- return _load_json_file(file_path)
- value = os.getenv(name)
- if value is None or value == "":
- return default
- return json.loads(value)
- @dataclass(frozen=True)
- class Settings:
- crawapi_base_url: str = "http://crawapi.piaoquantv.com"
- crawapi_hot_content_rank_path: str = "/crawler/jin_ri_re_bang/content_rank"
- crawapi_keyword_search_path: str = "/crawler/bai_du/keyword"
- decode_api_url: str = "https://aigc-api.aiddit.com/aigc/api/task/decode"
- decode_result_api_url: str = "https://aigc-api.aiddit.com/aigc/api/task/decode/result"
- decode_config_id: int = 70
- request_timeout_seconds: int = 180
- https_verify_ssl: bool = False
- hot_flow_cron_hours: str = "6,12,18"
- hot_flow_cron_minute: int = 0
- hot_flow_interval_seconds: int = 1800
- decode_result_flow_interval_seconds: int = 1800
- decode_result_batch_size: int = 50
- contribution_score_threshold: float = 0.6
- hot_flow_sources: list[dict[str, Any]] = field(
- default_factory=lambda: [
- {"source": "百度"},
- {"source": "微博"},
- {"source": "微信"},
- {
- "source": "快手",
- "hot_rank_base_url": "http://crawler.aiddit.com",
- "hot_rank_path": "/crawler/kuai_shou/hot_rank",
- "hot_rank_payload": {},
- },
- ]
- )
- mysql_host: str = "rm-t4nh1xx6o2a6vj8qu3o.mysql.singapore.rds.aliyuncs.com"
- mysql_port: int = 3306
- mysql_user: str = "content_rw"
- mysql_password: str = "bC1aH4bA1lB0"
- mysql_database: str = "external_demand"
- mysql_charset: str = "utf8mb4"
- open_router_api_key: str = "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"
- open_router_default_model: str = "anthropic/claude-haiku-4-5"
- open_router_timeout_seconds: int = 60
- open_router_http_referer: str = ""
- open_router_app_title: str = "external_demand"
- open_router_base_url: str = "https://openrouter.ai/api/v1"
- open_router_temperature: float | None = 0.7
- open_router_max_tokens: int | None = 20000
- odps_access_id: str = "LTAI9EBa0bd5PrDa"
- odps_access_key: str = "vAalxds7YxhfOA2yVv8GziCg3Y87v5"
- odps_project: str = "loghubods"
- odps_endpoint: str = "http://service.odps.aliyun.com/api"
- odps_tunnel_endpoint: str = ""
- demand_pool_source_table: str = "dwd_multi_demand_pool_di"
- demand_pool_excluded_strategy: str = "当下供需gap-分词"
- demand_pool_top_n: int = 200
- hot_demand_pool_strategy: str = "新热事件"
- wxindex_score_threshold: float = 100_000.0
- odps_daily_write_limit: int = 500
- postprocess_batch_size: int = 20
- contribution_match_llm_model: str = ""
- contribution_match_llm_max_attempts: int = 3
- contribution_match_llm_retry_sleep_seconds: float = 1.0
- contribution_match_llm_max_tokens: int = 4000
- wxindex_llm_model: str = "anthropic/claude-haiku-4-5"
- wxindex_llm_max_attempts: int = 3
- wxindex_llm_max_tokens: int = 4000
- wxindex_api_url: str = "http://crawapi.piaoquantv.com/crawler/wei_xin/wxindex"
- wxindex_lookback_days: int = 7
- wxindex_words_cron_hour: int = 10
- wxindex_words_cron_minute: int = 0
- wxindex_heat_pattern_cron_hour: int = 11
- wxindex_heat_pattern_cron_minute: int = 0
- demand_event_sense_threshold: float = 6.0
- demand_senior_fit_threshold: float = 6.0
- demand_quality_llm_model: str = "anthropic/claude-haiku-4-5"
- demand_quality_llm_max_attempts: int = 3
- demand_quality_llm_retry_sleep_seconds: float = 1.0
- demand_quality_llm_max_tokens: int = 4000
- category_filter_llm_model: str = "anthropic/claude-haiku-4-5"
- category_filter_llm_max_attempts: int = 3
- category_filter_llm_retry_sleep_seconds: float = 1.0
- category_filter_llm_max_tokens: int = 1024
- category_filter_body_max_chars: int = 2000
- category_filter_item_sleep_seconds: float = 0.0
- @classmethod
- def from_env(cls) -> "Settings":
- defaults = cls()
- return cls(
- crawapi_base_url=_env("CRAWAPI_BASE_URL", defaults.crawapi_base_url),
- crawapi_hot_content_rank_path=_env(
- "CRAWAPI_HOT_CONTENT_RANK_PATH",
- defaults.crawapi_hot_content_rank_path,
- ),
- crawapi_keyword_search_path=_env(
- "CRAWAPI_KEYWORD_SEARCH_PATH",
- defaults.crawapi_keyword_search_path,
- ),
- decode_api_url=_env("DECODE_API_URL", defaults.decode_api_url),
- decode_result_api_url=_env(
- "DECODE_RESULT_API_URL",
- defaults.decode_result_api_url,
- ),
- decode_config_id=_env_int("DECODE_CONFIG_ID", defaults.decode_config_id),
- request_timeout_seconds=_env_int(
- "REQUEST_TIMEOUT_SECONDS",
- defaults.request_timeout_seconds,
- ),
- https_verify_ssl=_env_bool("HTTPS_VERIFY_SSL", defaults.https_verify_ssl),
- hot_flow_cron_hours=_env(
- "HOT_FLOW_CRON_HOURS",
- defaults.hot_flow_cron_hours,
- ),
- hot_flow_cron_minute=_env_int(
- "HOT_FLOW_CRON_MINUTE",
- defaults.hot_flow_cron_minute,
- ),
- hot_flow_interval_seconds=_env_int(
- "HOT_FLOW_INTERVAL_SECONDS",
- defaults.hot_flow_interval_seconds,
- ),
- decode_result_flow_interval_seconds=_env_int(
- "DECODE_RESULT_FLOW_INTERVAL_SECONDS",
- defaults.decode_result_flow_interval_seconds,
- ),
- decode_result_batch_size=_env_int(
- "DECODE_RESULT_BATCH_SIZE",
- defaults.decode_result_batch_size,
- ),
- contribution_score_threshold=float(
- _env(
- "CONTRIBUTION_SCORE_THRESHOLD",
- str(defaults.contribution_score_threshold),
- )
- ),
- hot_flow_sources=_env_json(
- "HOT_FLOW_SOURCES_JSON",
- defaults.hot_flow_sources,
- "HOT_FLOW_SOURCES_FILE",
- ),
- mysql_host=_env("MYSQL_HOST", defaults.mysql_host),
- mysql_port=_env_int("MYSQL_PORT", defaults.mysql_port),
- mysql_user=_env("MYSQL_USER", defaults.mysql_user),
- mysql_password=_env("MYSQL_PASSWORD", defaults.mysql_password),
- mysql_database=_env("MYSQL_DATABASE", defaults.mysql_database),
- mysql_charset=_env("MYSQL_CHARSET", defaults.mysql_charset),
- open_router_api_key=_env_first(
- ("OPEN_ROUTER_API_KEY", "OPENROUTER_API_KEY"),
- defaults.open_router_api_key,
- ),
- open_router_default_model=_env(
- "OPEN_ROUTER_DEFAULT_MODEL",
- defaults.open_router_default_model,
- ),
- open_router_timeout_seconds=_env_int(
- "OPEN_ROUTER_TIMEOUT_SECONDS",
- defaults.open_router_timeout_seconds,
- ),
- open_router_http_referer=_env_first(
- ("OPEN_ROUTER_HTTP_REFERER", "OPENROUTER_HTTP_REFERER"),
- defaults.open_router_http_referer,
- ),
- open_router_app_title=_env_first(
- ("OPEN_ROUTER_APP_TITLE", "OPENROUTER_X_OPEN_ROUTER_TITLE"),
- defaults.open_router_app_title,
- ),
- open_router_base_url=_env(
- "OPEN_ROUTER_BASE_URL",
- defaults.open_router_base_url,
- ),
- open_router_temperature=_env_float_optional(
- "OPEN_ROUTER_TEMPERATURE",
- defaults.open_router_temperature,
- ),
- open_router_max_tokens=_env_int_optional(
- "OPEN_ROUTER_MAX_TOKENS",
- defaults.open_router_max_tokens,
- ),
- odps_access_id=_env("ODPS_ACCESS_ID", defaults.odps_access_id),
- odps_access_key=_env("ODPS_ACCESS_KEY", defaults.odps_access_key),
- odps_project=_env("ODPS_PROJECT", defaults.odps_project),
- odps_endpoint=_env("ODPS_ENDPOINT", defaults.odps_endpoint),
- odps_tunnel_endpoint=_env(
- "ODPS_TUNNEL_ENDPOINT",
- defaults.odps_tunnel_endpoint,
- ),
- demand_pool_source_table=_env(
- "DEMAND_POOL_SOURCE_TABLE",
- defaults.demand_pool_source_table,
- ),
- demand_pool_excluded_strategy=_env(
- "DEMAND_POOL_EXCLUDED_STRATEGY",
- defaults.demand_pool_excluded_strategy,
- ),
- demand_pool_top_n=_env_int(
- "DEMAND_POOL_TOP_N",
- defaults.demand_pool_top_n,
- ),
- hot_demand_pool_strategy=_env(
- "HOT_DEMAND_POOL_STRATEGY",
- defaults.hot_demand_pool_strategy,
- ),
- wxindex_score_threshold=_env_float(
- "WXINDEX_SCORE_THRESHOLD",
- _env_float(
- "HOT_DEMAND_POOL_WXINDEX_THRESHOLD",
- _env_float(
- "WXINDEX_LATEST_SCORE_THRESHOLD",
- defaults.wxindex_score_threshold,
- ),
- ),
- ),
- odps_daily_write_limit=_env_int(
- "ODPS_DAILY_WRITE_LIMIT",
- defaults.odps_daily_write_limit,
- ),
- postprocess_batch_size=_env_int(
- "POSTPROCESS_BATCH_SIZE",
- defaults.postprocess_batch_size,
- ),
- contribution_match_llm_model=_env(
- "CONTRIBUTION_MATCH_LLM_MODEL",
- defaults.contribution_match_llm_model,
- ),
- contribution_match_llm_max_attempts=_env_int(
- "CONTRIBUTION_MATCH_LLM_MAX_ATTEMPTS",
- defaults.contribution_match_llm_max_attempts,
- ),
- contribution_match_llm_retry_sleep_seconds=_env_float(
- "CONTRIBUTION_MATCH_LLM_RETRY_SLEEP_SECONDS",
- defaults.contribution_match_llm_retry_sleep_seconds,
- ),
- contribution_match_llm_max_tokens=_env_int(
- "CONTRIBUTION_MATCH_LLM_MAX_TOKENS",
- defaults.contribution_match_llm_max_tokens,
- ),
- wxindex_llm_model=_env(
- "WXINDEX_LLM_MODEL",
- defaults.wxindex_llm_model,
- ),
- wxindex_llm_max_attempts=_env_int(
- "WXINDEX_LLM_MAX_ATTEMPTS",
- defaults.wxindex_llm_max_attempts,
- ),
- wxindex_llm_max_tokens=_env_int(
- "WXINDEX_LLM_MAX_TOKENS",
- defaults.wxindex_llm_max_tokens,
- ),
- wxindex_api_url=_env("WXINDEX_API_URL", defaults.wxindex_api_url),
- wxindex_lookback_days=_env_int(
- "WXINDEX_LOOKBACK_DAYS",
- defaults.wxindex_lookback_days,
- ),
- wxindex_words_cron_hour=_env_int(
- "WXINDEX_WORDS_CRON_HOUR",
- defaults.wxindex_words_cron_hour,
- ),
- wxindex_words_cron_minute=_env_int(
- "WXINDEX_WORDS_CRON_MINUTE",
- defaults.wxindex_words_cron_minute,
- ),
- wxindex_heat_pattern_cron_hour=_env_int(
- "WXINDEX_HEAT_PATTERN_CRON_HOUR",
- defaults.wxindex_heat_pattern_cron_hour,
- ),
- wxindex_heat_pattern_cron_minute=_env_int(
- "WXINDEX_HEAT_PATTERN_CRON_MINUTE",
- defaults.wxindex_heat_pattern_cron_minute,
- ),
- demand_event_sense_threshold=_env_float(
- "DEMAND_EVENT_SENSE_THRESHOLD",
- defaults.demand_event_sense_threshold,
- ),
- demand_senior_fit_threshold=_env_float(
- "DEMAND_SENIOR_FIT_THRESHOLD",
- defaults.demand_senior_fit_threshold,
- ),
- demand_quality_llm_model=_env(
- "DEMAND_QUALITY_LLM_MODEL",
- defaults.demand_quality_llm_model,
- ),
- demand_quality_llm_max_attempts=_env_int(
- "DEMAND_QUALITY_LLM_MAX_ATTEMPTS",
- defaults.demand_quality_llm_max_attempts,
- ),
- demand_quality_llm_retry_sleep_seconds=_env_float(
- "DEMAND_QUALITY_LLM_RETRY_SLEEP_SECONDS",
- defaults.demand_quality_llm_retry_sleep_seconds,
- ),
- demand_quality_llm_max_tokens=_env_int(
- "DEMAND_QUALITY_LLM_MAX_TOKENS",
- defaults.demand_quality_llm_max_tokens,
- ),
- category_filter_llm_model=_env(
- "CATEGORY_FILTER_LLM_MODEL",
- defaults.category_filter_llm_model,
- ),
- category_filter_llm_max_attempts=_env_int(
- "CATEGORY_FILTER_LLM_MAX_ATTEMPTS",
- defaults.category_filter_llm_max_attempts,
- ),
- category_filter_llm_retry_sleep_seconds=_env_float(
- "CATEGORY_FILTER_LLM_RETRY_SLEEP_SECONDS",
- defaults.category_filter_llm_retry_sleep_seconds,
- ),
- category_filter_llm_max_tokens=_env_int(
- "CATEGORY_FILTER_LLM_MAX_TOKENS",
- defaults.category_filter_llm_max_tokens,
- ),
- category_filter_body_max_chars=_env_int(
- "CATEGORY_FILTER_BODY_MAX_CHARS",
- defaults.category_filter_body_max_chars,
- ),
- category_filter_item_sleep_seconds=_env_float(
- "CATEGORY_FILTER_ITEM_SLEEP_SECONDS",
- defaults.category_filter_item_sleep_seconds,
- ),
- )
- settings = Settings.from_env()
|