소스 검색

热榜增加快手来源

xueyiming 1 주 전
부모
커밋
6c8cbcbec4
4개의 변경된 파일50개의 추가작업 그리고 15개의 파일을 삭제
  1. 7 10
      app/core/config.py
  2. 11 1
      app/hot_content/config.py
  3. 28 3
      app/hot_content/service.py
  4. 4 1
      app/hot_content/types.py

+ 7 - 10
app/core/config.py

@@ -104,18 +104,15 @@ class Settings:
     contribution_score_threshold: float = 0.6
     hot_flow_sources: list[dict[str, Any]] = field(
         default_factory=lambda: [
+            {"source": "百度"},
+            {"source": "微博"},
+            {"source": "微信"},
             {
-                "source": "百度",
-                "count": 10,
+                "source": "快手",
+                "hot_rank_base_url": "http://crawler.aiddit.com",
+                "hot_rank_path": "/crawler/kuai_shou/hot_rank",
+                "hot_rank_payload": {},
             },
-            {
-                "source": "微博",
-                "count": 10,
-            },
-            {
-                "source": "微信",
-                "count": 10,
-            }
         ]
     )
 

+ 11 - 1
app/hot_content/config.py

@@ -80,9 +80,19 @@ def _normalize_source_config(item: Any) -> HotSourceConfig:
     if not source:
         raise HotContentFlowError(f"hot source missing source: {item!r}")
 
+    hot_rank_base_url = str(item.get("hot_rank_base_url") or "").strip().rstrip("/") or None
+    hot_rank_path = str(item.get("hot_rank_path") or "").strip() or None
+    hot_rank_payload = item.get("hot_rank_payload")
+    if hot_rank_payload is not None and not isinstance(hot_rank_payload, dict):
+        raise HotContentFlowError(
+            f"hot source hot_rank_payload must be a dict: {item!r}"
+        )
+
     return HotSourceConfig(
         source=source,
-        count=int(item.get("count") or item.get("limit") or item.get("rank_limit") or 10),
+        hot_rank_base_url=hot_rank_base_url,
+        hot_rank_path=hot_rank_path,
+        hot_rank_payload=hot_rank_payload,
     )
 
 

+ 28 - 3
app/hot_content/service.py

@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 from datetime import datetime
+import json
 import time
 from typing import Any
 
@@ -47,15 +48,39 @@ class HotContentFlowService:
         return self.build_summary(hot_titles, selected_contents, decode_resp)
 
     def fetch_and_save_hot_titles(self) -> list[dict[str, Any]]:
-        hot_url = build_url(self.config.crawapi_base_url, self.config.hot_rank_path)
         saved_titles: list[dict[str, Any]] = []
         seen_keys: set[str] = set()
-        resp = self.api_client.post_json(hot_url, HOT_RANK_PAYLOAD)
+        response_cache: dict[str, dict[str, Any]] = {}
 
         for source_config in self.config.sources:
+            hot_rank_base_url = (
+                source_config.hot_rank_base_url or self.config.crawapi_base_url
+            )
+            hot_rank_path = source_config.hot_rank_path or self.config.hot_rank_path
+            hot_rank_payload = (
+                source_config.hot_rank_payload
+                if source_config.hot_rank_payload is not None
+                else HOT_RANK_PAYLOAD
+            )
+            cache_key = json.dumps(
+                {
+                    "base_url": hot_rank_base_url,
+                    "path": hot_rank_path,
+                    "payload": hot_rank_payload,
+                },
+                ensure_ascii=False,
+                sort_keys=True,
+            )
+            if cache_key not in response_cache:
+                hot_url = build_url(hot_rank_base_url, hot_rank_path)
+                response_cache[cache_key] = self.api_client.post_json(
+                    hot_url,
+                    hot_rank_payload,
+                )
+            resp = response_cache[cache_key]
             rank_items = extract_rank_items(resp, source_config.source)
             for rank_item in rank_items:
-                title = str(rank_item.get("title") or "").strip()
+                title = str(rank_item.get("title") or rank_item.get("name") or "").strip()
                 if not title:
                     continue
                 unique_key = unique_title_key(source_config.source, title)

+ 4 - 1
app/hot_content/types.py

@@ -3,12 +3,15 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
+from typing import Any
 
 
 @dataclass(frozen=True)
 class HotSourceConfig:
     source: str
-    count: int = 10
+    hot_rank_base_url: str | None = None
+    hot_rank_path: str | None = None
+    hot_rank_payload: dict[str, Any] | None = None
 
 
 @dataclass(frozen=True)