|
|
@@ -3,6 +3,8 @@ import json
|
|
|
import os
|
|
|
import sys
|
|
|
import argparse
|
|
|
+import time
|
|
|
+import hashlib
|
|
|
from datetime import datetime
|
|
|
from typing import Literal, Optional
|
|
|
|
|
|
@@ -15,6 +17,8 @@ from lib.client import get_model
|
|
|
MODEL_NAME = "google/gemini-2.5-flash"
|
|
|
# 得分提升阈值:sug或组合词必须比来源query提升至少此幅度才能进入下一轮
|
|
|
REQUIRED_SCORE_GAIN = 0.02
|
|
|
+SUG_CACHE_TTL = 24 * 3600 # 24小时
|
|
|
+SUG_CACHE_DIR = os.path.join(os.path.dirname(__file__), "data", "sug_cache")
|
|
|
from script.search_recommendations.xiaohongshu_search_recommendations import XiaohongshuSearchRecommendations
|
|
|
from script.search.xiaohongshu_search import XiaohongshuSearch
|
|
|
from script.search.xiaohongshu_detail import XiaohongshuDetail
|
|
|
@@ -1781,6 +1785,73 @@ scope_category_evaluator = Agent[None](
|
|
|
# v121 新增辅助函数
|
|
|
# ============================================================================
|
|
|
|
|
|
+def _ensure_sug_cache_dir():
|
|
|
+ """确保SUG缓存目录存在"""
|
|
|
+ os.makedirs(SUG_CACHE_DIR, exist_ok=True)
|
|
|
+
|
|
|
+
|
|
|
+def _sug_cache_path(keyword: str) -> str:
|
|
|
+ """根据关键词生成缓存文件路径"""
|
|
|
+ key_hash = hashlib.md5(keyword.encode("utf-8")).hexdigest()
|
|
|
+ return os.path.join(SUG_CACHE_DIR, f"{key_hash}.json")
|
|
|
+
|
|
|
+
|
|
|
+def load_sug_cache(keyword: str) -> Optional[list[str]]:
|
|
|
+ """从持久化缓存中读取SUG结果"""
|
|
|
+ if not keyword:
|
|
|
+ return None
|
|
|
+
|
|
|
+ cache_path = _sug_cache_path(keyword)
|
|
|
+ if not os.path.exists(cache_path):
|
|
|
+ return None
|
|
|
+
|
|
|
+ file_age = time.time() - os.path.getmtime(cache_path)
|
|
|
+ if file_age > SUG_CACHE_TTL:
|
|
|
+ return None
|
|
|
+
|
|
|
+ try:
|
|
|
+ with open(cache_path, "r", encoding="utf-8") as f:
|
|
|
+ data = json.load(f)
|
|
|
+ suggestions = data.get("suggestions")
|
|
|
+ if isinstance(suggestions, list):
|
|
|
+ return suggestions
|
|
|
+ except Exception as exc:
|
|
|
+ print(f" ⚠️ 读取SUG缓存失败({keyword}): {exc}")
|
|
|
+ return None
|
|
|
+
|
|
|
+
|
|
|
+def save_sug_cache(keyword: str, suggestions: list[str]):
|
|
|
+ """将SUG结果写入持久化缓存"""
|
|
|
+ if not keyword or not isinstance(suggestions, list):
|
|
|
+ return
|
|
|
+
|
|
|
+ _ensure_sug_cache_dir()
|
|
|
+ cache_path = _sug_cache_path(keyword)
|
|
|
+ try:
|
|
|
+ payload = {
|
|
|
+ "keyword": keyword,
|
|
|
+ "suggestions": suggestions,
|
|
|
+ "timestamp": datetime.now().isoformat()
|
|
|
+ }
|
|
|
+ with open(cache_path, "w", encoding="utf-8") as f:
|
|
|
+ json.dump(payload, f, ensure_ascii=False, indent=2)
|
|
|
+ except Exception as exc:
|
|
|
+ print(f" ⚠️ 写入SUG缓存失败({keyword}): {exc}")
|
|
|
+
|
|
|
+
|
|
|
+def get_suggestions_with_cache(keyword: str, api: XiaohongshuSearchRecommendations) -> list[str]:
|
|
|
+ """带持久化缓存的SUG获取"""
|
|
|
+ cached = load_sug_cache(keyword)
|
|
|
+ if cached is not None:
|
|
|
+ print(f" 📦 SUG缓存命中: {keyword} ({len(cached)} 个)")
|
|
|
+ return cached
|
|
|
+
|
|
|
+ suggestions = api.get_recommendations(keyword=keyword)
|
|
|
+ if suggestions:
|
|
|
+ save_sug_cache(keyword, suggestions)
|
|
|
+ return suggestions
|
|
|
+
|
|
|
+
|
|
|
def get_ordered_subsets(words: list[str], min_len: int = 1) -> list[list[str]]:
|
|
|
"""
|
|
|
生成words的所有有序子集(可跳过但不可重排)
|
|
|
@@ -2841,7 +2912,7 @@ async def run_round(
|
|
|
sug_list_list = [] # list of list
|
|
|
for q in q_list:
|
|
|
print(f"\n 处理q: {q.text}")
|
|
|
- suggestions = xiaohongshu_api.get_recommendations(keyword=q.text)
|
|
|
+ suggestions = get_suggestions_with_cache(q.text, xiaohongshu_api)
|
|
|
|
|
|
q_sug_list = []
|
|
|
if suggestions:
|
|
|
@@ -3530,7 +3601,7 @@ async def run_round_v2(
|
|
|
sug_details = {}
|
|
|
|
|
|
for q in query_input:
|
|
|
- suggestions = xiaohongshu_api.get_recommendations(keyword=q.text)
|
|
|
+ suggestions = get_suggestions_with_cache(q.text, xiaohongshu_api)
|
|
|
if suggestions:
|
|
|
print(f" {q.text}: 获取到 {len(suggestions)} 个SUG")
|
|
|
for sug_text in suggestions:
|