hace 2 meses · d4d9e47372
--- a/knowledge_search_traverse.py
+++ b/knowledge_search_traverse.py
@@ -3,6 +3,8 @@ import json
 
				 import os
			
 
				 import sys
			
 
				 import argparse
			
 
				+import time
			
 
				+import hashlib
			
 
				 from datetime import datetime
			
 
				 from typing import Literal, Optional
			
 
				 
			
@@ -15,6 +17,8 @@ from lib.client import get_model
 
				 MODEL_NAME = "google/gemini-2.5-flash"
			
 
				 # 得分提升阈值：sug或组合词必须比来源query提升至少此幅度才能进入下一轮
			
 
				 REQUIRED_SCORE_GAIN = 0.02
			
 
				+SUG_CACHE_TTL = 24 * 3600  # 24小时
			
 
				+SUG_CACHE_DIR = os.path.join(os.path.dirname(__file__), "data", "sug_cache")
			
 
				 from script.search_recommendations.xiaohongshu_search_recommendations import XiaohongshuSearchRecommendations
			
 
				 from script.search.xiaohongshu_search import XiaohongshuSearch
			
 
				 from script.search.xiaohongshu_detail import XiaohongshuDetail
			
@@ -1781,6 +1785,73 @@ scope_category_evaluator = Agent[None](
 
				 # v121 新增辅助函数
			
 
				 # ============================================================================
			
 
				 
			
 
				+def _ensure_sug_cache_dir():
			
 
				+    """确保SUG缓存目录存在"""
			
 
				+    os.makedirs(SUG_CACHE_DIR, exist_ok=True)
			
 
				+
			
 
				+
			
 
				+def _sug_cache_path(keyword: str) -> str:
			
 
				+    """根据关键词生成缓存文件路径"""
			
 
				+    key_hash = hashlib.md5(keyword.encode("utf-8")).hexdigest()
			
 
				+    return os.path.join(SUG_CACHE_DIR, f"{key_hash}.json")
			
 
				+
			
 
				+
			
 
				+def load_sug_cache(keyword: str) -> Optional[list[str]]:
			
 
				+    """从持久化缓存中读取SUG结果"""
			
 
				+    if not keyword:
			
 
				+        return None
			
 
				+
			
 
				+    cache_path = _sug_cache_path(keyword)
			
 
				+    if not os.path.exists(cache_path):
			
 
				+        return None
			
 
				+
			
 
				+    file_age = time.time() - os.path.getmtime(cache_path)
			
 
				+    if file_age > SUG_CACHE_TTL:
			
 
				+        return None
			
 
				+
			
 
				+    try:
			
 
				+        with open(cache_path, "r", encoding="utf-8") as f:
			
 
				+            data = json.load(f)
			
 
				+        suggestions = data.get("suggestions")
			
 
				+        if isinstance(suggestions, list):
			
 
				+            return suggestions
			
 
				+    except Exception as exc:
			
 
				+        print(f"  ⚠️  读取SUG缓存失败（{keyword}）: {exc}")
			
 
				+    return None
			
 
				+
			
 
				+
			
 
				+def save_sug_cache(keyword: str, suggestions: list[str]):
			
 
				+    """将SUG结果写入持久化缓存"""
			
 
				+    if not keyword or not isinstance(suggestions, list):
			
 
				+        return
			
 
				+
			
 
				+    _ensure_sug_cache_dir()
			
 
				+    cache_path = _sug_cache_path(keyword)
			
 
				+    try:
			
 
				+        payload = {
			
 
				+            "keyword": keyword,
			
 
				+            "suggestions": suggestions,
			
 
				+            "timestamp": datetime.now().isoformat()
			
 
				+        }
			
 
				+        with open(cache_path, "w", encoding="utf-8") as f:
			
 
				+            json.dump(payload, f, ensure_ascii=False, indent=2)
			
 
				+    except Exception as exc:
			
 
				+        print(f"  ⚠️  写入SUG缓存失败（{keyword}）: {exc}")
			
 
				+
			
 
				+
			
 
				+def get_suggestions_with_cache(keyword: str, api: XiaohongshuSearchRecommendations) -> list[str]:
			
 
				+    """带持久化缓存的SUG获取"""
			
 
				+    cached = load_sug_cache(keyword)
			
 
				+    if cached is not None:
			
 
				+        print(f"    📦 SUG缓存命中: {keyword} ({len(cached)} 个)")
			
 
				+        return cached
			
 
				+
			
 
				+    suggestions = api.get_recommendations(keyword=keyword)
			
 
				+    if suggestions:
			
 
				+        save_sug_cache(keyword, suggestions)
			
 
				+    return suggestions
			
 
				+
			
 
				+
			
 
				 def get_ordered_subsets(words: list[str], min_len: int = 1) -> list[list[str]]:
			
 
				     """
			
 
				     生成words的所有有序子集（可跳过但不可重排）
			
@@ -2841,7 +2912,7 @@ async def run_round(
 
				     sug_list_list = []  # list of list
			
 
				     for q in q_list:
			
 
				         print(f"\n  处理q: {q.text}")
			
 
				-        suggestions = xiaohongshu_api.get_recommendations(keyword=q.text)
			
 
				+        suggestions = get_suggestions_with_cache(q.text, xiaohongshu_api)
			
 
				 
			
 
				         q_sug_list = []
			
 
				         if suggestions:
			
@@ -3530,7 +3601,7 @@ async def run_round_v2(
 
				     sug_details = {}
			
 
				 
			
 
				     for q in query_input:
			
 
				-        suggestions = xiaohongshu_api.get_recommendations(keyword=q.text)
			
 
				+        suggestions = get_suggestions_with_cache(q.text, xiaohongshu_api)
			
 
				         if suggestions:
			
 
				             print(f"  {q.text}: 获取到 {len(suggestions)} 个SUG")
			
 
				             for sug_text in suggestions:
			
--- a/post_evaluator_v3.py
+++ b/post_evaluator_v3.py
@@ -30,7 +30,7 @@ ENABLE_CACHE = True  # 是否启用评估结果缓存
 
				 CACHE_DIR = ".evaluation_cache"  # 缓存目录
			
 
				 
			
 
				 # 视频处理配置
			
 
				-MAX_VIDEO_SIZE_MB = 60  # 最大视频大小限制(MB)
			
 
				+MAX_VIDEO_SIZE_MB = 10  # 最大视频大小限制(MB)
			
 
				 VIDEO_DOWNLOAD_TIMEOUT = 60  # 视频下载超时(秒)
			
 
				 TEMP_VIDEO_DIR = "/tmp/kg_agent_videos"  # 临时视频存储目录（同时也是缓存目录）
			
 
				 VIDEO_CHUNK_SIZE = 8192  # 下载分块大小(字节)