2 kuukautta sitten · 8b15fcf950
--- a/knowledge_search_traverse.py
+++ b/knowledge_search_traverse.py
@@ -17,6 +17,8 @@ MODEL_NAME = "google/gemini-2.5-flash"
 
				 REQUIRED_SCORE_GAIN = 0.02
			
 
				 from script.search_recommendations.xiaohongshu_search_recommendations import XiaohongshuSearchRecommendations
			
 
				 from script.search.xiaohongshu_search import XiaohongshuSearch
			
 
				+from script.search.xiaohongshu_detail import XiaohongshuDetail
			
 
				+from script.search.enrichment_helper import enrich_post_with_detail
			
 
				 # from multimodal_extractor import extract_post_images  # 内容提取流程已断开
			
 
				 from post_evaluator_v3 import evaluate_post_v3, apply_evaluation_v3_to_post
			
 
				 
			
@@ -146,6 +148,13 @@ class Post(BaseModel):
 
				     note_id: str = ""
			
 
				     note_url: str = ""
			
 
				 
			
 
				+    # 详情补充字段（来自详情API）
			
 
				+    author_name: str = ""  # 作者名称
			
 
				+    author_id: str = ""  # 作者ID
			
 
				+    publish_time: int = 0  # 发布时间戳
			
 
				+    cdn_images: list[str] = Field(default_factory=list)  # 高清CDN图片列表(详情API补充)
			
 
				+    detail_fetched: bool = False  # 是否已获取详情的标记
			
 
				+
			
 
				     # V3评估字段（顶层 - 快速访问）
			
 
				     is_knowledge: bool | None = None  # Prompt1: 是否是知识内容
			
 
				     is_content_knowledge: bool | None = None  # Prompt2: 是否是内容知识
			
@@ -2014,46 +2023,49 @@ def process_note_data(note: dict) -> Post:
 
				     user_info = note_card.get("user", {})
			
 
				 
			
 
				     # ========== 调试日志 START ==========
			
 
				-    note_id = note.get("id", "")
			
 
				-
			
 
				-    # 1. 打印完整的 note 结构
			
 
				-    print(f"\n[DEBUG] ===== 处理帖子 {note_id} =====")
			
 
				-    print(f"[DEBUG] note 的所有键: {list(note.keys())}")
			
 
				-    print(f"[DEBUG] note 完整数据 (前2000字符):")
			
 
				-    print(json.dumps(note, ensure_ascii=False, indent=2)[:2000])
			
 
				-
			
 
				-    # 2. 打印 note_card 信息
			
 
				-    print(f"\n[DEBUG] note_card 的所有键: {list(note_card.keys())}")
			
 
				-
			
 
				-    # 3. 检查 desc 字段
			
 
				-    raw_desc = note_card.get("desc")
			
 
				-    print(f"\n[DEBUG] desc 字段:")
			
 
				-    print(f"  - 类型: {type(raw_desc).__name__}")
			
 
				-    print(f"  - 长度: {len(raw_desc) if raw_desc else 0}")
			
 
				-    print(f"  - 完整内容: {repr(raw_desc)}")
			
 
				-
			
 
				-    # 4. 检查是否有其他可能包含完整内容的字段
			
 
				-    print(f"\n[DEBUG] 检查其他可能的内容字段:")
			
 
				-    for potential_field in ["full_desc", "content", "full_content", "note_text", "body", "full_body", "title", "display_title"]:
			
 
				-        if potential_field in note_card:
			
 
				-            value = note_card.get(potential_field)
			
 
				-            print(f"  - 发现字段 '{potential_field}': 长度={len(str(value))}, 值={repr(str(value)[:200])}")
			
 
				-
			
 
				-    # 5. 检查顶层 note 对象中是否有详细内容
			
 
				-    print(f"\n[DEBUG] 检查 note 顶层字段:")
			
 
				-    for top_field in ["note_info", "detail", "content", "desc"]:
			
 
				-        if top_field in note:
			
 
				-            value = note.get(top_field)
			
 
				-            print(f"  - 发现顶层字段 '{top_field}': 类型={type(value).__name__}, 内容={repr(str(value)[:200])}")
			
 
				-
			
 
				-    print(f"[DEBUG] ===== 数据检查完成 =====\n")
			
 
				+    # note_id = note.get("id", "")
			
 
				+    #
			
 
				+    # # 1. 打印完整的 note 结构
			
 
				+    # print(f"\n[DEBUG] ===== 处理帖子 {note_id} =====")
			
 
				+    # print(f"[DEBUG] note 的所有键: {list(note.keys())}")
			
 
				+    # print(f"[DEBUG] note 完整数据 (前2000字符):")
			
 
				+    # print(json.dumps(note, ensure_ascii=False, indent=2)[:2000])
			
 
				+    #
			
 
				+    # # 2. 打印 note_card 信息
			
 
				+    # print(f"\n[DEBUG] note_card 的所有键: {list(note_card.keys())}")
			
 
				+    #
			
 
				+    # # 3. 检查 desc 字段
			
 
				+    # raw_desc = note_card.get("desc")
			
 
				+    # print(f"\n[DEBUG] desc 字段:")
			
 
				+    # print(f"  - 类型: {type(raw_desc).__name__}")
			
 
				+    # print(f"  - 长度: {len(raw_desc) if raw_desc else 0}")
			
 
				+    # print(f"  - 完整内容: {repr(raw_desc)}")
			
 
				+    #
			
 
				+    # # 4. 检查是否有其他可能包含完整内容的字段
			
 
				+    # print(f"\n[DEBUG] 检查其他可能的内容字段:")
			
 
				+    # for potential_field in ["full_desc", "content", "full_content", "note_text", "body", "full_body", "title", "display_title"]:
			
 
				+    #     if potential_field in note_card:
			
 
				+    #         value = note_card.get(potential_field)
			
 
				+    #         print(f"  - 发现字段 '{potential_field}': 长度={len(str(value))}, 值={repr(str(value)[:200])}")
			
 
				+    #
			
 
				+    # # 5. 检查顶层 note 对象中是否有详细内容
			
 
				+    # print(f"\n[DEBUG] 检查 note 顶层字段:")
			
 
				+    # for top_field in ["note_info", "detail", "content", "desc"]:
			
 
				+    #     if top_field in note:
			
 
				+    #         value = note.get(top_field)
			
 
				+    #         print(f"  - 发现顶层字段 '{top_field}': 类型={type(value).__name__}, 内容={repr(str(value)[:200])}")
			
 
				+    #
			
 
				+    # print(f"[DEBUG] ===== 数据检查完成 =====\n")
			
 
				     # ========== 调试日志 END ==========
			
 
				 
			
 
				-    # 提取图片URL - 使用新的字段名 image_url
			
 
				+    # 提取图片URL - 支持字符串和字典两种格式
			
 
				     images = []
			
 
				     for img in image_list:
			
 
				-        if isinstance(img, dict):
			
 
				-            # 尝试新字段名 image_url，如果不存在则尝试旧字段名 url_default
			
 
				+        if isinstance(img, str):
			
 
				+            # 预处理后的字符串格式（来自xiaohongshu_search.py的_preprocess_response）
			
 
				+            images.append(img)
			
 
				+        elif isinstance(img, dict):
			
 
				+            # 原始字典格式 - 尝试新字段名 image_url，如果不存在则尝试旧字段名 url_default
			
 
				             img_url = img.get("image_url") or img.get("url_default")
			
 
				             if img_url:
			
 
				                 images.append(img_url)
			
@@ -2084,16 +2096,16 @@ def process_note_data(note: dict) -> Post:
 
				         note_url=f"https://www.xiaohongshu.com/explore/{note.get('id', '')}"
			
 
				     )
			
 
				 
			
 
				-    # 打印最终构造的 Post 对象
			
 
				-    print(f"\n[DEBUG] ===== 构造的 Post 对象 =====")
			
 
				-    print(f"[DEBUG] - note_id: {post.note_id}")
			
 
				-    print(f"[DEBUG] - title: {post.title}")
			
 
				-    print(f"[DEBUG] - body_text 长度: {len(post.body_text)}")
			
 
				-    print(f"[DEBUG] - body_text 完整内容: {repr(post.body_text)}")
			
 
				-    print(f"[DEBUG] - type: {post.type}")
			
 
				-    print(f"[DEBUG] - images 数量: {len(post.images)}")
			
 
				-    print(f"[DEBUG] - interact_info: {post.interact_info}")
			
 
				-    print(f"[DEBUG] ===== Post 对象构造完成 =====\n")
			
 
				+    # # 打印最终构造的 Post 对象
			
 
				+    # print(f"\n[DEBUG] ===== 构造的 Post 对象 =====")
			
 
				+    # print(f"[DEBUG] - note_id: {post.note_id}")
			
 
				+    # print(f"[DEBUG] - title: {post.title}")
			
 
				+    # print(f"[DEBUG] - body_text 长度: {len(post.body_text)}")
			
 
				+    # print(f"[DEBUG] - body_text 完整内容: {repr(post.body_text)}")
			
 
				+    # print(f"[DEBUG] - type: {post.type}")
			
 
				+    # print(f"[DEBUG] - images 数量: {len(post.images)}")
			
 
				+    # print(f"[DEBUG] - interact_info: {post.interact_info}")
			
 
				+    # print(f"[DEBUG] ===== Post 对象构造完成 =====\n")
			
 
				 
			
 
				     return post
			
 
				 
			
@@ -2709,6 +2721,7 @@ async def run_round(
 
				     context: RunContext,
			
 
				     xiaohongshu_api: XiaohongshuSearchRecommendations,
			
 
				     xiaohongshu_search: XiaohongshuSearch,
			
 
				+    xiaohongshu_detail: XiaohongshuDetail,
			
 
				     sug_threshold: float = 0.7,
			
 
				     enable_evaluation: bool = False
			
 
				 ) -> tuple[list[Q], list[Seed], list[Search]]:
			
@@ -2847,17 +2860,26 @@ async def run_round(
 
				             print(f"    搜索: {sug.text}")
			
 
				             try:
			
 
				                 search_result = xiaohongshu_search.search(keyword=sug.text)
			
 
				-                result_str = search_result.get("result", "{}")
			
 
				-                if isinstance(result_str, str):
			
 
				-                    result_data = json.loads(result_str)
			
 
				-                else:
			
 
				-                    result_data = result_str
			
 
				-
			
 
				-                notes = result_data.get("data", {}).get("data", [])
			
 
				+                # xiaohongshu_search.search() 已经返回解析后的数据
			
 
				+                notes = search_result.get("data", {}).get("data", [])
			
 
				                 post_list = []
			
 
				                 for note in notes[:10]:  # 只取前10个
			
 
				-                    post = process_note_data(note)
			
 
				-                    post_list.append(post)
			
 
				+                    try:
			
 
				+                        post = process_note_data(note)
			
 
				+                        post_list.append(post)
			
 
				+                    except Exception as e:
			
 
				+                        print(f"      ⚠️  解析帖子失败 {note.get('id', 'unknown')}: {str(e)[:50]}")
			
 
				+
			
 
				+                # 补充详情信息（仅视频类型需要补充视频URL）
			
 
				+                video_posts = [p for p in post_list if p.type == "video"]
			
 
				+                if video_posts:
			
 
				+                    print(f"      补充详情（{len(video_posts)}个视频）...")
			
 
				+                    for post in video_posts:
			
 
				+                        try:
			
 
				+                            detail_response = xiaohongshu_detail.get_detail(post.note_id)
			
 
				+                            enrich_post_with_detail(post, detail_response)
			
 
				+                        except Exception as e:
			
 
				+                            print(f"        ⚠️  详情补充失败 {post.note_id}: {str(e)[:50]}")
			
 
				 
			
 
				                 print(f"      → 找到 {len(post_list)} 个帖子")
			
 
				 
			
@@ -3132,17 +3154,7 @@ async def run_round(
 
				         search_results_data.append({
			
 
				             "text": search.text,
			
 
				             "score_with_o": search.score_with_o,
			
 
				-            "post_list": [
			
 
				-                {
			
 
				-                    "note_id": post.note_id,
			
 
				-                    "note_url": post.note_url,
			
 
				-                    "title": post.title,
			
 
				-                    "body_text": post.body_text,
			
 
				-                    "images": post.images,
			
 
				-                    "interact_info": post.interact_info
			
 
				-                }
			
 
				-                for post in search.post_list
			
 
				-            ]
			
 
				+            "post_list": [post.model_dump() for post in search.post_list]
			
 
				         })
			
 
				 
			
 
				     # 记录本轮数据
			
@@ -3196,6 +3208,7 @@ async def iterative_loop(
 
				     # API实例
			
 
				     xiaohongshu_api = XiaohongshuSearchRecommendations()
			
 
				     xiaohongshu_search = XiaohongshuSearch()
			
 
				+    xiaohongshu_detail = XiaohongshuDetail()  # 详情API客户端
			
 
				 
			
 
				     # 保存初始化数据
			
 
				     context.rounds.append({
			
@@ -3387,6 +3400,7 @@ async def run_round_v2(
 
				     context: RunContext,
			
 
				     xiaohongshu_api: XiaohongshuSearchRecommendations,
			
 
				     xiaohongshu_search: XiaohongshuSearch,
			
 
				+    xiaohongshu_detail: XiaohongshuDetail,
			
 
				     sug_threshold: float = 0.7,
			
 
				     enable_evaluation: bool = False
			
 
				 ) -> tuple[list[Q], list[Search], dict]:
			
@@ -3490,24 +3504,33 @@ async def run_round_v2(
 
				 
			
 
				             try:
			
 
				                 search_result = xiaohongshu_search.search(keyword=sug.text)
			
 
				-                result_str = search_result.get("result", "{}")
			
 
				-                if isinstance(result_str, str):
			
 
				-                    result_data = json.loads(result_str)
			
 
				-                else:
			
 
				-                    result_data = result_str
			
 
				-
			
 
				-                notes = result_data.get("data", {}).get("data", [])
			
 
				+                # xiaohongshu_search.search() 已经返回解析后的数据
			
 
				+                notes = search_result.get("data", {}).get("data", [])
			
 
				                 post_list = []
			
 
				                 for note in notes[:10]:
			
 
				-                    post = process_note_data(note)
			
 
				-
			
 
				-                    # # 🆕 多模态提取（搜索后立即处理） - 内容提取流程已断开
			
 
				-                    # if post.type == "normal" and len(post.images) > 0:
			
 
				-                    #     extraction = await extract_post_images(post)
			
 
				-                    #     if extraction:
			
 
				-                    #         post_extractions[post.note_id] = extraction
			
 
				-
			
 
				-                    post_list.append(post)
			
 
				+                    try:
			
 
				+                        post = process_note_data(note)
			
 
				+
			
 
				+                        # # 🆕 多模态提取（搜索后立即处理） - 内容提取流程已断开
			
 
				+                        # if post.type == "normal" and len(post.images) > 0:
			
 
				+                        #     extraction = await extract_post_images(post)
			
 
				+                        #     if extraction:
			
 
				+                        #         post_extractions[post.note_id] = extraction
			
 
				+
			
 
				+                        post_list.append(post)
			
 
				+                    except Exception as e:
			
 
				+                        print(f"      ⚠️  解析帖子失败 {note.get('id', 'unknown')}: {str(e)[:50]}")
			
 
				+
			
 
				+                # 补充详情信息（仅视频类型需要补充视频URL）
			
 
				+                video_posts = [p for p in post_list if p.type == "video"]
			
 
				+                if video_posts:
			
 
				+                    print(f"      补充详情（{len(video_posts)}个视频）...")
			
 
				+                    for post in video_posts:
			
 
				+                        try:
			
 
				+                            detail_response = xiaohongshu_detail.get_detail(post.note_id)
			
 
				+                            enrich_post_with_detail(post, detail_response)
			
 
				+                        except Exception as e:
			
 
				+                            print(f"        ⚠️  详情补充失败 {post.note_id}: {str(e)[:50]}")
			
 
				 
			
 
				                 print(f"      → 找到 {len(post_list)} 个帖子")
			
 
				 
			
@@ -3695,17 +3718,7 @@ async def run_round_v2(
 
				         search_results_data.append({
			
 
				             "text": search.text,
			
 
				             "score_with_o": search.score_with_o,
			
 
				-            "post_list": [
			
 
				-                {
			
 
				-                    "note_id": post.note_id,
			
 
				-                    "note_url": post.note_url,
			
 
				-                    "title": post.title,
			
 
				-                    "body_text": post.body_text,
			
 
				-                    "images": post.images,
			
 
				-                    "interact_info": post.interact_info
			
 
				-                }
			
 
				-                for post in search.post_list
			
 
				-            ]
			
 
				+            "post_list": [post.model_dump() for post in search.post_list]
			
 
				         })
			
 
				 
			
 
				     round_data.update({
			
@@ -3800,6 +3813,7 @@ async def iterative_loop_v2(
 
				     # API实例
			
 
				     xiaohongshu_api = XiaohongshuSearchRecommendations()
			
 
				     xiaohongshu_search = XiaohongshuSearch()
			
 
				+    xiaohongshu_detail = XiaohongshuDetail()  # 详情API客户端
			
 
				 
			
 
				     # 收集所有搜索结果
			
 
				     all_search_list = []
			
@@ -3823,6 +3837,7 @@ async def iterative_loop_v2(
 
				             context=context,
			
 
				             xiaohongshu_api=xiaohongshu_api,
			
 
				             xiaohongshu_search=xiaohongshu_search,
			
 
				+            xiaohongshu_detail=xiaohongshu_detail,
			
 
				             sug_threshold=sug_threshold,
			
 
				             enable_evaluation=enable_evaluation
			
 
				         )
			
--- a/post_evaluator_v3.py
+++ b/post_evaluator_v3.py
@@ -22,6 +22,10 @@ MAX_IMAGES_PER_POST = 10
 
				 MAX_CONCURRENT_EVALUATIONS = 5
			
 
				 API_TIMEOUT = 120
			
 
				 
			
 
				+# 缓存配置
			
 
				+ENABLE_CACHE = True  # 是否启用评估结果缓存
			
 
				+CACHE_DIR = ".evaluation_cache"  # 缓存目录
			
 
				+
			
 
				 # ============================================================================
			
 
				 # 数据模型
			
 
				 # ============================================================================
			
@@ -775,7 +779,8 @@ PROMPT4_CATEGORY_MATCH = """# Prompt 2: 多模态内容品类匹配评估
 
				 ---
			
 
				 
			
 
				 ## 任务说明
			
 
				-你将收到一个**原始搜索需求**和一条**多模态帖子**（包含图片、标题、正文），请**仅评估品类维度**的匹配度，输出0-100分的量化得分。忽略目的和动机维度因素，只评估品类维度。
			
 
				+你将收到一个**原始搜索需求**和一条**多模态帖子**（包含图片、标题、正文）
			
 
				+请**仅评估品类维度**的匹配度，输出0-100分的量化得分。
			
 
				 
			
 
				 ---
			
 
				 
			
@@ -792,44 +797,93 @@ PROMPT4_CATEGORY_MATCH = """# Prompt 2: 多模态内容品类匹配评估
 
				 ---
			
 
				 
			
 
				 ## 评估维度：品类匹配
			
 
				-###品类定义：
			
 
				-**品类 = 核心主体（名词）+ 限定词**
			
 
				 
			
 
				-- **核心主体**：具体的内容对象（风光摄影、旅行攻略、美食推荐）
			
 
				-- **限定词**：限定词不包含具体的目的和动作
			
 
				-  - 地域：川西、成都、日本
			
 
				-  - 时间：秋季、夏天、2024
			
 
				-  - 类型：免费、高清、入门级
			
 
				-  - 风格：小清新、复古、简约
			
 
				+## 评估维度
			
 
				+本评估系统围绕 **品类维度** 进行：
			
 
				+
			
 
				+#  维度独立性警告
			
 
				+【严格约束】本评估**只评估品类维度**,，必须遵守以下规则：
			
 
				+1. **只看名词和限定词**：评估时只考虑主体、限定词的匹配度
			
 
				+2. **完全忽略动词**：动作意图、目的等动机信息对本维度评分无影响
			
 
				+3. **只看词条表面，禁止联想推演
			
 
				+4. **通用概念 ≠ 特定概念
			
 
				+
			
 
				 
			
 
				 ### 核心评估逻辑
			
 
				 
			
 
				+**品类 = 核心内容主体（实体名词）+ 场景/地域限定**
			
 
				+
			
 
				+### 品类识别规则
			
 
				+
			
 
				+#### 第一步：剥离动作词，识别核心主体
			
 
				+
			
 
				+**必须剥离的动作词（属于目的动机，不是品类）：**
			
 
				+- 如何、怎么、制作、拍摄、寻找、推荐、学习、了解等
			
 
				+
			
 
				+**示例：**
			
 
				+- "如何制作猫咪表情包" → 品类主体是**猫咪**，不是"表情包制作"
			
 
				+- "川西风光摄影教程" → 品类主体是**川西风光**，不是"摄影教程"
			
 
				+- "推荐日本旅行景点" → 品类主体是**日本旅行/景点**，不是"推荐"
			
 
				 
			
 
				+#### 第二步：识别核心主体类别
			
 
				+
			
 
				+**核心主体（实体名词）：**
			
 
				+- **生物类**：猫咪、狗狗、植物、人物（具体指儿童、女孩、老人等）
			
 
				+- **地理类**：川西、成都、日本、景点名称
			
 
				+- **物品类**：美食、服装、电子产品、家具
			
 
				+- **场景类**：风光、建筑、室内、户外
			
 
				+- **活动类**：旅行、运动、工作、学习场景
			
 
				+
			
 
				+**关键原则：品类主体必须是具体的内容对象，不是动作或形式**
			
 
				+
			
 
				+#### 第三步：识别场景/地域等限定词（可选）
			
 
				+
			
 
				+**场景/地域限定：**
			
 
				+- **地域限定**：川西、成都、日本、欧洲
			
 
				+- **时间限定**：秋季、夏天、2024
			
 
				+- **场景限定**：户外、室内、职场、家居
			
 
				+
			
 
				+**注意：**
			
 
				+- "表情包"、"梗图"、"照片"、"视频"等是**内容形式/载体**，不是品类主体
			
 
				+- "教程"、"攻略"、"指南"等是**内容类型**，属于目的动机，不是品类
			
 
				 
			
 
				 ---
			
 
				 
			
 
				 ## 评估流程
			
 
				 
			
 
				 ### 第一步：提取原始需求的品类信息
			
 
				-- 识别**核心主体名词**
			
 
				-- 识别**关键限定词**（地域/时间/类型/风格等）
			
 
				+
			
 
				+1. **剥离所有动作词和内容形式词**
			
 
				+2. **识别核心主体名词**（生物、地理、物品、场景等）
			
 
				+3. **识别场景/地域限定**（如果有）
			
 
				+
			
 
				+**示例分析：**
			
 
				+- "如何制作猫咪表情包梗图"
			
 
				+  - 剥离动作：如何、制作
			
 
				+  - 剥离形式：表情包、梗图
			
 
				+  - **核心品类主体：猫咪**
			
 
				+  - 场景限定：无
			
 
				+
			
 
				 
			
 
				 ### 第二步：从帖子中提取品类信息（重点看图片）
			
 
				 
			
 
				 **图片识别（权重70%）：**
			
 
				-- 图片展示的核心主体是什么？
			
 
				-- 图片中可识别的限定特征（地域标志、季节特征、类型属性、风格特点）
			
 
				+- 图片的**核心主体**是什么？（是猫、是人、是风景、是物品？）
			
 
				+- 图片的**场景/地域特征**是什么？
			
 
				 
			
 
				 **标题提取（权重15%）：**
			
 
				-- 标题明确的品类名词和限定词
			
 
				+- 标题明确的品类主体名词
			
 
				 
			
 
				 **正文提取（权重15%）：**
			
 
				-- 正文描述的品类信息
			
 
				+- 正文描述的品类主体
			
 
				 
			
 
				-### 第三步：对比匹配度
			
 
				-- 核心主体是否一致？
			
 
				-- 限定词匹配了几个？
			
 
				-- 是否存在泛化或偏移？
			
 
				+### 第三步：对比品类匹配度
			
 
				+
			
 
				+**核心判断：主体是否一致？**
			
 
				+- 猫咪 ≠ 女孩 → 品类完全不同 → 0-10分
			
 
				+- 猫咪 = 猫咪 → 品类一致 → 进一步看场景限定
			
 
				+- 川西风光 ≠ 日本风光 → 地域不同 → 30-50分
			
 
				+- 川西风光 = 四川风光 → 地域相近 → 70-85分
			
 
				 
			
 
				 ---
			
 
				 
			
@@ -837,43 +891,43 @@ PROMPT4_CATEGORY_MATCH = """# Prompt 2: 多模态内容品类匹配评估
 
				 
			
 
				 ### 高度匹配区间
			
 
				 
			
 
				-**90-100分：核心主体+关键限定词完全匹配**
			
 
				-- 图片展示的主体与需求精准一致
			
 
				-- 关键限定词全部匹配（地域、时间、类型等）
			
 
				+**90-100分：核心主体完全一致 + 场景/地域等限定词完全匹配**
			
 
				+- 图片主体与需求完全一致
			
 
				+- 关键限定词全部匹配（场景、地域、时间等）
			
 
				 - 例：需求"川西秋季风光" vs 图片展示川西秋季风景
			
 
				 
			
 
				-**75-89分：核心主体匹配，限定词匹配度百分之80**
			
 
				+**75-89分：核心主体完全一致 + 场景/地域等限定词部分匹配**
			
 
				 - 图片主体一致
			
 
				 - 存在1-2个限定词缺失但不影响核心匹配
			
 
				 - 例：需求"川西秋季风光" vs 图片展示川西风光（缺秋季）
			
 
				 
			
 
				-**60-74分：核心主体匹配，限定词匹配度百分之60**
			
 
				+**60-74分：核心主体匹配，限定词大量缺失**
			
 
				 - 图片主体在同一大类
			
 
				-- 限定词部分匹配或有合理上下位关系
			
 
				-- 例：需求"川西秋季风光" vs 图片展示四川风光
			
 
				+- 场景/地域等限定词大部分缺失
			
 
				+- 例：需求"川西秋季风光" vs 图片展示风光
			
 
				 
			
 
				 ### 中度相关区间
			
 
				 
			
 
				-**40-59分：核心主体匹配，限定词完全不匹配**
			
 
				+**40-59分：核心主体同大类但具体不同**
			
 
				 - 图片主体相同但上下文不同
			
 
				 - 限定词严重缺失或不匹配
			
 
				-- 例：需求"猫咪表情包梗图" vs 女孩表情包
			
 
				+- 例：需求"川西风光摄影" vs 图片展示风光照但无地域特征
			
 
				 
			
 
				 ### 不相关/负向区间
			
 
				 
			
 
				-**20-39分：主体过度泛化**
			
 
				+**20-39分：核心主体相关但类别差异明显**
			
 
				 - 图片主体是通用概念，需求是特定概念
			
 
				 - 仅有抽象类别相似
			
 
				 - 例：需求"川西旅行攻略" vs 图片展示普通旅行场景
			
 
				 
			
 
				-**1-19分：品类关联极弱**
			
 
				+**1-19分：核心主体几乎不相关**
			
 
				 - 图片主体与需求差异明显
			
 
				 
			
 
				-**0分：品类完全不同**
			
 
				+**0分：核心主体完全不同**
			
 
				 - 图片主体类别完全不同
			
 
				 - 例：需求"风光摄影" vs 图片展示美食
			
 
				 
			
 
				-**负分不使用**（品类维度不设负分）
			
 
				+**关键原则：品类主体不同 = 品类不匹配 = 0分或极低分**
			
 
				 
			
 
				 ---
			
 
				 
			
@@ -881,21 +935,27 @@ PROMPT4_CATEGORY_MATCH = """# Prompt 2: 多模态内容品类匹配评估
 
				 ```json
			
 
				 {{
			
 
				   "品类评估": {{
			
 
				-    "原始需求品类": {{
			
 
				-      "核心主体": "提取的主体名词",
			
 
				-      "关键限定词": ["限定词1", "限定词2"]
			
 
				+    "原始需求品类分析": {{
			
 
				+      "完整需求": "用户的原始搜索词",
			
 
				+      "剥离动作词": "识别并剥离的动作词",
			
 
				+      "剥离形式词": "识别并剥离的内容形式词",
			
 
				+      "核心主体": "提取的核心品类主体",
			
 
				+      "场景地域限定": ["限定词1", "限定词2"]
			
 
				     }},
			
 
				     "帖子实际品类": {{
			
 
				-      "图片主体": "图片展示的核心主体",
			
 
				-      "图片限定特征": ["从图片识别的限定词"],
			
 
				-      "标题品类": "标题提及的品类",
			
 
				-      "正文品类": "正文描述的品类"
			
 
				+      "图片主体": "图片展示的核心主体（权重70%）",
			
 
				+      "图片场景特征": "图片的场景/地域特征",
			
 
				+      "标题主体": "标题提及的主体",
			
 
				+      "正文主体": "正文描述的主体"
			
 
				+    }},
			
 
				+    "品类匹配分析": {{
			
 
				+      "主体对比": "需求主体 vs 帖子主体",
			
 
				+      "主体是否一致": "一致/同大类不同/完全不同",
			
 
				+      "场景限定匹配情况": "哪些匹配/哪些缺失"
			
 
				     }},
			
 
				     "品类匹配得分": 0-100的整数,
			
 
				     "匹配度等级": "完全匹配/高度匹配/基本匹配/弱匹配/不匹配",
			
 
				-    "主体匹配情况": "主体是否一致",
			
 
				-    "限定词匹配情况": "哪些限定词匹配/缺失",
			
 
				-    "核心依据": "为什么给这个分数（100字以内）"
			
 
				+    "核心依据": "为什么给这个分数（必须说明主体是否一致）"
			
 
				   }}
			
 
				 }}
			
 
				 ```
			
@@ -915,9 +975,11 @@ PROMPT4_CATEGORY_MATCH = """# Prompt 2: 多模态内容品类匹配评估
 
				 ## 特别注意
			
 
				 
			
 
				 - 本评估**只关注品类维度**，不考虑目的是否匹配
			
 
				+- 严格标准一致性：对所有用例使用相同的评估标准，避免评分飘移
			
 
				 - 输出的分数必须是**0-100的整数**
			
 
				 - 不要自行计算综合分数，只输出品类分数
			
 
				-- 禁止因为"可能相关"就给分，必须有明确视觉证据
			
 
				+- 禁止因为"可能相关"就给分，必须有明确视觉证据，不得用可能相关，你的评估
			
 
				+---
			
 
				 """
			
 
				 
			
 
				 
			
@@ -925,6 +987,106 @@ PROMPT4_CATEGORY_MATCH = """# Prompt 2: 多模态内容品类匹配评估
 
				 # 辅助函数
			
 
				 # ============================================================================
			
 
				 
			
 
				+def _get_cache_key(note_id: str) -> str:
			
 
				+    """
			
 
				+    生成缓存key
			
 
				+
			
 
				+    Args:
			
 
				+        note_id: 帖子ID
			
 
				+
			
 
				+    Returns:
			
 
				+        缓存文件名（不含目录）
			
 
				+    """
			
 
				+    return f"{note_id}_v3.0.json"
			
 
				+
			
 
				+
			
 
				+def _load_from_cache(note_id: str) -> Optional[tuple]:
			
 
				+    """
			
 
				+    从缓存加载评估结果
			
 
				+
			
 
				+    Args:
			
 
				+        note_id: 帖子ID
			
 
				+
			
 
				+    Returns:
			
 
				+        缓存的评估结果元组 (knowledge_eval, content_eval, purpose_eval, category_eval, final_score, match_level)
			
 
				+        如果缓存不存在或读取失败，返回None
			
 
				+    """
			
 
				+    if not ENABLE_CACHE:
			
 
				+        return None
			
 
				+
			
 
				+    cache_file = os.path.join(CACHE_DIR, _get_cache_key(note_id))
			
 
				+
			
 
				+    if not os.path.exists(cache_file):
			
 
				+        return None
			
 
				+
			
 
				+    try:
			
 
				+        with open(cache_file, 'r', encoding='utf-8') as f:
			
 
				+            data = json.load(f)
			
 
				+
			
 
				+        # 重建评估对象
			
 
				+        knowledge_eval = None
			
 
				+        if data.get("knowledge_eval"):
			
 
				+            knowledge_eval = KnowledgeEvaluation(**data["knowledge_eval"])
			
 
				+
			
 
				+        content_eval = None
			
 
				+        if data.get("content_eval"):
			
 
				+            content_eval = ContentKnowledgeEvaluation(**data["content_eval"])
			
 
				+
			
 
				+        purpose_eval = None
			
 
				+        if data.get("purpose_eval"):
			
 
				+            purpose_eval = PurposeEvaluation(**data["purpose_eval"])
			
 
				+
			
 
				+        category_eval = None
			
 
				+        if data.get("category_eval"):
			
 
				+            category_eval = CategoryEvaluation(**data["category_eval"])
			
 
				+
			
 
				+        final_score = data.get("final_score")
			
 
				+        match_level = data.get("match_level")
			
 
				+
			
 
				+        return (knowledge_eval, content_eval, purpose_eval, category_eval, final_score, match_level)
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        print(f"      ⚠️  缓存读取失败: {note_id} - {str(e)[:50]}")
			
 
				+        return None
			
 
				+
			
 
				+
			
 
				+def _save_to_cache(note_id: str, eval_results: tuple):
			
 
				+    """
			
 
				+    保存评估结果到缓存
			
 
				+
			
 
				+    Args:
			
 
				+        note_id: 帖子ID
			
 
				+        eval_results: 评估结果元组 (knowledge_eval, content_eval, purpose_eval, category_eval, final_score, match_level)
			
 
				+    """
			
 
				+    if not ENABLE_CACHE:
			
 
				+        return
			
 
				+
			
 
				+    knowledge_eval, content_eval, purpose_eval, category_eval, final_score, match_level = eval_results
			
 
				+
			
 
				+    # 确保缓存目录存在
			
 
				+    os.makedirs(CACHE_DIR, exist_ok=True)
			
 
				+
			
 
				+    # 转换为可序列化的dict
			
 
				+    cache_data = {
			
 
				+        "knowledge_eval": knowledge_eval.model_dump() if knowledge_eval else None,
			
 
				+        "content_eval": content_eval.model_dump() if content_eval else None,
			
 
				+        "purpose_eval": purpose_eval.model_dump() if purpose_eval else None,
			
 
				+        "category_eval": category_eval.model_dump() if category_eval else None,
			
 
				+        "final_score": final_score,
			
 
				+        "match_level": match_level,
			
 
				+        "cache_time": datetime.now().isoformat(),
			
 
				+        "evaluator_version": "v3.0"
			
 
				+    }
			
 
				+
			
 
				+    cache_file = os.path.join(CACHE_DIR, _get_cache_key(note_id))
			
 
				+
			
 
				+    try:
			
 
				+        with open(cache_file, 'w', encoding='utf-8') as f:
			
 
				+            json.dump(cache_data, f, ensure_ascii=False, indent=2)
			
 
				+    except Exception as e:
			
 
				+        print(f"      ⚠️  缓存保存失败: {note_id} - {str(e)[:50]}")
			
 
				+
			
 
				+
			
 
				 def _clean_json_response(content_text: str) -> str:
			
 
				     """清理API返回的JSON内容"""
			
 
				     content_text = content_text.strip()
			
@@ -1258,6 +1420,13 @@ async def evaluate_post_v3(
 
				         print(f"      ⊗ 跳过视频帖子: {post.note_id}")
			
 
				         return (None, None, None, None, None, None)
			
 
				 
			
 
				+    # 检查缓存
			
 
				+    if ENABLE_CACHE:
			
 
				+        cached_result = _load_from_cache(post.note_id)
			
 
				+        if cached_result is not None:
			
 
				+            print(f"      ♻️  使用缓存结果: {post.note_id}")
			
 
				+            return cached_result
			
 
				+
			
 
				     print(f"      🔍 开始V3评估: {post.note_id}")
			
 
				 
			
 
				     # Step 1: 判断是知识
			
@@ -1310,6 +1479,10 @@ async def evaluate_post_v3(
 
				 
			
 
				     print(f"      ✅ 综合得分: {final_score} ({match_level})")
			
 
				 
			
 
				+    # 保存到缓存
			
 
				+    if ENABLE_CACHE:
			
 
				+        _save_to_cache(post.note_id, (knowledge_eval, content_eval, purpose_eval, category_eval, final_score, match_level))
			
 
				+
			
 
				     return (knowledge_eval, content_eval, purpose_eval, category_eval, final_score, match_level)
			
 
				 
			
 
				 
			
--- a/post_evaluator_v4_image_loader.py
+++ b/post_evaluator_v4_image_loader.py
@@ -0,0 +1,82 @@
 
				+"""
			
 
				+ImageUploader 新实现 - 使用PIL Image对象
			
 
				+参考demo,直接下载图片到内存并转为PIL Image,不上传文件
			
 
				+"""
			
 
				+import asyncio
			
 
				+import requests
			
 
				+from PIL import Image
			
 
				+import io
			
 
				+from typing import Optional, List, Any
			
 
				+
			
 
				+
			
 
				+class ImageUploader:
			
 
				+    """图片加载器 - 下载图片并转为PIL Image对象(参考demo,使用内联数据方式)"""
			
 
				+
			
 
				+    @staticmethod
			
 
				+    async def upload_images(image_urls: List[str]) -> tuple[List[Any], List[str]]:
			
 
				+        """
			
 
				+        批量下载图片并转为PIL Image对象
			
 
				+
			
 
				+        Args:
			
 
				+            image_urls: 图片URL列表
			
 
				+
			
 
				+        Returns:
			
 
				+            (image_objects, []) - PIL Image对象列表和空列表(保持接口兼容)
			
 
				+        """
			
 
				+        if not image_urls:
			
 
				+            return [], []
			
 
				+
			
 
				+        print(f"      📥 准备加载 {len(image_urls)} 张图片(PIL Image方式)...")
			
 
				+
			
 
				+        # 并发下载所有图片
			
 
				+        tasks = [ImageUploader._load_single_image(url, idx) for idx, url in enumerate(image_urls)]
			
 
				+        results = await asyncio.gather(*tasks, return_exceptions=True)
			
 
				+
			
 
				+        # 分离成功和失败的结果
			
 
				+        image_objects = []
			
 
				+
			
 
				+        for idx, result in enumerate(results):
			
 
				+            if isinstance(result, Exception):
			
 
				+                print(f"      ⚠️  图片{idx}加载失败: {str(result)[:50]}")
			
 
				+            elif result is not None:
			
 
				+                image_objects.append(result)
			
 
				+
			
 
				+        print(f"      ✅ 成功加载 {len(image_objects)}/{len(image_urls)} 张图片")
			
 
				+        return image_objects, []  # 返回空列表作为temp_paths,因为不需要清理
			
 
				+
			
 
				+    @staticmethod
			
 
				+    async def _load_single_image(image_url: str, idx: int) -> Optional[Any]:
			
 
				+        """
			
 
				+        下载单张图片并转为PIL Image对象
			
 
				+
			
 
				+        Args:
			
 
				+            image_url: 图片URL
			
 
				+            idx: 图片索引(用于日志)
			
 
				+
			
 
				+        Returns:
			
 
				+            PIL Image对象
			
 
				+        """
			
 
				+        try:
			
 
				+            # 下载图片到内存
			
 
				+            loop = asyncio.get_event_loop()
			
 
				+            response = await loop.run_in_executor(
			
 
				+                None,
			
 
				+                lambda: requests.get(image_url, timeout=30)
			
 
				+            )
			
 
				+            response.raise_for_status()
			
 
				+
			
 
				+            # 转换为PIL Image对象
			
 
				+            image = Image.open(io.BytesIO(response.content))
			
 
				+
			
 
				+            # 转换为RGB模式(Gemini推荐)
			
 
				+            if image.mode != 'RGB':
			
 
				+                image = image.convert('RGB')
			
 
				+
			
 
				+            file_size_kb = len(response.content) / 1024
			
 
				+            print(f"      ✓ 图片{idx}加载成功 ({file_size_kb:.1f}KB, {image.size[0]}x{image.size[1]})")
			
 
				+
			
 
				+            return image
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"      ✗ 图片{idx}加载失败: {str(e)[:60]}")
			
 
				+            return None
			
--- a/post_evaluator_v4_langgraph.py
+++ b/post_evaluator_v4_langgraph.py
@@ -0,0 +1,2184 @@
 
				+"""
			
 
				+帖子评估模块 V4 - LangGraph版本 + Gemini API
			
 
				+
			
 
				+改进:
			
 
				+1. 框架: 使用 LangGraph 状态机替代传统异步流程
			
 
				+2. API: 切换到 Google Gemini API (google.generativeai)
			
 
				+3. 视频: 支持视频内容评估
			
 
				+4. Prompt: 视频内容自动调整Prompt描述
			
 
				+5. 流程: Prompt1 → Prompt2 → Prompt3&4(并行) → 综合评分
			
 
				+"""
			
 
				+
			
 
				+import asyncio
			
 
				+import json
			
 
				+import os
			
 
				+import time
			
 
				+import tempfile
			
 
				+import io
			
 
				+import base64
			
 
				+import requests
			
 
				+from datetime import datetime
			
 
				+from typing import Optional, TypedDict, List, Dict, Any
			
 
				+from pydantic import BaseModel, Field
			
 
				+from PIL import Image
			
 
				+from langchain_google_genai import ChatGoogleGenerativeAI
			
 
				+from langchain_core.messages import HumanMessage, SystemMessage
			
 
				+from langgraph.graph import StateGraph, END
			
 
				+# import google.generativeai as genai  # 暂时禁用,版本冲突
			
 
				+
			
 
				+# ============================================================================
			
 
				+# 常量配置
			
 
				+# ============================================================================
			
 
				+
			
 
				+# Gemini配置
			
 
				+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "AIzaSyBgt9h74LvdWJ4Ivy_mh17Yyy2JH2WJICE")
			
 
				+GEMINI_MODEL_NAME = "gemini-2.5-flash"
			
 
				+MAX_IMAGES_PER_POST = 10
			
 
				+
			
 
				+# 并发&重试配置
			
 
				+MAX_CONCURRENT_EVALUATIONS = 5
			
 
				+API_TIMEOUT = 180
			
 
				+MAX_RETRIES = 2
			
 
				+RETRY_WAIT_SECONDS = 3
			
 
				+FILE_PROCESS_TIMEOUT = 180
			
 
				+
			
 
				+# 缓存配置
			
 
				+ENABLE_CACHE = False
			
 
				+CACHE_DIR = ".evaluation_cache"
			
 
				+
			
 
				+# ============================================================================
			
 
				+# 数据模型 (复用V3)
			
 
				+# ============================================================================
			
 
				+
			
 
				+class KnowledgeEvaluation(BaseModel):
			
 
				+    """Prompt1: 判断是知识 - 评估结果"""
			
 
				+    is_knowledge: bool = Field(..., description="是否是知识内容")
			
 
				+    quick_exclude: dict = Field(default_factory=dict, description="快速排除判定")
			
 
				+    title_layer: dict = Field(default_factory=dict, description="标题层判断")
			
 
				+    image_layer: dict = Field(default_factory=dict, description="图片层判断(核心)")
			
 
				+    text_layer: dict = Field(default_factory=dict, description="正文层判断(辅助)")
			
 
				+    judgment_logic: str = Field(..., description="综合判定逻辑")
			
 
				+    core_evidence: list[str] = Field(default_factory=list, description="核心证据")
			
 
				+    issues: list[str] = Field(default_factory=list, description="不足或疑虑")
			
 
				+    conclusion: str = Field(..., description="结论陈述")
			
 
				+
			
 
				+
			
 
				+class ContentKnowledgeEvaluation(BaseModel):
			
 
				+    """Prompt2: 判断是否是内容知识 - 评估结果"""
			
 
				+    is_content_knowledge: bool = Field(..., description="是否属于内容知识")
			
 
				+    final_score: int = Field(..., description="最终得分(0-100)")
			
 
				+    level: str = Field(..., description="判定等级")
			
 
				+    quick_exclude: dict = Field(default_factory=dict, description="快速排除判定")
			
 
				+    dimension_scores: dict = Field(default_factory=dict, description="分层评分详情")
			
 
				+    core_evidence: list[str] = Field(default_factory=list, description="核心证据")
			
 
				+    issues: list[str] = Field(default_factory=list, description="不足之处")
			
 
				+    summary: str = Field(..., description="总结陈述")
			
 
				+
			
 
				+
			
 
				+class PurposeEvaluation(BaseModel):
			
 
				+    """Prompt3: 目的性匹配 - 评估结果"""
			
 
				+    purpose_score: int = Field(..., description="目的动机得分(0-100整数)")
			
 
				+    core_motivation: str = Field(..., description="原始需求核心动机")
			
 
				+    image_value: str = Field(..., description="图片提供的价值")
			
 
				+    title_intention: str = Field(..., description="标题体现的意图")
			
 
				+    text_content: str = Field(..., description="正文补充的内容")
			
 
				+    match_level: str = Field(..., description="匹配度等级")
			
 
				+    core_basis: str = Field(..., description="核心依据")
			
 
				+
			
 
				+
			
 
				+class CategoryEvaluation(BaseModel):
			
 
				+    """Prompt4: 品类匹配 - 评估结果"""
			
 
				+    category_score: int = Field(..., description="品类匹配得分(0-100整数)")
			
 
				+    original_category_analysis: dict = Field(default_factory=dict, description="原始需求品类分析")
			
 
				+    actual_category: dict = Field(default_factory=dict, description="帖子实际品类")
			
 
				+    match_level: str = Field(..., description="匹配度等级")
			
 
				+    category_match_analysis: dict = Field(default_factory=dict, description="品类匹配分析")
			
 
				+    core_basis: str = Field(..., description="核心依据")
			
 
				+
			
 
				+
			
 
				+# ============================================================================
			
 
				+# LangGraph State定义
			
 
				+# ============================================================================
			
 
				+
			
 
				+class EvaluationState(TypedDict):
			
 
				+    """评估状态"""
			
 
				+    # 输入
			
 
				+    post: Any  # Post对象
			
 
				+    original_query: str
			
 
				+
			
 
				+    # 视频相关
			
 
				+    video_file: Optional[Any]  # genai.File对象
			
 
				+    video_uri: Optional[str]
			
 
				+    temp_video_path: Optional[str]
			
 
				+
			
 
				+    # 图片相关
			
 
				+    temp_image_paths: Optional[List[str]]  # 临时图片文件路径列表
			
 
				+    cached_media_files: Optional[List[Dict]]  # 缓存的图片base64数据，避免重复下载
			
 
				+
			
 
				+    # 评估结果
			
 
				+    knowledge_eval: Optional[KnowledgeEvaluation]
			
 
				+    content_eval: Optional[ContentKnowledgeEvaluation]
			
 
				+    purpose_eval: Optional[PurposeEvaluation]
			
 
				+    category_eval: Optional[CategoryEvaluation]
			
 
				+    final_score: Optional[float]
			
 
				+    match_level: Optional[str]
			
 
				+
			
 
				+    # 控制
			
 
				+    should_continue: bool
			
 
				+    error: Optional[str]
			
 
				+    semaphore: Optional[asyncio.Semaphore]
			
 
				+
			
 
				+
			
 
				+# ============================================================================
			
 
				+# Prompt 定义 (复用V3 - 从post_evaluator_v3.py导入)
			
 
				+# ============================================================================
			
 
				+
			
 
				+# 为了避免重复,我们从v3模块导入Prompt
			
 
				+# ============================================================================
			
 
				+# Prompt 定义 - 拆分为System和User两部分
			
 
				+# ============================================================================
			
 
				+
			
 
				+# Prompt1: 知识判定 - System部分(评估规则)
			
 
				+SYSTEM_PROMPT1_IS_KNOWLEDGE = """# 内容知识判定系统 v2.0
			
 
				+
			
 
				+## 角色定义
			
 
				+你是一个多模态内容评估专家，专门判断社交媒体帖子是否属于"内容知识"类别。
			
 
				+
			
 
				+## 前置条件
			
 
				+该帖子已通过知识判定，确认提供了知识。现在需要进一步判断是否属于"内容知识"。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 内容知识定义
			
 
				+
			
 
				+**内容知识**是指与创作/制作/设计相关的、具有实操性和可迁移性的知识，帮助创作者提升创作能力。
			
 
				+
			
 
				+### 内容知识的范畴
			
 
				+- ✅ **创作原理**: 设计原理、创作逻辑、美学规律、构图法则（通用的，普适的）
			
 
				+- ✅ **制作方法**: 操作流程、技术步骤、工具使用方法
			
 
				+- ✅ **创意技巧**: 灵感方法、创意思路、表现手法、风格技法
			
 
				+- ✅ **体系框架**: 完整的创作体系、方法论、思维框架
			
 
				+- ✅ **案例提炼**: 从多个案例中总结的通用创作规律
			
 
				+
			
 
				+### 非内容知识（严格排除）
			
 
				+- ❌ **单案例展示**: 仅展示单个作品，无方法论提炼
			
 
				+- ❌ **作品集合**: 纯作品展示集合，无创作方法讲解
			
 
				+- ❌ **单点元素**: 只展示配色/字体/素材，无使用方法
			
 
				+- ❌ **单次操作**: 只讲某个项目的特定操作，无通用性
			
 
				+- ❌ **非创作领域**: 健康、财经、生活、科普等非创作制作领域的知识
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 输入信息
			
 
				+- **标题**: [帖子标题]
			
 
				+- **正文**: [帖子正文内容]  
			
 
				+- **图片**: [图片描述/内容]
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 判断流程
			
 
				+
			
 
				+### 第一步: 快速排除判断（任一为"是"则判定为非内容知识）
			
 
				+
			
 
				+1. 标题是否为纯展示型？（"我的XX作品"、"今天做了XX"、"作品分享"）
			
 
				+2. 图片是否全为作品展示，无任何方法/原理/步骤说明？
			
 
				+3. 是否只讲单个项目的特定操作，完全无通用性？
			
 
				+4. 是否为纯元素展示，无创作方法？（仅展示配色、字体、素材）
			
 
				+
			
 
				+**排除判定**: □ 是（判定为非内容知识） / □ 否（继续评估）
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### 第二步: 分层打分评估（满分100分）
			
 
				+
			
 
				+## 🖼️ 图片层评估（权重70%，满分70分）
			
 
				+
			
 
				+> **说明**: 社交媒体以图片为主要信息载体，图片层是核心判断依据
			
 
				+
			
 
				+#### 维度1: 创作方法呈现（20分）
			
 
				+**评分依据**: 图片是否清晰展示了具体的创作/制作方法、技巧、技法
			
 
				+
			
 
				+- **20分**: 图片详细展示≥3个具体可操作的创作方法/技巧，有明确的操作指引
			
 
				+- **15分**: 图片展示2个创作方法，方法较为具体
			
 
				+- **10分**: 图片展示1个创作方法，但不够详细
			
 
				+- **5分**: 图片暗示有方法，但未明确展示
			
 
				+- **0分**: 图片无任何方法展示，纯作品呈现
			
 
				+
			
 
				+**得分**: __/20
			
 
				+
			
 
				+---
			
 
				+
			
 
				+#### 维度2: 知识体系化程度（15分）
			
 
				+**评分依据**: 多图是否形成完整的知识体系或逻辑链条
			
 
				+
			
 
				+- **15分**: 多图形成完整体系（步骤1→2→3，或原理→方法→案例），逻辑清晰
			
 
				+- **12分**: 多图有知识关联性，形成部分体系
			
 
				+- **8分**: 多图展示多个知识点，但关联性弱
			
 
				+- **4分**: 多图仅为同类案例堆砌，无体系
			
 
				+- **0分**: 单图或多图无逻辑关联
			
 
				+
			
 
				+**得分**: __/15
			
 
				+
			
 
				+---
			
 
				+
			
 
				+#### 维度3: 教学性标注与说明（15分）
			
 
				+**评分依据**: 图片是否包含教学性的视觉元素（标注、序号、箭头、文字说明）
			
 
				+
			
 
				+- **15分**: 大量教学标注（序号、箭头、高亮、文字说明、对比标记等），清晰易懂
			
 
				+- **12分**: 有明显的教学标注，但不够完善
			
 
				+- **8分**: 有少量标注或说明
			
 
				+- **4分**: 仅有简单文字，无视觉教学元素
			
 
				+- **0分**: 无任何教学标注，纯视觉展示
			
 
				+
			
 
				+**得分**: __/15
			
 
				+
			
 
				+---
			
 
				+
			
 
				+#### 维度4: 方法可复用性（10分）
			
 
				+**评分依据**: 图片展示的方法是否可迁移到其他创作场景/项目
			
 
				+
			
 
				+- **10分**: 明确展示通用方法，可应用于多种场景（配公式/模板/框架）
			
 
				+- **8分**: 方法有一定通用性，可迁移到类似场景
			
 
				+- **5分**: 方法通用性一般，需要改造才能应用
			
 
				+- **2分**: 方法仅适用于特定项目
			
 
				+- **0分**: 无可复用方法
			
 
				+
			
 
				+**得分**: __/10
			
 
				+
			
 
				+---
			
 
				+
			
 
				+#### 维度5: 原理与案例结合（10分）
			
 
				+**评分依据**: 图片是否将创作原理与实际案例有效结合
			
 
				+
			
 
				+- **10分**: 原理+多案例验证，清晰展示原理如何应用
			
 
				+- **8分**: 原理+案例，有一定结合
			
 
				+- **5分**: 有原理或有案例，但结合不够
			
 
				+- **2分**: 仅有案例，无原理提炼
			
 
				+- **0分**: 纯案例展示或纯理论
			
 
				+
			
 
				+**得分**: __/10
			
 
				+
			
 
				+---
			
 
				+
			
 
				+**🖼️ 图片层总分**: __/70
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 📝 正文层评估（权重20%，满分20分）
			
 
				+
			
 
				+> **说明**: 正文作为辅助判断，补充图片未完整呈现的知识信息
			
 
				+
			
 
				+#### 维度6: 方法/步骤描述（10分）
			
 
				+**评分依据**: 正文是否描述了具体的创作方法或操作步骤
			
 
				+
			
 
				+- **10分**: 有完整的步骤描述（≥3步）或详细的方法说明
			
 
				+- **7分**: 有步骤或方法描述，但不够系统
			
 
				+- **4分**: 有零散的方法提及
			
 
				+- **0分**: 无方法/步骤，纯叙事或展示性文字
			
 
				+
			
 
				+**得分**: __/10
			
 
				+
			
 
				+---
			
 
				+
			
 
				+#### 维度7: 知识总结与提炼（10分）
			
 
				+**评分依据**: 正文是否对创作经验/规律进行总结提炼
			
 
				+
			
 
				+- **10分**: 有明确的知识总结、归纳、框架化输出
			
 
				+- **7分**: 有一定的经验总结或要点提炼
			
 
				+- **4分**: 有零散的心得，但未成体系
			
 
				+- **0分**: 无任何知识提炼
			
 
				+
			
 
				+**得分**: __/10
			
 
				+
			
 
				+---
			
 
				+
			
 
				+**📝 正文层总分**: __/20
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 🏷️ 标题层评估（权重10%，满分10分）
			
 
				+
			
 
				+> **说明**: 标题作为内容导向，辅助判断内容主题
			
 
				+
			
 
				+#### 维度8: 标题内容指向性（10分）
			
 
				+**评分依据**: 标题是否明确指向创作/制作相关的知识内容
			
 
				+
			
 
				+- **10分**: 标题明确包含方法/教程/技巧/原理类词汇（"XX教程"、"XX技巧"、"如何XX"、"XX方法"）
			
 
				+- **7分**: 标题包含整理型词汇（"合集"、"总结"、"分享XX方法"）
			
 
				+- **4分**: 描述性标题，暗示有创作知识
			
 
				+- **0分**: 纯展示型标题（"我的作品"、"今天做了XX"）或与创作无关
			
 
				+
			
 
				+**得分**: __/10
			
 
				+
			
 
				+---
			
 
				+
			
 
				+**🏷️标题层总分**: __/10
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### 第三步: 综合评分与判定
			
 
				+
			
 
				+**总分计算**:
			
 
				+总分 = 图片层总分(70分) + 正文层总分(20分) + 标题层总分(10分)
			
 
				+
			
 
				+**最终得分**: __/100分
			
 
				+
			
 
				+---
			
 
				+
			
 
				+**判定等级**:
			
 
				+- **85-100分**: ⭐⭐⭐⭐⭐ 优质内容知识 - 强烈符合
			
 
				+- **70-84分**: ⭐⭐⭐⭐ 良好内容知识 - 符合
			
 
				+- **55-69分**: ⭐⭐⭐ 基础内容知识 - 基本符合
			
 
				+- **40-54分**: ⭐⭐ 弱内容知识 - 不太符合
			
 
				+- **0-39分**: ⭐ 非内容知识 - 不符合
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 输出格式（JSON）
			
 
				+```json
			
 
				+{
			
 
				+  "is_knowledge": true/false,
			
 
				+  "quick_exclude": {
			
 
				+    "result": "通过/排除",
			
 
				+    "reason": "快速排除判定理由"
			
 
				+  },
			
 
				+  "title_layer": {
			
 
				+    "has_knowledge_direction": true/false,
			
 
				+    "reason": "标题层判断理由"
			
 
				+  },
			
 
				+  "image_layer": {
			
 
				+    "knowledge_presentation": {
			
 
				+      "match": true/false,
			
 
				+      "reason": "图片是否呈现知识"
			
 
				+    },
			
 
				+    "educational_value": {
			
 
				+      "has_value": true/false,
			
 
				+      "reason": "是否有教学价值"
			
 
				+    },
			
 
				+    "structure_level": {
			
 
				+      "structured": true/false,
			
 
				+      "reason": "结构化程度"
			
 
				+    },
			
 
				+    "practicality": {
			
 
				+      "practical": true/false,
			
 
				+      "reason": "实用性评估"
			
 
				+    },
			
 
				+    "information_density": {
			
 
				+      "level": "高/中/低",
			
 
				+      "reason": "信息密度判断"
			
 
				+    },
			
 
				+    "overall": "传递知识/纯展示/其他"
			
 
				+  },
			
 
				+  "text_layer": {
			
 
				+    "information_gain": {
			
 
				+      "has_gain": true/false,
			
 
				+      "reason": "是否有信息增量"
			
 
				+    },
			
 
				+    "verifiability": {
			
 
				+      "verifiable": true/false,
			
 
				+      "reason": "可验证性"
			
 
				+    },
			
 
				+    "knowledge_type": {
			
 
				+      "type": "方法性知识/应用性知识/原理性知识等",
			
 
				+      "reason": "知识类型判断"
			
 
				+    },
			
 
				+    "overall": "有知识支撑/无知识支撑"
			
 
				+  },
			
 
				+  "judgment_logic": "综合判定逻辑说明（2-3句话）",
			
 
				+  "core_evidence": [
			
 
				+    "证据1：从图片/正文/标题中提取的关键证据",
			
 
				+    "证据2：...",
			
 
				+    "证据3：..."
			
 
				+  ],
			
 
				+  "issues": [
			
 
				+    "问题1：存在的不足或疑虑",
			
 
				+    "问题2：..."
			
 
				+  ],
			
 
				+  "conclusion": "结论陈述（2-3句话说明判定结果和核心理由）"
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 判断原则
			
 
				+1. **图片主导原则**: 图片占70%权重，是核心判断依据；标题和正文为辅助
			
 
				+2. **创作领域限定**: 必须属于创作/制作/设计领域，其他领域知识不属于内容知识
			
 
				+3. **方法优先原则**: 重点评估是否提供了可操作的创作方法，而非纯作品展示
			
 
				+4. **通用性要求**: 优先考虑方法的可复用性和可迁移性
			
 
				+5. **严格性原则**: 宁可误判为"非内容知识"，也不放过纯展示型内容
			
 
				+6. **证据性原则**: 评分需基于明确的视觉和文本证据，可量化衡量
			
 
				+"""
			
 
				+
			
 
				+# Prompt1: 知识判定 - User部分(帖子数据)
			
 
				+USER_TEMPLATE1_IS_KNOWLEDGE = """请评估以下帖子是否为知识内容：
			
 
				+
			
 
				+**标题**: {title}
			
 
				+**正文**: {body_text}
			
 
				+**图片**: {num_images}张（图片内容见下方）
			
 
				+"""
			
 
				+
			
 
				+
			
 
				+# ============================================================================
			
 
				+# Prompt2: 内容知识评估 - 拆分为System和User
			
 
				+# ============================================================================
			
 
				+
			
 
				+SYSTEM_PROMPT2_CONTENT_KNOWLEDGE = """## 角色定义
			
 
				+你是一个多模态内容评估专家，专门判断社交媒体帖子是否属于"内容知识"类别。
			
 
				+
			
 
				+## 前置条件
			
 
				+该帖子已通过知识判定，确认提供了知识。现在需要进一步判断是否属于"内容知识"。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 内容知识的底层定义
			
 
				+
			
 
				+**内容知识**：关于社交媒体内容创作与制作的通识性、原理性知识，帮助创作者策划、生产、优化和传播优质内容。
			
 
				+
			
 
				+### 核心特征
			
 
				+1. **领域特定性**：专注于社交媒体内容本身的创作与制作
			
 
				+2. **通识性**：跨平台、跨领域适用的内容创作原理和方法
			
 
				+3. **原理性**：不仅是操作步骤，更包含背后的逻辑和原理
			
 
				+4. **可迁移性**：方法可应用于不同类型的社交媒体内容创作
			
 
				+
			
 
				+### 内容知识的完整范畴
			
 
				+
			
 
				+#### 1️⃣ 内容策划层
			
 
				+- **选题方法**：如何找选题、选题原理、热点捕捉、用户需求分析
			
 
				+- **内容定位**：账号定位、人设打造、差异化策略
			
 
				+- **结构设计**：内容框架、故事结构、信息组织方式
			
 
				+- **创意方法**：创意思路、脑暴方法、灵感来源
			
 
				+
			
 
				+#### 2️⃣ 内容制作层
			
 
				+- **文案创作**：标题技巧、正文写作、文案公式、钩子设计、情绪调动
			
 
				+- **视觉呈现**：封面设计原理、排版方法、配色技巧（用于内容呈现的）
			
 
				+- **视频制作**：脚本结构、拍摄技巧、镜头语言、剪辑节奏、转场方法
			
 
				+- **多模态组合**：图文配合、视频+文案组合、内容形式选择
			
 
				+
			
 
				+#### 3️⃣ 内容优化层
			
 
				+- **开头/钩子**：前3秒设计、开头公式、吸引注意力的方法
			
 
				+- **节奏控制**：信息密度、节奏把控、留白技巧
			
 
				+- **完播/完读**：提升完播率/完读率的方法和原理
			
 
				+- **互动设计**：评论引导、互动话术、用户参与设计
			
 
				+
			
 
				+#### 4️⃣ 内容方法论
			
 
				+- **创作体系**：完整的内容创作流程和体系
			
 
				+- **底层原理**：为什么这样做有效的原理解释
			
 
				+- **通用框架**：可复用的内容创作框架和模板
			
 
				+- **案例提炼**：从多个案例中总结的通用规律
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### 内容知识 vs 非内容知识
			
 
				+
			
 
				+**✅ 属于内容知识的例子**：
			
 
				+- "小红书爆款标题的5个公式"（文案创作）
			
 
				+- "短视频前3秒如何抓住用户"（开头设计）
			
 
				+- "如何策划一个涨粉选题"（内容策划）
			
 
				+- "视频节奏控制的底层逻辑"（内容优化）
			
 
				+- "图文笔记的排版原理"（视觉呈现）
			
 
				+- "从10个爆款视频总结的脚本结构"（方法论提炼）
			
 
				+
			
 
				+**❌ 不属于内容知识的例子**：
			
 
				+- "摄影构图的三分法则"（专业摄影技能，除非用于讲解社交媒体内容拍摄）
			
 
				+- "PS修图教程"（设计软件技能，除非用于讲解封面/配图制作）
			
 
				+- "我的探店vlog"（单个作品展示，无创作方法）
			
 
				+- "今天涨粉100个好开心"（个人记录，无方法论）
			
 
				+- "健康饮食的10个建议"（其他领域知识）
			
 
				+- "这套配色真好看"（纯元素展示，无创作方法）
			
 
				+
			
 
				+**⚠️ 边界情况判断**：
			
 
				+- **专业技能类**：如果是为社交媒体内容创作服务的，属于内容知识（如"拍摄短视频的灯光布置"）；如果是纯技能教学，不属于（如"专业摄影的灯光理论"）
			
 
				+- **工具使用类**：如果是为内容制作服务的，属于内容知识（如"剪映做转场的3种方法"）；如果是纯软件教程，不属于（如"AE粒子特效教程"）
			
 
				+- **案例分析类**：如果从案例中提炼了内容创作方法，属于内容知识；如果只是案例展示，不属于
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### 判断核心准则
			
 
				+
			
 
				+**问自己三个问题**：
			
 
				+1. **这个知识是关于"如何创作社交媒体内容"的吗？** 
			
 
				+   - 是 → 可能是内容知识
			
 
				+   - 否 → 不是内容知识
			
 
				+
			
 
				+2. **这个方法/原理是通识性的吗？能跨内容类型/平台应用吗？**
			
 
				+   - 是 → 符合内容知识特征
			
 
				+   - 否 → 可能只是单点技巧
			
 
				+
			
 
				+3. **看完后，创作者能用它来改进自己的内容创作吗？**
			
 
				+   - 能 → 是内容知识
			
 
				+   - 不能 → 不是内容知识
			
 
				+
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 输入信息
			
 
				+- **标题**: [帖子标题]
			
 
				+- **正文**: [帖子正文内容]  
			
 
				+- **图片**: [图片描述/内容]
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 判断流程
			
 
				+
			
 
				+### 第一步: 领域快速筛查
			
 
				+
			
 
				+**判断：内容是否属于社交媒体内容创作/制作领域？**
			
 
				+
			
 
				+核心判断标准：
			
 
				+- 属于： 讲的是如何创作/制作社交媒体内容（选题、文案、拍摄、剪辑、运营等）
			
 
				+- 属于：讲的是内容创作的原理、方法、技巧
			
 
				+- 属于：讲的是平台运营、爆款方法、涨粉策略
			
 
				+- 不属于：讲的是其他专业领域技能（摄影、设计、编程等），与内容创作无关
			
 
				+- 不属于：讲的是其他行业知识（财经、健康、科普等）
			
 
				+
			
 
				+**判定**: □ 属于内容创作领域（继续） / □ 不属于（判定为非内容知识）
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### 第二步: 快速排除判断（任一为"是"则判定为非内容知识）
			
 
				+
			
 
				+1. 标题是否为纯展示型？（"我的XX"、"今天拍了XX"、"作品分享"）
			
 
				+2. 图片是否全为作品展示，无任何内容创作方法说明？
			
 
				+3. 是否只讲单个项目/单次创作的特定操作，完全无通用性？
			
 
				+4. 是否为纯元素/素材展示，无创作方法？（仅展示配色、字体、模板）
			
 
				+5. 是否为其他领域的专业知识，与内容创作无关？
			
 
				+
			
 
				+**排除判定**: □ 是（判定为非内容知识） / □ 否（继续评估）
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### 第三步: 分层打分评估（满分100分）
			
 
				+
			
 
				+## 🖼️ 图片层评估（权重70%，满分70分）
			
 
				+
			
 
				+> **说明**: 社交媒体以图片为主要信息载体，图片层是核心判断依据
			
 
				+
			
 
				+#### 维度1: 内容创作方法呈现（20分）
			
 
				+**评分依据**: 图片是否清晰展示了具体的内容创作/制作方法、技巧
			
 
				+
			
 
				+- **20分**: 图片详细展示≥3个可操作的内容创作方法（如标题公式、脚本结构、拍摄技巧等）
			
 
				+- **15分**: 图片展示2个内容创作方法，方法较为具体
			
 
				+- **10分**: 图片展示1个内容创作方法，但不够详细
			
 
				+- **5分**: 图片暗示有方法，但未明确展示
			
 
				+- **0分**: 图片无任何方法展示，纯作品呈现
			
 
				+
			
 
				+**得分**: __/20
			
 
				+
			
 
				+---
			
 
				+
			
 
				+#### 维度2: 内容知识体系化（15分）
			
 
				+**评分依据**: 多图是否形成完整的内容创作知识体系或逻辑链条
			
 
				+
			
 
				+- **15分**: 多图形成完整体系（如选题→文案→制作→优化，或原理→方法→案例），逻辑清晰
			
 
				+- **12分**: 多图有知识关联性，形成部分内容创作体系
			
 
				+- **8分**: 多图展示多个内容创作知识点，但关联性弱
			
 
				+- **4分**: 多图仅为同类案例堆砌，无体系
			
 
				+- **0分**: 单图或多图无逻辑关联
			
 
				+
			
 
				+**得分**: __/15
			
 
				+
			
 
				+---
			
 
				+
			
 
				+#### 维度3: 教学性标注与说明（15分）
			
 
				+**评分依据**: 图片是否包含教学性的视觉元素（标注、序号、箭头、文字说明）
			
 
				+
			
 
				+- **15分**: 大量教学标注（序号、箭头、高亮、文字说明、对比标记等），清晰易懂
			
 
				+- **12分**: 有明显的教学标注，但不够完善
			
 
				+- **8分**: 有少量标注或说明
			
 
				+- **4分**: 仅有简单文字，无视觉教学元素
			
 
				+- **0分**: 无任何教学标注，纯视觉展示
			
 
				+
			
 
				+**得分**: __/15
			
 
				+
			
 
				+---
			
 
				+
			
 
				+#### 维度4: 方法通识性与可迁移性（10分）
			
 
				+**评分依据**: 图片展示的方法是否具有通识性，可迁移到不同类型的内容创作
			
 
				+
			
 
				+- **10分**: 明确展示通识性方法，可应用于多种内容类型/平台（配公式/框架）
			
 
				+- **8分**: 方法有较强通识性，可迁移到类似内容
			
 
				+- **5分**: 方法通识性一般，适用范围较窄
			
 
				+- **2分**: 方法仅适用于特定单一场景
			
 
				+- **0分**: 无通识性方法
			
 
				+
			
 
				+**得分**: __/10
			
 
				+
			
 
				+---
			
 
				+
			
 
				+#### 维度5: 原理性深度（10分）
			
 
				+**评分依据**: 图片是否讲解了内容创作背后的原理和逻辑，而非仅操作步骤
			
 
				+
			
 
				+- **10分**: 深入讲解原理（为什么这样做有效），配合方法和案例
			
 
				+- **8分**: 有原理说明，但深度不够
			
 
				+- **5分**: 主要是方法，略有原理提及
			
 
				+- **2分**: 仅有操作步骤，无原理
			
 
				+- **0分**: 纯案例展示，无原理无方法
			
 
				+
			
 
				+**得分**: __/10
			
 
				+
			
 
				+---
			
 
				+
			
 
				+**🖼️ 图片层总分**: __/70
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 📝 正文层评估（权重20%，满分20分）
			
 
				+
			
 
				+> **说明**: 正文作为辅助判断，补充图片未完整呈现的知识信息
			
 
				+
			
 
				+#### 维度6: 方法/步骤描述（10分）
			
 
				+**评分依据**: 正文是否描述了具体的内容创作方法或操作步骤
			
 
				+
			
 
				+- **10分**: 有完整的内容创作步骤（≥3步）或详细的方法说明
			
 
				+- **7分**: 有步骤或方法描述，但不够系统
			
 
				+- **4分**: 有零散的方法提及
			
 
				+- **0分**: 无方法/步骤，纯叙事或展示性文字
			
 
				+
			
 
				+**得分**: __/10
			
 
				+
			
 
				+---
			
 
				+
			
 
				+#### 维度7: 知识总结与提炼（10分）
			
 
				+**评分依据**: 正文是否对内容创作经验/规律进行总结提炼
			
 
				+
			
 
				+- **10分**: 有明确的知识总结、规律归纳、框架化输出
			
 
				+- **7分**: 有一定的经验总结或要点提炼
			
 
				+- **4分**: 有零散的心得，但未成体系
			
 
				+- **0分**: 无任何知识提炼
			
 
				+
			
 
				+**得分**: __/10
			
 
				+
			
 
				+---
			
 
				+
			
 
				+**📝 正文层总分**: __/20
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 🏷️ 标题层评估（权重10%，满分10分）
			
 
				+
			
 
				+> **说明**: 标题作为内容导向，辅助判断内容主题
			
 
				+
			
 
				+#### 维度8: 标题内容指向性（10分）
			
 
				+**评分依据**: 标题是否明确指向内容创作/制作相关的知识
			
 
				+
			
 
				+- **10分**: 标题明确包含内容创作相关词汇（"爆款XX"、"涨粉XX"、"XX文案"、"XX脚本"、"XX选题"、"XX标题"、"如何拍/写/做XX"）
			
 
				+- **7分**: 标题包含整理型词汇（"XX合集"、"XX技巧总结"）
			
 
				+- **4分**: 描述性标题，暗示有内容创作知识
			
 
				+- **0分**: 纯展示型标题（"我的作品"、"今天拍了XX"）或与内容创作无关
			
 
				+
			
 
				+**得分**: __/10
			
 
				+---
			
 
				+
			
 
				+**🏷️标题层总分**: __/10
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### 第三步: 综合评分与判定
			
 
				+
			
 
				+**总分计算**:
			
 
				+总分 = 图片层总分(70分) + 正文层总分(20分) + 标题层总分(10分)
			
 
				+
			
 
				+**最终得分**: __/100分
			
 
				+
			
 
				+---
			
 
				+
			
 
				+**判定等级**:
			
 
				+- **85-100分**: ⭐⭐⭐⭐⭐ 优质内容知识 - 强烈符合
			
 
				+- **70-84分**: ⭐⭐⭐⭐ 良好内容知识 - 符合
			
 
				+- **55-69分**: ⭐⭐⭐ 基础内容知识 - 基本符合
			
 
				+- **40-54分**: ⭐⭐ 弱内容知识 - 不符合
			
 
				+- **0-39分**: ⭐ 非内容知识 - 完全不符合
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 输出格式（JSON）
			
 
				+```json
			
 
				+{
			
 
				+  "is_content_knowledge": true/false,
			
 
				+  "final_score": 0-100的整数,
			
 
				+  "level": "⭐⭐⭐⭐⭐ 优质内容知识 / ⭐⭐⭐⭐ 良好内容知识 / ⭐⭐⭐ 基础内容知识 / ⭐⭐ 弱内容知识 / ⭐ 非内容知识",
			
 
				+  "quick_exclude": {
			
 
				+    "result": "是/否",
			
 
				+    "reason": "快速排除判定理由"
			
 
				+  },
			
 
				+  "dimension_scores": {
			
 
				+    "image_layer": {
			
 
				+      "creation_method": {
			
 
				+        "score": 0-20的整数,
			
 
				+        "reason": "内容创作方法呈现评分依据"
			
 
				+      },
			
 
				+      "knowledge_system": {
			
 
				+        "score": 0-15的整数,
			
 
				+        "reason": "内容知识体系化评分依据"
			
 
				+      },
			
 
				+      "teaching_annotation": {
			
 
				+        "score": 0-15的整数,
			
 
				+        "reason": "教学性标注评分依据"
			
 
				+      },
			
 
				+      "method_reusability": {
			
 
				+        "score": 0-10的整数,
			
 
				+        "reason": "方法通识性评分依据"
			
 
				+      },
			
 
				+      "principle_case": {
			
 
				+        "score": 0-10的整数,
			
 
				+        "reason": "原理性深度评分依据"
			
 
				+      },
			
 
				+      "subtotal": 0-70的整数
			
 
				+    },
			
 
				+    "text_layer": {
			
 
				+      "method_description": {
			
 
				+        "score": 0-10的整数,
			
 
				+        "reason": "方法/步骤描述评分依据"
			
 
				+      },
			
 
				+      "knowledge_summary": {
			
 
				+        "score": 0-10的整数,
			
 
				+        "reason": "知识总结提炼评分依据"
			
 
				+      },
			
 
				+      "subtotal": 0-20的整数
			
 
				+    },
			
 
				+    "title_layer": {
			
 
				+      "content_direction": {
			
 
				+        "score": 0-10的整数,
			
 
				+        "reason": "标题内容创作指向性评分依据"
			
 
				+      },
			
 
				+      "subtotal": 0-10的整数
			
 
				+    }
			
 
				+  },
			
 
				+  "core_evidence": [
			
 
				+    "证据1：从图片/正文/标题中提取的关键证据",
			
 
				+    "证据2：...",
			
 
				+    "证据3：..."
			
 
				+  ],
			
 
				+  "issues": [
			
 
				+    "问题1：存在的不足",
			
 
				+    "问题2：..."
			
 
				+  ],
			
 
				+  "summary": "总结陈述（5-6句话说明判定结果和核心理由，明确指出为何属于/不属于内容知识）"
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 判断原则
			
 
				+1. **图片主导原则**: 图片占70%权重，是核心判断依据；标题和正文为辅助
			
 
				+2. **创作领域限定**: 必须属于创作/制作/设计领域，其他领域知识不属于内容知识
			
 
				+3. **方法优先原则**: 重点评估是否提供了可操作的创作方法，而非纯作品展示
			
 
				+4. **通用性要求**: 优先考虑方法的可复用性和可迁移性
			
 
				+5. **严格性原则**: 宁可误判为"非内容知识"，也不放过纯展示型内容
			
 
				+6. **证据性原则**: 评分需基于明确的视觉和文本证据，可量化衡量
			
 
				+"""
			
 
				+
			
 
				+USER_TEMPLATE2_CONTENT_KNOWLEDGE = """请评估以下帖子是否属于内容知识：
			
 
				+
			
 
				+**标题**: {title}
			
 
				+**正文**: {body_text}
			
 
				+**图片**: {num_images}张（图片内容见下方）
			
 
				+"""
			
 
				+
			
 
				+# ============================================================================
			
 
				+# Prompt3: 目的性匹配评估 - 拆分为System和User
			
 
				+# ============================================================================
			
 
				+
			
 
				+SYSTEM_PROMPT3_PURPOSE_MATCH = """
			
 
				+
			
 
				+# Prompt 1: 多模态内容目的动机匹配评估
			
 
				+
			
 
				+## 角色定义
			
 
				+你是一位专业的多模态内容评估专家，擅长分析社交媒体UGC平台帖子的**目的动机匹配度**，能够精准判断帖子是否满足用户的核心意图。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 任务说明
			
 
				+你将收到一个**原始搜索需求**和一条**多模态帖子**（包含图片、标题、正文）
			
 
				+请**仅评估目的动机维度**的匹配度，输出0-100分的量化得分。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 输入格式
			
 
				+
			
 
				+**原始搜索需求：**
			
 
				+[用户的搜索查询词/需求描述]
			
 
				+
			
 
				+**多模态帖子内容：**
			
 
				+- **图片：** [图片内容描述或实际图片]
			
 
				+- **标题：** [帖子标题]
			
 
				+- **正文：** [帖子正文内容]
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 评估维度：目的动机匹配
			
 
				+
			
 
				+### 核心评估逻辑
			
 
				+
			
 
				+**目的动机 = 用户想做什么 = 核心动词/意图**
			
 
				+
			
 
				+常见动机类型：
			
 
				+- **获取型**：寻找、下载、收藏、获取
			
 
				+- **学习型**：教程、学习、了解、掌握
			
 
				+- **决策型**：推荐、对比、评测、选择
			
 
				+- **创作型**：拍摄、制作、设计、生成
			
 
				+- **分享型**：晒单、记录、分享、展示
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 评估流程
			
 
				+
			
 
				+### 第一步：识别原始需求的核心动机
			
 
				+- 提取**核心动词**（如果是纯名词短语，识别隐含意图）
			
 
				+- 判断用户的**最终目的**是什么
			
 
				+
			
 
				+### 第二步：分析帖子提供的价值（重点看图片）
			
 
				+
			
 
				+**图片分析（权重70%）：**
			
 
				+- 图片展示的是什么类型的内容？
			
 
				+- 图片是否直接解答了需求的目的？
			
 
				+- 图片的信息完整度和实用性如何？
			
 
				+
			
 
				+**标题分析（权重15%）：**
			
 
				+- 标题是否明确了内容的目的？
			
 
				+
			
 
				+**正文分析（权重15%）：**
			
 
				+- 正文是否提供了实质性的解答内容？
			
 
				+
			
 
				+### 第三步：判断目的匹配度
			
 
				+- 帖子是否**实质性地满足**了需求的动机？
			
 
				+- 内容是否**实用、完整、可执行**？
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 评分标准（0-100分）
			
 
				+
			
 
				+### 高度匹配区间
			
 
				+
			
 
				+**90-100分：完全满足动机，内容实用完整**
			
 
				+- 图片直接展示解决方案/教程步骤/对比结果
			
 
				+- 内容完整、清晰、可直接使用
			
 
				+- 例：需求"如何拍摄夜景" vs 图片展示完整的夜景拍摄参数设置和效果对比
			
 
				+
			
 
				+**75-89分：基本满足动机，信息较全面**
			
 
				+- 图片提供了核心解答内容
			
 
				+- 信息相对完整但深度略有不足
			
 
				+- 例：需求"推荐旅行路线" vs 图片展示了路线图但缺少详细说明
			
 
				+
			
 
				+**60-74分：部分满足动机，有参考价值**
			
 
				+- 图片提供了相关内容但不够直接
			
 
				+- 需要结合文字才能理解完整意图
			
 
				+
			
 
				+### 中度相关区间
			
 
				+
			
 
				+**40-59分：弱相关，核心目的未充分满足**
			
 
				+- 图片内容与动机有关联但不是直接解答
			
 
				+- 实用性较低
			
 
				+- 例：需求"如何拍摄" vs 图片只展示成品照片，无教程内容
			
 
				+
			
 
				+
			
 
				+### 不相关/负向区间
			
 
				+
			
 
				+**20-39分：微弱关联，基本未解答**
			
 
				+- 图片仅有外围相关性
			
 
				+- 对满足需求帮助极小
			
 
				+
			
 
				+**1-19分：几乎无关**
			
 
				+- 图片与需求动机关联极弱
			
 
				+
			
 
				+**0分：完全不相关**
			
 
				+- 图片与需求动机无任何关联
			
 
				+
			
 
				+**负分不使用**（目的动机维度不设负分）
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 输出格式（JSON）
			
 
				+```json
			
 
				+{
			
 
				+  "purpose_score": 0-100的整数,
			
 
				+  "core_motivation": "识别出的用户意图（一句话）",
			
 
				+  "image_value": "图片展示了什么，如何满足动机",
			
 
				+  "title_intention": "标题说明了什么",
			
 
				+  "text_content": "正文是否有实质解答",
			
 
				+  "match_level": "完全匹配/高度匹配/基本匹配/弱匹配/不匹配",
			
 
				+  "core_basis": "为什么给这个分数（100字以内）"
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 评估原则
			
 
				+
			
 
				+1. **图片优先**：图片权重70%，是判断的主要依据
			
 
				+2. **实用导向**：不看表面相关，看实际解答程度
			
 
				+3. **严格标准**：宁可低估，避免虚高
			
 
				+4. **客观量化**：基于可观察的内容特征打分
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 特别注意
			
 
				+
			
 
				+- 本评估**只关注目的动机维度**，不考虑品类是否匹配
			
 
				+- 输出的分数必须是**0-100的整数**
			
 
				+- 不要自行计算综合分数，只输出目的动机分数
			
 
				+- 评分依据要具体、可验证
			
 
				+
			
 
				+"""
			
 
				+
			
 
				+USER_TEMPLATE3_PURPOSE_MATCH = """请评估以下帖子与用户需求的目的性匹配度：
			
 
				+
			
 
				+**原始搜索词**: {original_query}
			
 
				+**帖子标题**: {title}
			
 
				+**帖子正文**: {body_text}
			
 
				+**图片**: {num_images}张（图片内容见下方）
			
 
				+"""
			
 
				+
			
 
				+# ============================================================================
			
 
				+# Prompt4: 品类匹配评估 - 拆分为System和User  
			
 
				+# ============================================================================
			
 
				+
			
 
				+SYSTEM_PROMPT4_CATEGORY_MATCH = """# Prompt 2: 多模态内容品类匹配评估
			
 
				+
			
 
				+## 角色定义
			
 
				+你是一位专业的多模态内容评估专家，擅长分析社交媒体UGC平台帖子的**品类匹配度**
			
 
				+能够精准判断帖子的内容主体是否与用户需求一致。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 任务说明
			
 
				+你将收到一个**原始搜索需求**和一条**多模态帖子**（包含图片、标题、正文），请**仅评估品类维度**的匹配度，输出0-100分的量化得分。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 输入格式
			
 
				+
			
 
				+**原始搜索需求：**
			
 
				+[用户的搜索查询词/需求描述]
			
 
				+
			
 
				+**多模态帖子内容：**
			
 
				+- **图片：** [图片内容描述或实际图片]
			
 
				+- **标题：** [帖子标题]
			
 
				+- **正文：** [帖子正文内容]
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 评估维度：品类匹配
			
 
				+
			
 
				+### 核心评估逻辑
			
 
				+
			
 
				+**品类 = 核心主体（名词）+ 限定词**
			
 
				+
			
 
				+- **核心主体**：具体的内容对象（风光摄影、旅行攻略、美食推荐）
			
 
				+- **限定词**：
			
 
				+  - 地域：川西、成都、日本
			
 
				+  - 时间：秋季、夏天、2024
			
 
				+  - 类型：免费、高清、入门级
			
 
				+  - 风格：小清新、复古、简约
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 评估流程
			
 
				+
			
 
				+### 第一步：提取原始需求的品类信息
			
 
				+- 识别**核心主体名词**
			
 
				+- 识别**关键限定词**（地域/时间/类型/风格等）
			
 
				+
			
 
				+### 第二步：从帖子中提取品类信息（重点看图片）
			
 
				+
			
 
				+**图片识别（权重70%）：**
			
 
				+- 图片展示的核心主体是什么？
			
 
				+- 图片中可识别的限定特征（地域标志、季节特征、类型属性、风格特点）
			
 
				+
			
 
				+**标题提取（权重15%）：**
			
 
				+- 标题明确的品类名词和限定词
			
 
				+
			
 
				+**正文提取（权重15%）：**
			
 
				+- 正文描述的品类信息
			
 
				+
			
 
				+### 第三步：对比匹配度
			
 
				+- 核心主体是否一致？
			
 
				+- 限定词匹配了几个？
			
 
				+- 是否存在泛化或偏移？
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 评分标准（0-100分）
			
 
				+
			
 
				+### 高度匹配区间
			
 
				+
			
 
				+**90-100分：核心主体+关键限定词完全匹配**
			
 
				+- 图片展示的主体与需求精准一致
			
 
				+- 关键限定词全部匹配（地域、时间、类型等）
			
 
				+- 例：需求"川西秋季风光" vs 图片展示川西秋季风景
			
 
				+
			
 
				+**75-89分：核心主体匹配，限定词匹配度百分之80**
			
 
				+- 图片主体一致
			
 
				+- 存在1-2个限定词缺失但不影响核心匹配
			
 
				+- 例：需求"川西秋季风光" vs 图片展示川西风光（缺秋季）
			
 
				+
			
 
				+**60-74分：核心主体匹配，限定词匹配度百分之60**
			
 
				+- 图片主体在同一大类
			
 
				+- 限定词部分匹配或有合理上下位关系
			
 
				+- 例：需求"川西秋季风光" vs 图片展示四川风光
			
 
				+
			
 
				+### 中度相关区间
			
 
				+
			
 
				+**40-59分：核心主体匹配，限定词完全不匹配**
			
 
				+- 图片主体相同但上下文不同
			
 
				+- 限定词严重缺失或不匹配
			
 
				+- 例：需求"猫咪表情包梗图" vs 女孩表情包
			
 
				+
			
 
				+### 不相关/负向区间
			
 
				+
			
 
				+**20-39分：主体过度泛化**
			
 
				+- 图片主体是通用概念，需求是特定概念
			
 
				+- 仅有抽象类别相似
			
 
				+- 例：需求"川西旅行攻略" vs 图片展示普通旅行场景
			
 
				+
			
 
				+**1-19分：品类关联极弱**
			
 
				+- 图片主体与需求差异明显
			
 
				+
			
 
				+**0分：品类完全不同**
			
 
				+- 图片主体类别完全不同
			
 
				+- 例：需求"风光摄影" vs 图片展示美食
			
 
				+
			
 
				+**负分不使用**（品类维度不设负分）
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 输出格式（JSON）
			
 
				+```json
			
 
				+{
			
 
				+  "category_score": 0-100的整数,
			
 
				+  "original_category_analysis": {
			
 
				+    "核心主体": "提取的主体名词",
			
 
				+    "关键限定词": ["限定词1", "限定词2"]
			
 
				+  },
			
 
				+  "actual_category": {
			
 
				+    "图片主体": "图片展示的核心主体",
			
 
				+    "图片限定特征": ["从图片识别的限定词"],
			
 
				+    "标题品类": "标题提及的品类",
			
 
				+    "正文品类": "正文描述的品类"
			
 
				+  },
			
 
				+  "match_level": "完全匹配/高度匹配/基本匹配/弱匹配/不匹配",
			
 
				+  "category_match_analysis": {
			
 
				+    "主体匹配情况": "主体是否一致",
			
 
				+    "限定词匹配情况": "哪些限定词匹配/缺失"
			
 
				+  },
			
 
				+  "core_basis": "为什么给这个分数（100字以内）"
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 评估原则
			
 
				+
			
 
				+1. **图片优先**：图片权重70%，是判断的主要依据
			
 
				+2. **表面匹配**：只看实际展示的内容，禁止推测联想
			
 
				+3. **通用≠特定**：通用概念不等于特定概念，需明确区分
			
 
				+4. **严格标准**：宁可低估，避免虚高
			
 
				+5. **客观量化**：基于可观察的视觉特征和文字信息打分
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 特别注意
			
 
				+
			
 
				+- 本评估**只关注品类维度**，不考虑目的是否匹配
			
 
				+- 输出的分数必须是**0-100的整数**
			
 
				+- 不要自行计算综合分数，只输出品类分数
			
 
				+- 禁止因为"可能相关"就给分，必须有明确视觉证据
			
 
				+
			
 
				+---
			
 
				+"""
			
 
				+
			
 
				+USER_TEMPLATE4_CATEGORY_MATCH = """请评估以下帖子与用户需求的品类匹配度：
			
 
				+
			
 
				+**原始搜索词**: {original_query}
			
 
				+**帖子标题**: {title}
			
 
				+**帖子正文**: {body_text}
			
 
				+**图片**: {num_images}张（图片内容见下方）
			
 
				+"""
			
 
				+
			
 
				+
			
 
				+# 为了向后兼容，保留原始导入
			
 
				+from post_evaluator_v3 import (
			
 
				+    PROMPT1_IS_KNOWLEDGE,
			
 
				+    PROMPT2_IS_CONTENT_KNOWLEDGE,
			
 
				+    PROMPT3_PURPOSE_MATCH,
			
 
				+    PROMPT4_CATEGORY_MATCH
			
 
				+)
			
 
				+
			
 
				+
			
 
				+# ============================================================================
			
 
				+# Gemini Client
			
 
				+# ============================================================================
			
 
				+
			
 
				+class GeminiClient:
			
 
				+    """Gemini API客户端 - 使用LangChain ChatGoogleGenerativeAI"""
			
 
				+
			
 
				+    def __init__(self, api_key: str = GEMINI_API_KEY, model_name: str = GEMINI_MODEL_NAME):
			
 
				+        self.api_key = api_key
			
 
				+        self.model_name = model_name
			
 
				+
			
 
				+    def create_model(self) -> ChatGoogleGenerativeAI:
			
 
				+        """创建Gemini模型实例(LangChain)"""
			
 
				+        return ChatGoogleGenerativeAI(
			
 
				+            model=self.model_name,
			
 
				+            google_api_key=self.api_key,
			
 
				+            temperature=0.1,
			
 
				+            # 配置返回JSON格式
			
 
				+            model_kwargs={
			
 
				+                "response_mime_type": "application/json"
			
 
				+            }
			
 
				+        )
			
 
				+
			
 
				+    async def generate_content(
			
 
				+        self,
			
 
				+        prompt_text: str = None,
			
 
				+        media_files: Optional[List[Any]] = None,
			
 
				+        max_retries: int = MAX_RETRIES,
			
 
				+        system_prompt: str = None,
			
 
				+        user_prompt: str = None
			
 
				+    ) -> dict:
			
 
				+        """
			
 
				+        调用Gemini API生成内容 (支持SystemMessage + HumanMessage)
			
 
				+
			
 
				+        Args:
			
 
				+            prompt_text: Prompt文本(旧格式,向后兼容)
			
 
				+            media_files: 媒体文件列表 (base64 data URL字典或视频File对象)
			
 
				+            max_retries: 最大重试次数
			
 
				+            system_prompt: System Prompt(新格式 - 评估规则)
			
 
				+            user_prompt: User Prompt(新格式 - 帖子数据)
			
 
				+
			
 
				+        Returns:
			
 
				+            解析后的JSON响应
			
 
				+        """
			
 
				+        # 构建messages列表
			
 
				+        messages = []
			
 
				+
			
 
				+        # 如果提供了system_prompt和user_prompt,使用新格式
			
 
				+        if system_prompt and user_prompt:
			
 
				+            # System Message
			
 
				+            messages.append(SystemMessage(content=system_prompt))
			
 
				+
			
 
				+            # Human Message (用户内容 + 图片)
			
 
				+            human_content = [{"type": "text", "text": user_prompt}]
			
 
				+            if media_files:
			
 
				+                human_content.extend(media_files)
			
 
				+            messages.append(HumanMessage(content=human_content))
			
 
				+
			
 
				+        # 否则使用旧格式(向后兼容)
			
 
				+        else:
			
 
				+            content = []
			
 
				+            # 添加文本
			
 
				+            content.append({"type": "text", "text": prompt_text or ""})
			
 
				+
			
 
				+            # 添加媒体文件
			
 
				+            if media_files:
			
 
				+                content.extend(media_files)
			
 
				+            messages.append(HumanMessage(content=content))
			
 
				+
			
 
				+        # 打印调试信息
			
 
				+        if media_files:
			
 
				+            print(f"      🔍 传递给Gemini: {len(media_files)}个媒体文件")
			
 
				+            for i, media in enumerate(media_files[:3]):
			
 
				+                if isinstance(media, dict) and media.get("type") == "image_url":
			
 
				+                    data_url = media.get("image_url", {}).get("url", "")
			
 
				+                    print(f"         📸 图片[{i}]: Base64 data URL ({len(data_url)}字符)")
			
 
				+                else:
			
 
				+                    print(f"         🎥 视频[{i}]: {type(media).__name__}")
			
 
				+        else:
			
 
				+            print(f"      ⚠️  无媒体文件传递给Gemini（仅文本）")
			
 
				+
			
 
				+        print(f"      💬 Messages: {len(messages)} ({['System' if 'SystemMessage' in str(type(m)) else 'Human' for m in messages]})")
			
 
				+
			
 
				+        # 创建模型
			
 
				+        model = self.create_model()
			
 
				+
			
 
				+        for attempt in range(max_retries + 1):
			
 
				+            try:
			
 
				+                # 调用模型
			
 
				+                loop = asyncio.get_event_loop()
			
 
				+                response = await loop.run_in_executor(
			
 
				+                    None,
			
 
				+                    lambda: model.invoke(messages)
			
 
				+                )
			
 
				+
			
 
				+                # 解析JSON响应
			
 
				+                response_text = response.content.strip()
			
 
				+                response_text = self._clean_json_response(response_text)
			
 
				+                return json.loads(response_text)
			
 
				+
			
 
				+            except Exception as e:
			
 
				+                error_msg = str(e)
			
 
				+                print(f"      ❌ Gemini API错误详情: {error_msg[:200]}")
			
 
				+                if "image" in error_msg.lower() or "media" in error_msg.lower():
			
 
				+                    print(f"      ⚠️  可能是图片/媒体访问问题")
			
 
				+
			
 
				+                if attempt < max_retries:
			
 
				+                    wait_time = RETRY_WAIT_SECONDS * (attempt + 1)
			
 
				+                    print(f"      ⏳ {wait_time}秒后重试 (第{attempt + 1}/{max_retries}次)")
			
 
				+                    await asyncio.sleep(wait_time)
			
 
				+                else:
			
 
				+                    raise Exception(f"Gemini API调用失败: {error_msg}")
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def _clean_json_response(text: str) -> str:
			
 
				+        """清理JSON响应"""
			
 
				+        text = text.strip()
			
 
				+        if text.startswith("```json"):
			
 
				+            text = text[7:]
			
 
				+        elif text.startswith("```"):
			
 
				+            text = text[3:]
			
 
				+        if text.endswith("```"):
			
 
				+            text = text[:-3]
			
 
				+        return text.strip()
			
 
				+
			
 
				+
			
 
				+# ============================================================================
			
 
				+# Video Uploader
			
 
				+# ============================================================================
			
 
				+
			
 
				+class VideoUploader:
			
 
				+    """视频上传处理器"""
			
 
				+
			
 
				+    @staticmethod
			
 
				+    async def upload_video(video_url: str) -> tuple[Optional[Any], Optional[str], Optional[str]]:
			
 
				+        """
			
 
				+        上传视频到Gemini
			
 
				+
			
 
				+        Args:
			
 
				+            video_url: 视频URL
			
 
				+
			
 
				+        Returns:
			
 
				+            (video_file, video_uri, temp_path)
			
 
				+        """
			
 
				+        import requests
			
 
				+
			
 
				+        # 下载视频到临时文件
			
 
				+        temp_fd, temp_path = tempfile.mkstemp(suffix=".mp4", prefix="eval_video_")
			
 
				+        os.close(temp_fd)
			
 
				+
			
 
				+        try:
			
 
				+            print(f"      📥 下载视频: {video_url[:60]}...")
			
 
				+
			
 
				+            # 下载
			
 
				+            loop = asyncio.get_event_loop()
			
 
				+            response = await loop.run_in_executor(
			
 
				+                None,
			
 
				+                lambda: requests.get(video_url, timeout=120, stream=True)
			
 
				+            )
			
 
				+            response.raise_for_status()
			
 
				+
			
 
				+            with open(temp_path, 'wb') as f:
			
 
				+                for chunk in response.iter_content(chunk_size=8192):
			
 
				+                    if chunk:
			
 
				+                        f.write(chunk)
			
 
				+
			
 
				+            file_size_mb = os.path.getsize(temp_path) / (1024 * 1024)
			
 
				+            print(f"      📦 视频下载完成,大小: {file_size_mb:.2f}MB")
			
 
				+
			
 
				+            # 上传到Gemini
			
 
				+            print(f"      ☁️  上传到Gemini...")
			
 
				+            # 暂时禁用视频上传功能(genai版本冲突)
			
 
				+            raise NotImplementedError("视频上传暂时禁用,等待修复版本冲突")
			
 
				+            # uploaded_file = await loop.run_in_executor(
			
 
				+            #     None,
			
 
				+            #     lambda: genai.upload_file(temp_path)
			
 
				+            # )
			
 
				+
			
 
				+            # 等待处理
			
 
				+            processed_file = await VideoUploader._wait_for_processing(uploaded_file)
			
 
				+            if not processed_file:
			
 
				+                return None, None, temp_path
			
 
				+
			
 
				+            print(f"      ✅ 视频上传成功: {processed_file.uri}")
			
 
				+            return processed_file, processed_file.uri, temp_path
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"      ❌ 视频上传失败: {str(e)[:100]}")
			
 
				+            return None, None, temp_path
			
 
				+
			
 
				+    @staticmethod
			
 
				+    async def _wait_for_processing(uploaded_file: Any) -> Optional[Any]:
			
 
				+        """等待Gemini处理视频文件"""
			
 
				+        start_time = time.time()
			
 
				+        current_file = uploaded_file
			
 
				+
			
 
				+        loop = asyncio.get_event_loop()
			
 
				+
			
 
				+        while current_file.state.name == "PROCESSING":
			
 
				+            elapsed = time.time() - start_time
			
 
				+            if elapsed > FILE_PROCESS_TIMEOUT:
			
 
				+                print(f"      ❌ 视频处理超时: {current_file.name}")
			
 
				+                return None
			
 
				+
			
 
				+            print(f"      ⏳ 等待Gemini处理视频...{elapsed:.0f}s")
			
 
				+            await asyncio.sleep(RETRY_WAIT_SECONDS)
			
 
				+
			
 
				+            current_file = await loop.run_in_executor(
			
 
				+                None,
			
 
				+                lambda: genai.get_file(current_file.name)
			
 
				+            )
			
 
				+
			
 
				+        if current_file.state.name == "FAILED":
			
 
				+            print(f"      ❌ 视频处理失败: {current_file.state}")
			
 
				+            return None
			
 
				+
			
 
				+        return current_file
			
 
				+
			
 
				+
			
 
				+# ============================================================================
			
 
				+# Image Uploader
			
 
				+# ============================================================================
			
 
				+
			
 
				+class ImageUploader:
			
 
				+    """图片加载器 - 下载图片并转为base64 data URL(参考demo)"""
			
 
				+
			
 
				+    @staticmethod
			
 
				+    async def upload_images(image_urls: List[str]) -> tuple[List[Dict], List[str]]:
			
 
				+        """
			
 
				+        批量下载图片并转为base64 data URL格式
			
 
				+
			
 
				+        Args:
			
 
				+            image_urls: 图片URL列表
			
 
				+
			
 
				+        Returns:
			
 
				+            (image_contents, []) - 图片content字典列表和空列表(保持接口兼容)
			
 
				+            格式: {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}}
			
 
				+        """
			
 
				+        if not image_urls:
			
 
				+            return [], []
			
 
				+
			
 
				+        print(f"      📥 准备加载 {len(image_urls)} 张图片(Base64方式)...")
			
 
				+
			
 
				+        # 并发下载所有图片
			
 
				+        tasks = [ImageUploader._load_single_image(url, idx) for idx, url in enumerate(image_urls)]
			
 
				+        results = await asyncio.gather(*tasks, return_exceptions=True)
			
 
				+
			
 
				+        # 分离成功和失败的结果
			
 
				+        image_contents = []
			
 
				+
			
 
				+        for idx, result in enumerate(results):
			
 
				+            if isinstance(result, Exception):
			
 
				+                print(f"      ⚠️  图片{idx}加载失败: {str(result)[:50]}")
			
 
				+            elif result is not None:
			
 
				+                image_contents.append(result)
			
 
				+
			
 
				+        print(f"      ✅ 成功加载 {len(image_contents)}/{len(image_urls)} 张图片")
			
 
				+        return image_contents, []  # 返回空列表作为temp_paths,因为不需要清理
			
 
				+
			
 
				+    @staticmethod
			
 
				+    async def _load_single_image(image_url: str, idx: int) -> Optional[Dict]:
			
 
				+        """
			
 
				+        下载单张图片并转为base64 data URL格式
			
 
				+
			
 
				+        Args:
			
 
				+            image_url: 图片URL
			
 
				+            idx: 图片索引(用于日志)
			
 
				+
			
 
				+        Returns:
			
 
				+            图片content字典: {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}}
			
 
				+        """
			
 
				+        try:
			
 
				+            # 下载图片到内存
			
 
				+            loop = asyncio.get_event_loop()
			
 
				+            response = await loop.run_in_executor(
			
 
				+                None,
			
 
				+                lambda: requests.get(image_url, timeout=30)
			
 
				+            )
			
 
				+            response.raise_for_status()
			
 
				+
			
 
				+            # 转换为PIL Image对象
			
 
				+            image = Image.open(io.BytesIO(response.content))
			
 
				+
			
 
				+            # 转换为RGB模式(Gemini推荐)
			
 
				+            if image.mode != 'RGB':
			
 
				+                image = image.convert('RGB')
			
 
				+
			
 
				+            # 转换为PNG格式的BytesIO
			
 
				+            buffer = io.BytesIO()
			
 
				+            image.save(buffer, format="PNG")
			
 
				+            image_bytes = buffer.getvalue()
			
 
				+
			
 
				+            # Base64编码
			
 
				+            base64_encoded = base64.b64encode(image_bytes).decode('utf-8')
			
 
				+            data_url = f"data:image/png;base64,{base64_encoded}"
			
 
				+
			
 
				+            file_size_kb = len(image_bytes) / 1024
			
 
				+            print(f"      ✓ 图片{idx}加载成功 ({file_size_kb:.1f}KB, {image.size[0]}x{image.size[1]})")
			
 
				+
			
 
				+            # 返回格式与demo一致
			
 
				+            return {
			
 
				+                "type": "image_url",
			
 
				+                "image_url": {"url": data_url}
			
 
				+            }
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"      ✗ 图片{idx}加载失败: {str(e)[:60]}")
			
 
				+            return None
			
 
				+
			
 
				+
			
 
				+
			
 
				+class PromptAdapter:
			
 
				+    """Prompt适配器 - 根据媒体类型调整Prompt"""
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def adapt_prompt(prompt_template: str, post: Any, **kwargs) -> str:
			
 
				+        """
			
 
				+        适配Prompt
			
 
				+
			
 
				+        Args:
			
 
				+            prompt_template: Prompt模板
			
 
				+            post: Post对象
			
 
				+            **kwargs: 其他参数 (如original_query)
			
 
				+
			
 
				+        Returns:
			
 
				+            适配后的Prompt
			
 
				+        """
			
 
				+        # 准备替换参数
			
 
				+        params = {
			
 
				+            "title": post.title or "",
			
 
				+            "body_text": post.body_text or "",
			
 
				+        }
			
 
				+
			
 
				+        # 媒体描述
			
 
				+        if post.type == "video":
			
 
				+            params["num_images"] = "1个视频"
			
 
				+        else:
			
 
				+            num_images = len(post.images) if post.images else 0
			
 
				+            params["num_images"] = f"{num_images}张"
			
 
				+
			
 
				+        # 添加其他参数
			
 
				+        params.update(kwargs)
			
 
				+
			
 
				+        return prompt_template.format(**params)
			
 
				+
			
 
				+
			
 
				+# ============================================================================
			
 
				+# 缓存函数 (复用V3逻辑)
			
 
				+# ============================================================================
			
 
				+
			
 
				+def _get_cache_key(note_id: str) -> str:
			
 
				+    """生成缓存key"""
			
 
				+    return f"{note_id}_v4.0.json"
			
 
				+
			
 
				+
			
 
				+def _load_from_cache(note_id: str) -> Optional[tuple]:
			
 
				+    """从缓存加载评估结果"""
			
 
				+    if not ENABLE_CACHE:
			
 
				+        return None
			
 
				+
			
 
				+    cache_file = os.path.join(CACHE_DIR, _get_cache_key(note_id))
			
 
				+
			
 
				+    if not os.path.exists(cache_file):
			
 
				+        return None
			
 
				+
			
 
				+    try:
			
 
				+        with open(cache_file, 'r', encoding='utf-8') as f:
			
 
				+            data = json.load(f)
			
 
				+
			
 
				+        # 重建评估对象
			
 
				+        knowledge_eval = None
			
 
				+        if data.get("knowledge_eval"):
			
 
				+            knowledge_eval = KnowledgeEvaluation(**data["knowledge_eval"])
			
 
				+
			
 
				+        content_eval = None
			
 
				+        if data.get("content_eval"):
			
 
				+            content_eval = ContentKnowledgeEvaluation(**data["content_eval"])
			
 
				+
			
 
				+        purpose_eval = None
			
 
				+        if data.get("purpose_eval"):
			
 
				+            purpose_eval = PurposeEvaluation(**data["purpose_eval"])
			
 
				+
			
 
				+        category_eval = None
			
 
				+        if data.get("category_eval"):
			
 
				+            category_eval = CategoryEvaluation(**data["category_eval"])
			
 
				+
			
 
				+        final_score = data.get("final_score")
			
 
				+        match_level = data.get("match_level")
			
 
				+
			
 
				+        return (knowledge_eval, content_eval, purpose_eval, category_eval, final_score, match_level)
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        print(f"      ⚠️  缓存读取失败: {note_id} - {str(e)[:50]}")
			
 
				+        return None
			
 
				+
			
 
				+
			
 
				+def _save_to_cache(note_id: str, eval_results: tuple):
			
 
				+    """保存评估结果到缓存"""
			
 
				+    if not ENABLE_CACHE:
			
 
				+        return
			
 
				+
			
 
				+    knowledge_eval, content_eval, purpose_eval, category_eval, final_score, match_level = eval_results
			
 
				+
			
 
				+    os.makedirs(CACHE_DIR, exist_ok=True)
			
 
				+
			
 
				+    cache_data = {
			
 
				+        "knowledge_eval": knowledge_eval.model_dump() if knowledge_eval else None,
			
 
				+        "content_eval": content_eval.model_dump() if content_eval else None,
			
 
				+        "purpose_eval": purpose_eval.model_dump() if purpose_eval else None,
			
 
				+        "category_eval": category_eval.model_dump() if category_eval else None,
			
 
				+        "final_score": final_score,
			
 
				+        "match_level": match_level,
			
 
				+        "cache_time": datetime.now().isoformat(),
			
 
				+        "evaluator_version": "v4.0"
			
 
				+    }
			
 
				+
			
 
				+    cache_file = os.path.join(CACHE_DIR, _get_cache_key(note_id))
			
 
				+
			
 
				+    try:
			
 
				+        with open(cache_file, 'w', encoding='utf-8') as f:
			
 
				+            json.dump(cache_data, f, ensure_ascii=False, indent=2)
			
 
				+    except Exception as e:
			
 
				+        print(f"      ⚠️  缓存保存失败: {note_id} - {str(e)[:50]}")
			
 
				+
			
 
				+
			
 
				+# ============================================================================
			
 
				+# LangGraph 节点函数
			
 
				+# ============================================================================
			
 
				+
			
 
				+async def knowledge_node(state: EvaluationState) -> EvaluationState:
			
 
				+    """
			
 
				+    Node 1: 知识判断 (Prompt1)
			
 
				+    """
			
 
				+    post = state["post"]
			
 
				+    semaphore = state.get("semaphore")
			
 
				+
			
 
				+    print(f"      📝 Step 1/4: 判断是知识...")
			
 
				+
			
 
				+    try:
			
 
				+        # 准备媒体文件
			
 
				+        media_files = []
			
 
				+        if post.type == "video" and state.get("video_file"):
			
 
				+            media_files = [state["video_file"]]
			
 
				+            print(f"      📹 准备视频文件: {state.get('video_uri', 'N/A')}")
			
 
				+        elif post.images:
			
 
				+            # 图文帖子 - 上传图片到Gemini
			
 
				+            image_urls = post.images[:MAX_IMAGES_PER_POST]
			
 
				+            print(f"      📸 准备上传 {len(image_urls)} 张图片 (总共{len(post.images)}张)")
			
 
				+
			
 
				+            uploaded_files, temp_paths = await ImageUploader.upload_images(image_urls)
			
 
				+            media_files = uploaded_files
			
 
				+
			
 
				+            # 保存临时路径到state中
			
 
				+            if not state.get("temp_image_paths"):
			
 
				+                state["temp_image_paths"] = []
			
 
				+            state["temp_image_paths"].extend(temp_paths)
			
 
				+
			
 
				+            # ✅ 缓存图片数据，避免后续节点重复下载
			
 
				+            state["cached_media_files"] = media_files
			
 
				+        else:
			
 
				+            print(f"      ⚠️  帖子无图片/视频")
			
 
				+
			
 
				+        # 准备System和User Prompt
			
 
				+        user_prompt = PromptAdapter.adapt_prompt(USER_TEMPLATE1_IS_KNOWLEDGE, post)
			
 
				+        system_prompt = SYSTEM_PROMPT1_IS_KNOWLEDGE
			
 
				+
			
 
				+        # 调用Gemini (使用新格式)
			
 
				+        client = GeminiClient()
			
 
				+
			
 
				+        if semaphore:
			
 
				+            async with semaphore:
			
 
				+                data = await client.generate_content(
			
 
				+                    system_prompt=system_prompt,
			
 
				+                    user_prompt=user_prompt,
			
 
				+                    media_files=media_files
			
 
				+                )
			
 
				+        else:
			
 
				+            data = await client.generate_content(
			
 
				+                system_prompt=system_prompt,
			
 
				+                user_prompt=user_prompt,
			
 
				+                media_files=media_files
			
 
				+            )
			
 
				+
			
 
				+        # 调试:打印返回的数据结构
			
 
				+        print(f"      🐛 DEBUG - API返回数据: {json.dumps(data, ensure_ascii=False, indent=2)[:500]}")
			
 
				+        print(f"      🐛 DEBUG - data keys: {list(data.keys())}")
			
 
				+
			
 
				+        # 解析结果
			
 
				+        knowledge_eval = KnowledgeEvaluation(
			
 
				+            is_knowledge=data.get("is_knowledge", False),
			
 
				+            quick_exclude=data.get("quick_exclude", {}),
			
 
				+            title_layer=data.get("title_layer", {}),
			
 
				+            image_layer=data.get("image_layer", {}),
			
 
				+            text_layer=data.get("text_layer", {}),
			
 
				+            judgment_logic=data.get("judgment_logic", ""),
			
 
				+            core_evidence=data.get("core_evidence", []),
			
 
				+            issues=data.get("issues", []),
			
 
				+            conclusion=data.get("conclusion", "")
			
 
				+        )
			
 
				+
			
 
				+        state["knowledge_eval"] = knowledge_eval
			
 
				+
			
 
				+        # 判断是否继续
			
 
				+        if not knowledge_eval.is_knowledge:
			
 
				+            print(f"      ⊗ 非知识内容,停止后续评估")
			
 
				+            state["should_continue"] = False
			
 
				+        else:
			
 
				+            print(f"      ✅ Step 1: 是知识内容")
			
 
				+            state["should_continue"] = True
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        print(f"      ❌ Prompt1评估失败: {str(e)[:100]}")
			
 
				+        state["error"] = str(e)
			
 
				+        state["should_continue"] = False
			
 
				+
			
 
				+    return state
			
 
				+
			
 
				+
			
 
				+async def content_knowledge_node(state: EvaluationState) -> EvaluationState:
			
 
				+    """
			
 
				+    Node 2: 内容知识判断 (Prompt2)
			
 
				+    """
			
 
				+    post = state["post"]
			
 
				+    semaphore = state.get("semaphore")
			
 
				+
			
 
				+    print(f"      📝 Step 2/4: 判断是否是内容知识...")
			
 
				+
			
 
				+    try:
			
 
				+        # 准备媒体文件
			
 
				+        media_files = []
			
 
				+        if post.type == "video" and state.get("video_file"):
			
 
				+            media_files = [state["video_file"]]
			
 
				+            print(f"      📹 准备视频文件")
			
 
				+        elif post.images:
			
 
				+            # ✅ 优先使用缓存的图片，避免重复下载
			
 
				+            if state.get("cached_media_files"):
			
 
				+                media_files = state["cached_media_files"]
			
 
				+                print(f"      ♻️  使用缓存图片 ({len(media_files)}张)")
			
 
				+            else:
			
 
				+                # 缓存不存在才下载
			
 
				+                image_urls = post.images[:MAX_IMAGES_PER_POST]
			
 
				+                print(f"      📸 准备上传 {len(image_urls)} 张图片 (用于内容知识评估)")
			
 
				+
			
 
				+                uploaded_files, temp_paths = await ImageUploader.upload_images(image_urls)
			
 
				+                media_files = uploaded_files
			
 
				+
			
 
				+                # 保存临时路径到state中
			
 
				+                if not state.get("temp_image_paths"):
			
 
				+                    state["temp_image_paths"] = []
			
 
				+                state["temp_image_paths"].extend(temp_paths)
			
 
				+        else:
			
 
				+            print(f"      ⚠️  无媒体文件")
			
 
				+
			
 
				+        # 准备System和User Prompt
			
 
				+        user_prompt = PromptAdapter.adapt_prompt(USER_TEMPLATE2_CONTENT_KNOWLEDGE, post)
			
 
				+        system_prompt = SYSTEM_PROMPT2_CONTENT_KNOWLEDGE
			
 
				+
			
 
				+        # 调用Gemini (使用新格式)
			
 
				+        client = GeminiClient()
			
 
				+
			
 
				+        if semaphore:
			
 
				+            async with semaphore:
			
 
				+                data = await client.generate_content(
			
 
				+                    system_prompt=system_prompt,
			
 
				+                    user_prompt=user_prompt,
			
 
				+                    media_files=media_files
			
 
				+                )
			
 
				+        else:
			
 
				+            data = await client.generate_content(
			
 
				+                system_prompt=system_prompt,
			
 
				+                user_prompt=user_prompt,
			
 
				+                media_files=media_files
			
 
				+            )
			
 
				+
			
 
				+        # 解析结果
			
 
				+        final_score = data.get("final_score", 0)
			
 
				+        is_content_knowledge = final_score >= 55
			
 
				+
			
 
				+        content_eval = ContentKnowledgeEvaluation(
			
 
				+            is_content_knowledge=is_content_knowledge,
			
 
				+            final_score=final_score,
			
 
				+            level=data.get("level", ""),
			
 
				+            quick_exclude=data.get("quick_exclude", {}),
			
 
				+            dimension_scores=data.get("dimension_scores", {}),
			
 
				+            core_evidence=data.get("core_evidence", []),
			
 
				+            issues=data.get("issues", []),
			
 
				+            summary=data.get("summary", "")
			
 
				+        )
			
 
				+
			
 
				+        state["content_eval"] = content_eval
			
 
				+
			
 
				+        # 判断是否继续
			
 
				+        if not is_content_knowledge:
			
 
				+            print(f"      ⊗ 非内容知识,停止后续评估 (得分: {final_score})")
			
 
				+            state["should_continue"] = False
			
 
				+        else:
			
 
				+            print(f"      ✅ Step 2: 是内容知识 (得分: {final_score})")
			
 
				+            state["should_continue"] = True
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        print(f"      ❌ Prompt2评估失败: {str(e)[:100]}")
			
 
				+        state["error"] = str(e)
			
 
				+        state["should_continue"] = False
			
 
				+
			
 
				+    return state
			
 
				+
			
 
				+
			
 
				+async def parallel_match_node(state: EvaluationState) -> EvaluationState:
			
 
				+    """
			
 
				+    Node 3: 并行目的性和品类匹配 (Prompt3 & Prompt4)
			
 
				+    """
			
 
				+    post = state["post"]
			
 
				+    original_query = state["original_query"]
			
 
				+    semaphore = state.get("semaphore")
			
 
				+
			
 
				+    print(f"      📝 Step 3&4/4: 并行执行目的性和品类匹配...")
			
 
				+
			
 
				+    try:
			
 
				+        # 准备媒体文件
			
 
				+        media_files = []
			
 
				+        if post.type == "video" and state.get("video_file"):
			
 
				+            media_files = [state["video_file"]]
			
 
				+            print(f"      📹 准备视频文件")
			
 
				+        elif post.images:
			
 
				+            # ✅ 优先使用缓存的图片，避免重复下载
			
 
				+            if state.get("cached_media_files"):
			
 
				+                media_files = state["cached_media_files"]
			
 
				+                print(f"      ♻️  使用缓存图片 ({len(media_files)}张)")
			
 
				+            else:
			
 
				+                # 缓存不存在才下载
			
 
				+                image_urls = post.images[:MAX_IMAGES_PER_POST]
			
 
				+                print(f"      📸 准备上传 {len(image_urls)} 张图片 (用于目的性和品类评估)")
			
 
				+
			
 
				+                uploaded_files, temp_paths = await ImageUploader.upload_images(image_urls)
			
 
				+                media_files = uploaded_files
			
 
				+
			
 
				+                # 保存临时路径到state中
			
 
				+                if not state.get("temp_image_paths"):
			
 
				+                    state["temp_image_paths"] = []
			
 
				+                state["temp_image_paths"].extend(temp_paths)
			
 
				+        else:
			
 
				+            print(f"      ⚠️  无媒体文件")
			
 
				+
			
 
				+        client = GeminiClient()
			
 
				+
			
 
				+        # 并行执行Prompt3和Prompt4
			
 
				+        async def eval_purpose():
			
 
				+            user_prompt = PromptAdapter.adapt_prompt(
			
 
				+                USER_TEMPLATE3_PURPOSE_MATCH, post, original_query=original_query
			
 
				+            )
			
 
				+            system_prompt = SYSTEM_PROMPT3_PURPOSE_MATCH
			
 
				+            
			
 
				+            if semaphore:
			
 
				+                async with semaphore:
			
 
				+                    return await client.generate_content(
			
 
				+                        system_prompt=system_prompt,
			
 
				+                        user_prompt=user_prompt,
			
 
				+                        media_files=media_files
			
 
				+                    )
			
 
				+            else:
			
 
				+                return await client.generate_content(
			
 
				+                    system_prompt=system_prompt,
			
 
				+                    user_prompt=user_prompt,
			
 
				+                    media_files=media_files
			
 
				+                )
			
 
				+
			
 
				+        async def eval_category():
			
 
				+            user_prompt = PromptAdapter.adapt_prompt(
			
 
				+                USER_TEMPLATE4_CATEGORY_MATCH, post, original_query=original_query
			
 
				+            )
			
 
				+            system_prompt = SYSTEM_PROMPT4_CATEGORY_MATCH
			
 
				+            
			
 
				+            if semaphore:
			
 
				+                async with semaphore:
			
 
				+                    return await client.generate_content(
			
 
				+                        system_prompt=system_prompt,
			
 
				+                        user_prompt=user_prompt,
			
 
				+                        media_files=media_files
			
 
				+                    )
			
 
				+            else:
			
 
				+                return await client.generate_content(
			
 
				+                    system_prompt=system_prompt,
			
 
				+                    user_prompt=user_prompt,
			
 
				+                    media_files=media_files
			
 
				+                )
			
 
				+
			
 
				+        purpose_data, category_data = await asyncio.gather(eval_purpose(), eval_category())
			
 
				+
			
 
				+        # 🔍 调试日志 - 查看API返回的实际结构
			
 
				+        print(f"\n      🐛 DEBUG - purpose_data keys: {list(purpose_data.keys())}")
			
 
				+        print(f"      🐛 DEBUG - purpose_data 内容: {purpose_data}")
			
 
				+        print(f"\n      🐛 DEBUG - category_data keys: {list(category_data.keys())}")
			
 
				+        print(f"      🐛 DEBUG - category_data 内容: {category_data}\n")
			
 
				+
			
 
				+        # 解析Prompt3结果（直接使用英文字段名）
			
 
				+        purpose_eval = PurposeEvaluation(
			
 
				+            purpose_score=purpose_data.get("purpose_score", 0),
			
 
				+            core_motivation=purpose_data.get("core_motivation", ""),
			
 
				+            image_value=purpose_data.get("image_value", ""),
			
 
				+            title_intention=purpose_data.get("title_intention", ""),
			
 
				+            text_content=purpose_data.get("text_content", ""),
			
 
				+            match_level=purpose_data.get("match_level", ""),
			
 
				+            core_basis=purpose_data.get("core_basis", "")
			
 
				+        )
			
 
				+
			
 
				+        # 解析Prompt4结果（直接使用英文字段名）
			
 
				+        category_eval = CategoryEvaluation(
			
 
				+            category_score=category_data.get("category_score", 0),
			
 
				+            original_category_analysis=category_data.get("original_category_analysis", {}),
			
 
				+            actual_category=category_data.get("actual_category", {}),
			
 
				+            match_level=category_data.get("match_level", ""),
			
 
				+            category_match_analysis=category_data.get("category_match_analysis", {}),
			
 
				+            core_basis=category_data.get("core_basis", "")
			
 
				+        )
			
 
				+
			
 
				+        state["purpose_eval"] = purpose_eval
			
 
				+        state["category_eval"] = category_eval
			
 
				+        state["should_continue"] = True
			
 
				+
			
 
				+        print(f"      ✅ Step 3: 目的性得分 = {purpose_eval.purpose_score}")
			
 
				+        print(f"      ✅ Step 4: 品类得分 = {category_eval.category_score}")
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        print(f"      ❌ Prompt3或4评估失败: {str(e)[:100]}")
			
 
				+        state["error"] = str(e)
			
 
				+        state["should_continue"] = False
			
 
				+
			
 
				+    return state
			
 
				+
			
 
				+
			
 
				+async def score_node(state: EvaluationState) -> EvaluationState:
			
 
				+    """
			
 
				+    Node 4: 计算综合得分
			
 
				+    """
			
 
				+    print(f"      📊 Step 5/5: 计算综合得分...")
			
 
				+
			
 
				+    try:
			
 
				+        purpose_eval = state["purpose_eval"]
			
 
				+        category_eval = state["category_eval"]
			
 
				+
			
 
				+        if not purpose_eval or not category_eval:
			
 
				+            raise Exception("缺少目的性或品类评估结果")
			
 
				+
			
 
				+        # 计算综合得分: 目的性50% + 品类50%
			
 
				+        final_score = round(
			
 
				+            purpose_eval.purpose_score * 0.5 + category_eval.category_score * 0.5,
			
 
				+            2
			
 
				+        )
			
 
				+
			
 
				+        # 判定匹配等级
			
 
				+        if final_score >= 85:
			
 
				+            match_level = "高度匹配"
			
 
				+        elif final_score >= 70:
			
 
				+            match_level = "基本匹配"
			
 
				+        elif final_score >= 50:
			
 
				+            match_level = "部分匹配"
			
 
				+        elif final_score >= 30:
			
 
				+            match_level = "弱匹配"
			
 
				+        else:
			
 
				+            match_level = "不匹配"
			
 
				+
			
 
				+        state["final_score"] = final_score
			
 
				+        state["match_level"] = match_level
			
 
				+
			
 
				+        print(f"      ✅ 综合得分: {final_score} ({match_level})")
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        print(f"      ❌ 综合评分失败: {str(e)[:100]}")
			
 
				+        state["error"] = str(e)
			
 
				+
			
 
				+    return state
			
 
				+
			
 
				+
			
 
				+# ============================================================================
			
 
				+# LangGraph 图定义
			
 
				+# ============================================================================
			
 
				+
			
 
				+def create_evaluation_graph() -> StateGraph:
			
 
				+    """创建评估流程图"""
			
 
				+
			
 
				+    # 定义条件判断
			
 
				+    def should_continue_to_content(state: EvaluationState) -> str:
			
 
				+        """判断是否继续到内容知识评估"""
			
 
				+        if not state.get("should_continue", False):
			
 
				+            return END
			
 
				+        return "content_knowledge_node"
			
 
				+
			
 
				+    def should_continue_to_match(state: EvaluationState) -> str:
			
 
				+        """判断是否继续到匹配评估"""
			
 
				+        if not state.get("should_continue", False):
			
 
				+            return END
			
 
				+        return "parallel_match_node"
			
 
				+
			
 
				+    def should_continue_to_score(state: EvaluationState) -> str:
			
 
				+        """判断是否继续到评分"""
			
 
				+        if not state.get("should_continue", False):
			
 
				+            return END
			
 
				+        return "score_node"
			
 
				+
			
 
				+    # 创建StateGraph
			
 
				+    workflow = StateGraph(EvaluationState)
			
 
				+
			
 
				+    # 添加节点
			
 
				+    workflow.add_node("knowledge_node", knowledge_node)
			
 
				+    workflow.add_node("content_knowledge_node", content_knowledge_node)
			
 
				+    workflow.add_node("parallel_match_node", parallel_match_node)
			
 
				+    workflow.add_node("score_node", score_node)
			
 
				+
			
 
				+    # 设置入口点
			
 
				+    workflow.set_entry_point("knowledge_node")
			
 
				+
			
 
				+    # 添加条件边
			
 
				+    workflow.add_conditional_edges(
			
 
				+        "knowledge_node",
			
 
				+        should_continue_to_content,
			
 
				+        {
			
 
				+            "content_knowledge_node": "content_knowledge_node",
			
 
				+            END: END
			
 
				+        }
			
 
				+    )
			
 
				+
			
 
				+    workflow.add_conditional_edges(
			
 
				+        "content_knowledge_node",
			
 
				+        should_continue_to_match,
			
 
				+        {
			
 
				+            "parallel_match_node": "parallel_match_node",
			
 
				+            END: END
			
 
				+        }
			
 
				+    )
			
 
				+
			
 
				+    workflow.add_conditional_edges(
			
 
				+        "parallel_match_node",
			
 
				+        should_continue_to_score,
			
 
				+        {
			
 
				+            "score_node": "score_node",
			
 
				+            END: END
			
 
				+        }
			
 
				+    )
			
 
				+
			
 
				+    # score_node结束后直接到END
			
 
				+    workflow.add_edge("score_node", END)
			
 
				+
			
 
				+    return workflow.compile()
			
 
				+
			
 
				+
			
 
				+# ============================================================================
			
 
				+# 主评估函数
			
 
				+# ============================================================================
			
 
				+
			
 
				+async def evaluate_post_v4(
			
 
				+    post,
			
 
				+    original_query: str,
			
 
				+    semaphore: Optional[asyncio.Semaphore] = None
			
 
				+) -> tuple:
			
 
				+    """
			
 
				+    V4评估主函数 (LangGraph版本)
			
 
				+
			
 
				+    Args:
			
 
				+        post: Post对象
			
 
				+        original_query: 原始搜索query
			
 
				+        semaphore: 并发控制信号量
			
 
				+
			
 
				+    Returns:
			
 
				+        (knowledge_eval, content_eval, purpose_eval, category_eval, final_score, match_level)
			
 
				+    """
			
 
				+    # 检查缓存
			
 
				+    if ENABLE_CACHE:
			
 
				+        cached_result = _load_from_cache(post.note_id)
			
 
				+        if cached_result is not None:
			
 
				+            print(f"      ♻️  使用缓存结果: {post.note_id}")
			
 
				+            return cached_result
			
 
				+
			
 
				+    print(f"      🔍 开始V4评估 (LangGraph): {post.note_id}")
			
 
				+
			
 
				+    # 初始化状态
			
 
				+    initial_state: EvaluationState = {
			
 
				+        "post": post,
			
 
				+        "original_query": original_query,
			
 
				+        "video_file": None,
			
 
				+        "video_uri": None,
			
 
				+        "temp_video_path": None,
			
 
				+        "temp_image_paths": None,
			
 
				+        "knowledge_eval": None,
			
 
				+        "content_eval": None,
			
 
				+        "purpose_eval": None,
			
 
				+        "category_eval": None,
			
 
				+        "final_score": None,
			
 
				+        "match_level": None,
			
 
				+        "should_continue": True,
			
 
				+        "error": None,
			
 
				+        "semaphore": semaphore
			
 
				+    }
			
 
				+
			
 
				+    # 处理视频
			
 
				+    if post.type == "video" and post.images and len(post.images) > 0:
			
 
				+        video_url = post.images[0]  # 视频URL通常在images[0]
			
 
				+        video_file, video_uri, temp_path = await VideoUploader.upload_video(video_url)
			
 
				+        initial_state["video_file"] = video_file
			
 
				+        initial_state["video_uri"] = video_uri
			
 
				+        initial_state["temp_video_path"] = temp_path
			
 
				+
			
 
				+        if not video_file:
			
 
				+            print(f"      ❌ 视频上传失败,停止评估")
			
 
				+            return (None, None, None, None, None, None)
			
 
				+
			
 
				+    try:
			
 
				+        # 创建并运行图
			
 
				+        graph = create_evaluation_graph()
			
 
				+        final_state = await graph.ainvoke(initial_state)
			
 
				+
			
 
				+        # 提取结果
			
 
				+        knowledge_eval = final_state.get("knowledge_eval")
			
 
				+        content_eval = final_state.get("content_eval")
			
 
				+        purpose_eval = final_state.get("purpose_eval")
			
 
				+        category_eval = final_state.get("category_eval")
			
 
				+        final_score = final_state.get("final_score")
			
 
				+        match_level = final_state.get("match_level")
			
 
				+
			
 
				+        # 保存到缓存
			
 
				+        if ENABLE_CACHE and knowledge_eval:
			
 
				+            _save_to_cache(
			
 
				+                post.note_id,
			
 
				+                (knowledge_eval, content_eval, purpose_eval, category_eval, final_score, match_level)
			
 
				+            )
			
 
				+
			
 
				+        return (knowledge_eval, content_eval, purpose_eval, category_eval, final_score, match_level)
			
 
				+
			
 
				+    finally:
			
 
				+        # 清理临时视频文件
			
 
				+        if initial_state.get("temp_video_path"):
			
 
				+            try:
			
 
				+                os.remove(initial_state["temp_video_path"])
			
 
				+                print(f"      🗑️  清理临时视频文件")
			
 
				+            except:
			
 
				+                pass
			
 
				+
			
 
				+        # 清理临时图片文件
			
 
				+        temp_image_paths = final_state.get("temp_image_paths") if 'final_state' in locals() else initial_state.get("temp_image_paths")
			
 
				+        if temp_image_paths:
			
 
				+            cleaned_count = 0
			
 
				+            for temp_path in temp_image_paths:
			
 
				+                try:
			
 
				+                    os.remove(temp_path)
			
 
				+                    cleaned_count += 1
			
 
				+                except:
			
 
				+                    pass
			
 
				+            if cleaned_count > 0:
			
 
				+                print(f"      🗑️  清理 {cleaned_count}/{len(temp_image_paths)} 个临时图片文件")
			
 
				+
			
 
				+
			
 
				+def apply_evaluation_v4_to_post(
			
 
				+    post,
			
 
				+    knowledge_eval: Optional[KnowledgeEvaluation],
			
 
				+    content_eval: Optional[ContentKnowledgeEvaluation],
			
 
				+    purpose_eval: Optional[PurposeEvaluation],
			
 
				+    category_eval: Optional[CategoryEvaluation],
			
 
				+    final_score: Optional[float],
			
 
				+    match_level: Optional[str]
			
 
				+):
			
 
				+    """
			
 
				+    将V4评估结果应用到Post对象
			
 
				+
			
 
				+    Args:
			
 
				+        post: Post对象
			
 
				+        knowledge_eval: Prompt1结果
			
 
				+        content_eval: Prompt2结果
			
 
				+        purpose_eval: Prompt3结果
			
 
				+        category_eval: Prompt4结果
			
 
				+        final_score: 综合得分
			
 
				+        match_level: 匹配等级
			
 
				+    """
			
 
				+    # Prompt1: 判断是知识
			
 
				+    if knowledge_eval:
			
 
				+        post.is_knowledge = knowledge_eval.is_knowledge
			
 
				+        post.knowledge_evaluation = {
			
 
				+            "quick_exclude": knowledge_eval.quick_exclude,
			
 
				+            "title_layer": knowledge_eval.title_layer,
			
 
				+            "image_layer": knowledge_eval.image_layer,
			
 
				+            "text_layer": knowledge_eval.text_layer,
			
 
				+            "judgment_logic": knowledge_eval.judgment_logic,
			
 
				+            "core_evidence": knowledge_eval.core_evidence,
			
 
				+            "issues": knowledge_eval.issues,
			
 
				+            "conclusion": knowledge_eval.conclusion
			
 
				+        }
			
 
				+
			
 
				+    # Prompt2: 判断是否是内容知识
			
 
				+    if content_eval:
			
 
				+        post.is_content_knowledge = content_eval.is_content_knowledge
			
 
				+        post.knowledge_score = float(content_eval.final_score)
			
 
				+        post.content_knowledge_evaluation = {
			
 
				+            "is_content_knowledge": content_eval.is_content_knowledge,
			
 
				+            "final_score": content_eval.final_score,
			
 
				+            "level": content_eval.level,
			
 
				+            "quick_exclude": content_eval.quick_exclude,
			
 
				+            "dimension_scores": content_eval.dimension_scores,
			
 
				+            "core_evidence": content_eval.core_evidence,
			
 
				+            "issues": content_eval.issues,
			
 
				+            "summary": content_eval.summary
			
 
				+        }
			
 
				+
			
 
				+    # Prompt3: 目的性匹配
			
 
				+    if purpose_eval:
			
 
				+        post.purpose_score = purpose_eval.purpose_score
			
 
				+        post.purpose_evaluation = {
			
 
				+            "purpose_score": purpose_eval.purpose_score,
			
 
				+            "core_motivation": purpose_eval.core_motivation,
			
 
				+            "image_value": purpose_eval.image_value,
			
 
				+            "title_intention": purpose_eval.title_intention,
			
 
				+            "text_content": purpose_eval.text_content,
			
 
				+            "match_level": purpose_eval.match_level,
			
 
				+            "core_basis": purpose_eval.core_basis
			
 
				+        }
			
 
				+
			
 
				+    # Prompt4: 品类匹配
			
 
				+    if category_eval:
			
 
				+        post.category_score = category_eval.category_score
			
 
				+        post.category_evaluation = {
			
 
				+            "category_score": category_eval.category_score,
			
 
				+            "original_category_analysis": category_eval.original_category_analysis,
			
 
				+            "actual_category": category_eval.actual_category,
			
 
				+            "match_level": category_eval.match_level,
			
 
				+            "category_match_analysis": category_eval.category_match_analysis,
			
 
				+            "core_basis": category_eval.core_basis
			
 
				+        }
			
 
				+
			
 
				+    # 综合得分
			
 
				+    if final_score is not None and match_level is not None:
			
 
				+        post.final_score = final_score
			
 
				+        post.match_level = match_level
			
 
				+
			
 
				+    # 设置评估时间和版本
			
 
				+    post.evaluation_time = datetime.now().isoformat()
			
 
				+    post.evaluator_version = "v4.0_langgraph"
			
 
				+
			
 
				+
			
 
				+async def batch_evaluate_posts_v4(
			
 
				+    posts: list,
			
 
				+    original_query: str,
			
 
				+    max_concurrent: int = MAX_CONCURRENT_EVALUATIONS
			
 
				+) -> int:
			
 
				+    """
			
 
				+    批量评估多个帖子 (V4版本)
			
 
				+
			
 
				+    Args:
			
 
				+        posts: Post对象列表
			
 
				+        original_query: 原始搜索query
			
 
				+        max_concurrent: 最大并发数
			
 
				+
			
 
				+    Returns:
			
 
				+        成功评估的帖子数量
			
 
				+    """
			
 
				+    semaphore = asyncio.Semaphore(max_concurrent)
			
 
				+
			
 
				+    print(f"\n📊 开始批量评估 {len(posts)} 个帖子 (LangGraph + Gemini,并发限制: {max_concurrent})...")
			
 
				+
			
 
				+    tasks = [evaluate_post_v4(post, original_query, semaphore) for post in posts]
			
 
				+    results = await asyncio.gather(*tasks)
			
 
				+
			
 
				+    success_count = 0
			
 
				+    for i, result in enumerate(results):
			
 
				+        knowledge_eval, content_eval, purpose_eval, category_eval, final_score, match_level = result
			
 
				+
			
 
				+        if knowledge_eval:
			
 
				+            apply_evaluation_v4_to_post(
			
 
				+                posts[i],
			
 
				+                knowledge_eval,
			
 
				+                content_eval,
			
 
				+                purpose_eval,
			
 
				+                category_eval,
			
 
				+                final_score,
			
 
				+                match_level
			
 
				+            )
			
 
				+            success_count += 1
			
 
				+
			
 
				+    print(f"✅ 批量评估完成: {success_count}/{len(posts)} 帖子已评估")
			
 
				+
			
 
				+    return success_count
			
--- a/script/search/enrichment_helper.py
+++ b/script/search/enrichment_helper.py
@@ -0,0 +1,261 @@
 
				+#!/usr/bin/env python3
			
 
				+"""
			
 
				+帖子详情补充工具
			
 
				+用于将搜索结果与详情API结果合并，补充高清图片、视频URL、作者信息等
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+from typing import Dict, Any, List
			
 
				+from datetime import datetime
			
 
				+
			
 
				+
			
 
				+def parse_detail_result(detail_response: Dict[str, Any]) -> Dict[str, Any] | None:
			
 
				+    """
			
 
				+    解析详情API返回的结果
			
 
				+
			
 
				+    Args:
			
 
				+        detail_response: 详情API的完整响应
			
 
				+
			
 
				+    Returns:
			
 
				+        解析后的数据字典，失败返回None
			
 
				+    """
			
 
				+    try:
			
 
				+        # 检查success字段
			
 
				+        if not detail_response.get("success"):
			
 
				+            print(f"    ⚠️  详情API返回失败")
			
 
				+            return None
			
 
				+
			
 
				+        # 解析result字段（可能是JSON字符串）
			
 
				+        result = detail_response.get("result", "")
			
 
				+        if isinstance(result, str):
			
 
				+            result = json.loads(result)
			
 
				+
			
 
				+        # 提取data
			
 
				+        if isinstance(result, list) and len(result) > 0:
			
 
				+            return result[0].get("data", {})
			
 
				+        elif isinstance(result, dict):
			
 
				+            return result.get("data", {})
			
 
				+
			
 
				+        return None
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        print(f"    ✗ 解析详情结果失败: {e}")
			
 
				+        return None
			
 
				+
			
 
				+
			
 
				+def enrich_post_with_detail(post: Any, detail_response: Dict[str, Any]) -> bool:
			
 
				+    """
			
 
				+    使用详情API的数据补充Post对象
			
 
				+
			
 
				+    Args:
			
 
				+        post: Post对象（会被直接修改）
			
 
				+        detail_response: 详情API的完整响应
			
 
				+
			
 
				+    Returns:
			
 
				+        是否成功补充
			
 
				+    """
			
 
				+    # 解析详情数据
			
 
				+    detail_data = parse_detail_result(detail_response)
			
 
				+    if not detail_data:
			
 
				+        return False
			
 
				+
			
 
				+    try:
			
 
				+        # 1. 正文内容 - 使用详情API的完整正文覆盖
			
 
				+        body_text = detail_data.get("body_text", "")
			
 
				+        if body_text:
			
 
				+            post.body_text = body_text
			
 
				+
			
 
				+        # 2. 作者信息
			
 
				+        post.author_name = detail_data.get("channel_account_name", "")
			
 
				+        post.author_id = detail_data.get("channel_account_id", "")
			
 
				+
			
 
				+        # 3. 发布时间
			
 
				+        post.publish_time = detail_data.get("publish_timestamp", 0)
			
 
				+
			
 
				+        # 4. 互动信息 - 使用详情API的精确数据更新
			
 
				+        post.interact_info.update({
			
 
				+            "like_count": detail_data.get("like_count", 0),       # 详情API字段
			
 
				+            "collect_count": detail_data.get("collect_count", 0), # 详情API字段
			
 
				+        })
			
 
				+
			
 
				+        # 5. 根据类型处理图片/视频
			
 
				+        if post.type == "video":
			
 
				+            # 视频帖：补充视频URL（images保持不变）
			
 
				+            video_url = detail_data.get("video", "")
			
 
				+            if video_url:
			
 
				+                post.video = video_url
			
 
				+
			
 
				+        else:
			
 
				+            # 图文帖：仅保存CDN图片到cdn_images字段，不覆盖images
			
 
				+            images_data = detail_data.get("images", [])
			
 
				+            if images_data:
			
 
				+                # 提取CDN URL
			
 
				+                cdn_urls = []
			
 
				+                for img in images_data:
			
 
				+                    if isinstance(img, dict):
			
 
				+                        cdn_url = img.get("cdn_url", "")
			
 
				+                        if cdn_url:
			
 
				+                            cdn_urls.append(cdn_url)
			
 
				+                    elif isinstance(img, str):
			
 
				+                        cdn_urls.append(img)
			
 
				+
			
 
				+                # 仅保存CDN图片列表，不覆盖images
			
 
				+                post.cdn_images = cdn_urls
			
 
				+
			
 
				+        # 6. 标记已获取详情
			
 
				+        post.detail_fetched = True
			
 
				+
			
 
				+        return True
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        print(f"    ✗ 补充详情失败: {e}")
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+def enrich_posts_batch(
			
 
				+    posts: List[Any],
			
 
				+    detail_client: Any,
			
 
				+    show_progress: bool = True,
			
 
				+    delay: int = 1
			
 
				+) -> tuple[int, int]:
			
 
				+    """
			
 
				+    批量补充帖子详情
			
 
				+
			
 
				+    Args:
			
 
				+        posts: Post对象列表（会被直接修改）
			
 
				+        detail_client: XiaohongshuDetail实例
			
 
				+        show_progress: 是否显示进度
			
 
				+        delay: 请求间隔（秒）
			
 
				+
			
 
				+    Returns:
			
 
				+        (成功数量, 失败数量)
			
 
				+    """
			
 
				+    success_count = 0
			
 
				+    fail_count = 0
			
 
				+    total = len(posts)
			
 
				+
			
 
				+    for idx, post in enumerate(posts, 1):
			
 
				+        if show_progress:
			
 
				+            print(f"补充详情 ({idx}/{total}): {post.note_id}")
			
 
				+
			
 
				+        try:
			
 
				+            # 调用详情API
			
 
				+            detail_response = detail_client.get_detail(post.note_id)
			
 
				+
			
 
				+            # 合并数据
			
 
				+            if enrich_post_with_detail(post, detail_response):
			
 
				+                success_count += 1
			
 
				+                if show_progress:
			
 
				+                    print(f"  ✓ 成功补充")
			
 
				+            else:
			
 
				+                fail_count += 1
			
 
				+                if show_progress:
			
 
				+                    print(f"  ✗ 补充失败")
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            fail_count += 1
			
 
				+            if show_progress:
			
 
				+                print(f"  ✗ 请求失败: {e}")
			
 
				+
			
 
				+        # 避免请求过快（最后一个不需要延迟）
			
 
				+        if idx < total and delay > 0:
			
 
				+            import time
			
 
				+            time.sleep(delay)
			
 
				+
			
 
				+    return success_count, fail_count
			
 
				+
			
 
				+
			
 
				+def create_enriched_summary(post: Any) -> Dict[str, Any]:
			
 
				+    """
			
 
				+    创建包含详情的帖子摘要（用于保存）
			
 
				+
			
 
				+    Args:
			
 
				+        post: Post对象
			
 
				+
			
 
				+    Returns:
			
 
				+        摘要字典
			
 
				+    """
			
 
				+    summary = {
			
 
				+        # 基础信息
			
 
				+        "note_id": post.note_id,
			
 
				+        "note_url": post.note_url,
			
 
				+        "title": post.title,
			
 
				+        "body_text": post.body_text,
			
 
				+        "type": post.type,
			
 
				+
			
 
				+        # 媒体信息
			
 
				+        "images": post.images,
			
 
				+        "cdn_images": post.cdn_images,
			
 
				+        "video": post.video,
			
 
				+
			
 
				+        # 作者信息（详情补充）
			
 
				+        "author": {
			
 
				+            "name": post.author_name,
			
 
				+            "id": post.author_id
			
 
				+        } if post.detail_fetched else {},
			
 
				+
			
 
				+        # 互动信息
			
 
				+        "interact_info": post.interact_info,
			
 
				+
			
 
				+        # 时间信息
			
 
				+        "publish_time": post.publish_time,
			
 
				+        "publish_time_readable": datetime.fromtimestamp(
			
 
				+            post.publish_time / 1000
			
 
				+        ).strftime("%Y-%m-%d %H:%M:%S") if post.publish_time > 0 else "",
			
 
				+
			
 
				+        # 元数据
			
 
				+        "detail_fetched": post.detail_fetched
			
 
				+    }
			
 
				+
			
 
				+    return summary
			
 
				+
			
 
				+
			
 
				+def print_enrichment_stats(posts: List[Any]) -> None:
			
 
				+    """
			
 
				+    打印详情补充统计信息
			
 
				+
			
 
				+    Args:
			
 
				+        posts: Post对象列表
			
 
				+    """
			
 
				+    total = len(posts)
			
 
				+    enriched = sum(1 for p in posts if p.detail_fetched)
			
 
				+
			
 
				+    video_count = sum(1 for p in posts if p.type == "video")
			
 
				+    image_count = total - video_count
			
 
				+
			
 
				+    print("\n" + "=" * 60)
			
 
				+    print("详情补充统计")
			
 
				+    print("=" * 60)
			
 
				+    print(f"总帖子数: {total}")
			
 
				+    print(f"  - 图文帖: {image_count}")
			
 
				+    print(f"  - 视频帖: {video_count}")
			
 
				+    print(f"\n已补充详情: {enriched}/{total} ({enriched*100//total if total > 0 else 0}%)")
			
 
				+    print(f"未补充详情: {total - enriched}")
			
 
				+
			
 
				+    if enriched > 0:
			
 
				+        print("\n详情字段统计:")
			
 
				+        has_author = sum(1 for p in posts if p.author_name)
			
 
				+        has_publish_time = sum(1 for p in posts if p.publish_time > 0)
			
 
				+        has_cdn_images = sum(1 for p in posts if p.cdn_images)
			
 
				+        has_video_url = sum(1 for p in posts if p.video and p.type == "video")
			
 
				+
			
 
				+        print(f"  - 作者信息: {has_author}/{enriched}")
			
 
				+        print(f"  - 发布时间: {has_publish_time}/{enriched}")
			
 
				+        print(f"  - 高清图片: {has_cdn_images}/{image_count} (图文帖)")
			
 
				+        print(f"  - 视频URL: {has_video_url}/{video_count} (视频帖)")
			
 
				+
			
 
				+    print("=" * 60 + "\n")
			
 
				+
			
 
				+
			
 
				+# ============================================================================
			
 
				+# 使用示例
			
 
				+# ============================================================================
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    print("这是一个辅助模块，请通过 search_with_detail.py 使用")
			
 
				+    print("\n主要功能：")
			
 
				+    print("1. parse_detail_result() - 解析详情API响应")
			
 
				+    print("2. enrich_post_with_detail() - 补充单个帖子详情")
			
 
				+    print("3. enrich_posts_batch() - 批量补充详情")
			
 
				+    print("4. create_enriched_summary() - 创建详情摘要")
			
 
				+    print("5. print_enrichment_stats() - 打印统计信息")
			
--- a/script/search/search_with_detail.py
+++ b/script/search/search_with_detail.py
@@ -0,0 +1,292 @@
 
				+#!/usr/bin/env python3
			
 
				+"""
			
 
				+小红书搜索 + 详情补充 - 端到端工具
			
 
				+先调用搜索API获取笔记列表，再批量调用详情API补充完整信息
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+import os
			
 
				+import argparse
			
 
				+import sys
			
 
				+from datetime import datetime
			
 
				+from typing import List, Dict, Any
			
 
				+
			
 
				+# 添加项目根目录到路径
			
 
				+script_dir = os.path.dirname(os.path.abspath(__file__))
			
 
				+project_root = os.path.dirname(os.path.dirname(script_dir))
			
 
				+sys.path.insert(0, project_root)
			
 
				+
			
 
				+from script.search.xiaohongshu_search import XiaohongshuSearch
			
 
				+from script.search.xiaohongshu_detail import XiaohongshuDetail
			
 
				+from script.search.enrichment_helper import (
			
 
				+    enrich_posts_batch,
			
 
				+    create_enriched_summary,
			
 
				+    print_enrichment_stats
			
 
				+)
			
 
				+from knowledge_search_traverse import Post, process_note_data
			
 
				+
			
 
				+
			
 
				+def search_and_enrich(
			
 
				+    keyword: str,
			
 
				+    content_type: str = "不限",
			
 
				+    sort_type: str = "综合",
			
 
				+    publish_time: str = "不限",
			
 
				+    cursor: str = "",
			
 
				+    enable_detail: bool = True,
			
 
				+    detail_delay: int = 1,
			
 
				+    results_dir: str = None
			
 
				+) -> tuple[List[Post], str]:
			
 
				+    """
			
 
				+    搜索并补充详情的主流程
			
 
				+
			
 
				+    Args:
			
 
				+        keyword: 搜索关键词
			
 
				+        content_type: 内容类型
			
 
				+        sort_type: 排序方式
			
 
				+        publish_time: 发布时间筛选
			
 
				+        cursor: 翻页游标
			
 
				+        enable_detail: 是否启用详情补充
			
 
				+        detail_delay: 详情请求间隔（秒）
			
 
				+        results_dir: 结果输出目录
			
 
				+
			
 
				+    Returns:
			
 
				+        (Post对象列表, 保存的文件路径)
			
 
				+    """
			
 
				+    print("\n" + "=" * 80)
			
 
				+    print(f"小红书搜索 + 详情补充工具")
			
 
				+    print("=" * 80)
			
 
				+    print(f"关键词: {keyword}")
			
 
				+    print(f"内容类型: {content_type}")
			
 
				+    print(f"排序方式: {sort_type}")
			
 
				+    print(f"发布时间: {publish_time}")
			
 
				+    print(f"详情补充: {'启用' if enable_detail else '禁用'}")
			
 
				+    print("=" * 80 + "\n")
			
 
				+
			
 
				+    # 1. 执行搜索
			
 
				+    print("步骤 1/3: 执行搜索...")
			
 
				+    print("-" * 80)
			
 
				+
			
 
				+    search_client = XiaohongshuSearch(results_dir=results_dir)
			
 
				+    search_result = search_client.search(
			
 
				+        keyword=keyword,
			
 
				+        content_type=content_type,
			
 
				+        sort_type=sort_type,
			
 
				+        publish_time=publish_time,
			
 
				+        cursor=cursor
			
 
				+    )
			
 
				+
			
 
				+    # 解析搜索结果
			
 
				+    notes_data = search_result.get("data", {}).get("data", [])
			
 
				+    print(f"✓ 搜索完成，获得 {len(notes_data)} 条结果\n")
			
 
				+
			
 
				+    if not notes_data:
			
 
				+        print("未找到任何结果")
			
 
				+        return [], ""
			
 
				+
			
 
				+    # 2. 转换为Post对象
			
 
				+    print("步骤 2/3: 解析搜索结果...")
			
 
				+    print("-" * 80)
			
 
				+
			
 
				+    posts: List[Post] = []
			
 
				+    for note in notes_data:
			
 
				+        try:
			
 
				+            post = process_note_data(note)
			
 
				+            posts.append(post)
			
 
				+        except Exception as e:
			
 
				+            print(f"  ✗ 解析失败: {e}")
			
 
				+
			
 
				+    print(f"✓ 成功解析 {len(posts)}/{len(notes_data)} 条结果\n")
			
 
				+
			
 
				+    # 3. 补充详情（如果启用）
			
 
				+    if enable_detail and posts:
			
 
				+        print("步骤 3/3: 补充详情信息...")
			
 
				+        print("-" * 80)
			
 
				+
			
 
				+        detail_client = XiaohongshuDetail(results_dir=results_dir)
			
 
				+        success, fail = enrich_posts_batch(
			
 
				+            posts,
			
 
				+            detail_client,
			
 
				+            show_progress=True,
			
 
				+            delay=detail_delay
			
 
				+        )
			
 
				+
			
 
				+        print(f"\n✓ 详情补充完成: 成功 {success}/{len(posts)}, 失败 {fail}")
			
 
				+        print_enrichment_stats(posts)
			
 
				+    else:
			
 
				+        print("步骤 3/3: 跳过详情补充\n")
			
 
				+
			
 
				+    # 4. 保存结果
			
 
				+    filepath = save_enriched_results(keyword, posts, search_result, results_dir)
			
 
				+
			
 
				+    return posts, filepath
			
 
				+
			
 
				+
			
 
				+def save_enriched_results(
			
 
				+    keyword: str,
			
 
				+    posts: List[Post],
			
 
				+    search_result: Dict[str, Any],
			
 
				+    results_dir: str = None
			
 
				+) -> str:
			
 
				+    """
			
 
				+    保存增强后的结果
			
 
				+
			
 
				+    Args:
			
 
				+        keyword: 搜索关键词
			
 
				+        posts: Post对象列表
			
 
				+        search_result: 原始搜索结果
			
 
				+        results_dir: 结果输出目录
			
 
				+
			
 
				+    Returns:
			
 
				+        保存的文件路径
			
 
				+    """
			
 
				+    # 确定输出目录
			
 
				+    if results_dir:
			
 
				+        base_dir = results_dir
			
 
				+    else:
			
 
				+        script_dir = os.path.dirname(os.path.abspath(__file__))
			
 
				+        project_root = os.path.dirname(os.path.dirname(script_dir))
			
 
				+        base_dir = os.path.join(project_root, "data", "search")
			
 
				+
			
 
				+    # 创建目录
			
 
				+    result_dir = os.path.join(base_dir, "enriched", keyword)
			
 
				+    os.makedirs(result_dir, exist_ok=True)
			
 
				+
			
 
				+    # 构建结果数据
			
 
				+    enriched_data = {
			
 
				+        "metadata": {
			
 
				+            "keyword": keyword,
			
 
				+            "search_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
			
 
				+            "total_posts": len(posts),
			
 
				+            "enriched_posts": sum(1 for p in posts if p.detail_fetched),
			
 
				+            "video_posts": sum(1 for p in posts if p.type == "video"),
			
 
				+            "image_posts": sum(1 for p in posts if p.type != "video"),
			
 
				+        },
			
 
				+        "posts": [create_enriched_summary(p) for p in posts],
			
 
				+        "original_search_result": search_result  # 保留原始搜索结果供参考
			
 
				+    }
			
 
				+
			
 
				+    # 保存文件
			
 
				+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
			
 
				+    filename = f"{timestamp}_enriched.json"
			
 
				+    filepath = os.path.join(result_dir, filename)
			
 
				+
			
 
				+    with open(filepath, 'w', encoding='utf-8') as f:
			
 
				+        json.dump(enriched_data, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+    print(f"\n✓ 结果已保存: {filepath}\n")
			
 
				+    return filepath
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    """命令行入口"""
			
 
				+    parser = argparse.ArgumentParser(
			
 
				+        description='小红书搜索 + 详情补充工具',
			
 
				+        formatter_class=argparse.RawDescriptionHelpFormatter,
			
 
				+        epilog="""
			
 
				+使用示例:
			
 
				+  # 基础搜索并补充详情
			
 
				+  python3 search_with_detail.py --keyword "健身教程"
			
 
				+
			
 
				+  # 搜索视频内容
			
 
				+  python3 search_with_detail.py --keyword "化妆教程" --content-type "视频"
			
 
				+
			
 
				+  # 仅搜索不补充详情
			
 
				+  python3 search_with_detail.py --keyword "美食" --no-detail
			
 
				+
			
 
				+  # 自定义输出目录
			
 
				+  python3 search_with_detail.py --keyword "旅游" --results-dir "custom/output"
			
 
				+        """
			
 
				+    )
			
 
				+
			
 
				+    # 搜索参数
			
 
				+    parser.add_argument(
			
 
				+        '--keyword',
			
 
				+        type=str,
			
 
				+        required=True,
			
 
				+        help='搜索关键词（必填）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--content-type',
			
 
				+        type=str,
			
 
				+        default='不限',
			
 
				+        choices=['不限', '视频', '图文'],
			
 
				+        help='内容类型（默认: 不限）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--sort-type',
			
 
				+        type=str,
			
 
				+        default='综合',
			
 
				+        choices=['综合', '最新', '最多点赞', '最多评论'],
			
 
				+        help='排序方式（默认: 综合）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--publish-time',
			
 
				+        type=str,
			
 
				+        default='不限',
			
 
				+        choices=['不限', '一天内', '一周内', '半年内'],
			
 
				+        help='发布时间筛选（默认: 不限）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--cursor',
			
 
				+        type=str,
			
 
				+        default='',
			
 
				+        help='翻页游标（默认为空，即第一页）'
			
 
				+    )
			
 
				+
			
 
				+    # 详情补充参数
			
 
				+    parser.add_argument(
			
 
				+        '--no-detail',
			
 
				+        action='store_true',
			
 
				+        help='禁用详情补充（仅搜索）'
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        '--detail-delay',
			
 
				+        type=int,
			
 
				+        default=1,
			
 
				+        help='详情请求间隔时间（秒），默认1秒'
			
 
				+    )
			
 
				+
			
 
				+    # 输出参数
			
 
				+    parser.add_argument(
			
 
				+        '--results-dir',
			
 
				+        type=str,
			
 
				+        default=None,
			
 
				+        help='结果输出目录（默认: data/search）'
			
 
				+    )
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    # 执行搜索和补充
			
 
				+    try:
			
 
				+        posts, filepath = search_and_enrich(
			
 
				+            keyword=args.keyword,
			
 
				+            content_type=args.content_type,
			
 
				+            sort_type=args.sort_type,
			
 
				+            publish_time=args.publish_time,
			
 
				+            cursor=args.cursor,
			
 
				+            enable_detail=not args.no_detail,
			
 
				+            detail_delay=args.detail_delay,
			
 
				+            results_dir=args.results_dir
			
 
				+        )
			
 
				+
			
 
				+        # 打印摘要
			
 
				+        print("=" * 80)
			
 
				+        print("执行完成")
			
 
				+        print("=" * 80)
			
 
				+        print(f"关键词: {args.keyword}")
			
 
				+        print(f"获得帖子: {len(posts)} 条")
			
 
				+        if not args.no_detail:
			
 
				+            enriched = sum(1 for p in posts if p.detail_fetched)
			
 
				+            print(f"详情补充: {enriched}/{len(posts)} 条")
			
 
				+        print(f"结果文件: {filepath}")
			
 
				+        print("=" * 80)
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        print(f"\n✗ 执行失败: {e}", file=sys.stderr)
			
 
				+        import traceback
			
 
				+        traceback.print_exc()
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/script/search/xiaohongshu_detail.py
+++ b/script/search/xiaohongshu_detail.py
@@ -0,0 +1,282 @@
 
				+#!/usr/bin/env python3
			
 
				+"""
			
 
				+小红书笔记详情获取工具
			
 
				+根据笔记ID获取小红书笔记的详细信息
			
 
				+"""
			
 
				+
			
 
				+import requests
			
 
				+import json
			
 
				+import os
			
 
				+import argparse
			
 
				+import time
			
 
				+from datetime import datetime
			
 
				+from typing import Dict, Any, List
			
 
				+
			
 
				+
			
 
				+class XiaohongshuDetail:
			
 
				+    """小红书笔记详情API封装类"""
			
 
				+
			
 
				+    BASE_URL = "http://47.84.182.56:8001"
			
 
				+    TOOL_NAME = "get_xhs_detail_by_note_id"
			
 
				+    PLATFORM = "xiaohongshu"
			
 
				+
			
 
				+    def __init__(self, results_dir: str = None):
			
 
				+        """
			
 
				+        初始化API客户端
			
 
				+
			
 
				+        Args:
			
 
				+            results_dir: 结果输出目录，默认为项目根目录下的 data/detail 文件夹
			
 
				+        """
			
 
				+        self.api_url = f"{self.BASE_URL}/tools/call/{self.TOOL_NAME}"
			
 
				+
			
 
				+        # 设置结果输出目录
			
 
				+        if results_dir:
			
 
				+            self.results_base_dir = results_dir
			
 
				+        else:
			
 
				+            # 默认使用项目根目录的 data/detail 文件夹
			
 
				+            script_dir = os.path.dirname(os.path.abspath(__file__))
			
 
				+            project_root = os.path.dirname(os.path.dirname(script_dir))
			
 
				+            self.results_base_dir = os.path.join(project_root, "data", "detail")
			
 
				+
			
 
				+    def get_detail(
			
 
				+        self,
			
 
				+        note_id: str,
			
 
				+        timeout: int = 30,
			
 
				+        max_retries: int = 3,
			
 
				+        retry_delay: int = 2
			
 
				+    ) -> Dict[str, Any]:
			
 
				+        """
			
 
				+        获取小红书笔记详情（带重试机制）
			
 
				+
			
 
				+        Args:
			
 
				+            note_id: 笔记ID
			
 
				+            timeout: 请求超时时间（秒），默认30秒
			
 
				+            max_retries: 最大重试次数，默认3次
			
 
				+            retry_delay: 重试间隔时间（秒），默认2秒
			
 
				+
			
 
				+        Returns:
			
 
				+            API响应的JSON数据
			
 
				+
			
 
				+        Raises:
			
 
				+            requests.exceptions.RequestException: 所有重试都失败时抛出异常
			
 
				+        """
			
 
				+        payload = {
			
 
				+            "note_id": note_id
			
 
				+        }
			
 
				+
			
 
				+        last_exception = None
			
 
				+
			
 
				+        # 重试循环：最多尝试 max_retries 次
			
 
				+        for attempt in range(1, max_retries + 1):
			
 
				+            try:
			
 
				+                if attempt > 1:
			
 
				+                    print(f"    重试第 {attempt - 1}/{max_retries - 1} 次: {note_id}")
			
 
				+
			
 
				+                response = requests.post(
			
 
				+                    self.api_url,
			
 
				+                    json=payload,
			
 
				+                    timeout=timeout,
			
 
				+                    headers={"Content-Type": "application/json"}
			
 
				+                )
			
 
				+                response.raise_for_status()
			
 
				+                result = response.json()
			
 
				+
			
 
				+                if attempt > 1:
			
 
				+                    print(f"    ✓ 重试成功")
			
 
				+
			
 
				+                return result
			
 
				+
			
 
				+            except requests.exceptions.RequestException as e:
			
 
				+                last_exception = e
			
 
				+
			
 
				+                if attempt < max_retries:
			
 
				+                    # 还有重试机会，等待后继续
			
 
				+                    print(f"    ✗ 请求失败 (第{attempt}次尝试): {e}")
			
 
				+                    print(f"    等待 {retry_delay} 秒后重试...")
			
 
				+                    time.sleep(retry_delay)
			
 
				+                else:
			
 
				+                    # 已达最大重试次数，抛出异常
			
 
				+                    print(f"    ✗ 请求失败 (已达最大重试次数 {max_retries}): {e}")
			
 
				+
			
 
				+        # 所有重试都失败，抛出最后一次的异常
			
 
				+        raise last_exception
			
 
				+
			
 
				+    def get_details_batch(
			
 
				+        self,
			
 
				+        note_ids: List[str],
			
 
				+        timeout: int = 30,
			
 
				+        max_retries: int = 3,
			
 
				+        retry_delay: int = 2,
			
 
				+        batch_delay: int = 1
			
 
				+    ) -> List[Dict[str, Any]]:
			
 
				+        """
			
 
				+        批量获取多个笔记的详情
			
 
				+
			
 
				+        Args:
			
 
				+            note_ids: 笔记ID列表
			
 
				+            timeout: 请求超时时间（秒），默认30秒
			
 
				+            max_retries: 最大重试次数，默认3次
			
 
				+            retry_delay: 重试间隔时间（秒），默认2秒
			
 
				+            batch_delay: 批量请求间隔时间（秒），默认1秒
			
 
				+
			
 
				+        Returns:
			
 
				+            包含所有笔记详情的列表
			
 
				+        """
			
 
				+        results = []
			
 
				+        total = len(note_ids)
			
 
				+
			
 
				+        for idx, note_id in enumerate(note_ids, 1):
			
 
				+            print(f"正在获取笔记详情 ({idx}/{total}): {note_id}")
			
 
				+            try:
			
 
				+                result = self.get_detail(note_id, timeout, max_retries, retry_delay)
			
 
				+                results.append({
			
 
				+                    "note_id": note_id,
			
 
				+                    "success": True,
			
 
				+                    "data": result
			
 
				+                })
			
 
				+                print(f"  ✓ 成功获取")
			
 
				+            except Exception as e:
			
 
				+                print(f"  ✗ 获取失败: {e}")
			
 
				+                results.append({
			
 
				+                    "note_id": note_id,
			
 
				+                    "success": False,
			
 
				+                    "error": str(e)
			
 
				+                })
			
 
				+
			
 
				+            # 避免请求过快，添加延迟（最后一个不需要延迟）
			
 
				+            if idx < total:
			
 
				+                time.sleep(batch_delay)
			
 
				+
			
 
				+        return results
			
 
				+
			
 
				+    def save_result(self, note_id: str, result: Dict[str, Any]) -> str:
			
 
				+        """
			
 
				+        保存单个笔记详情到文件
			
 
				+        目录结构: results/xiaohongshu_detail/note_id/时间戳.json
			
 
				+
			
 
				+        Args:
			
 
				+            note_id: 笔记ID
			
 
				+            result: API返回的结果
			
 
				+
			
 
				+        Returns:
			
 
				+            保存的文件路径
			
 
				+        """
			
 
				+        # 创建目录结构: results/xiaohongshu_detail/note_id/
			
 
				+        result_dir = os.path.join(self.results_base_dir, "xiaohongshu_detail", note_id)
			
 
				+        os.makedirs(result_dir, exist_ok=True)
			
 
				+
			
 
				+        # 文件名使用时间戳
			
 
				+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
			
 
				+        filename = f"{timestamp}.json"
			
 
				+        filepath = os.path.join(result_dir, filename)
			
 
				+
			
 
				+        # 保存结果
			
 
				+        with open(filepath, 'w', encoding='utf-8') as f:
			
 
				+            json.dump(result, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+        return filepath
			
 
				+
			
 
				+    def save_batch_results(self, results: List[Dict[str, Any]], batch_name: str = None) -> str:
			
 
				+        """
			
 
				+        保存批量获取的结果到单个文件
			
 
				+        目录结构: results/xiaohongshu_detail/batch/时间戳_批次名.json
			
 
				+
			
 
				+        Args:
			
 
				+            results: 批量获取的结果列表
			
 
				+            batch_name: 批次名称（可选）
			
 
				+
			
 
				+        Returns:
			
 
				+            保存的文件路径
			
 
				+        """
			
 
				+        # 创建目录结构
			
 
				+        result_dir = os.path.join(self.results_base_dir, "xiaohongshu_detail", "batch")
			
 
				+        os.makedirs(result_dir, exist_ok=True)
			
 
				+
			
 
				+        # 文件名使用时间戳和批次名
			
 
				+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
			
 
				+        if batch_name:
			
 
				+            filename = f"{timestamp}_{batch_name}.json"
			
 
				+        else:
			
 
				+            filename = f"{timestamp}.json"
			
 
				+        filepath = os.path.join(result_dir, filename)
			
 
				+
			
 
				+        # 保存结果
			
 
				+        with open(filepath, 'w', encoding='utf-8') as f:
			
 
				+            json.dump(results, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+        return filepath
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    """示例使用"""
			
 
				+    # 解析命令行参数
			
 
				+    parser = argparse.ArgumentParser(description='小红书笔记详情获取工具')
			
 
				+    parser.add_argument(
			
 
				+        '--results-dir',
			
 
				+        type=str,
			
 
				+        default='data/detail',
			
 
				+        help='结果输出目录 (默认: data/detail)'
			
 
				+    )
			
 
				+
			
 
				+    # 创建互斥参数组：单个笔记ID 或 批量笔记ID
			
 
				+    group = parser.add_mutually_exclusive_group(required=True)
			
 
				+    group.add_argument(
			
 
				+        '--note-id',
			
 
				+        type=str,
			
 
				+        help='单个笔记ID'
			
 
				+    )
			
 
				+    group.add_argument(
			
 
				+        '--note-ids',
			
 
				+        type=str,
			
 
				+        help='多个笔记ID，用逗号分隔，例如: id1,id2,id3'
			
 
				+    )
			
 
				+    group.add_argument(
			
 
				+        '--note-ids-file',
			
 
				+        type=str,
			
 
				+        help='包含笔记ID的文件路径，每行一个ID'
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        '--batch-name',
			
 
				+        type=str,
			
 
				+        help='批量获取时的批次名称（可选）'
			
 
				+    )
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    # 创建API客户端实例
			
 
				+    client = XiaohongshuDetail(results_dir=args.results_dir)
			
 
				+
			
 
				+    try:
			
 
				+        # 单个笔记ID
			
 
				+        if args.note_id:
			
 
				+            result = client.get_detail(args.note_id)
			
 
				+            filepath = client.save_result(args.note_id, result)
			
 
				+            print(f"Output: {filepath}")
			
 
				+
			
 
				+        # 多个笔记ID（命令行逗号分隔）
			
 
				+        elif args.note_ids:
			
 
				+            note_ids = [nid.strip() for nid in args.note_ids.split(',') if nid.strip()]
			
 
				+            results = client.get_details_batch(note_ids)
			
 
				+            filepath = client.save_batch_results(results, args.batch_name)
			
 
				+            print(f"\n批量获取完成")
			
 
				+            print(f"成功: {sum(1 for r in results if r['success'])}/{len(results)}")
			
 
				+            print(f"Output: {filepath}")
			
 
				+
			
 
				+        # 从文件读取笔记ID
			
 
				+        elif args.note_ids_file:
			
 
				+            with open(args.note_ids_file, 'r', encoding='utf-8') as f:
			
 
				+                note_ids = [line.strip() for line in f if line.strip()]
			
 
				+            results = client.get_details_batch(note_ids)
			
 
				+            filepath = client.save_batch_results(results, args.batch_name)
			
 
				+            print(f"\n批量获取完成")
			
 
				+            print(f"成功: {sum(1 for r in results if r['success'])}/{len(results)}")
			
 
				+            print(f"Output: {filepath}")
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        print(f"Error: {e}", file=__import__('sys').stderr)
			
 
				+        raise
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/script/search/xiaohongshu_search.py
+++ b/script/search/xiaohongshu_search.py
@@ -41,7 +41,7 @@ class XiaohongshuSearch:
 
				     def search(
			
 
				         self,
			
 
				         keyword: str,
			
 
				-        content_type: str = "图文",
			
 
				+        content_type: str = "不限",
			
 
				         sort_type: str = "综合",
			
 
				         publish_time: str = "不限",
			
 
				         cursor: str = "",
			
@@ -91,7 +91,14 @@ class XiaohongshuSearch:
 
				                     headers={"Content-Type": "application/json"}
			
 
				                 )
			
 
				                 response.raise_for_status()
			
 
				-                result = response.json()
			
 
				+                api_response = response.json()
			
 
				+
			
 
				+                # 解析API返回的result字段（是JSON字符串）
			
 
				+                if not api_response.get("success"):
			
 
				+                    raise Exception(f"API返回失败: {api_response}")
			
 
				+
			
 
				+                result_str = api_response.get("result", "{}")
			
 
				+                result = json.loads(result_str)
			
 
				 
			
 
				                 # 预处理返回数据：提取 image_list 中的 URL 字符串
			
 
				                 self._preprocess_response(result)
			
@@ -125,7 +132,6 @@ class XiaohongshuSearch:
 
				         """
			
 
				         # 获取帖子列表
			
 
				         notes = result.get("data", {}).get("data", [])
			
 
				-
			
 
				         for note in notes:
			
 
				             note_card = note.get("note_card", {})
			
 
				             image_list_raw = note_card.get("image_list", [])