7 місяців тому · 8b15fcf950
--- a/knowledge_search_traverse.py
+++ b/knowledge_search_traverse.py
@@ -17,6 +17,8 @@ MODEL_NAME = "google/gemini-2.5-flash"
 
															 REQUIRED_SCORE_GAIN = 0.02
														
 
															 from script.search_recommendations.xiaohongshu_search_recommendations import XiaohongshuSearchRecommendations
														
 
															 from script.search.xiaohongshu_search import XiaohongshuSearch
														
 
															+from script.search.xiaohongshu_detail import XiaohongshuDetail
														
 
															+from script.search.enrichment_helper import enrich_post_with_detail
														
 
															 # from multimodal_extractor import extract_post_images  # 内容提取流程已断开
														
 
															 from post_evaluator_v3 import evaluate_post_v3, apply_evaluation_v3_to_post
														
@@ -146,6 +148,13 @@ class Post(BaseModel):
 
															     note_id: str = ""
														
 
															     note_url: str = ""
														
 
															+    # 详情补充字段（来自详情API）
														
 
															+    author_name: str = ""  # 作者名称
														
 
															+    author_id: str = ""  # 作者ID
														
 
															+    publish_time: int = 0  # 发布时间戳
														
 
															+    cdn_images: list[str] = Field(default_factory=list)  # 高清CDN图片列表(详情API补充)
														
 
															+    detail_fetched: bool = False  # 是否已获取详情的标记
														
 
															+
														
 
															     # V3评估字段（顶层 - 快速访问）
														
 
															     is_knowledge: bool | None = None  # Prompt1: 是否是知识内容
														
 
															     is_content_knowledge: bool | None = None  # Prompt2: 是否是内容知识
														
@@ -2014,46 +2023,49 @@ def process_note_data(note: dict) -> Post:
 
															     user_info = note_card.get("user", {})
														
 
															     # ========== 调试日志 START ==========
														
 
															-    note_id = note.get("id", "")
														
 
															-
														
 
															-    # 1. 打印完整的 note 结构
														
 
															-    print(f"\n[DEBUG] ===== 处理帖子 {note_id} =====")
														
 
															-    print(f"[DEBUG] note 的所有键: {list(note.keys())}")
														
 
															-    print(f"[DEBUG] note 完整数据 (前2000字符):")
														
 
															-    print(json.dumps(note, ensure_ascii=False, indent=2)[:2000])
														
 
															-
														
 
															-    # 2. 打印 note_card 信息
														
 
															-    print(f"\n[DEBUG] note_card 的所有键: {list(note_card.keys())}")
														
 
															-
														
 
															-    # 3. 检查 desc 字段
														
 
															-    raw_desc = note_card.get("desc")
														
 
															-    print(f"\n[DEBUG] desc 字段:")
														
 
															-    print(f"  - 类型: {type(raw_desc).__name__}")
														
 
															-    print(f"  - 长度: {len(raw_desc) if raw_desc else 0}")
														
 
															-    print(f"  - 完整内容: {repr(raw_desc)}")
														
 
															-
														
 
															-    # 4. 检查是否有其他可能包含完整内容的字段
														
 
															-    print(f"\n[DEBUG] 检查其他可能的内容字段:")
														
 
															-    for potential_field in ["full_desc", "content", "full_content", "note_text", "body", "full_body", "title", "display_title"]:
														
 
															-        if potential_field in note_card:
														
 
															-            value = note_card.get(potential_field)
														
 
															-            print(f"  - 发现字段 '{potential_field}': 长度={len(str(value))}, 值={repr(str(value)[:200])}")
														
 
															-
														
 
															-    # 5. 检查顶层 note 对象中是否有详细内容
														
 
															-    print(f"\n[DEBUG] 检查 note 顶层字段:")
														
 
															-    for top_field in ["note_info", "detail", "content", "desc"]:
														
 
															-        if top_field in note:
														
 
															-            value = note.get(top_field)
														
 
															-            print(f"  - 发现顶层字段 '{top_field}': 类型={type(value).__name__}, 内容={repr(str(value)[:200])}")
														
 
															-
														
 
															-    print(f"[DEBUG] ===== 数据检查完成 =====\n")
														
 
															+    # note_id = note.get("id", "")
														
 
															+    #
														
 
															+    # # 1. 打印完整的 note 结构
														
 
															+    # print(f"\n[DEBUG] ===== 处理帖子 {note_id} =====")
														
 
															+    # print(f"[DEBUG] note 的所有键: {list(note.keys())}")
														
 
															+    # print(f"[DEBUG] note 完整数据 (前2000字符):")
														
 
															+    # print(json.dumps(note, ensure_ascii=False, indent=2)[:2000])
														
 
															+    #
														
 
															+    # # 2. 打印 note_card 信息
														
 
															+    # print(f"\n[DEBUG] note_card 的所有键: {list(note_card.keys())}")
														
 
															+    #
														
 
															+    # # 3. 检查 desc 字段
														
 
															+    # raw_desc = note_card.get("desc")
														
 
															+    # print(f"\n[DEBUG] desc 字段:")
														
 
															+    # print(f"  - 类型: {type(raw_desc).__name__}")
														
 
															+    # print(f"  - 长度: {len(raw_desc) if raw_desc else 0}")
														
 
															+    # print(f"  - 完整内容: {repr(raw_desc)}")
														
 
															+    #
														
 
															+    # # 4. 检查是否有其他可能包含完整内容的字段
														
 
															+    # print(f"\n[DEBUG] 检查其他可能的内容字段:")
														
 
															+    # for potential_field in ["full_desc", "content", "full_content", "note_text", "body", "full_body", "title", "display_title"]:
														
 
															+    #     if potential_field in note_card:
														
 
															+    #         value = note_card.get(potential_field)
														
 
															+    #         print(f"  - 发现字段 '{potential_field}': 长度={len(str(value))}, 值={repr(str(value)[:200])}")
														
 
															+    #
														
 
															+    # # 5. 检查顶层 note 对象中是否有详细内容
														
 
															+    # print(f"\n[DEBUG] 检查 note 顶层字段:")
														
 
															+    # for top_field in ["note_info", "detail", "content", "desc"]:
														
 
															+    #     if top_field in note:
														
 
															+    #         value = note.get(top_field)
														
 
															+    #         print(f"  - 发现顶层字段 '{top_field}': 类型={type(value).__name__}, 内容={repr(str(value)[:200])}")
														
 
															+    #
														
 
															+    # print(f"[DEBUG] ===== 数据检查完成 =====\n")
														
 
															     # ========== 调试日志 END ==========
														
 
															-    # 提取图片URL - 使用新的字段名 image_url
														
 
															+    # 提取图片URL - 支持字符串和字典两种格式
														
 
															     images = []
														
 
															     for img in image_list:
														
 
															-        if isinstance(img, dict):
														
 
															-            # 尝试新字段名 image_url，如果不存在则尝试旧字段名 url_default
														
 
															+        if isinstance(img, str):
														
 
															+            # 预处理后的字符串格式（来自xiaohongshu_search.py的_preprocess_response）
														
 
															+            images.append(img)
														
 
															+        elif isinstance(img, dict):
														
 
															+            # 原始字典格式 - 尝试新字段名 image_url，如果不存在则尝试旧字段名 url_default
														
 
															             img_url = img.get("image_url") or img.get("url_default")
														
 
															             if img_url:
														
 
															                 images.append(img_url)
														
@@ -2084,16 +2096,16 @@ def process_note_data(note: dict) -> Post:
 
															         note_url=f"https://www.xiaohongshu.com/explore/{note.get('id', '')}"
														
 
															     )
														
 
															-    # 打印最终构造的 Post 对象
														
 
															-    print(f"\n[DEBUG] ===== 构造的 Post 对象 =====")
														
 
															-    print(f"[DEBUG] - note_id: {post.note_id}")
														
 
															-    print(f"[DEBUG] - title: {post.title}")
														
 
															-    print(f"[DEBUG] - body_text 长度: {len(post.body_text)}")
														
 
															-    print(f"[DEBUG] - body_text 完整内容: {repr(post.body_text)}")
														
 
															-    print(f"[DEBUG] - type: {post.type}")
														
 
															-    print(f"[DEBUG] - images 数量: {len(post.images)}")
														
 
															-    print(f"[DEBUG] - interact_info: {post.interact_info}")
														
 
															-    print(f"[DEBUG] ===== Post 对象构造完成 =====\n")
														
 
															+    # # 打印最终构造的 Post 对象
														
 
															+    # print(f"\n[DEBUG] ===== 构造的 Post 对象 =====")
														
 
															+    # print(f"[DEBUG] - note_id: {post.note_id}")
														
 
															+    # print(f"[DEBUG] - title: {post.title}")
														
 
															+    # print(f"[DEBUG] - body_text 长度: {len(post.body_text)}")
														
 
															+    # print(f"[DEBUG] - body_text 完整内容: {repr(post.body_text)}")
														
 
															+    # print(f"[DEBUG] - type: {post.type}")
														
 
															+    # print(f"[DEBUG] - images 数量: {len(post.images)}")
														
 
															+    # print(f"[DEBUG] - interact_info: {post.interact_info}")
														
 
															+    # print(f"[DEBUG] ===== Post 对象构造完成 =====\n")
														
 
															     return post
														
@@ -2709,6 +2721,7 @@ async def run_round(
 
															     context: RunContext,
														
 
															     xiaohongshu_api: XiaohongshuSearchRecommendations,
														
 
															     xiaohongshu_search: XiaohongshuSearch,
														
 
															+    xiaohongshu_detail: XiaohongshuDetail,
														
 
															     sug_threshold: float = 0.7,
														
 
															     enable_evaluation: bool = False
														
 
															 ) -> tuple[list[Q], list[Seed], list[Search]]:
														
@@ -2847,17 +2860,26 @@ async def run_round(
 
															             print(f"    搜索: {sug.text}")
														
 
															             try:
														
 
															                 search_result = xiaohongshu_search.search(keyword=sug.text)
														
 
															-                result_str = search_result.get("result", "{}")
														
 
															-                if isinstance(result_str, str):
														
 
															-                    result_data = json.loads(result_str)
														
 
															-                else:
														
 
															-                    result_data = result_str
														
 
															-
														
 
															-                notes = result_data.get("data", {}).get("data", [])
														
 
															+                # xiaohongshu_search.search() 已经返回解析后的数据
														
 
															+                notes = search_result.get("data", {}).get("data", [])
														
 
															                 post_list = []
														
 
															                 for note in notes[:10]:  # 只取前10个
														
 
															-                    post = process_note_data(note)
														
 
															-                    post_list.append(post)
														
 
															+                    try:
														
 
															+                        post = process_note_data(note)
														
 
															+                        post_list.append(post)
														
 
															+                    except Exception as e:
														
 
															+                        print(f"      ⚠️  解析帖子失败 {note.get('id', 'unknown')}: {str(e)[:50]}")
														
 
															+
														
 
															+                # 补充详情信息（仅视频类型需要补充视频URL）
														
 
															+                video_posts = [p for p in post_list if p.type == "video"]
														
 
															+                if video_posts:
														
 
															+                    print(f"      补充详情（{len(video_posts)}个视频）...")
														
 
															+                    for post in video_posts:
														
 
															+                        try:
														
 
															+                            detail_response = xiaohongshu_detail.get_detail(post.note_id)
														
 
															+                            enrich_post_with_detail(post, detail_response)
														
 
															+                        except Exception as e:
														
 
															+                            print(f"        ⚠️  详情补充失败 {post.note_id}: {str(e)[:50]}")
														
 
															                 print(f"      → 找到 {len(post_list)} 个帖子")
														
@@ -3132,17 +3154,7 @@ async def run_round(
 
															         search_results_data.append({
														
 
															             "text": search.text,
														
 
															             "score_with_o": search.score_with_o,
														
 
															-            "post_list": [
														
 
															-                {
														
 
															-                    "note_id": post.note_id,
														
 
															-                    "note_url": post.note_url,
														
 
															-                    "title": post.title,
														
 
															-                    "body_text": post.body_text,
														
 
															-                    "images": post.images,
														
 
															-                    "interact_info": post.interact_info
														
 
															-                }
														
 
															-                for post in search.post_list
														
 
															-            ]
														
 
															+            "post_list": [post.model_dump() for post in search.post_list]
														
 
															         })
														
 
															     # 记录本轮数据
														
@@ -3196,6 +3208,7 @@ async def iterative_loop(
 
															     # API实例
														
 
															     xiaohongshu_api = XiaohongshuSearchRecommendations()
														
 
															     xiaohongshu_search = XiaohongshuSearch()
														
 
															+    xiaohongshu_detail = XiaohongshuDetail()  # 详情API客户端
														
 
															     # 保存初始化数据
														
 
															     context.rounds.append({
														
@@ -3387,6 +3400,7 @@ async def run_round_v2(
 
															     context: RunContext,
														
 
															     xiaohongshu_api: XiaohongshuSearchRecommendations,
														
 
															     xiaohongshu_search: XiaohongshuSearch,
														
 
															+    xiaohongshu_detail: XiaohongshuDetail,
														
 
															     sug_threshold: float = 0.7,
														
 
															     enable_evaluation: bool = False
														
 
															 ) -> tuple[list[Q], list[Search], dict]:
														
@@ -3490,24 +3504,33 @@ async def run_round_v2(
 
															             try:
														
 
															                 search_result = xiaohongshu_search.search(keyword=sug.text)
														
 
															-                result_str = search_result.get("result", "{}")
														
 
															-                if isinstance(result_str, str):
														
 
															-                    result_data = json.loads(result_str)
														
 
															-                else:
														
 
															-                    result_data = result_str
														
 
															-
														
 
															-                notes = result_data.get("data", {}).get("data", [])
														
 
															+                # xiaohongshu_search.search() 已经返回解析后的数据
														
 
															+                notes = search_result.get("data", {}).get("data", [])
														
 
															                 post_list = []
														
 
															                 for note in notes[:10]:
														
 
															-                    post = process_note_data(note)
														
 
															-
														
 
															-                    # # 🆕 多模态提取（搜索后立即处理） - 内容提取流程已断开
														
 
															-                    # if post.type == "normal" and len(post.images) > 0:
														
 
															-                    #     extraction = await extract_post_images(post)
														
 
															-                    #     if extraction:
														
 
															-                    #         post_extractions[post.note_id] = extraction
														
 
															-
														
 
															-                    post_list.append(post)
														
 
															+                    try:
														
 
															+                        post = process_note_data(note)
														
 
															+
														
 
															+                        # # 🆕 多模态提取（搜索后立即处理） - 内容提取流程已断开
														
 
															+                        # if post.type == "normal" and len(post.images) > 0:
														
 
															+                        #     extraction = await extract_post_images(post)
														
 
															+                        #     if extraction:
														
 
															+                        #         post_extractions[post.note_id] = extraction
														
 
															+
														
 
															+                        post_list.append(post)
														
 
															+                    except Exception as e:
														
 
															+                        print(f"      ⚠️  解析帖子失败 {note.get('id', 'unknown')}: {str(e)[:50]}")
														
 
															+
														
 
															+                # 补充详情信息（仅视频类型需要补充视频URL）
														
 
															+                video_posts = [p for p in post_list if p.type == "video"]
														
 
															+                if video_posts:
														
 
															+                    print(f"      补充详情（{len(video_posts)}个视频）...")
														
 
															+                    for post in video_posts:
														
 
															+                        try:
														
 
															+                            detail_response = xiaohongshu_detail.get_detail(post.note_id)
														
 
															+                            enrich_post_with_detail(post, detail_response)
														
 
															+                        except Exception as e:
														
 
															+                            print(f"        ⚠️  详情补充失败 {post.note_id}: {str(e)[:50]}")
														
 
															                 print(f"      → 找到 {len(post_list)} 个帖子")
														
@@ -3695,17 +3718,7 @@ async def run_round_v2(
 
															         search_results_data.append({
														
 
															             "text": search.text,
														
 
															             "score_with_o": search.score_with_o,
														
 
															-            "post_list": [
														
 
															-                {
														
 
															-                    "note_id": post.note_id,
														
 
															-                    "note_url": post.note_url,
														
 
															-                    "title": post.title,
														
 
															-                    "body_text": post.body_text,
														
 
															-                    "images": post.images,
														
 
															-                    "interact_info": post.interact_info
														
 
															-                }
														
 
															-                for post in search.post_list
														
 
															-            ]
														
 
															+            "post_list": [post.model_dump() for post in search.post_list]
														
 
															         })
														
 
															     round_data.update({
														
@@ -3800,6 +3813,7 @@ async def iterative_loop_v2(
 
															     # API实例
														
 
															     xiaohongshu_api = XiaohongshuSearchRecommendations()
														
 
															     xiaohongshu_search = XiaohongshuSearch()
														
 
															+    xiaohongshu_detail = XiaohongshuDetail()  # 详情API客户端
														
 
															     # 收集所有搜索结果
														
 
															     all_search_list = []
														
@@ -3823,6 +3837,7 @@ async def iterative_loop_v2(
 
															             context=context,
														
 
															             xiaohongshu_api=xiaohongshu_api,
														
 
															             xiaohongshu_search=xiaohongshu_search,
														
 
															+            xiaohongshu_detail=xiaohongshu_detail,
														
 
															             sug_threshold=sug_threshold,
														
 
															             enable_evaluation=enable_evaluation
														
 
															         )
														
--- a/post_evaluator_v3.py
+++ b/post_evaluator_v3.py
@@ -22,6 +22,10 @@ MAX_IMAGES_PER_POST = 10
 
															 MAX_CONCURRENT_EVALUATIONS = 5
														
 
															 API_TIMEOUT = 120
														
 
															+# 缓存配置
														
 
															+ENABLE_CACHE = True  # 是否启用评估结果缓存
														
 
															+CACHE_DIR = ".evaluation_cache"  # 缓存目录
														
 
															+
														
 
															 # ============================================================================
														
 
															 # 数据模型
														
 
															 # ============================================================================
														
@@ -775,7 +779,8 @@ PROMPT4_CATEGORY_MATCH = """# Prompt 2: 多模态内容品类匹配评估
 
															 ---
														
 
															 ## 任务说明
														
 
															-你将收到一个**原始搜索需求**和一条**多模态帖子**（包含图片、标题、正文），请**仅评估品类维度**的匹配度，输出0-100分的量化得分。忽略目的和动机维度因素，只评估品类维度。
														
 
															+你将收到一个**原始搜索需求**和一条**多模态帖子**（包含图片、标题、正文）
														
 
															+请**仅评估品类维度**的匹配度，输出0-100分的量化得分。
														
 
															 ---
														
@@ -792,44 +797,93 @@ PROMPT4_CATEGORY_MATCH = """# Prompt 2: 多模态内容品类匹配评估
 
															 ---
														
 
															 ## 评估维度：品类匹配
														
 
															-###品类定义：
														
 
															-**品类 = 核心主体（名词）+ 限定词**
														
 
															-- **核心主体**：具体的内容对象（风光摄影、旅行攻略、美食推荐）
														
 
															-- **限定词**：限定词不包含具体的目的和动作
														
 
															-  - 地域：川西、成都、日本
														
 
															-  - 时间：秋季、夏天、2024
														
 
															-  - 类型：免费、高清、入门级
														
 
															-  - 风格：小清新、复古、简约
														
 
															+## 评估维度
														
 
															+本评估系统围绕 **品类维度** 进行：
														
 
															+
														
 
															+#  维度独立性警告
														
 
															+【严格约束】本评估**只评估品类维度**,，必须遵守以下规则：
														
 
															+1. **只看名词和限定词**：评估时只考虑主体、限定词的匹配度
														
 
															+2. **完全忽略动词**：动作意图、目的等动机信息对本维度评分无影响
														
 
															+3. **只看词条表面，禁止联想推演
														
 
															+4. **通用概念 ≠ 特定概念
														
 
															+
														
 
															 ### 核心评估逻辑
														
 
															+**品类 = 核心内容主体（实体名词）+ 场景/地域限定**
														
 
															+
														
 
															+### 品类识别规则
														
 
															+
														
 
															+#### 第一步：剥离动作词，识别核心主体
														
 
															+
														
 
															+**必须剥离的动作词（属于目的动机，不是品类）：**
														
 
															+- 如何、怎么、制作、拍摄、寻找、推荐、学习、了解等
														
 
															+
														
 
															+**示例：**
														
 
															+- "如何制作猫咪表情包" → 品类主体是**猫咪**，不是"表情包制作"
														
 
															+- "川西风光摄影教程" → 品类主体是**川西风光**，不是"摄影教程"
														
 
															+- "推荐日本旅行景点" → 品类主体是**日本旅行/景点**，不是"推荐"
														
 
															+#### 第二步：识别核心主体类别
														
 
															+
														
 
															+**核心主体（实体名词）：**
														
 
															+- **生物类**：猫咪、狗狗、植物、人物（具体指儿童、女孩、老人等）
														
 
															+- **地理类**：川西、成都、日本、景点名称
														
 
															+- **物品类**：美食、服装、电子产品、家具
														
 
															+- **场景类**：风光、建筑、室内、户外
														
 
															+- **活动类**：旅行、运动、工作、学习场景
														
 
															+
														
 
															+**关键原则：品类主体必须是具体的内容对象，不是动作或形式**
														
 
															+
														
 
															+#### 第三步：识别场景/地域等限定词（可选）
														
 
															+
														
 
															+**场景/地域限定：**
														
 
															+- **地域限定**：川西、成都、日本、欧洲
														
 
															+- **时间限定**：秋季、夏天、2024
														
 
															+- **场景限定**：户外、室内、职场、家居
														
 
															+
														
 
															+**注意：**
														
 
															+- "表情包"、"梗图"、"照片"、"视频"等是**内容形式/载体**，不是品类主体
														
 
															+- "教程"、"攻略"、"指南"等是**内容类型**，属于目的动机，不是品类
														
 
															 ---
														
 
															 ## 评估流程
														
 
															 ### 第一步：提取原始需求的品类信息
														
 
															-- 识别**核心主体名词**
														
 
															-- 识别**关键限定词**（地域/时间/类型/风格等）
														
 
															+
														
 
															+1. **剥离所有动作词和内容形式词**
														
 
															+2. **识别核心主体名词**（生物、地理、物品、场景等）
														
 
															+3. **识别场景/地域限定**（如果有）
														
 
															+
														
 
															+**示例分析：**
														
 
															+- "如何制作猫咪表情包梗图"
														
 
															+  - 剥离动作：如何、制作
														
 
															+  - 剥离形式：表情包、梗图
														
 
															+  - **核心品类主体：猫咪**
														
 
															+  - 场景限定：无
														
 
															+
														
 
															 ### 第二步：从帖子中提取品类信息（重点看图片）
														
 
															 **图片识别（权重70%）：**
														
 
															-- 图片展示的核心主体是什么？
														
 
															-- 图片中可识别的限定特征（地域标志、季节特征、类型属性、风格特点）
														
 
															+- 图片的**核心主体**是什么？（是猫、是人、是风景、是物品？）
														
 
															+- 图片的**场景/地域特征**是什么？
														
 
															 **标题提取（权重15%）：**
														
 
															-- 标题明确的品类名词和限定词
														
 
															+- 标题明确的品类主体名词
														
 
															 **正文提取（权重15%）：**
														
 
															-- 正文描述的品类信息
														
 
															+- 正文描述的品类主体
														
 
															-### 第三步：对比匹配度
														
 
															-- 核心主体是否一致？
														
 
															-- 限定词匹配了几个？
														
 
															-- 是否存在泛化或偏移？
														
 
															+### 第三步：对比品类匹配度
														
 
															+
														
 
															+**核心判断：主体是否一致？**
														
 
															+- 猫咪 ≠ 女孩 → 品类完全不同 → 0-10分
														
 
															+- 猫咪 = 猫咪 → 品类一致 → 进一步看场景限定
														
 
															+- 川西风光 ≠ 日本风光 → 地域不同 → 30-50分
														
 
															+- 川西风光 = 四川风光 → 地域相近 → 70-85分
														
 
															 ---
														
@@ -837,43 +891,43 @@ PROMPT4_CATEGORY_MATCH = """# Prompt 2: 多模态内容品类匹配评估
 
															 ### 高度匹配区间
														
 
															-**90-100分：核心主体+关键限定词完全匹配**
														
 
															-- 图片展示的主体与需求精准一致
														
 
															-- 关键限定词全部匹配（地域、时间、类型等）
														
 
															+**90-100分：核心主体完全一致 + 场景/地域等限定词完全匹配**
														
 
															+- 图片主体与需求完全一致
														
 
															+- 关键限定词全部匹配（场景、地域、时间等）
														
 
															 - 例：需求"川西秋季风光" vs 图片展示川西秋季风景
														
 
															-**75-89分：核心主体匹配，限定词匹配度百分之80**
														
 
															+**75-89分：核心主体完全一致 + 场景/地域等限定词部分匹配**
														
 
															 - 图片主体一致
														
 
															 - 存在1-2个限定词缺失但不影响核心匹配
														
 
															 - 例：需求"川西秋季风光" vs 图片展示川西风光（缺秋季）
														
 
															-**60-74分：核心主体匹配，限定词匹配度百分之60**
														
 
															+**60-74分：核心主体匹配，限定词大量缺失**
														
 
															 - 图片主体在同一大类
														
 
															-- 限定词部分匹配或有合理上下位关系
														
 
															-- 例：需求"川西秋季风光" vs 图片展示四川风光
														
 
															+- 场景/地域等限定词大部分缺失
														
 
															+- 例：需求"川西秋季风光" vs 图片展示风光
														
 
															 ### 中度相关区间
														
 
															-**40-59分：核心主体匹配，限定词完全不匹配**
														
 
															+**40-59分：核心主体同大类但具体不同**
														
 
															 - 图片主体相同但上下文不同
														
 
															 - 限定词严重缺失或不匹配
														
 
															-- 例：需求"猫咪表情包梗图" vs 女孩表情包
														
 
															+- 例：需求"川西风光摄影" vs 图片展示风光照但无地域特征
														
 
															 ### 不相关/负向区间
														
 
															-**20-39分：主体过度泛化**
														
 
															+**20-39分：核心主体相关但类别差异明显**
														
 
															 - 图片主体是通用概念，需求是特定概念
														
 
															 - 仅有抽象类别相似
														
 
															 - 例：需求"川西旅行攻略" vs 图片展示普通旅行场景
														
 
															-**1-19分：品类关联极弱**
														
 
															+**1-19分：核心主体几乎不相关**
														
 
															 - 图片主体与需求差异明显
														
 
															-**0分：品类完全不同**
														
 
															+**0分：核心主体完全不同**
														
 
															 - 图片主体类别完全不同
														
 
															 - 例：需求"风光摄影" vs 图片展示美食
														
 
															-**负分不使用**（品类维度不设负分）
														
 
															+**关键原则：品类主体不同 = 品类不匹配 = 0分或极低分**
														
 
															 ---
														
@@ -881,21 +935,27 @@ PROMPT4_CATEGORY_MATCH = """# Prompt 2: 多模态内容品类匹配评估
 
															 ```json
														
 
															 {{
														
 
															   "品类评估": {{
														
 
															-    "原始需求品类": {{
														
 
															-      "核心主体": "提取的主体名词",
														
 
															-      "关键限定词": ["限定词1", "限定词2"]
														
 
															+    "原始需求品类分析": {{
														
 
															+      "完整需求": "用户的原始搜索词",
														
 
															+      "剥离动作词": "识别并剥离的动作词",
														
 
															+      "剥离形式词": "识别并剥离的内容形式词",
														
 
															+      "核心主体": "提取的核心品类主体",
														
 
															+      "场景地域限定": ["限定词1", "限定词2"]
														
 
															     }},
														
 
															     "帖子实际品类": {{
														
 
															-      "图片主体": "图片展示的核心主体",
														
 
															-      "图片限定特征": ["从图片识别的限定词"],
														
 
															-      "标题品类": "标题提及的品类",
														
 
															-      "正文品类": "正文描述的品类"
														
 
															+      "图片主体": "图片展示的核心主体（权重70%）",
														
 
															+      "图片场景特征": "图片的场景/地域特征",
														
 
															+      "标题主体": "标题提及的主体",
														
 
															+      "正文主体": "正文描述的主体"
														
 
															+    }},
														
 
															+    "品类匹配分析": {{
														
 
															+      "主体对比": "需求主体 vs 帖子主体",
														
 
															+      "主体是否一致": "一致/同大类不同/完全不同",
														
 
															+      "场景限定匹配情况": "哪些匹配/哪些缺失"
														
 
															     }},
														
 
															     "品类匹配得分": 0-100的整数,
														
 
															     "匹配度等级": "完全匹配/高度匹配/基本匹配/弱匹配/不匹配",
														
 
															-    "主体匹配情况": "主体是否一致",
														
 
															-    "限定词匹配情况": "哪些限定词匹配/缺失",
														
 
															-    "核心依据": "为什么给这个分数（100字以内）"
														
 
															+    "核心依据": "为什么给这个分数（必须说明主体是否一致）"
														
 
															   }}
														
 
															 }}
														
 
															 ```
														
@@ -915,9 +975,11 @@ PROMPT4_CATEGORY_MATCH = """# Prompt 2: 多模态内容品类匹配评估
 
															 ## 特别注意
														
 
															 - 本评估**只关注品类维度**，不考虑目的是否匹配
														
 
															+- 严格标准一致性：对所有用例使用相同的评估标准，避免评分飘移
														
 
															 - 输出的分数必须是**0-100的整数**
														
 
															 - 不要自行计算综合分数，只输出品类分数
														
 
															-- 禁止因为"可能相关"就给分，必须有明确视觉证据
														
 
															+- 禁止因为"可能相关"就给分，必须有明确视觉证据，不得用可能相关，你的评估
														
 
															+---
														
 
															 """
														
@@ -925,6 +987,106 @@ PROMPT4_CATEGORY_MATCH = """# Prompt 2: 多模态内容品类匹配评估
 
															 # 辅助函数
														
 
															 # ============================================================================
														
 
															+def _get_cache_key(note_id: str) -> str:
														
 
															+    """
														
 
															+    生成缓存key
														
 
															+
														
 
															+    Args:
														
 
															+        note_id: 帖子ID
														
 
															+
														
 
															+    Returns:
														
 
															+        缓存文件名（不含目录）
														
 
															+    """
														
 
															+    return f"{note_id}_v3.0.json"
														
 
															+
														
 
															+
														
 
															+def _load_from_cache(note_id: str) -> Optional[tuple]:
														
 
															+    """
														
 
															+    从缓存加载评估结果
														
 
															+
														
 
															+    Args:
														
 
															+        note_id: 帖子ID
														
 
															+
														
 
															+    Returns:
														
 
															+        缓存的评估结果元组 (knowledge_eval, content_eval, purpose_eval, category_eval, final_score, match_level)
														
 
															+        如果缓存不存在或读取失败，返回None
														
 
															+    """
														
 
															+    if not ENABLE_CACHE:
														
 
															+        return None
														
 
															+
														
 
															+    cache_file = os.path.join(CACHE_DIR, _get_cache_key(note_id))
														
 
															+
														
 
															+    if not os.path.exists(cache_file):
														
 
															+        return None
														
 
															+
														
 
															+    try:
														
 
															+        with open(cache_file, 'r', encoding='utf-8') as f:
														
 
															+            data = json.load(f)
														
 
															+
														
 
															+        # 重建评估对象
														
 
															+        knowledge_eval = None
														
 
															+        if data.get("knowledge_eval"):
														
 
															+            knowledge_eval = KnowledgeEvaluation(**data["knowledge_eval"])
														
 
															+
														
 
															+        content_eval = None
														
 
															+        if data.get("content_eval"):
														
 
															+            content_eval = ContentKnowledgeEvaluation(**data["content_eval"])
														
 
															+
														
 
															+        purpose_eval = None
														
 
															+        if data.get("purpose_eval"):
														
 
															+            purpose_eval = PurposeEvaluation(**data["purpose_eval"])
														
 
															+
														
 
															+        category_eval = None
														
 
															+        if data.get("category_eval"):
														
 
															+            category_eval = CategoryEvaluation(**data["category_eval"])
														
 
															+
														
 
															+        final_score = data.get("final_score")
														
 
															+        match_level = data.get("match_level")
														
 
															+
														
 
															+        return (knowledge_eval, content_eval, purpose_eval, category_eval, final_score, match_level)
														
 
															+
														
 
															+    except Exception as e:
														
 
															+        print(f"      ⚠️  缓存读取失败: {note_id} - {str(e)[:50]}")
														
 
															+        return None
														
 
															+
														
 
															+
														
 
															+def _save_to_cache(note_id: str, eval_results: tuple):
														
 
															+    """
														
 
															+    保存评估结果到缓存
														
 
															+
														
 
															+    Args:
														
 
															+        note_id: 帖子ID
														
 
															+        eval_results: 评估结果元组 (knowledge_eval, content_eval, purpose_eval, category_eval, final_score, match_level)
														
 
															+    """
														
 
															+    if not ENABLE_CACHE:
														
 
															+        return
														
 
															+
														
 
															+    knowledge_eval, content_eval, purpose_eval, category_eval, final_score, match_level = eval_results
														
 
															+
														
 
															+    # 确保缓存目录存在
														
 
															+    os.makedirs(CACHE_DIR, exist_ok=True)
														
 
															+
														
 
															+    # 转换为可序列化的dict
														
 
															+    cache_data = {
														
 
															+        "knowledge_eval": knowledge_eval.model_dump() if knowledge_eval else None,
														
 
															+        "content_eval": content_eval.model_dump() if content_eval else None,
														
 
															+        "purpose_eval": purpose_eval.model_dump() if purpose_eval else None,
														
 
															+        "category_eval": category_eval.model_dump() if category_eval else None,
														
 
															+        "final_score": final_score,
														
 
															+        "match_level": match_level,
														
 
															+        "cache_time": datetime.now().isoformat(),
														
 
															+        "evaluator_version": "v3.0"
														
 
															+    }
														
 
															+
														
 
															+    cache_file = os.path.join(CACHE_DIR, _get_cache_key(note_id))
														
 
															+
														
 
															+    try:
														
 
															+        with open(cache_file, 'w', encoding='utf-8') as f:
														
 
															+            json.dump(cache_data, f, ensure_ascii=False, indent=2)
														
 
															+    except Exception as e:
														
 
															+        print(f"      ⚠️  缓存保存失败: {note_id} - {str(e)[:50]}")
														
 
															+
														
 
															+
														
 
															 def _clean_json_response(content_text: str) -> str:
														
 
															     """清理API返回的JSON内容"""
														
 
															     content_text = content_text.strip()
														
@@ -1258,6 +1420,13 @@ async def evaluate_post_v3(
 
															         print(f"      ⊗ 跳过视频帖子: {post.note_id}")
														
 
															         return (None, None, None, None, None, None)
														
 
															+    # 检查缓存
														
 
															+    if ENABLE_CACHE:
														
 
															+        cached_result = _load_from_cache(post.note_id)
														
 
															+        if cached_result is not None:
														
 
															+            print(f"      ♻️  使用缓存结果: {post.note_id}")
														
 
															+            return cached_result
														
 
															+
														
 
															     print(f"      🔍 开始V3评估: {post.note_id}")
														
 
															     # Step 1: 判断是知识
														
@@ -1310,6 +1479,10 @@ async def evaluate_post_v3(
 
															     print(f"      ✅ 综合得分: {final_score} ({match_level})")
														
 
															+    # 保存到缓存
														
 
															+    if ENABLE_CACHE:
														
 
															+        _save_to_cache(post.note_id, (knowledge_eval, content_eval, purpose_eval, category_eval, final_score, match_level))
														
 
															+
														
 
															     return (knowledge_eval, content_eval, purpose_eval, category_eval, final_score, match_level)
														
--- a/post_evaluator_v4_image_loader.py
+++ b/post_evaluator_v4_image_loader.py
@@ -0,0 +1,82 @@
 
															+"""
														
 
															+ImageUploader 新实现 - 使用PIL Image对象
														
 
															+参考demo,直接下载图片到内存并转为PIL Image,不上传文件
														
 
															+"""
														
 
															+import asyncio
														
 
															+import requests
														
 
															+from PIL import Image
														
 
															+import io
														
 
															+from typing import Optional, List, Any
														
 
															+
														
 
															+
														
 
															+class ImageUploader:
														
 
															+    """图片加载器 - 下载图片并转为PIL Image对象(参考demo,使用内联数据方式)"""
														
 
															+
														
 
															+    @staticmethod
														
 
															+    async def upload_images(image_urls: List[str]) -> tuple[List[Any], List[str]]:
														
 
															+        """
														
 
															+        批量下载图片并转为PIL Image对象
														
 
															+
														
 
															+        Args:
														
 
															+            image_urls: 图片URL列表
														
 
															+
														
 
															+        Returns:
														
 
															+            (image_objects, []) - PIL Image对象列表和空列表(保持接口兼容)
														
 
															+        """
														
 
															+        if not image_urls:
														
 
															+            return [], []
														
 
															+
														
 
															+        print(f"      📥 准备加载 {len(image_urls)} 张图片(PIL Image方式)...")
														
 
															+
														
 
															+        # 并发下载所有图片
														
 
															+        tasks = [ImageUploader._load_single_image(url, idx) for idx, url in enumerate(image_urls)]
														
 
															+        results = await asyncio.gather(*tasks, return_exceptions=True)
														
 
															+
														
 
															+        # 分离成功和失败的结果
														
 
															+        image_objects = []
														
 
															+
														
 
															+        for idx, result in enumerate(results):
														
 
															+            if isinstance(result, Exception):
														
 
															+                print(f"      ⚠️  图片{idx}加载失败: {str(result)[:50]}")
														
 
															+            elif result is not None:
														
 
															+                image_objects.append(result)
														
 
															+
														
 
															+        print(f"      ✅ 成功加载 {len(image_objects)}/{len(image_urls)} 张图片")
														
 
															+        return image_objects, []  # 返回空列表作为temp_paths,因为不需要清理
														
 
															+
														
 
															+    @staticmethod
														
 
															+    async def _load_single_image(image_url: str, idx: int) -> Optional[Any]:
														
 
															+        """
														
 
															+        下载单张图片并转为PIL Image对象
														
 
															+
														
 
															+        Args:
														
 
															+            image_url: 图片URL
														
 
															+            idx: 图片索引(用于日志)
														
 
															+
														
 
															+        Returns:
														
 
															+            PIL Image对象
														
 
															+        """
														
 
															+        try:
														
 
															+            # 下载图片到内存
														
 
															+            loop = asyncio.get_event_loop()
														
 
															+            response = await loop.run_in_executor(
														
 
															+                None,
														
 
															+                lambda: requests.get(image_url, timeout=30)
														
 
															+            )
														
 
															+            response.raise_for_status()
														
 
															+
														
 
															+            # 转换为PIL Image对象
														
 
															+            image = Image.open(io.BytesIO(response.content))
														
 
															+
														
 
															+            # 转换为RGB模式(Gemini推荐)
														
 
															+            if image.mode != 'RGB':
														
 
															+                image = image.convert('RGB')
														
 
															+
														
 
															+            file_size_kb = len(response.content) / 1024
														
 
															+            print(f"      ✓ 图片{idx}加载成功 ({file_size_kb:.1f}KB, {image.size[0]}x{image.size[1]})")
														
 
															+
														
 
															+            return image
														
 
															+
														
 
															+        except Exception as e:
														
 
															+            print(f"      ✗ 图片{idx}加载失败: {str(e)[:60]}")
														
 
															+            return None
														
--- a/post_evaluator_v4_langgraph.py
+++ b/post_evaluator_v4_langgraph.py
@@ -0,0 +1,2184 @@
 
															+"""
														
 
															+帖子评估模块 V4 - LangGraph版本 + Gemini API
														
 
															+
														
 
															+改进:
														
 
															+1. 框架: 使用 LangGraph 状态机替代传统异步流程
														
 
															+2. API: 切换到 Google Gemini API (google.generativeai)
														
 
															+3. 视频: 支持视频内容评估
														
 
															+4. Prompt: 视频内容自动调整Prompt描述
														
 
															+5. 流程: Prompt1 → Prompt2 → Prompt3&4(并行) → 综合评分
														
 
															+"""
														
 
															+
														
 
															+import asyncio
														
 
															+import json
														
 
															+import os
														
 
															+import time
														
 
															+import tempfile
														
 
															+import io
														
 
															+import base64
														
 
															+import requests
														
 
															+from datetime import datetime
														
 
															+from typing import Optional, TypedDict, List, Dict, Any
														
 
															+from pydantic import BaseModel, Field
														
 
															+from PIL import Image
														
 
															+from langchain_google_genai import ChatGoogleGenerativeAI
														
 
															+from langchain_core.messages import HumanMessage, SystemMessage
														
 
															+from langgraph.graph import StateGraph, END
														
 
															+# import google.generativeai as genai  # 暂时禁用,版本冲突
														
 
															+
														
 
															+# ============================================================================
														
 
															+# 常量配置
														
 
															+# ============================================================================
														
 
															+
														
 
															+# Gemini配置
														
 
															+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "AIzaSyBgt9h74LvdWJ4Ivy_mh17Yyy2JH2WJICE")
														
 
															+GEMINI_MODEL_NAME = "gemini-2.5-flash"
														
 
															+MAX_IMAGES_PER_POST = 10
														
 
															+
														
 
															+# 并发&重试配置
														
 
															+MAX_CONCURRENT_EVALUATIONS = 5
														
 
															+API_TIMEOUT = 180
														
 
															+MAX_RETRIES = 2
														
 
															+RETRY_WAIT_SECONDS = 3
														
 
															+FILE_PROCESS_TIMEOUT = 180
														
 
															+
														
 
															+# 缓存配置
														
 
															+ENABLE_CACHE = False
														
 
															+CACHE_DIR = ".evaluation_cache"
														
 
															+
														
 
															+# ============================================================================
														
 
															+# 数据模型 (复用V3)
														
 
															+# ============================================================================
														
 
															+
														
 
															+class KnowledgeEvaluation(BaseModel):
														
 
															+    """Prompt1: 判断是知识 - 评估结果"""
														
 
															+    is_knowledge: bool = Field(..., description="是否是知识内容")
														
 
															+    quick_exclude: dict = Field(default_factory=dict, description="快速排除判定")
														
 
															+    title_layer: dict = Field(default_factory=dict, description="标题层判断")
														
 
															+    image_layer: dict = Field(default_factory=dict, description="图片层判断(核心)")
														
 
															+    text_layer: dict = Field(default_factory=dict, description="正文层判断(辅助)")
														
 
															+    judgment_logic: str = Field(..., description="综合判定逻辑")
														
 
															+    core_evidence: list[str] = Field(default_factory=list, description="核心证据")
														
 
															+    issues: list[str] = Field(default_factory=list, description="不足或疑虑")
														
 
															+    conclusion: str = Field(..., description="结论陈述")
														
 
															+
														
 
															+
														
 
															+class ContentKnowledgeEvaluation(BaseModel):
														
 
															+    """Prompt2: 判断是否是内容知识 - 评估结果"""
														
 
															+    is_content_knowledge: bool = Field(..., description="是否属于内容知识")
														
 
															+    final_score: int = Field(..., description="最终得分(0-100)")
														
 
															+    level: str = Field(..., description="判定等级")
														
 
															+    quick_exclude: dict = Field(default_factory=dict, description="快速排除判定")
														
 
															+    dimension_scores: dict = Field(default_factory=dict, description="分层评分详情")
														
 
															+    core_evidence: list[str] = Field(default_factory=list, description="核心证据")
														
 
															+    issues: list[str] = Field(default_factory=list, description="不足之处")
														
 
															+    summary: str = Field(..., description="总结陈述")
														
 
															+
														
 
															+
														
 
															+class PurposeEvaluation(BaseModel):
														
 
															+    """Prompt3: 目的性匹配 - 评估结果"""
														
 
															+    purpose_score: int = Field(..., description="目的动机得分(0-100整数)")
														
 
															+    core_motivation: str = Field(..., description="原始需求核心动机")
														
 
															+    image_value: str = Field(..., description="图片提供的价值")
														
 
															+    title_intention: str = Field(..., description="标题体现的意图")
														
 
															+    text_content: str = Field(..., description="正文补充的内容")
														
 
															+    match_level: str = Field(..., description="匹配度等级")
														
 
															+    core_basis: str = Field(..., description="核心依据")
														
 
															+
														
 
															+
														
 
															+class CategoryEvaluation(BaseModel):
														
 
															+    """Prompt4: 品类匹配 - 评估结果"""
														
 
															+    category_score: int = Field(..., description="品类匹配得分(0-100整数)")
														
 
															+    original_category_analysis: dict = Field(default_factory=dict, description="原始需求品类分析")
														
 
															+    actual_category: dict = Field(default_factory=dict, description="帖子实际品类")
														
 
															+    match_level: str = Field(..., description="匹配度等级")
														
 
															+    category_match_analysis: dict = Field(default_factory=dict, description="品类匹配分析")
														
 
															+    core_basis: str = Field(..., description="核心依据")
														
 
															+
														
 
															+
														
 
															+# ============================================================================
														
 
															+# LangGraph State定义
														
 
															+# ============================================================================
														
 
															+
														
 
															+class EvaluationState(TypedDict):
														
 
															+    """评估状态"""
														
 
															+    # 输入
														
 
															+    post: Any  # Post对象
														
 
															+    original_query: str
														
 
															+
														
 
															+    # 视频相关
														
 
															+    video_file: Optional[Any]  # genai.File对象
														
 
															+    video_uri: Optional[str]
														
 
															+    temp_video_path: Optional[str]
														
 
															+
														
 
															+    # 图片相关
														
 
															+    temp_image_paths: Optional[List[str]]  # 临时图片文件路径列表
														
 
															+    cached_media_files: Optional[List[Dict]]  # 缓存的图片base64数据，避免重复下载
														
 
															+
														
 
															+    # 评估结果
														
 
															+    knowledge_eval: Optional[KnowledgeEvaluation]
														
 
															+    content_eval: Optional[ContentKnowledgeEvaluation]
														
 
															+    purpose_eval: Optional[PurposeEvaluation]
														
 
															+    category_eval: Optional[CategoryEvaluation]
														
 
															+    final_score: Optional[float]
														
 
															+    match_level: Optional[str]
														
 
															+
														
 
															+    # 控制
														
 
															+    should_continue: bool
														
 
															+    error: Optional[str]
														
 
															+    semaphore: Optional[asyncio.Semaphore]
														
 
															+
														
 
															+
														
 
															+# ============================================================================
														
 
															+# Prompt 定义 (复用V3 - 从post_evaluator_v3.py导入)
														
 
															+# ============================================================================
														
 
															+
														
 
															+# 为了避免重复,我们从v3模块导入Prompt
														
 
															+# ============================================================================
														
 
															+# Prompt 定义 - 拆分为System和User两部分
														
 
															+# ============================================================================
														
 
															+
														
 
															+# Prompt1: 知识判定 - System部分(评估规则)
														
 
															+SYSTEM_PROMPT1_IS_KNOWLEDGE = """# 内容知识判定系统 v2.0
														
 
															+
														
 
															+## 角色定义
														
 
															+你是一个多模态内容评估专家，专门判断社交媒体帖子是否属于"内容知识"类别。
														
 
															+
														
 
															+## 前置条件
														
 
															+该帖子已通过知识判定，确认提供了知识。现在需要进一步判断是否属于"内容知识"。
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 内容知识定义
														
 
															+
														
 
															+**内容知识**是指与创作/制作/设计相关的、具有实操性和可迁移性的知识，帮助创作者提升创作能力。
														
 
															+
														
 
															+### 内容知识的范畴
														
 
															+- ✅ **创作原理**: 设计原理、创作逻辑、美学规律、构图法则（通用的，普适的）
														
 
															+- ✅ **制作方法**: 操作流程、技术步骤、工具使用方法
														
 
															+- ✅ **创意技巧**: 灵感方法、创意思路、表现手法、风格技法
														
 
															+- ✅ **体系框架**: 完整的创作体系、方法论、思维框架
														
 
															+- ✅ **案例提炼**: 从多个案例中总结的通用创作规律
														
 
															+
														
 
															+### 非内容知识（严格排除）
														
 
															+- ❌ **单案例展示**: 仅展示单个作品，无方法论提炼
														
 
															+- ❌ **作品集合**: 纯作品展示集合，无创作方法讲解
														
 
															+- ❌ **单点元素**: 只展示配色/字体/素材，无使用方法
														
 
															+- ❌ **单次操作**: 只讲某个项目的特定操作，无通用性
														
 
															+- ❌ **非创作领域**: 健康、财经、生活、科普等非创作制作领域的知识
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 输入信息
														
 
															+- **标题**: [帖子标题]
														
 
															+- **正文**: [帖子正文内容]  
														
 
															+- **图片**: [图片描述/内容]
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 判断流程
														
 
															+
														
 
															+### 第一步: 快速排除判断（任一为"是"则判定为非内容知识）
														
 
															+
														
 
															+1. 标题是否为纯展示型？（"我的XX作品"、"今天做了XX"、"作品分享"）
														
 
															+2. 图片是否全为作品展示，无任何方法/原理/步骤说明？
														
 
															+3. 是否只讲单个项目的特定操作，完全无通用性？
														
 
															+4. 是否为纯元素展示，无创作方法？（仅展示配色、字体、素材）
														
 
															+
														
 
															+**排除判定**: □ 是（判定为非内容知识） / □ 否（继续评估）
														
 
															+
														
 
															+---
														
 
															+
														
 
															+### 第二步: 分层打分评估（满分100分）
														
 
															+
														
 
															+## 🖼️ 图片层评估（权重70%，满分70分）
														
 
															+
														
 
															+> **说明**: 社交媒体以图片为主要信息载体，图片层是核心判断依据
														
 
															+
														
 
															+#### 维度1: 创作方法呈现（20分）
														
 
															+**评分依据**: 图片是否清晰展示了具体的创作/制作方法、技巧、技法
														
 
															+
														
 
															+- **20分**: 图片详细展示≥3个具体可操作的创作方法/技巧，有明确的操作指引
														
 
															+- **15分**: 图片展示2个创作方法，方法较为具体
														
 
															+- **10分**: 图片展示1个创作方法，但不够详细
														
 
															+- **5分**: 图片暗示有方法，但未明确展示
														
 
															+- **0分**: 图片无任何方法展示，纯作品呈现
														
 
															+
														
 
															+**得分**: __/20
														
 
															+
														
 
															+---
														
 
															+
														
 
															+#### 维度2: 知识体系化程度（15分）
														
 
															+**评分依据**: 多图是否形成完整的知识体系或逻辑链条
														
 
															+
														
 
															+- **15分**: 多图形成完整体系（步骤1→2→3，或原理→方法→案例），逻辑清晰
														
 
															+- **12分**: 多图有知识关联性，形成部分体系
														
 
															+- **8分**: 多图展示多个知识点，但关联性弱
														
 
															+- **4分**: 多图仅为同类案例堆砌，无体系
														
 
															+- **0分**: 单图或多图无逻辑关联
														
 
															+
														
 
															+**得分**: __/15
														
 
															+
														
 
															+---
														
 
															+
														
 
															+#### 维度3: 教学性标注与说明（15分）
														
 
															+**评分依据**: 图片是否包含教学性的视觉元素（标注、序号、箭头、文字说明）
														
 
															+
														
 
															+- **15分**: 大量教学标注（序号、箭头、高亮、文字说明、对比标记等），清晰易懂
														
 
															+- **12分**: 有明显的教学标注，但不够完善
														
 
															+- **8分**: 有少量标注或说明
														
 
															+- **4分**: 仅有简单文字，无视觉教学元素
														
 
															+- **0分**: 无任何教学标注，纯视觉展示
														
 
															+
														
 
															+**得分**: __/15
														
 
															+
														
 
															+---
														
 
															+
														
 
															+#### 维度4: 方法可复用性（10分）
														
 
															+**评分依据**: 图片展示的方法是否可迁移到其他创作场景/项目
														
 
															+
														
 
															+- **10分**: 明确展示通用方法，可应用于多种场景（配公式/模板/框架）
														
 
															+- **8分**: 方法有一定通用性，可迁移到类似场景
														
 
															+- **5分**: 方法通用性一般，需要改造才能应用
														
 
															+- **2分**: 方法仅适用于特定项目
														
 
															+- **0分**: 无可复用方法
														
 
															+
														
 
															+**得分**: __/10
														
 
															+
														
 
															+---
														
 
															+
														
 
															+#### 维度5: 原理与案例结合（10分）
														
 
															+**评分依据**: 图片是否将创作原理与实际案例有效结合
														
 
															+
														
 
															+- **10分**: 原理+多案例验证，清晰展示原理如何应用
														
 
															+- **8分**: 原理+案例，有一定结合
														
 
															+- **5分**: 有原理或有案例，但结合不够
														
 
															+- **2分**: 仅有案例，无原理提炼
														
 
															+- **0分**: 纯案例展示或纯理论
														
 
															+
														
 
															+**得分**: __/10
														
 
															+
														
 
															+---
														
 
															+
														
 
															+**🖼️ 图片层总分**: __/70
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 📝 正文层评估（权重20%，满分20分）
														
 
															+
														
 
															+> **说明**: 正文作为辅助判断，补充图片未完整呈现的知识信息
														
 
															+
														
 
															+#### 维度6: 方法/步骤描述（10分）
														
 
															+**评分依据**: 正文是否描述了具体的创作方法或操作步骤
														
 
															+
														
 
															+- **10分**: 有完整的步骤描述（≥3步）或详细的方法说明
														
 
															+- **7分**: 有步骤或方法描述，但不够系统
														
 
															+- **4分**: 有零散的方法提及
														
 
															+- **0分**: 无方法/步骤，纯叙事或展示性文字
														
 
															+
														
 
															+**得分**: __/10
														
 
															+
														
 
															+---
														
 
															+
														
 
															+#### 维度7: 知识总结与提炼（10分）
														
 
															+**评分依据**: 正文是否对创作经验/规律进行总结提炼
														
 
															+
														
 
															+- **10分**: 有明确的知识总结、归纳、框架化输出
														
 
															+- **7分**: 有一定的经验总结或要点提炼
														
 
															+- **4分**: 有零散的心得，但未成体系
														
 
															+- **0分**: 无任何知识提炼
														
 
															+
														
 
															+**得分**: __/10
														
 
															+
														
 
															+---
														
 
															+
														
 
															+**📝 正文层总分**: __/20
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 🏷️ 标题层评估（权重10%，满分10分）
														
 
															+
														
 
															+> **说明**: 标题作为内容导向，辅助判断内容主题
														
 
															+
														
 
															+#### 维度8: 标题内容指向性（10分）
														
 
															+**评分依据**: 标题是否明确指向创作/制作相关的知识内容
														
 
															+
														
 
															+- **10分**: 标题明确包含方法/教程/技巧/原理类词汇（"XX教程"、"XX技巧"、"如何XX"、"XX方法"）
														
 
															+- **7分**: 标题包含整理型词汇（"合集"、"总结"、"分享XX方法"）
														
 
															+- **4分**: 描述性标题，暗示有创作知识
														
 
															+- **0分**: 纯展示型标题（"我的作品"、"今天做了XX"）或与创作无关
														
 
															+
														
 
															+**得分**: __/10
														
 
															+
														
 
															+---
														
 
															+
														
 
															+**🏷️标题层总分**: __/10
														
 
															+
														
 
															+---
														
 
															+
														
 
															+### 第三步: 综合评分与判定
														
 
															+
														
 
															+**总分计算**:
														
 
															+总分 = 图片层总分(70分) + 正文层总分(20分) + 标题层总分(10分)
														
 
															+
														
 
															+**最终得分**: __/100分
														
 
															+
														
 
															+---
														
 
															+
														
 
															+**判定等级**:
														
 
															+- **85-100分**: ⭐⭐⭐⭐⭐ 优质内容知识 - 强烈符合
														
 
															+- **70-84分**: ⭐⭐⭐⭐ 良好内容知识 - 符合
														
 
															+- **55-69分**: ⭐⭐⭐ 基础内容知识 - 基本符合
														
 
															+- **40-54分**: ⭐⭐ 弱内容知识 - 不太符合
														
 
															+- **0-39分**: ⭐ 非内容知识 - 不符合
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 输出格式（JSON）
														
 
															+```json
														
 
															+{
														
 
															+  "is_knowledge": true/false,
														
 
															+  "quick_exclude": {
														
 
															+    "result": "通过/排除",
														
 
															+    "reason": "快速排除判定理由"
														
 
															+  },
														
 
															+  "title_layer": {
														
 
															+    "has_knowledge_direction": true/false,
														
 
															+    "reason": "标题层判断理由"
														
 
															+  },
														
 
															+  "image_layer": {
														
 
															+    "knowledge_presentation": {
														
 
															+      "match": true/false,
														
 
															+      "reason": "图片是否呈现知识"
														
 
															+    },
														
 
															+    "educational_value": {
														
 
															+      "has_value": true/false,
														
 
															+      "reason": "是否有教学价值"
														
 
															+    },
														
 
															+    "structure_level": {
														
 
															+      "structured": true/false,
														
 
															+      "reason": "结构化程度"
														
 
															+    },
														
 
															+    "practicality": {
														
 
															+      "practical": true/false,
														
 
															+      "reason": "实用性评估"
														
 
															+    },
														
 
															+    "information_density": {
														
 
															+      "level": "高/中/低",
														
 
															+      "reason": "信息密度判断"
														
 
															+    },
														
 
															+    "overall": "传递知识/纯展示/其他"
														
 
															+  },
														
 
															+  "text_layer": {
														
 
															+    "information_gain": {
														
 
															+      "has_gain": true/false,
														
 
															+      "reason": "是否有信息增量"
														
 
															+    },
														
 
															+    "verifiability": {
														
 
															+      "verifiable": true/false,
														
 
															+      "reason": "可验证性"
														
 
															+    },
														
 
															+    "knowledge_type": {
														
 
															+      "type": "方法性知识/应用性知识/原理性知识等",
														
 
															+      "reason": "知识类型判断"
														
 
															+    },
														
 
															+    "overall": "有知识支撑/无知识支撑"
														
 
															+  },
														
 
															+  "judgment_logic": "综合判定逻辑说明（2-3句话）",
														
 
															+  "core_evidence": [
														
 
															+    "证据1：从图片/正文/标题中提取的关键证据",
														
 
															+    "证据2：...",
														
 
															+    "证据3：..."
														
 
															+  ],
														
 
															+  "issues": [
														
 
															+    "问题1：存在的不足或疑虑",
														
 
															+    "问题2：..."
														
 
															+  ],
														
 
															+  "conclusion": "结论陈述（2-3句话说明判定结果和核心理由）"
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 判断原则
														
 
															+1. **图片主导原则**: 图片占70%权重，是核心判断依据；标题和正文为辅助
														
 
															+2. **创作领域限定**: 必须属于创作/制作/设计领域，其他领域知识不属于内容知识
														
 
															+3. **方法优先原则**: 重点评估是否提供了可操作的创作方法，而非纯作品展示
														
 
															+4. **通用性要求**: 优先考虑方法的可复用性和可迁移性
														
 
															+5. **严格性原则**: 宁可误判为"非内容知识"，也不放过纯展示型内容
														
 
															+6. **证据性原则**: 评分需基于明确的视觉和文本证据，可量化衡量
														
 
															+"""
														
 
															+
														
 
															+# Prompt1: 知识判定 - User部分(帖子数据)
														
 
															+USER_TEMPLATE1_IS_KNOWLEDGE = """请评估以下帖子是否为知识内容：
														
 
															+
														
 
															+**标题**: {title}
														
 
															+**正文**: {body_text}
														
 
															+**图片**: {num_images}张（图片内容见下方）
														
 
															+"""
														
 
															+
														
 
															+
														
 
															+# ============================================================================
														
 
															+# Prompt2: 内容知识评估 - 拆分为System和User
														
 
															+# ============================================================================
														
 
															+
														
 
															+SYSTEM_PROMPT2_CONTENT_KNOWLEDGE = """## 角色定义
														
 
															+你是一个多模态内容评估专家，专门判断社交媒体帖子是否属于"内容知识"类别。
														
 
															+
														
 
															+## 前置条件
														
 
															+该帖子已通过知识判定，确认提供了知识。现在需要进一步判断是否属于"内容知识"。
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 内容知识的底层定义
														
 
															+
														
 
															+**内容知识**：关于社交媒体内容创作与制作的通识性、原理性知识，帮助创作者策划、生产、优化和传播优质内容。
														
 
															+
														
 
															+### 核心特征
														
 
															+1. **领域特定性**：专注于社交媒体内容本身的创作与制作
														
 
															+2. **通识性**：跨平台、跨领域适用的内容创作原理和方法
														
 
															+3. **原理性**：不仅是操作步骤，更包含背后的逻辑和原理
														
 
															+4. **可迁移性**：方法可应用于不同类型的社交媒体内容创作
														
 
															+
														
 
															+### 内容知识的完整范畴
														
 
															+
														
 
															+#### 1️⃣ 内容策划层
														
 
															+- **选题方法**：如何找选题、选题原理、热点捕捉、用户需求分析
														
 
															+- **内容定位**：账号定位、人设打造、差异化策略
														
 
															+- **结构设计**：内容框架、故事结构、信息组织方式
														
 
															+- **创意方法**：创意思路、脑暴方法、灵感来源
														
 
															+
														
 
															+#### 2️⃣ 内容制作层
														
 
															+- **文案创作**：标题技巧、正文写作、文案公式、钩子设计、情绪调动
														
 
															+- **视觉呈现**：封面设计原理、排版方法、配色技巧（用于内容呈现的）
														
 
															+- **视频制作**：脚本结构、拍摄技巧、镜头语言、剪辑节奏、转场方法
														
 
															+- **多模态组合**：图文配合、视频+文案组合、内容形式选择
														
 
															+
														
 
															+#### 3️⃣ 内容优化层
														
 
															+- **开头/钩子**：前3秒设计、开头公式、吸引注意力的方法
														
 
															+- **节奏控制**：信息密度、节奏把控、留白技巧
														
 
															+- **完播/完读**：提升完播率/完读率的方法和原理
														
 
															+- **互动设计**：评论引导、互动话术、用户参与设计
														
 
															+
														
 
															+#### 4️⃣ 内容方法论
														
 
															+- **创作体系**：完整的内容创作流程和体系
														
 
															+- **底层原理**：为什么这样做有效的原理解释
														
 
															+- **通用框架**：可复用的内容创作框架和模板
														
 
															+- **案例提炼**：从多个案例中总结的通用规律
														
 
															+
														
 
															+---
														
 
															+
														
 
															+### 内容知识 vs 非内容知识
														
 
															+
														
 
															+**✅ 属于内容知识的例子**：
														
 
															+- "小红书爆款标题的5个公式"（文案创作）
														
 
															+- "短视频前3秒如何抓住用户"（开头设计）
														
 
															+- "如何策划一个涨粉选题"（内容策划）
														
 
															+- "视频节奏控制的底层逻辑"（内容优化）
														
 
															+- "图文笔记的排版原理"（视觉呈现）
														
 
															+- "从10个爆款视频总结的脚本结构"（方法论提炼）
														
 
															+
														
 
															+**❌ 不属于内容知识的例子**：
														
 
															+- "摄影构图的三分法则"（专业摄影技能，除非用于讲解社交媒体内容拍摄）
														
 
															+- "PS修图教程"（设计软件技能，除非用于讲解封面/配图制作）
														
 
															+- "我的探店vlog"（单个作品展示，无创作方法）
														
 
															+- "今天涨粉100个好开心"（个人记录，无方法论）
														
 
															+- "健康饮食的10个建议"（其他领域知识）
														
 
															+- "这套配色真好看"（纯元素展示，无创作方法）
														
 
															+
														
 
															+**⚠️ 边界情况判断**：
														
 
															+- **专业技能类**：如果是为社交媒体内容创作服务的，属于内容知识（如"拍摄短视频的灯光布置"）；如果是纯技能教学，不属于（如"专业摄影的灯光理论"）
														
 
															+- **工具使用类**：如果是为内容制作服务的，属于内容知识（如"剪映做转场的3种方法"）；如果是纯软件教程，不属于（如"AE粒子特效教程"）
														
 
															+- **案例分析类**：如果从案例中提炼了内容创作方法，属于内容知识；如果只是案例展示，不属于
														
 
															+
														
 
															+---
														
 
															+
														
 
															+### 判断核心准则
														
 
															+
														
 
															+**问自己三个问题**：
														
 
															+1. **这个知识是关于"如何创作社交媒体内容"的吗？** 
														
 
															+   - 是 → 可能是内容知识
														
 
															+   - 否 → 不是内容知识
														
 
															+
														
 
															+2. **这个方法/原理是通识性的吗？能跨内容类型/平台应用吗？**
														
 
															+   - 是 → 符合内容知识特征
														
 
															+   - 否 → 可能只是单点技巧
														
 
															+
														
 
															+3. **看完后，创作者能用它来改进自己的内容创作吗？**
														
 
															+   - 能 → 是内容知识
														
 
															+   - 不能 → 不是内容知识
														
 
															+
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 输入信息
														
 
															+- **标题**: [帖子标题]
														
 
															+- **正文**: [帖子正文内容]  
														
 
															+- **图片**: [图片描述/内容]
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 判断流程
														
 
															+
														
 
															+### 第一步: 领域快速筛查
														
 
															+
														
 
															+**判断：内容是否属于社交媒体内容创作/制作领域？**
														
 
															+
														
 
															+核心判断标准：
														
 
															+- 属于： 讲的是如何创作/制作社交媒体内容（选题、文案、拍摄、剪辑、运营等）
														
 
															+- 属于：讲的是内容创作的原理、方法、技巧
														
 
															+- 属于：讲的是平台运营、爆款方法、涨粉策略
														
 
															+- 不属于：讲的是其他专业领域技能（摄影、设计、编程等），与内容创作无关
														
 
															+- 不属于：讲的是其他行业知识（财经、健康、科普等）
														
 
															+
														
 
															+**判定**: □ 属于内容创作领域（继续） / □ 不属于（判定为非内容知识）
														
 
															+
														
 
															+---
														
 
															+
														
 
															+### 第二步: 快速排除判断（任一为"是"则判定为非内容知识）
														
 
															+
														
 
															+1. 标题是否为纯展示型？（"我的XX"、"今天拍了XX"、"作品分享"）
														
 
															+2. 图片是否全为作品展示，无任何内容创作方法说明？
														
 
															+3. 是否只讲单个项目/单次创作的特定操作，完全无通用性？
														
 
															+4. 是否为纯元素/素材展示，无创作方法？（仅展示配色、字体、模板）
														
 
															+5. 是否为其他领域的专业知识，与内容创作无关？
														
 
															+
														
 
															+**排除判定**: □ 是（判定为非内容知识） / □ 否（继续评估）
														
 
															+
														
 
															+---
														
 
															+
														
 
															+### 第三步: 分层打分评估（满分100分）
														
 
															+
														
 
															+## 🖼️ 图片层评估（权重70%，满分70分）
														
 
															+
														
 
															+> **说明**: 社交媒体以图片为主要信息载体，图片层是核心判断依据
														
 
															+
														
 
															+#### 维度1: 内容创作方法呈现（20分）
														
 
															+**评分依据**: 图片是否清晰展示了具体的内容创作/制作方法、技巧
														
 
															+
														
 
															+- **20分**: 图片详细展示≥3个可操作的内容创作方法（如标题公式、脚本结构、拍摄技巧等）
														
 
															+- **15分**: 图片展示2个内容创作方法，方法较为具体
														
 
															+- **10分**: 图片展示1个内容创作方法，但不够详细
														
 
															+- **5分**: 图片暗示有方法，但未明确展示
														
 
															+- **0分**: 图片无任何方法展示，纯作品呈现
														
 
															+
														
 
															+**得分**: __/20
														
 
															+
														
 
															+---
														
 
															+
														
 
															+#### 维度2: 内容知识体系化（15分）
														
 
															+**评分依据**: 多图是否形成完整的内容创作知识体系或逻辑链条
														
 
															+
														
 
															+- **15分**: 多图形成完整体系（如选题→文案→制作→优化，或原理→方法→案例），逻辑清晰
														
 
															+- **12分**: 多图有知识关联性，形成部分内容创作体系
														
 
															+- **8分**: 多图展示多个内容创作知识点，但关联性弱
														
 
															+- **4分**: 多图仅为同类案例堆砌，无体系
														
 
															+- **0分**: 单图或多图无逻辑关联
														
 
															+
														
 
															+**得分**: __/15
														
 
															+
														
 
															+---
														
 
															+
														
 
															+#### 维度3: 教学性标注与说明（15分）
														
 
															+**评分依据**: 图片是否包含教学性的视觉元素（标注、序号、箭头、文字说明）
														
 
															+
														
 
															+- **15分**: 大量教学标注（序号、箭头、高亮、文字说明、对比标记等），清晰易懂
														
 
															+- **12分**: 有明显的教学标注，但不够完善
														
 
															+- **8分**: 有少量标注或说明
														
 
															+- **4分**: 仅有简单文字，无视觉教学元素
														
 
															+- **0分**: 无任何教学标注，纯视觉展示
														
 
															+
														
 
															+**得分**: __/15
														
 
															+
														
 
															+---
														
 
															+
														
 
															+#### 维度4: 方法通识性与可迁移性（10分）
														
 
															+**评分依据**: 图片展示的方法是否具有通识性，可迁移到不同类型的内容创作
														
 
															+
														
 
															+- **10分**: 明确展示通识性方法，可应用于多种内容类型/平台（配公式/框架）
														
 
															+- **8分**: 方法有较强通识性，可迁移到类似内容
														
 
															+- **5分**: 方法通识性一般，适用范围较窄
														
 
															+- **2分**: 方法仅适用于特定单一场景
														
 
															+- **0分**: 无通识性方法
														
 
															+
														
 
															+**得分**: __/10
														
 
															+
														
 
															+---
														
 
															+
														
 
															+#### 维度5: 原理性深度（10分）
														
 
															+**评分依据**: 图片是否讲解了内容创作背后的原理和逻辑，而非仅操作步骤
														
 
															+
														
 
															+- **10分**: 深入讲解原理（为什么这样做有效），配合方法和案例
														
 
															+- **8分**: 有原理说明，但深度不够
														
 
															+- **5分**: 主要是方法，略有原理提及
														
 
															+- **2分**: 仅有操作步骤，无原理
														
 
															+- **0分**: 纯案例展示，无原理无方法
														
 
															+
														
 
															+**得分**: __/10
														
 
															+
														
 
															+---
														
 
															+
														
 
															+**🖼️ 图片层总分**: __/70
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 📝 正文层评估（权重20%，满分20分）
														
 
															+
														
 
															+> **说明**: 正文作为辅助判断，补充图片未完整呈现的知识信息
														
 
															+
														
 
															+#### 维度6: 方法/步骤描述（10分）
														
 
															+**评分依据**: 正文是否描述了具体的内容创作方法或操作步骤
														
 
															+
														
 
															+- **10分**: 有完整的内容创作步骤（≥3步）或详细的方法说明
														
 
															+- **7分**: 有步骤或方法描述，但不够系统
														
 
															+- **4分**: 有零散的方法提及
														
 
															+- **0分**: 无方法/步骤，纯叙事或展示性文字
														
 
															+
														
 
															+**得分**: __/10
														
 
															+
														
 
															+---
														
 
															+
														
 
															+#### 维度7: 知识总结与提炼（10分）
														
 
															+**评分依据**: 正文是否对内容创作经验/规律进行总结提炼
														
 
															+
														
 
															+- **10分**: 有明确的知识总结、规律归纳、框架化输出
														
 
															+- **7分**: 有一定的经验总结或要点提炼
														
 
															+- **4分**: 有零散的心得，但未成体系
														
 
															+- **0分**: 无任何知识提炼
														
 
															+
														
 
															+**得分**: __/10
														
 
															+
														
 
															+---
														
 
															+
														
 
															+**📝 正文层总分**: __/20
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 🏷️ 标题层评估（权重10%，满分10分）
														
 
															+
														
 
															+> **说明**: 标题作为内容导向，辅助判断内容主题
														
 
															+
														
 
															+#### 维度8: 标题内容指向性（10分）
														
 
															+**评分依据**: 标题是否明确指向内容创作/制作相关的知识
														
 
															+
														
 
															+- **10分**: 标题明确包含内容创作相关词汇（"爆款XX"、"涨粉XX"、"XX文案"、"XX脚本"、"XX选题"、"XX标题"、"如何拍/写/做XX"）
														
 
															+- **7分**: 标题包含整理型词汇（"XX合集"、"XX技巧总结"）
														
 
															+- **4分**: 描述性标题，暗示有内容创作知识
														
 
															+- **0分**: 纯展示型标题（"我的作品"、"今天拍了XX"）或与内容创作无关
														
 
															+
														
 
															+**得分**: __/10
														
 
															+---
														
 
															+
														
 
															+**🏷️标题层总分**: __/10
														
 
															+
														
 
															+---
														
 
															+
														
 
															+### 第三步: 综合评分与判定
														
 
															+
														
 
															+**总分计算**:
														
 
															+总分 = 图片层总分(70分) + 正文层总分(20分) + 标题层总分(10分)
														
 
															+
														
 
															+**最终得分**: __/100分
														
 
															+
														
 
															+---
														
 
															+
														
 
															+**判定等级**:
														
 
															+- **85-100分**: ⭐⭐⭐⭐⭐ 优质内容知识 - 强烈符合
														
 
															+- **70-84分**: ⭐⭐⭐⭐ 良好内容知识 - 符合
														
 
															+- **55-69分**: ⭐⭐⭐ 基础内容知识 - 基本符合
														
 
															+- **40-54分**: ⭐⭐ 弱内容知识 - 不符合
														
 
															+- **0-39分**: ⭐ 非内容知识 - 完全不符合
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 输出格式（JSON）
														
 
															+```json
														
 
															+{
														
 
															+  "is_content_knowledge": true/false,
														
 
															+  "final_score": 0-100的整数,
														
 
															+  "level": "⭐⭐⭐⭐⭐ 优质内容知识 / ⭐⭐⭐⭐ 良好内容知识 / ⭐⭐⭐ 基础内容知识 / ⭐⭐ 弱内容知识 / ⭐ 非内容知识",
														
 
															+  "quick_exclude": {
														
 
															+    "result": "是/否",
														
 
															+    "reason": "快速排除判定理由"
														
 
															+  },
														
 
															+  "dimension_scores": {
														
 
															+    "image_layer": {
														
 
															+      "creation_method": {
														
 
															+        "score": 0-20的整数,
														
 
															+        "reason": "内容创作方法呈现评分依据"
														
 
															+      },
														
 
															+      "knowledge_system": {
														
 
															+        "score": 0-15的整数,
														
 
															+        "reason": "内容知识体系化评分依据"
														
 
															+      },
														
 
															+      "teaching_annotation": {
														
 
															+        "score": 0-15的整数,
														
 
															+        "reason": "教学性标注评分依据"
														
 
															+      },
														
 
															+      "method_reusability": {
														
 
															+        "score": 0-10的整数,
														
 
															+        "reason": "方法通识性评分依据"
														
 
															+      },
														
 
															+      "principle_case": {
														
 
															+        "score": 0-10的整数,
														
 
															+        "reason": "原理性深度评分依据"
														
 
															+      },
														
 
															+      "subtotal": 0-70的整数
														
 
															+    },
														
 
															+    "text_layer": {
														
 
															+      "method_description": {
														
 
															+        "score": 0-10的整数,
														
 
															+        "reason": "方法/步骤描述评分依据"
														
 
															+      },
														
 
															+      "knowledge_summary": {
														
 
															+        "score": 0-10的整数,
														
 
															+        "reason": "知识总结提炼评分依据"
														
 
															+      },
														
 
															+      "subtotal": 0-20的整数
														
 
															+    },
														
 
															+    "title_layer": {
														
 
															+      "content_direction": {
														
 
															+        "score": 0-10的整数,
														
 
															+        "reason": "标题内容创作指向性评分依据"
														
 
															+      },
														
 
															+      "subtotal": 0-10的整数
														
 
															+    }
														
 
															+  },
														
 
															+  "core_evidence": [
														
 
															+    "证据1：从图片/正文/标题中提取的关键证据",
														
 
															+    "证据2：...",
														
 
															+    "证据3：..."
														
 
															+  ],
														
 
															+  "issues": [
														
 
															+    "问题1：存在的不足",
														
 
															+    "问题2：..."
														
 
															+  ],
														
 
															+  "summary": "总结陈述（5-6句话说明判定结果和核心理由，明确指出为何属于/不属于内容知识）"
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 判断原则
														
 
															+1. **图片主导原则**: 图片占70%权重，是核心判断依据；标题和正文为辅助
														
 
															+2. **创作领域限定**: 必须属于创作/制作/设计领域，其他领域知识不属于内容知识
														
 
															+3. **方法优先原则**: 重点评估是否提供了可操作的创作方法，而非纯作品展示
														
 
															+4. **通用性要求**: 优先考虑方法的可复用性和可迁移性
														
 
															+5. **严格性原则**: 宁可误判为"非内容知识"，也不放过纯展示型内容
														
 
															+6. **证据性原则**: 评分需基于明确的视觉和文本证据，可量化衡量
														
 
															+"""
														
 
															+
														
 
															+USER_TEMPLATE2_CONTENT_KNOWLEDGE = """请评估以下帖子是否属于内容知识：
														
 
															+
														
 
															+**标题**: {title}
														
 
															+**正文**: {body_text}
														
 
															+**图片**: {num_images}张（图片内容见下方）
														
 
															+"""
														
 
															+
														
 
															+# ============================================================================
														
 
															+# Prompt3: 目的性匹配评估 - 拆分为System和User
														
 
															+# ============================================================================
														
 
															+
														
 
															+SYSTEM_PROMPT3_PURPOSE_MATCH = """
														
 
															+
														
 
															+# Prompt 1: 多模态内容目的动机匹配评估
														
 
															+
														
 
															+## 角色定义
														
 
															+你是一位专业的多模态内容评估专家，擅长分析社交媒体UGC平台帖子的**目的动机匹配度**，能够精准判断帖子是否满足用户的核心意图。
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 任务说明
														
 
															+你将收到一个**原始搜索需求**和一条**多模态帖子**（包含图片、标题、正文）
														
 
															+请**仅评估目的动机维度**的匹配度，输出0-100分的量化得分。
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 输入格式
														
 
															+
														
 
															+**原始搜索需求：**
														
 
															+[用户的搜索查询词/需求描述]
														
 
															+
														
 
															+**多模态帖子内容：**
														
 
															+- **图片：** [图片内容描述或实际图片]
														
 
															+- **标题：** [帖子标题]
														
 
															+- **正文：** [帖子正文内容]
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 评估维度：目的动机匹配
														
 
															+
														
 
															+### 核心评估逻辑
														
 
															+
														
 
															+**目的动机 = 用户想做什么 = 核心动词/意图**
														
 
															+
														
 
															+常见动机类型：
														
 
															+- **获取型**：寻找、下载、收藏、获取
														
 
															+- **学习型**：教程、学习、了解、掌握
														
 
															+- **决策型**：推荐、对比、评测、选择
														
 
															+- **创作型**：拍摄、制作、设计、生成
														
 
															+- **分享型**：晒单、记录、分享、展示
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 评估流程
														
 
															+
														
 
															+### 第一步：识别原始需求的核心动机
														
 
															+- 提取**核心动词**（如果是纯名词短语，识别隐含意图）
														
 
															+- 判断用户的**最终目的**是什么
														
 
															+
														
 
															+### 第二步：分析帖子提供的价值（重点看图片）
														
 
															+
														
 
															+**图片分析（权重70%）：**
														
 
															+- 图片展示的是什么类型的内容？
														
 
															+- 图片是否直接解答了需求的目的？
														
 
															+- 图片的信息完整度和实用性如何？
														
 
															+
														
 
															+**标题分析（权重15%）：**
														
 
															+- 标题是否明确了内容的目的？
														
 
															+
														
 
															+**正文分析（权重15%）：**
														
 
															+- 正文是否提供了实质性的解答内容？
														
 
															+
														
 
															+### 第三步：判断目的匹配度
														
 
															+- 帖子是否**实质性地满足**了需求的动机？
														
 
															+- 内容是否**实用、完整、可执行**？
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 评分标准（0-100分）
														
 
															+
														
 
															+### 高度匹配区间
														
 
															+
														
 
															+**90-100分：完全满足动机，内容实用完整**
														
 
															+- 图片直接展示解决方案/教程步骤/对比结果
														
 
															+- 内容完整、清晰、可直接使用
														
 
															+- 例：需求"如何拍摄夜景" vs 图片展示完整的夜景拍摄参数设置和效果对比
														
 
															+
														
 
															+**75-89分：基本满足动机，信息较全面**
														
 
															+- 图片提供了核心解答内容
														
 
															+- 信息相对完整但深度略有不足
														
 
															+- 例：需求"推荐旅行路线" vs 图片展示了路线图但缺少详细说明
														
 
															+
														
 
															+**60-74分：部分满足动机，有参考价值**
														
 
															+- 图片提供了相关内容但不够直接
														
 
															+- 需要结合文字才能理解完整意图
														
 
															+
														
 
															+### 中度相关区间
														
 
															+
														
 
															+**40-59分：弱相关，核心目的未充分满足**
														
 
															+- 图片内容与动机有关联但不是直接解答
														
 
															+- 实用性较低
														
 
															+- 例：需求"如何拍摄" vs 图片只展示成品照片，无教程内容
														
 
															+
														
 
															+
														
 
															+### 不相关/负向区间
														
 
															+
														
 
															+**20-39分：微弱关联，基本未解答**
														
 
															+- 图片仅有外围相关性
														
 
															+- 对满足需求帮助极小
														
 
															+
														
 
															+**1-19分：几乎无关**
														
 
															+- 图片与需求动机关联极弱
														
 
															+
														
 
															+**0分：完全不相关**
														
 
															+- 图片与需求动机无任何关联
														
 
															+
														
 
															+**负分不使用**（目的动机维度不设负分）
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 输出格式（JSON）
														
 
															+```json
														
 
															+{
														
 
															+  "purpose_score": 0-100的整数,
														
 
															+  "core_motivation": "识别出的用户意图（一句话）",
														
 
															+  "image_value": "图片展示了什么，如何满足动机",
														
 
															+  "title_intention": "标题说明了什么",
														
 
															+  "text_content": "正文是否有实质解答",
														
 
															+  "match_level": "完全匹配/高度匹配/基本匹配/弱匹配/不匹配",
														
 
															+  "core_basis": "为什么给这个分数（100字以内）"
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 评估原则
														
 
															+
														
 
															+1. **图片优先**：图片权重70%，是判断的主要依据
														
 
															+2. **实用导向**：不看表面相关，看实际解答程度
														
 
															+3. **严格标准**：宁可低估，避免虚高
														
 
															+4. **客观量化**：基于可观察的内容特征打分
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 特别注意
														
 
															+
														
 
															+- 本评估**只关注目的动机维度**，不考虑品类是否匹配
														
 
															+- 输出的分数必须是**0-100的整数**
														
 
															+- 不要自行计算综合分数，只输出目的动机分数
														
 
															+- 评分依据要具体、可验证
														
 
															+
														
 
															+"""
														
 
															+
														
 
															+USER_TEMPLATE3_PURPOSE_MATCH = """请评估以下帖子与用户需求的目的性匹配度：
														
 
															+
														
 
															+**原始搜索词**: {original_query}
														
 
															+**帖子标题**: {title}
														
 
															+**帖子正文**: {body_text}
														
 
															+**图片**: {num_images}张（图片内容见下方）
														
 
															+"""
														
 
															+
														
 
															+# ============================================================================
														
 
															+# Prompt4: 品类匹配评估 - 拆分为System和User  
														
 
															+# ============================================================================
														
 
															+
														
 
															+SYSTEM_PROMPT4_CATEGORY_MATCH = """# Prompt 2: 多模态内容品类匹配评估
														
 
															+
														
 
															+## 角色定义
														
 
															+你是一位专业的多模态内容评估专家，擅长分析社交媒体UGC平台帖子的**品类匹配度**
														
 
															+能够精准判断帖子的内容主体是否与用户需求一致。
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 任务说明
														
 
															+你将收到一个**原始搜索需求**和一条**多模态帖子**（包含图片、标题、正文），请**仅评估品类维度**的匹配度，输出0-100分的量化得分。
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 输入格式
														
 
															+
														
 
															+**原始搜索需求：**
														
 
															+[用户的搜索查询词/需求描述]
														
 
															+
														
 
															+**多模态帖子内容：**
														
 
															+- **图片：** [图片内容描述或实际图片]
														
 
															+- **标题：** [帖子标题]
														
 
															+- **正文：** [帖子正文内容]
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 评估维度：品类匹配
														
 
															+
														
 
															+### 核心评估逻辑
														
 
															+
														
 
															+**品类 = 核心主体（名词）+ 限定词**
														
 
															+
														
 
															+- **核心主体**：具体的内容对象（风光摄影、旅行攻略、美食推荐）
														
 
															+- **限定词**：
														
 
															+  - 地域：川西、成都、日本
														
 
															+  - 时间：秋季、夏天、2024
														
 
															+  - 类型：免费、高清、入门级
														
 
															+  - 风格：小清新、复古、简约
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 评估流程
														
 
															+
														
 
															+### 第一步：提取原始需求的品类信息
														
 
															+- 识别**核心主体名词**
														
 
															+- 识别**关键限定词**（地域/时间/类型/风格等）
														
 
															+
														
 
															+### 第二步：从帖子中提取品类信息（重点看图片）
														
 
															+
														
 
															+**图片识别（权重70%）：**
														
 
															+- 图片展示的核心主体是什么？
														
 
															+- 图片中可识别的限定特征（地域标志、季节特征、类型属性、风格特点）
														
 
															+
														
 
															+**标题提取（权重15%）：**
														
 
															+- 标题明确的品类名词和限定词
														
 
															+
														
 
															+**正文提取（权重15%）：**
														
 
															+- 正文描述的品类信息
														
 
															+
														
 
															+### 第三步：对比匹配度
														
 
															+- 核心主体是否一致？
														
 
															+- 限定词匹配了几个？
														
 
															+- 是否存在泛化或偏移？
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 评分标准（0-100分）
														
 
															+
														
 
															+### 高度匹配区间
														
 
															+
														
 
															+**90-100分：核心主体+关键限定词完全匹配**
														
 
															+- 图片展示的主体与需求精准一致
														
 
															+- 关键限定词全部匹配（地域、时间、类型等）
														
 
															+- 例：需求"川西秋季风光" vs 图片展示川西秋季风景
														
 
															+
														
 
															+**75-89分：核心主体匹配，限定词匹配度百分之80**
														
 
															+- 图片主体一致
														
 
															+- 存在1-2个限定词缺失但不影响核心匹配
														
 
															+- 例：需求"川西秋季风光" vs 图片展示川西风光（缺秋季）
														
 
															+
														
 
															+**60-74分：核心主体匹配，限定词匹配度百分之60**
														
 
															+- 图片主体在同一大类
														
 
															+- 限定词部分匹配或有合理上下位关系
														
 
															+- 例：需求"川西秋季风光" vs 图片展示四川风光
														
 
															+
														
 
															+### 中度相关区间
														
 
															+
														
 
															+**40-59分：核心主体匹配，限定词完全不匹配**
														
 
															+- 图片主体相同但上下文不同
														
 
															+- 限定词严重缺失或不匹配
														
 
															+- 例：需求"猫咪表情包梗图" vs 女孩表情包
														
 
															+
														
 
															+### 不相关/负向区间
														
 
															+
														
 
															+**20-39分：主体过度泛化**
														
 
															+- 图片主体是通用概念，需求是特定概念
														
 
															+- 仅有抽象类别相似
														
 
															+- 例：需求"川西旅行攻略" vs 图片展示普通旅行场景
														
 
															+
														
 
															+**1-19分：品类关联极弱**
														
 
															+- 图片主体与需求差异明显
														
 
															+
														
 
															+**0分：品类完全不同**
														
 
															+- 图片主体类别完全不同
														
 
															+- 例：需求"风光摄影" vs 图片展示美食
														
 
															+
														
 
															+**负分不使用**（品类维度不设负分）
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 输出格式（JSON）
														
 
															+```json
														
 
															+{
														
 
															+  "category_score": 0-100的整数,
														
 
															+  "original_category_analysis": {
														
 
															+    "核心主体": "提取的主体名词",
														
 
															+    "关键限定词": ["限定词1", "限定词2"]
														
 
															+  },
														
 
															+  "actual_category": {
														
 
															+    "图片主体": "图片展示的核心主体",
														
 
															+    "图片限定特征": ["从图片识别的限定词"],
														
 
															+    "标题品类": "标题提及的品类",
														
 
															+    "正文品类": "正文描述的品类"
														
 
															+  },
														
 
															+  "match_level": "完全匹配/高度匹配/基本匹配/弱匹配/不匹配",
														
 
															+  "category_match_analysis": {
														
 
															+    "主体匹配情况": "主体是否一致",
														
 
															+    "限定词匹配情况": "哪些限定词匹配/缺失"
														
 
															+  },
														
 
															+  "core_basis": "为什么给这个分数（100字以内）"
														
 
															+}
														
 
															+```
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 评估原则
														
 
															+
														
 
															+1. **图片优先**：图片权重70%，是判断的主要依据
														
 
															+2. **表面匹配**：只看实际展示的内容，禁止推测联想
														
 
															+3. **通用≠特定**：通用概念不等于特定概念，需明确区分
														
 
															+4. **严格标准**：宁可低估，避免虚高
														
 
															+5. **客观量化**：基于可观察的视觉特征和文字信息打分
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 特别注意
														
 
															+
														
 
															+- 本评估**只关注品类维度**，不考虑目的是否匹配
														
 
															+- 输出的分数必须是**0-100的整数**
														
 
															+- 不要自行计算综合分数，只输出品类分数
														
 
															+- 禁止因为"可能相关"就给分，必须有明确视觉证据
														
 
															+
														
 
															+---
														
 
															+"""
														
 
															+
														
 
															+USER_TEMPLATE4_CATEGORY_MATCH = """请评估以下帖子与用户需求的品类匹配度：
														
 
															+
														
 
															+**原始搜索词**: {original_query}
														
 
															+**帖子标题**: {title}
														
 
															+**帖子正文**: {body_text}
														
 
															+**图片**: {num_images}张（图片内容见下方）
														
 
															+"""
														
 
															+
														
 
															+
														
 
															+# 为了向后兼容，保留原始导入
														
 
															+from post_evaluator_v3 import (
														
 
															+    PROMPT1_IS_KNOWLEDGE,
														
 
															+    PROMPT2_IS_CONTENT_KNOWLEDGE,
														
 
															+    PROMPT3_PURPOSE_MATCH,
														
 
															+    PROMPT4_CATEGORY_MATCH
														
 
															+)
														
 
															+
														
 
															+
														
 
															+# ============================================================================
														
 
															+# Gemini Client
														
 
															+# ============================================================================
														
 
															+
														
 
															+class GeminiClient:
														
 
															+    """Gemini API客户端 - 使用LangChain ChatGoogleGenerativeAI"""
														
 
															+
														
 
															+    def __init__(self, api_key: str = GEMINI_API_KEY, model_name: str = GEMINI_MODEL_NAME):
														
 
															+        self.api_key = api_key
														
 
															+        self.model_name = model_name
														
 
															+
														
 
															+    def create_model(self) -> ChatGoogleGenerativeAI:
														
 
															+        """创建Gemini模型实例(LangChain)"""
														
 
															+        return ChatGoogleGenerativeAI(
														
 
															+            model=self.model_name,
														
 
															+            google_api_key=self.api_key,
														
 
															+            temperature=0.1,
														
 
															+            # 配置返回JSON格式
														
 
															+            model_kwargs={
														
 
															+                "response_mime_type": "application/json"
														
 
															+            }
														
 
															+        )
														
 
															+
														
 
															+    async def generate_content(
														
 
															+        self,
														
 
															+        prompt_text: str = None,
														
 
															+        media_files: Optional[List[Any]] = None,
														
 
															+        max_retries: int = MAX_RETRIES,
														
 
															+        system_prompt: str = None,
														
 
															+        user_prompt: str = None
														
 
															+    ) -> dict:
														
 
															+        """
														
 
															+        调用Gemini API生成内容 (支持SystemMessage + HumanMessage)
														
 
															+
														
 
															+        Args:
														
 
															+            prompt_text: Prompt文本(旧格式,向后兼容)
														
 
															+            media_files: 媒体文件列表 (base64 data URL字典或视频File对象)
														
 
															+            max_retries: 最大重试次数
														
 
															+            system_prompt: System Prompt(新格式 - 评估规则)
														
 
															+            user_prompt: User Prompt(新格式 - 帖子数据)
														
 
															+
														
 
															+        Returns:
														
 
															+            解析后的JSON响应
														
 
															+        """
														
 
															+        # 构建messages列表
														
 
															+        messages = []
														
 
															+
														
 
															+        # 如果提供了system_prompt和user_prompt,使用新格式
														
 
															+        if system_prompt and user_prompt:
														
 
															+            # System Message
														
 
															+            messages.append(SystemMessage(content=system_prompt))
														
 
															+
														
 
															+            # Human Message (用户内容 + 图片)
														
 
															+            human_content = [{"type": "text", "text": user_prompt}]
														
 
															+            if media_files:
														
 
															+                human_content.extend(media_files)
														
 
															+            messages.append(HumanMessage(content=human_content))
														
 
															+
														
 
															+        # 否则使用旧格式(向后兼容)
														
 
															+        else:
														
 
															+            content = []
														
 
															+            # 添加文本
														
 
															+            content.append({"type": "text", "text": prompt_text or ""})
														
 
															+
														
 
															+            # 添加媒体文件
														
 
															+            if media_files:
														
 
															+                content.extend(media_files)
														
 
															+            messages.append(HumanMessage(content=content))
														
 
															+
														
 
															+        # 打印调试信息
														
 
															+        if media_files:
														
 
															+            print(f"      🔍 传递给Gemini: {len(media_files)}个媒体文件")
														
 
															+            for i, media in enumerate(media_files[:3]):
														
 
															+                if isinstance(media, dict) and media.get("type") == "image_url":
														
 
															+                    data_url = media.get("image_url", {}).get("url", "")
														
 
															+                    print(f"         📸 图片[{i}]: Base64 data URL ({len(data_url)}字符)")
														
 
															+                else:
														
 
															+                    print(f"         🎥 视频[{i}]: {type(media).__name__}")
														
 
															+        else:
														
 
															+            print(f"      ⚠️  无媒体文件传递给Gemini（仅文本）")
														
 
															+
														
 
															+        print(f"      💬 Messages: {len(messages)} ({['System' if 'SystemMessage' in str(type(m)) else 'Human' for m in messages]})")
														
 
															+
														
 
															+        # 创建模型
														
 
															+        model = self.create_model()
														
 
															+
														
 
															+        for attempt in range(max_retries + 1):
														
 
															+            try:
														
 
															+                # 调用模型
														
 
															+                loop = asyncio.get_event_loop()
														
 
															+                response = await loop.run_in_executor(
														
 
															+                    None,
														
 
															+                    lambda: model.invoke(messages)
														
 
															+                )
														
 
															+
														
 
															+                # 解析JSON响应
														
 
															+                response_text = response.content.strip()
														
 
															+                response_text = self._clean_json_response(response_text)
														
 
															+                return json.loads(response_text)
														
 
															+
														
 
															+            except Exception as e:
														
 
															+                error_msg = str(e)
														
 
															+                print(f"      ❌ Gemini API错误详情: {error_msg[:200]}")
														
 
															+                if "image" in error_msg.lower() or "media" in error_msg.lower():
														
 
															+                    print(f"      ⚠️  可能是图片/媒体访问问题")
														
 
															+
														
 
															+                if attempt < max_retries:
														
 
															+                    wait_time = RETRY_WAIT_SECONDS * (attempt + 1)
														
 
															+                    print(f"      ⏳ {wait_time}秒后重试 (第{attempt + 1}/{max_retries}次)")
														
 
															+                    await asyncio.sleep(wait_time)
														
 
															+                else:
														
 
															+                    raise Exception(f"Gemini API调用失败: {error_msg}")
														
 
															+
														
 
															+    @staticmethod
														
 
															+    def _clean_json_response(text: str) -> str:
														
 
															+        """清理JSON响应"""
														
 
															+        text = text.strip()
														
 
															+        if text.startswith("```json"):
														
 
															+            text = text[7:]
														
 
															+        elif text.startswith("```"):
														
 
															+            text = text[3:]
														
 
															+        if text.endswith("```"):
														
 
															+            text = text[:-3]
														
 
															+        return text.strip()
														
 
															+
														
 
															+
														
 
															+# ============================================================================
														
 
															+# Video Uploader
														
 
															+# ============================================================================
														
 
															+
														
 
															+class VideoUploader:
														
 
															+    """视频上传处理器"""
														
 
															+
														
 
															+    @staticmethod
														
 
															+    async def upload_video(video_url: str) -> tuple[Optional[Any], Optional[str], Optional[str]]:
														
 
															+        """
														
 
															+        上传视频到Gemini
														
 
															+
														
 
															+        Args:
														
 
															+            video_url: 视频URL
														
 
															+
														
 
															+        Returns:
														
 
															+            (video_file, video_uri, temp_path)
														
 
															+        """
														
 
															+        import requests
														
 
															+
														
 
															+        # 下载视频到临时文件
														
 
															+        temp_fd, temp_path = tempfile.mkstemp(suffix=".mp4", prefix="eval_video_")
														
 
															+        os.close(temp_fd)
														
 
															+
														
 
															+        try:
														
 
															+            print(f"      📥 下载视频: {video_url[:60]}...")
														
 
															+
														
 
															+            # 下载
														
 
															+            loop = asyncio.get_event_loop()
														
 
															+            response = await loop.run_in_executor(
														
 
															+                None,
														
 
															+                lambda: requests.get(video_url, timeout=120, stream=True)
														
 
															+            )
														
 
															+            response.raise_for_status()
														
 
															+
														
 
															+            with open(temp_path, 'wb') as f:
														
 
															+                for chunk in response.iter_content(chunk_size=8192):
														
 
															+                    if chunk:
														
 
															+                        f.write(chunk)
														
 
															+
														
 
															+            file_size_mb = os.path.getsize(temp_path) / (1024 * 1024)
														
 
															+            print(f"      📦 视频下载完成,大小: {file_size_mb:.2f}MB")
														
 
															+
														
 
															+            # 上传到Gemini
														
 
															+            print(f"      ☁️  上传到Gemini...")
														
 
															+            # 暂时禁用视频上传功能(genai版本冲突)
														
 
															+            raise NotImplementedError("视频上传暂时禁用,等待修复版本冲突")
														
 
															+            # uploaded_file = await loop.run_in_executor(
														
 
															+            #     None,
														
 
															+            #     lambda: genai.upload_file(temp_path)
														
 
															+            # )
														
 
															+
														
 
															+            # 等待处理
														
 
															+            processed_file = await VideoUploader._wait_for_processing(uploaded_file)
														
 
															+            if not processed_file:
														
 
															+                return None, None, temp_path
														
 
															+
														
 
															+            print(f"      ✅ 视频上传成功: {processed_file.uri}")
														
 
															+            return processed_file, processed_file.uri, temp_path
														
 
															+
														
 
															+        except Exception as e:
														
 
															+            print(f"      ❌ 视频上传失败: {str(e)[:100]}")
														
 
															+            return None, None, temp_path
														
 
															+
														
 
															+    @staticmethod
														
 
															+    async def _wait_for_processing(uploaded_file: Any) -> Optional[Any]:
														
 
															+        """等待Gemini处理视频文件"""
														
 
															+        start_time = time.time()
														
 
															+        current_file = uploaded_file
														
 
															+
														
 
															+        loop = asyncio.get_event_loop()
														
 
															+
														
 
															+        while current_file.state.name == "PROCESSING":
														
 
															+            elapsed = time.time() - start_time
														
 
															+            if elapsed > FILE_PROCESS_TIMEOUT:
														
 
															+                print(f"      ❌ 视频处理超时: {current_file.name}")
														
 
															+                return None
														
 
															+
														
 
															+            print(f"      ⏳ 等待Gemini处理视频...{elapsed:.0f}s")
														
 
															+            await asyncio.sleep(RETRY_WAIT_SECONDS)
														
 
															+
														
 
															+            current_file = await loop.run_in_executor(
														
 
															+                None,
														
 
															+                lambda: genai.get_file(current_file.name)
														
 
															+            )
														
 
															+
														
 
															+        if current_file.state.name == "FAILED":
														
 
															+            print(f"      ❌ 视频处理失败: {current_file.state}")
														
 
															+            return None
														
 
															+
														
 
															+        return current_file
														
 
															+
														
 
															+
														
 
															+# ============================================================================
														
 
															+# Image Uploader
														
 
															+# ============================================================================
														
 
															+
														
 
															+class ImageUploader:
														
 
															+    """图片加载器 - 下载图片并转为base64 data URL(参考demo)"""
														
 
															+
														
 
															+    @staticmethod
														
 
															+    async def upload_images(image_urls: List[str]) -> tuple[List[Dict], List[str]]:
														
 
															+        """
														
 
															+        批量下载图片并转为base64 data URL格式
														
 
															+
														
 
															+        Args:
														
 
															+            image_urls: 图片URL列表
														
 
															+
														
 
															+        Returns:
														
 
															+            (image_contents, []) - 图片content字典列表和空列表(保持接口兼容)
														
 
															+            格式: {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}}
														
 
															+        """
														
 
															+        if not image_urls:
														
 
															+            return [], []
														
 
															+
														
 
															+        print(f"      📥 准备加载 {len(image_urls)} 张图片(Base64方式)...")
														
 
															+
														
 
															+        # 并发下载所有图片
														
 
															+        tasks = [ImageUploader._load_single_image(url, idx) for idx, url in enumerate(image_urls)]
														
 
															+        results = await asyncio.gather(*tasks, return_exceptions=True)
														
 
															+
														
 
															+        # 分离成功和失败的结果
														
 
															+        image_contents = []
														
 
															+
														
 
															+        for idx, result in enumerate(results):
														
 
															+            if isinstance(result, Exception):
														
 
															+                print(f"      ⚠️  图片{idx}加载失败: {str(result)[:50]}")
														
 
															+            elif result is not None:
														
 
															+                image_contents.append(result)
														
 
															+
														
 
															+        print(f"      ✅ 成功加载 {len(image_contents)}/{len(image_urls)} 张图片")
														
 
															+        return image_contents, []  # 返回空列表作为temp_paths,因为不需要清理
														
 
															+
														
 
															+    @staticmethod
														
 
															+    async def _load_single_image(image_url: str, idx: int) -> Optional[Dict]:
														
 
															+        """
														
 
															+        下载单张图片并转为base64 data URL格式
														
 
															+
														
 
															+        Args:
														
 
															+            image_url: 图片URL
														
 
															+            idx: 图片索引(用于日志)
														
 
															+
														
 
															+        Returns:
														
 
															+            图片content字典: {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}}
														
 
															+        """
														
 
															+        try:
														
 
															+            # 下载图片到内存
														
 
															+            loop = asyncio.get_event_loop()
														
 
															+            response = await loop.run_in_executor(
														
 
															+                None,
														
 
															+                lambda: requests.get(image_url, timeout=30)
														
 
															+            )
														
 
															+            response.raise_for_status()
														
 
															+
														
 
															+            # 转换为PIL Image对象
														
 
															+            image = Image.open(io.BytesIO(response.content))
														
 
															+
														
 
															+            # 转换为RGB模式(Gemini推荐)
														
 
															+            if image.mode != 'RGB':
														
 
															+                image = image.convert('RGB')
														
 
															+
														
 
															+            # 转换为PNG格式的BytesIO
														
 
															+            buffer = io.BytesIO()
														
 
															+            image.save(buffer, format="PNG")
														
 
															+            image_bytes = buffer.getvalue()
														
 
															+
														
 
															+            # Base64编码
														
 
															+            base64_encoded = base64.b64encode(image_bytes).decode('utf-8')
														
 
															+            data_url = f"data:image/png;base64,{base64_encoded}"
														
 
															+
														
 
															+            file_size_kb = len(image_bytes) / 1024
														
 
															+            print(f"      ✓ 图片{idx}加载成功 ({file_size_kb:.1f}KB, {image.size[0]}x{image.size[1]})")
														
 
															+
														
 
															+            # 返回格式与demo一致
														
 
															+            return {
														
 
															+                "type": "image_url",
														
 
															+                "image_url": {"url": data_url}
														
 
															+            }
														
 
															+
														
 
															+        except Exception as e:
														
 
															+            print(f"      ✗ 图片{idx}加载失败: {str(e)[:60]}")
														
 
															+            return None
														
 
															+
														
 
															+
														
 
															+
														
 
															+class PromptAdapter:
														
 
															+    """Prompt适配器 - 根据媒体类型调整Prompt"""
														
 
															+
														
 
															+    @staticmethod
														
 
															+    def adapt_prompt(prompt_template: str, post: Any, **kwargs) -> str:
														
 
															+        """
														
 
															+        适配Prompt
														
 
															+
														
 
															+        Args:
														
 
															+            prompt_template: Prompt模板
														
 
															+            post: Post对象
														
 
															+            **kwargs: 其他参数 (如original_query)
														
 
															+
														
 
															+        Returns:
														
 
															+            适配后的Prompt
														
 
															+        """
														
 
															+        # 准备替换参数
														
 
															+        params = {
														
 
															+            "title": post.title or "",
														
 
															+            "body_text": post.body_text or "",
														
 
															+        }
														
 
															+
														
 
															+        # 媒体描述
														
 
															+        if post.type == "video":
														
 
															+            params["num_images"] = "1个视频"
														
 
															+        else:
														
 
															+            num_images = len(post.images) if post.images else 0
														
 
															+            params["num_images"] = f"{num_images}张"
														
 
															+
														
 
															+        # 添加其他参数
														
 
															+        params.update(kwargs)
														
 
															+
														
 
															+        return prompt_template.format(**params)
														
 
															+
														
 
															+
														
 
															+# ============================================================================
														
 
															+# 缓存函数 (复用V3逻辑)
														
 
															+# ============================================================================
														
 
															+
														
 
															+def _get_cache_key(note_id: str) -> str:
														
 
															+    """生成缓存key"""
														
 
															+    return f"{note_id}_v4.0.json"
														
 
															+
														
 
															+
														
 
															+def _load_from_cache(note_id: str) -> Optional[tuple]:
														
 
															+    """从缓存加载评估结果"""
														
 
															+    if not ENABLE_CACHE:
														
 
															+        return None
														
 
															+
														
 
															+    cache_file = os.path.join(CACHE_DIR, _get_cache_key(note_id))
														
 
															+
														
 
															+    if not os.path.exists(cache_file):
														
 
															+        return None
														
 
															+
														
 
															+    try:
														
 
															+        with open(cache_file, 'r', encoding='utf-8') as f:
														
 
															+            data = json.load(f)
														
 
															+
														
 
															+        # 重建评估对象
														
 
															+        knowledge_eval = None
														
 
															+        if data.get("knowledge_eval"):
														
 
															+            knowledge_eval = KnowledgeEvaluation(**data["knowledge_eval"])
														
 
															+
														
 
															+        content_eval = None
														
 
															+        if data.get("content_eval"):
														
 
															+            content_eval = ContentKnowledgeEvaluation(**data["content_eval"])
														
 
															+
														
 
															+        purpose_eval = None
														
 
															+        if data.get("purpose_eval"):
														
 
															+            purpose_eval = PurposeEvaluation(**data["purpose_eval"])
														
 
															+
														
 
															+        category_eval = None
														
 
															+        if data.get("category_eval"):
														
 
															+            category_eval = CategoryEvaluation(**data["category_eval"])
														
 
															+
														
 
															+        final_score = data.get("final_score")
														
 
															+        match_level = data.get("match_level")
														
 
															+
														
 
															+        return (knowledge_eval, content_eval, purpose_eval, category_eval, final_score, match_level)
														
 
															+
														
 
															+    except Exception as e:
														
 
															+        print(f"      ⚠️  缓存读取失败: {note_id} - {str(e)[:50]}")
														
 
															+        return None
														
 
															+
														
 
															+
														
 
															+def _save_to_cache(note_id: str, eval_results: tuple):
														
 
															+    """保存评估结果到缓存"""
														
 
															+    if not ENABLE_CACHE:
														
 
															+        return
														
 
															+
														
 
															+    knowledge_eval, content_eval, purpose_eval, category_eval, final_score, match_level = eval_results
														
 
															+
														
 
															+    os.makedirs(CACHE_DIR, exist_ok=True)
														
 
															+
														
 
															+    cache_data = {
														
 
															+        "knowledge_eval": knowledge_eval.model_dump() if knowledge_eval else None,
														
 
															+        "content_eval": content_eval.model_dump() if content_eval else None,
														
 
															+        "purpose_eval": purpose_eval.model_dump() if purpose_eval else None,
														
 
															+        "category_eval": category_eval.model_dump() if category_eval else None,
														
 
															+        "final_score": final_score,
														
 
															+        "match_level": match_level,
														
 
															+        "cache_time": datetime.now().isoformat(),
														
 
															+        "evaluator_version": "v4.0"
														
 
															+    }
														
 
															+
														
 
															+    cache_file = os.path.join(CACHE_DIR, _get_cache_key(note_id))
														
 
															+
														
 
															+    try:
														
 
															+        with open(cache_file, 'w', encoding='utf-8') as f:
														
 
															+            json.dump(cache_data, f, ensure_ascii=False, indent=2)
														
 
															+    except Exception as e:
														
 
															+        print(f"      ⚠️  缓存保存失败: {note_id} - {str(e)[:50]}")
														
 
															+
														
 
															+
														
 
															+# ============================================================================
														
 
															+# LangGraph 节点函数
														
 
															+# ============================================================================
														
 
															+
														
 
															+async def knowledge_node(state: EvaluationState) -> EvaluationState:
														
 
															+    """
														
 
															+    Node 1: 知识判断 (Prompt1)
														
 
															+    """
														
 
															+    post = state["post"]
														
 
															+    semaphore = state.get("semaphore")
														
 
															+
														
 
															+    print(f"      📝 Step 1/4: 判断是知识...")
														
 
															+
														
 
															+    try:
														
 
															+        # 准备媒体文件
														
 
															+        media_files = []
														
 
															+        if post.type == "video" and state.get("video_file"):
														
 
															+            media_files = [state["video_file"]]
														
 
															+            print(f"      📹 准备视频文件: {state.get('video_uri', 'N/A')}")
														
 
															+        elif post.images:
														
 
															+            # 图文帖子 - 上传图片到Gemini
														
 
															+            image_urls = post.images[:MAX_IMAGES_PER_POST]
														
 
															+            print(f"      📸 准备上传 {len(image_urls)} 张图片 (总共{len(post.images)}张)")
														
 
															+
														
 
															+            uploaded_files, temp_paths = await ImageUploader.upload_images(image_urls)
														
 
															+            media_files = uploaded_files
														
 
															+
														
 
															+            # 保存临时路径到state中
														
 
															+            if not state.get("temp_image_paths"):
														
 
															+                state["temp_image_paths"] = []
														
 
															+            state["temp_image_paths"].extend(temp_paths)
														
 
															+
														
 
															+            # ✅ 缓存图片数据，避免后续节点重复下载
														
 
															+            state["cached_media_files"] = media_files
														
 
															+        else:
														
 
															+            print(f"      ⚠️  帖子无图片/视频")
														
 
															+
														
 
															+        # 准备System和User Prompt
														
 
															+        user_prompt = PromptAdapter.adapt_prompt(USER_TEMPLATE1_IS_KNOWLEDGE, post)
														
 
															+        system_prompt = SYSTEM_PROMPT1_IS_KNOWLEDGE
														
 
															+
														
 
															+        # 调用Gemini (使用新格式)
														
 
															+        client = GeminiClient()
														
 
															+
														
 
															+        if semaphore:
														
 
															+            async with semaphore:
														
 
															+                data = await client.generate_content(
														
 
															+                    system_prompt=system_prompt,
														
 
															+                    user_prompt=user_prompt,
														
 
															+                    media_files=media_files
														
 
															+                )
														
 
															+        else:
														
 
															+            data = await client.generate_content(
														
 
															+                system_prompt=system_prompt,
														
 
															+                user_prompt=user_prompt,
														
 
															+                media_files=media_files
														
 
															+            )
														
 
															+
														
 
															+        # 调试:打印返回的数据结构
														
 
															+        print(f"      🐛 DEBUG - API返回数据: {json.dumps(data, ensure_ascii=False, indent=2)[:500]}")
														
 
															+        print(f"      🐛 DEBUG - data keys: {list(data.keys())}")
														
 
															+
														
 
															+        # 解析结果
														
 
															+        knowledge_eval = KnowledgeEvaluation(
														
 
															+            is_knowledge=data.get("is_knowledge", False),
														
 
															+            quick_exclude=data.get("quick_exclude", {}),
														
 
															+            title_layer=data.get("title_layer", {}),
														
 
															+            image_layer=data.get("image_layer", {}),
														
 
															+            text_layer=data.get("text_layer", {}),
														
 
															+            judgment_logic=data.get("judgment_logic", ""),
														
 
															+            core_evidence=data.get("core_evidence", []),
														
 
															+            issues=data.get("issues", []),
														
 
															+            conclusion=data.get("conclusion", "")
														
 
															+        )
														
 
															+
														
 
															+        state["knowledge_eval"] = knowledge_eval
														
 
															+
														
 
															+        # 判断是否继续
														
 
															+        if not knowledge_eval.is_knowledge:
														
 
															+            print(f"      ⊗ 非知识内容,停止后续评估")
														
 
															+            state["should_continue"] = False
														
 
															+        else:
														
 
															+            print(f"      ✅ Step 1: 是知识内容")
														
 
															+            state["should_continue"] = True
														
 
															+
														
 
															+    except Exception as e:
														
 
															+        print(f"      ❌ Prompt1评估失败: {str(e)[:100]}")
														
 
															+        state["error"] = str(e)
														
 
															+        state["should_continue"] = False
														
 
															+
														
 
															+    return state
														
 
															+
														
 
															+
														
 
															+async def content_knowledge_node(state: EvaluationState) -> EvaluationState:
														
 
															+    """
														
 
															+    Node 2: 内容知识判断 (Prompt2)
														
 
															+    """
														
 
															+    post = state["post"]
														
 
															+    semaphore = state.get("semaphore")
														
 
															+
														
 
															+    print(f"      📝 Step 2/4: 判断是否是内容知识...")
														
 
															+
														
 
															+    try:
														
 
															+        # 准备媒体文件
														
 
															+        media_files = []
														
 
															+        if post.type == "video" and state.get("video_file"):
														
 
															+            media_files = [state["video_file"]]
														
 
															+            print(f"      📹 准备视频文件")
														
 
															+        elif post.images:
														
 
															+            # ✅ 优先使用缓存的图片，避免重复下载
														
 
															+            if state.get("cached_media_files"):
														
 
															+                media_files = state["cached_media_files"]
														
 
															+                print(f"      ♻️  使用缓存图片 ({len(media_files)}张)")
														
 
															+            else:
														
 
															+                # 缓存不存在才下载
														
 
															+                image_urls = post.images[:MAX_IMAGES_PER_POST]
														
 
															+                print(f"      📸 准备上传 {len(image_urls)} 张图片 (用于内容知识评估)")
														
 
															+
														
 
															+                uploaded_files, temp_paths = await ImageUploader.upload_images(image_urls)
														
 
															+                media_files = uploaded_files
														
 
															+
														
 
															+                # 保存临时路径到state中
														
 
															+                if not state.get("temp_image_paths"):
														
 
															+                    state["temp_image_paths"] = []
														
 
															+                state["temp_image_paths"].extend(temp_paths)
														
 
															+        else:
														
 
															+            print(f"      ⚠️  无媒体文件")
														
 
															+
														
 
															+        # 准备System和User Prompt
														
 
															+        user_prompt = PromptAdapter.adapt_prompt(USER_TEMPLATE2_CONTENT_KNOWLEDGE, post)
														
 
															+        system_prompt = SYSTEM_PROMPT2_CONTENT_KNOWLEDGE
														
 
															+
														
 
															+        # 调用Gemini (使用新格式)
														
 
															+        client = GeminiClient()
														
 
															+
														
 
															+        if semaphore:
														
 
															+            async with semaphore:
														
 
															+                data = await client.generate_content(
														
 
															+                    system_prompt=system_prompt,
														
 
															+                    user_prompt=user_prompt,
														
 
															+                    media_files=media_files
														
 
															+                )
														
 
															+        else:
														
 
															+            data = await client.generate_content(
														
 
															+                system_prompt=system_prompt,
														
 
															+                user_prompt=user_prompt,
														
 
															+                media_files=media_files
														
 
															+            )
														
 
															+
														
 
															+        # 解析结果
														
 
															+        final_score = data.get("final_score", 0)
														
 
															+        is_content_knowledge = final_score >= 55
														
 
															+
														
 
															+        content_eval = ContentKnowledgeEvaluation(
														
 
															+            is_content_knowledge=is_content_knowledge,
														
 
															+            final_score=final_score,
														
 
															+            level=data.get("level", ""),
														
 
															+            quick_exclude=data.get("quick_exclude", {}),
														
 
															+            dimension_scores=data.get("dimension_scores", {}),
														
 
															+            core_evidence=data.get("core_evidence", []),
														
 
															+            issues=data.get("issues", []),
														
 
															+            summary=data.get("summary", "")
														
 
															+        )
														
 
															+
														
 
															+        state["content_eval"] = content_eval
														
 
															+
														
 
															+        # 判断是否继续
														
 
															+        if not is_content_knowledge:
														
 
															+            print(f"      ⊗ 非内容知识,停止后续评估 (得分: {final_score})")
														
 
															+            state["should_continue"] = False
														
 
															+        else:
														
 
															+            print(f"      ✅ Step 2: 是内容知识 (得分: {final_score})")
														
 
															+            state["should_continue"] = True
														
 
															+
														
 
															+    except Exception as e:
														
 
															+        print(f"      ❌ Prompt2评估失败: {str(e)[:100]}")
														
 
															+        state["error"] = str(e)
														
 
															+        state["should_continue"] = False
														
 
															+
														
 
															+    return state
														
 
															+
														
 
															+
														
 
															+async def parallel_match_node(state: EvaluationState) -> EvaluationState:
														
 
															+    """
														
 
															+    Node 3: 并行目的性和品类匹配 (Prompt3 & Prompt4)
														
 
															+    """
														
 
															+    post = state["post"]
														
 
															+    original_query = state["original_query"]
														
 
															+    semaphore = state.get("semaphore")
														
 
															+
														
 
															+    print(f"      📝 Step 3&4/4: 并行执行目的性和品类匹配...")
														
 
															+
														
 
															+    try:
														
 
															+        # 准备媒体文件
														
 
															+        media_files = []
														
 
															+        if post.type == "video" and state.get("video_file"):
														
 
															+            media_files = [state["video_file"]]
														
 
															+            print(f"      📹 准备视频文件")
														
 
															+        elif post.images:
														
 
															+            # ✅ 优先使用缓存的图片，避免重复下载
														
 
															+            if state.get("cached_media_files"):
														
 
															+                media_files = state["cached_media_files"]
														
 
															+                print(f"      ♻️  使用缓存图片 ({len(media_files)}张)")
														
 
															+            else:
														
 
															+                # 缓存不存在才下载
														
 
															+                image_urls = post.images[:MAX_IMAGES_PER_POST]
														
 
															+                print(f"      📸 准备上传 {len(image_urls)} 张图片 (用于目的性和品类评估)")
														
 
															+
														
 
															+                uploaded_files, temp_paths = await ImageUploader.upload_images(image_urls)
														
 
															+                media_files = uploaded_files
														
 
															+
														
 
															+                # 保存临时路径到state中
														
 
															+                if not state.get("temp_image_paths"):
														
 
															+                    state["temp_image_paths"] = []
														
 
															+                state["temp_image_paths"].extend(temp_paths)
														
 
															+        else:
														
 
															+            print(f"      ⚠️  无媒体文件")
														
 
															+
														
 
															+        client = GeminiClient()
														
 
															+
														
 
															+        # 并行执行Prompt3和Prompt4
														
 
															+        async def eval_purpose():
														
 
															+            user_prompt = PromptAdapter.adapt_prompt(
														
 
															+                USER_TEMPLATE3_PURPOSE_MATCH, post, original_query=original_query
														
 
															+            )
														
 
															+            system_prompt = SYSTEM_PROMPT3_PURPOSE_MATCH
														
 
															+            
														
 
															+            if semaphore:
														
 
															+                async with semaphore:
														
 
															+                    return await client.generate_content(
														
 
															+                        system_prompt=system_prompt,
														
 
															+                        user_prompt=user_prompt,
														
 
															+                        media_files=media_files
														
 
															+                    )
														
 
															+            else:
														
 
															+                return await client.generate_content(
														
 
															+                    system_prompt=system_prompt,
														
 
															+                    user_prompt=user_prompt,
														
 
															+                    media_files=media_files
														
 
															+                )
														
 
															+
														
 
															+        async def eval_category():
														
 
															+            user_prompt = PromptAdapter.adapt_prompt(
														
 
															+                USER_TEMPLATE4_CATEGORY_MATCH, post, original_query=original_query
														
 
															+            )
														
 
															+            system_prompt = SYSTEM_PROMPT4_CATEGORY_MATCH
														
 
															+            
														
 
															+            if semaphore:
														
 
															+                async with semaphore:
														
 
															+                    return await client.generate_content(
														
 
															+                        system_prompt=system_prompt,
														
 
															+                        user_prompt=user_prompt,
														
 
															+                        media_files=media_files
														
 
															+                    )
														
 
															+            else:
														
 
															+                return await client.generate_content(
														
 
															+                    system_prompt=system_prompt,
														
 
															+                    user_prompt=user_prompt,
														
 
															+                    media_files=media_files
														
 
															+                )
														
 
															+
														
 
															+        purpose_data, category_data = await asyncio.gather(eval_purpose(), eval_category())
														
 
															+
														
 
															+        # 🔍 调试日志 - 查看API返回的实际结构
														
 
															+        print(f"\n      🐛 DEBUG - purpose_data keys: {list(purpose_data.keys())}")
														
 
															+        print(f"      🐛 DEBUG - purpose_data 内容: {purpose_data}")
														
 
															+        print(f"\n      🐛 DEBUG - category_data keys: {list(category_data.keys())}")
														
 
															+        print(f"      🐛 DEBUG - category_data 内容: {category_data}\n")
														
 
															+
														
 
															+        # 解析Prompt3结果（直接使用英文字段名）
														
 
															+        purpose_eval = PurposeEvaluation(
														
 
															+            purpose_score=purpose_data.get("purpose_score", 0),
														
 
															+            core_motivation=purpose_data.get("core_motivation", ""),
														
 
															+            image_value=purpose_data.get("image_value", ""),
														
 
															+            title_intention=purpose_data.get("title_intention", ""),
														
 
															+            text_content=purpose_data.get("text_content", ""),
														
 
															+            match_level=purpose_data.get("match_level", ""),
														
 
															+            core_basis=purpose_data.get("core_basis", "")
														
 
															+        )
														
 
															+
														
 
															+        # 解析Prompt4结果（直接使用英文字段名）
														
 
															+        category_eval = CategoryEvaluation(
														
 
															+            category_score=category_data.get("category_score", 0),
														
 
															+            original_category_analysis=category_data.get("original_category_analysis", {}),
														
 
															+            actual_category=category_data.get("actual_category", {}),
														
 
															+            match_level=category_data.get("match_level", ""),
														
 
															+            category_match_analysis=category_data.get("category_match_analysis", {}),
														
 
															+            core_basis=category_data.get("core_basis", "")
														
 
															+        )
														
 
															+
														
 
															+        state["purpose_eval"] = purpose_eval
														
 
															+        state["category_eval"] = category_eval
														
 
															+        state["should_continue"] = True
														
 
															+
														
 
															+        print(f"      ✅ Step 3: 目的性得分 = {purpose_eval.purpose_score}")
														
 
															+        print(f"      ✅ Step 4: 品类得分 = {category_eval.category_score}")
														
 
															+
														
 
															+    except Exception as e:
														
 
															+        print(f"      ❌ Prompt3或4评估失败: {str(e)[:100]}")
														
 
															+        state["error"] = str(e)
														
 
															+        state["should_continue"] = False
														
 
															+
														
 
															+    return state
														
 
															+
														
 
															+
														
 
															+async def score_node(state: EvaluationState) -> EvaluationState:
														
 
															+    """
														
 
															+    Node 4: 计算综合得分
														
 
															+    """
														
 
															+    print(f"      📊 Step 5/5: 计算综合得分...")
														
 
															+
														
 
															+    try:
														
 
															+        purpose_eval = state["purpose_eval"]
														
 
															+        category_eval = state["category_eval"]
														
 
															+
														
 
															+        if not purpose_eval or not category_eval:
														
 
															+            raise Exception("缺少目的性或品类评估结果")
														
 
															+
														
 
															+        # 计算综合得分: 目的性50% + 品类50%
														
 
															+        final_score = round(
														
 
															+            purpose_eval.purpose_score * 0.5 + category_eval.category_score * 0.5,
														
 
															+            2
														
 
															+        )
														
 
															+
														
 
															+        # 判定匹配等级
														
 
															+        if final_score >= 85:
														
 
															+            match_level = "高度匹配"
														
 
															+        elif final_score >= 70:
														
 
															+            match_level = "基本匹配"
														
 
															+        elif final_score >= 50:
														
 
															+            match_level = "部分匹配"
														
 
															+        elif final_score >= 30:
														
 
															+            match_level = "弱匹配"
														
 
															+        else:
														
 
															+            match_level = "不匹配"
														
 
															+
														
 
															+        state["final_score"] = final_score
														
 
															+        state["match_level"] = match_level
														
 
															+
														
 
															+        print(f"      ✅ 综合得分: {final_score} ({match_level})")
														
 
															+
														
 
															+    except Exception as e:
														
 
															+        print(f"      ❌ 综合评分失败: {str(e)[:100]}")
														
 
															+        state["error"] = str(e)
														
 
															+
														
 
															+    return state
														
 
															+
														
 
															+
														
 
															+# ============================================================================
														
 
															+# LangGraph 图定义
														
 
															+# ============================================================================
														
 
															+
														
 
															+def create_evaluation_graph() -> StateGraph:
														
 
															+    """创建评估流程图"""
														
 
															+
														
 
															+    # 定义条件判断
														
 
															+    def should_continue_to_content(state: EvaluationState) -> str:
														
 
															+        """判断是否继续到内容知识评估"""
														
 
															+        if not state.get("should_continue", False):
														
 
															+            return END
														
 
															+        return "content_knowledge_node"
														
 
															+
														
 
															+    def should_continue_to_match(state: EvaluationState) -> str:
														
 
															+        """判断是否继续到匹配评估"""
														
 
															+        if not state.get("should_continue", False):
														
 
															+            return END
														
 
															+        return "parallel_match_node"
														
 
															+
														
 
															+    def should_continue_to_score(state: EvaluationState) -> str:
														
 
															+        """判断是否继续到评分"""
														
 
															+        if not state.get("should_continue", False):
														
 
															+            return END
														
 
															+        return "score_node"
														
 
															+
														
 
															+    # 创建StateGraph
														
 
															+    workflow = StateGraph(EvaluationState)
														
 
															+
														
 
															+    # 添加节点
														
 
															+    workflow.add_node("knowledge_node", knowledge_node)
														
 
															+    workflow.add_node("content_knowledge_node", content_knowledge_node)
														
 
															+    workflow.add_node("parallel_match_node", parallel_match_node)
														
 
															+    workflow.add_node("score_node", score_node)
														
 
															+
														
 
															+    # 设置入口点
														
 
															+    workflow.set_entry_point("knowledge_node")
														
 
															+
														
 
															+    # 添加条件边
														
 
															+    workflow.add_conditional_edges(
														
 
															+        "knowledge_node",
														
 
															+        should_continue_to_content,
														
 
															+        {
														
 
															+            "content_knowledge_node": "content_knowledge_node",
														
 
															+            END: END
														
 
															+        }
														
 
															+    )
														
 
															+
														
 
															+    workflow.add_conditional_edges(
														
 
															+        "content_knowledge_node",
														
 
															+        should_continue_to_match,
														
 
															+        {
														
 
															+            "parallel_match_node": "parallel_match_node",
														
 
															+            END: END
														
 
															+        }
														
 
															+    )
														
 
															+
														
 
															+    workflow.add_conditional_edges(
														
 
															+        "parallel_match_node",
														
 
															+        should_continue_to_score,
														
 
															+        {
														
 
															+            "score_node": "score_node",
														
 
															+            END: END
														
 
															+        }
														
 
															+    )
														
 
															+
														
 
															+    # score_node结束后直接到END
														
 
															+    workflow.add_edge("score_node", END)
														
 
															+
														
 
															+    return workflow.compile()
														
 
															+
														
 
															+
														
 
															+# ============================================================================
														
 
															+# 主评估函数
														
 
															+# ============================================================================
														
 
															+
														
 
															+async def evaluate_post_v4(
														
 
															+    post,
														
 
															+    original_query: str,
														
 
															+    semaphore: Optional[asyncio.Semaphore] = None
														
 
															+) -> tuple:
														
 
															+    """
														
 
															+    V4评估主函数 (LangGraph版本)
														
 
															+
														
 
															+    Args:
														
 
															+        post: Post对象
														
 
															+        original_query: 原始搜索query
														
 
															+        semaphore: 并发控制信号量
														
 
															+
														
 
															+    Returns:
														
 
															+        (knowledge_eval, content_eval, purpose_eval, category_eval, final_score, match_level)
														
 
															+    """
														
 
															+    # 检查缓存
														
 
															+    if ENABLE_CACHE:
														
 
															+        cached_result = _load_from_cache(post.note_id)
														
 
															+        if cached_result is not None:
														
 
															+            print(f"      ♻️  使用缓存结果: {post.note_id}")
														
 
															+            return cached_result
														
 
															+
														
 
															+    print(f"      🔍 开始V4评估 (LangGraph): {post.note_id}")
														
 
															+
														
 
															+    # 初始化状态
														
 
															+    initial_state: EvaluationState = {
														
 
															+        "post": post,
														
 
															+        "original_query": original_query,
														
 
															+        "video_file": None,
														
 
															+        "video_uri": None,
														
 
															+        "temp_video_path": None,
														
 
															+        "temp_image_paths": None,
														
 
															+        "knowledge_eval": None,
														
 
															+        "content_eval": None,
														
 
															+        "purpose_eval": None,
														
 
															+        "category_eval": None,
														
 
															+        "final_score": None,
														
 
															+        "match_level": None,
														
 
															+        "should_continue": True,
														
 
															+        "error": None,
														
 
															+        "semaphore": semaphore
														
 
															+    }
														
 
															+
														
 
															+    # 处理视频
														
 
															+    if post.type == "video" and post.images and len(post.images) > 0:
														
 
															+        video_url = post.images[0]  # 视频URL通常在images[0]
														
 
															+        video_file, video_uri, temp_path = await VideoUploader.upload_video(video_url)
														
 
															+        initial_state["video_file"] = video_file
														
 
															+        initial_state["video_uri"] = video_uri
														
 
															+        initial_state["temp_video_path"] = temp_path
														
 
															+
														
 
															+        if not video_file:
														
 
															+            print(f"      ❌ 视频上传失败,停止评估")
														
 
															+            return (None, None, None, None, None, None)
														
 
															+
														
 
															+    try:
														
 
															+        # 创建并运行图
														
 
															+        graph = create_evaluation_graph()
														
 
															+        final_state = await graph.ainvoke(initial_state)
														
 
															+
														
 
															+        # 提取结果
														
 
															+        knowledge_eval = final_state.get("knowledge_eval")
														
 
															+        content_eval = final_state.get("content_eval")
														
 
															+        purpose_eval = final_state.get("purpose_eval")
														
 
															+        category_eval = final_state.get("category_eval")
														
 
															+        final_score = final_state.get("final_score")
														
 
															+        match_level = final_state.get("match_level")
														
 
															+
														
 
															+        # 保存到缓存
														
 
															+        if ENABLE_CACHE and knowledge_eval:
														
 
															+            _save_to_cache(
														
 
															+                post.note_id,
														
 
															+                (knowledge_eval, content_eval, purpose_eval, category_eval, final_score, match_level)
														
 
															+            )
														
 
															+
														
 
															+        return (knowledge_eval, content_eval, purpose_eval, category_eval, final_score, match_level)
														
 
															+
														
 
															+    finally:
														
 
															+        # 清理临时视频文件
														
 
															+        if initial_state.get("temp_video_path"):
														
 
															+            try:
														
 
															+                os.remove(initial_state["temp_video_path"])
														
 
															+                print(f"      🗑️  清理临时视频文件")
														
 
															+            except:
														
 
															+                pass
														
 
															+
														
 
															+        # 清理临时图片文件
														
 
															+        temp_image_paths = final_state.get("temp_image_paths") if 'final_state' in locals() else initial_state.get("temp_image_paths")
														
 
															+        if temp_image_paths:
														
 
															+            cleaned_count = 0
														
 
															+            for temp_path in temp_image_paths:
														
 
															+                try:
														
 
															+                    os.remove(temp_path)
														
 
															+                    cleaned_count += 1
														
 
															+                except:
														
 
															+                    pass
														
 
															+            if cleaned_count > 0:
														
 
															+                print(f"      🗑️  清理 {cleaned_count}/{len(temp_image_paths)} 个临时图片文件")
														
 
															+
														
 
															+
														
 
															+def apply_evaluation_v4_to_post(
														
 
															+    post,
														
 
															+    knowledge_eval: Optional[KnowledgeEvaluation],
														
 
															+    content_eval: Optional[ContentKnowledgeEvaluation],
														
 
															+    purpose_eval: Optional[PurposeEvaluation],
														
 
															+    category_eval: Optional[CategoryEvaluation],
														
 
															+    final_score: Optional[float],
														
 
															+    match_level: Optional[str]
														
 
															+):
														
 
															+    """
														
 
															+    将V4评估结果应用到Post对象
														
 
															+
														
 
															+    Args:
														
 
															+        post: Post对象
														
 
															+        knowledge_eval: Prompt1结果
														
 
															+        content_eval: Prompt2结果
														
 
															+        purpose_eval: Prompt3结果
														
 
															+        category_eval: Prompt4结果
														
 
															+        final_score: 综合得分
														
 
															+        match_level: 匹配等级
														
 
															+    """
														
 
															+    # Prompt1: 判断是知识
														
 
															+    if knowledge_eval:
														
 
															+        post.is_knowledge = knowledge_eval.is_knowledge
														
 
															+        post.knowledge_evaluation = {
														
 
															+            "quick_exclude": knowledge_eval.quick_exclude,
														
 
															+            "title_layer": knowledge_eval.title_layer,
														
 
															+            "image_layer": knowledge_eval.image_layer,
														
 
															+            "text_layer": knowledge_eval.text_layer,
														
 
															+            "judgment_logic": knowledge_eval.judgment_logic,
														
 
															+            "core_evidence": knowledge_eval.core_evidence,
														
 
															+            "issues": knowledge_eval.issues,
														
 
															+            "conclusion": knowledge_eval.conclusion
														
 
															+        }
														
 
															+
														
 
															+    # Prompt2: 判断是否是内容知识
														
 
															+    if content_eval:
														
 
															+        post.is_content_knowledge = content_eval.is_content_knowledge
														
 
															+        post.knowledge_score = float(content_eval.final_score)
														
 
															+        post.content_knowledge_evaluation = {
														
 
															+            "is_content_knowledge": content_eval.is_content_knowledge,
														
 
															+            "final_score": content_eval.final_score,
														
 
															+            "level": content_eval.level,
														
 
															+            "quick_exclude": content_eval.quick_exclude,
														
 
															+            "dimension_scores": content_eval.dimension_scores,
														
 
															+            "core_evidence": content_eval.core_evidence,
														
 
															+            "issues": content_eval.issues,
														
 
															+            "summary": content_eval.summary
														
 
															+        }
														
 
															+
														
 
															+    # Prompt3: 目的性匹配
														
 
															+    if purpose_eval:
														
 
															+        post.purpose_score = purpose_eval.purpose_score
														
 
															+        post.purpose_evaluation = {
														
 
															+            "purpose_score": purpose_eval.purpose_score,
														
 
															+            "core_motivation": purpose_eval.core_motivation,
														
 
															+            "image_value": purpose_eval.image_value,
														
 
															+            "title_intention": purpose_eval.title_intention,
														
 
															+            "text_content": purpose_eval.text_content,
														
 
															+            "match_level": purpose_eval.match_level,
														
 
															+            "core_basis": purpose_eval.core_basis
														
 
															+        }
														
 
															+
														
 
															+    # Prompt4: 品类匹配
														
 
															+    if category_eval:
														
 
															+        post.category_score = category_eval.category_score
														
 
															+        post.category_evaluation = {
														
 
															+            "category_score": category_eval.category_score,
														
 
															+            "original_category_analysis": category_eval.original_category_analysis,
														
 
															+            "actual_category": category_eval.actual_category,
														
 
															+            "match_level": category_eval.match_level,
														
 
															+            "category_match_analysis": category_eval.category_match_analysis,
														
 
															+            "core_basis": category_eval.core_basis
														
 
															+        }
														
 
															+
														
 
															+    # 综合得分
														
 
															+    if final_score is not None and match_level is not None:
														
 
															+        post.final_score = final_score
														
 
															+        post.match_level = match_level
														
 
															+
														
 
															+    # 设置评估时间和版本
														
 
															+    post.evaluation_time = datetime.now().isoformat()
														
 
															+    post.evaluator_version = "v4.0_langgraph"
														
 
															+
														
 
															+
														
 
															+async def batch_evaluate_posts_v4(
														
 
															+    posts: list,
														
 
															+    original_query: str,
														
 
															+    max_concurrent: int = MAX_CONCURRENT_EVALUATIONS
														
 
															+) -> int:
														
 
															+    """
														
 
															+    批量评估多个帖子 (V4版本)
														
 
															+
														
 
															+    Args:
														
 
															+        posts: Post对象列表
														
 
															+        original_query: 原始搜索query
														
 
															+        max_concurrent: 最大并发数
														
 
															+
														
 
															+    Returns:
														
 
															+        成功评估的帖子数量
														
 
															+    """
														
 
															+    semaphore = asyncio.Semaphore(max_concurrent)
														
 
															+
														
 
															+    print(f"\n📊 开始批量评估 {len(posts)} 个帖子 (LangGraph + Gemini,并发限制: {max_concurrent})...")
														
 
															+
														
 
															+    tasks = [evaluate_post_v4(post, original_query, semaphore) for post in posts]
														
 
															+    results = await asyncio.gather(*tasks)
														
 
															+
														
 
															+    success_count = 0
														
 
															+    for i, result in enumerate(results):
														
 
															+        knowledge_eval, content_eval, purpose_eval, category_eval, final_score, match_level = result
														
 
															+
														
 
															+        if knowledge_eval:
														
 
															+            apply_evaluation_v4_to_post(
														
 
															+                posts[i],
														
 
															+                knowledge_eval,
														
 
															+                content_eval,
														
 
															+                purpose_eval,
														
 
															+                category_eval,
														
 
															+                final_score,
														
 
															+                match_level
														
 
															+            )
														
 
															+            success_count += 1
														
 
															+
														
 
															+    print(f"✅ 批量评估完成: {success_count}/{len(posts)} 帖子已评估")
														
 
															+
														
 
															+    return success_count
														
--- a/script/search/enrichment_helper.py
+++ b/script/search/enrichment_helper.py
@@ -0,0 +1,261 @@
 
															+#!/usr/bin/env python3
														
 
															+"""
														
 
															+帖子详情补充工具
														
 
															+用于将搜索结果与详情API结果合并，补充高清图片、视频URL、作者信息等
														
 
															+"""
														
 
															+
														
 
															+import json
														
 
															+from typing import Dict, Any, List
														
 
															+from datetime import datetime
														
 
															+
														
 
															+
														
 
															+def parse_detail_result(detail_response: Dict[str, Any]) -> Dict[str, Any] | None:
														
 
															+    """
														
 
															+    解析详情API返回的结果
														
 
															+
														
 
															+    Args:
														
 
															+        detail_response: 详情API的完整响应
														
 
															+
														
 
															+    Returns:
														
 
															+        解析后的数据字典，失败返回None
														
 
															+    """
														
 
															+    try:
														
 
															+        # 检查success字段
														
 
															+        if not detail_response.get("success"):
														
 
															+            print(f"    ⚠️  详情API返回失败")
														
 
															+            return None
														
 
															+
														
 
															+        # 解析result字段（可能是JSON字符串）
														
 
															+        result = detail_response.get("result", "")
														
 
															+        if isinstance(result, str):
														
 
															+            result = json.loads(result)
														
 
															+
														
 
															+        # 提取data
														
 
															+        if isinstance(result, list) and len(result) > 0:
														
 
															+            return result[0].get("data", {})
														
 
															+        elif isinstance(result, dict):
														
 
															+            return result.get("data", {})
														
 
															+
														
 
															+        return None
														
 
															+
														
 
															+    except Exception as e:
														
 
															+        print(f"    ✗ 解析详情结果失败: {e}")
														
 
															+        return None
														
 
															+
														
 
															+
														
 
															+def enrich_post_with_detail(post: Any, detail_response: Dict[str, Any]) -> bool:
														
 
															+    """
														
 
															+    使用详情API的数据补充Post对象
														
 
															+
														
 
															+    Args:
														
 
															+        post: Post对象（会被直接修改）
														
 
															+        detail_response: 详情API的完整响应
														
 
															+
														
 
															+    Returns:
														
 
															+        是否成功补充
														
 
															+    """
														
 
															+    # 解析详情数据
														
 
															+    detail_data = parse_detail_result(detail_response)
														
 
															+    if not detail_data:
														
 
															+        return False
														
 
															+
														
 
															+    try:
														
 
															+        # 1. 正文内容 - 使用详情API的完整正文覆盖
														
 
															+        body_text = detail_data.get("body_text", "")
														
 
															+        if body_text:
														
 
															+            post.body_text = body_text
														
 
															+
														
 
															+        # 2. 作者信息
														
 
															+        post.author_name = detail_data.get("channel_account_name", "")
														
 
															+        post.author_id = detail_data.get("channel_account_id", "")
														
 
															+
														
 
															+        # 3. 发布时间
														
 
															+        post.publish_time = detail_data.get("publish_timestamp", 0)
														
 
															+
														
 
															+        # 4. 互动信息 - 使用详情API的精确数据更新
														
 
															+        post.interact_info.update({
														
 
															+            "like_count": detail_data.get("like_count", 0),       # 详情API字段
														
 
															+            "collect_count": detail_data.get("collect_count", 0), # 详情API字段
														
 
															+        })
														
 
															+
														
 
															+        # 5. 根据类型处理图片/视频
														
 
															+        if post.type == "video":
														
 
															+            # 视频帖：补充视频URL（images保持不变）
														
 
															+            video_url = detail_data.get("video", "")
														
 
															+            if video_url:
														
 
															+                post.video = video_url
														
 
															+
														
 
															+        else:
														
 
															+            # 图文帖：仅保存CDN图片到cdn_images字段，不覆盖images
														
 
															+            images_data = detail_data.get("images", [])
														
 
															+            if images_data:
														
 
															+                # 提取CDN URL
														
 
															+                cdn_urls = []
														
 
															+                for img in images_data:
														
 
															+                    if isinstance(img, dict):
														
 
															+                        cdn_url = img.get("cdn_url", "")
														
 
															+                        if cdn_url:
														
 
															+                            cdn_urls.append(cdn_url)
														
 
															+                    elif isinstance(img, str):
														
 
															+                        cdn_urls.append(img)
														
 
															+
														
 
															+                # 仅保存CDN图片列表，不覆盖images
														
 
															+                post.cdn_images = cdn_urls
														
 
															+
														
 
															+        # 6. 标记已获取详情
														
 
															+        post.detail_fetched = True
														
 
															+
														
 
															+        return True
														
 
															+
														
 
															+    except Exception as e:
														
 
															+        print(f"    ✗ 补充详情失败: {e}")
														
 
															+        return False
														
 
															+
														
 
															+
														
 
															+def enrich_posts_batch(
														
 
															+    posts: List[Any],
														
 
															+    detail_client: Any,
														
 
															+    show_progress: bool = True,
														
 
															+    delay: int = 1
														
 
															+) -> tuple[int, int]:
														
 
															+    """
														
 
															+    批量补充帖子详情
														
 
															+
														
 
															+    Args:
														
 
															+        posts: Post对象列表（会被直接修改）
														
 
															+        detail_client: XiaohongshuDetail实例
														
 
															+        show_progress: 是否显示进度
														
 
															+        delay: 请求间隔（秒）
														
 
															+
														
 
															+    Returns:
														
 
															+        (成功数量, 失败数量)
														
 
															+    """
														
 
															+    success_count = 0
														
 
															+    fail_count = 0
														
 
															+    total = len(posts)
														
 
															+
														
 
															+    for idx, post in enumerate(posts, 1):
														
 
															+        if show_progress:
														
 
															+            print(f"补充详情 ({idx}/{total}): {post.note_id}")
														
 
															+
														
 
															+        try:
														
 
															+            # 调用详情API
														
 
															+            detail_response = detail_client.get_detail(post.note_id)
														
 
															+
														
 
															+            # 合并数据
														
 
															+            if enrich_post_with_detail(post, detail_response):
														
 
															+                success_count += 1
														
 
															+                if show_progress:
														
 
															+                    print(f"  ✓ 成功补充")
														
 
															+            else:
														
 
															+                fail_count += 1
														
 
															+                if show_progress:
														
 
															+                    print(f"  ✗ 补充失败")
														
 
															+
														
 
															+        except Exception as e:
														
 
															+            fail_count += 1
														
 
															+            if show_progress:
														
 
															+                print(f"  ✗ 请求失败: {e}")
														
 
															+
														
 
															+        # 避免请求过快（最后一个不需要延迟）
														
 
															+        if idx < total and delay > 0:
														
 
															+            import time
														
 
															+            time.sleep(delay)
														
 
															+
														
 
															+    return success_count, fail_count
														
 
															+
														
 
															+
														
 
															+def create_enriched_summary(post: Any) -> Dict[str, Any]:
														
 
															+    """
														
 
															+    创建包含详情的帖子摘要（用于保存）
														
 
															+
														
 
															+    Args:
														
 
															+        post: Post对象
														
 
															+
														
 
															+    Returns:
														
 
															+        摘要字典
														
 
															+    """
														
 
															+    summary = {
														
 
															+        # 基础信息
														
 
															+        "note_id": post.note_id,
														
 
															+        "note_url": post.note_url,
														
 
															+        "title": post.title,
														
 
															+        "body_text": post.body_text,
														
 
															+        "type": post.type,
														
 
															+
														
 
															+        # 媒体信息
														
 
															+        "images": post.images,
														
 
															+        "cdn_images": post.cdn_images,
														
 
															+        "video": post.video,
														
 
															+
														
 
															+        # 作者信息（详情补充）
														
 
															+        "author": {
														
 
															+            "name": post.author_name,
														
 
															+            "id": post.author_id
														
 
															+        } if post.detail_fetched else {},
														
 
															+
														
 
															+        # 互动信息
														
 
															+        "interact_info": post.interact_info,
														
 
															+
														
 
															+        # 时间信息
														
 
															+        "publish_time": post.publish_time,
														
 
															+        "publish_time_readable": datetime.fromtimestamp(
														
 
															+            post.publish_time / 1000
														
 
															+        ).strftime("%Y-%m-%d %H:%M:%S") if post.publish_time > 0 else "",
														
 
															+
														
 
															+        # 元数据
														
 
															+        "detail_fetched": post.detail_fetched
														
 
															+    }
														
 
															+
														
 
															+    return summary
														
 
															+
														
 
															+
														
 
															+def print_enrichment_stats(posts: List[Any]) -> None:
														
 
															+    """
														
 
															+    打印详情补充统计信息
														
 
															+
														
 
															+    Args:
														
 
															+        posts: Post对象列表
														
 
															+    """
														
 
															+    total = len(posts)
														
 
															+    enriched = sum(1 for p in posts if p.detail_fetched)
														
 
															+
														
 
															+    video_count = sum(1 for p in posts if p.type == "video")
														
 
															+    image_count = total - video_count
														
 
															+
														
 
															+    print("\n" + "=" * 60)
														
 
															+    print("详情补充统计")
														
 
															+    print("=" * 60)
														
 
															+    print(f"总帖子数: {total}")
														
 
															+    print(f"  - 图文帖: {image_count}")
														
 
															+    print(f"  - 视频帖: {video_count}")
														
 
															+    print(f"\n已补充详情: {enriched}/{total} ({enriched*100//total if total > 0 else 0}%)")
														
 
															+    print(f"未补充详情: {total - enriched}")
														
 
															+
														
 
															+    if enriched > 0:
														
 
															+        print("\n详情字段统计:")
														
 
															+        has_author = sum(1 for p in posts if p.author_name)
														
 
															+        has_publish_time = sum(1 for p in posts if p.publish_time > 0)
														
 
															+        has_cdn_images = sum(1 for p in posts if p.cdn_images)
														
 
															+        has_video_url = sum(1 for p in posts if p.video and p.type == "video")
														
 
															+
														
 
															+        print(f"  - 作者信息: {has_author}/{enriched}")
														
 
															+        print(f"  - 发布时间: {has_publish_time}/{enriched}")
														
 
															+        print(f"  - 高清图片: {has_cdn_images}/{image_count} (图文帖)")
														
 
															+        print(f"  - 视频URL: {has_video_url}/{video_count} (视频帖)")
														
 
															+
														
 
															+    print("=" * 60 + "\n")
														
 
															+
														
 
															+
														
 
															+# ============================================================================
														
 
															+# 使用示例
														
 
															+# ============================================================================
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    print("这是一个辅助模块，请通过 search_with_detail.py 使用")
														
 
															+    print("\n主要功能：")
														
 
															+    print("1. parse_detail_result() - 解析详情API响应")
														
 
															+    print("2. enrich_post_with_detail() - 补充单个帖子详情")
														
 
															+    print("3. enrich_posts_batch() - 批量补充详情")
														
 
															+    print("4. create_enriched_summary() - 创建详情摘要")
														
 
															+    print("5. print_enrichment_stats() - 打印统计信息")
														
--- a/script/search/search_with_detail.py
+++ b/script/search/search_with_detail.py
@@ -0,0 +1,292 @@
 
															+#!/usr/bin/env python3
														
 
															+"""
														
 
															+小红书搜索 + 详情补充 - 端到端工具
														
 
															+先调用搜索API获取笔记列表，再批量调用详情API补充完整信息
														
 
															+"""
														
 
															+
														
 
															+import json
														
 
															+import os
														
 
															+import argparse
														
 
															+import sys
														
 
															+from datetime import datetime
														
 
															+from typing import List, Dict, Any
														
 
															+
														
 
															+# 添加项目根目录到路径
														
 
															+script_dir = os.path.dirname(os.path.abspath(__file__))
														
 
															+project_root = os.path.dirname(os.path.dirname(script_dir))
														
 
															+sys.path.insert(0, project_root)
														
 
															+
														
 
															+from script.search.xiaohongshu_search import XiaohongshuSearch
														
 
															+from script.search.xiaohongshu_detail import XiaohongshuDetail
														
 
															+from script.search.enrichment_helper import (
														
 
															+    enrich_posts_batch,
														
 
															+    create_enriched_summary,
														
 
															+    print_enrichment_stats
														
 
															+)
														
 
															+from knowledge_search_traverse import Post, process_note_data
														
 
															+
														
 
															+
														
 
															+def search_and_enrich(
														
 
															+    keyword: str,
														
 
															+    content_type: str = "不限",
														
 
															+    sort_type: str = "综合",
														
 
															+    publish_time: str = "不限",
														
 
															+    cursor: str = "",
														
 
															+    enable_detail: bool = True,
														
 
															+    detail_delay: int = 1,
														
 
															+    results_dir: str = None
														
 
															+) -> tuple[List[Post], str]:
														
 
															+    """
														
 
															+    搜索并补充详情的主流程
														
 
															+
														
 
															+    Args:
														
 
															+        keyword: 搜索关键词
														
 
															+        content_type: 内容类型
														
 
															+        sort_type: 排序方式
														
 
															+        publish_time: 发布时间筛选
														
 
															+        cursor: 翻页游标
														
 
															+        enable_detail: 是否启用详情补充
														
 
															+        detail_delay: 详情请求间隔（秒）
														
 
															+        results_dir: 结果输出目录
														
 
															+
														
 
															+    Returns:
														
 
															+        (Post对象列表, 保存的文件路径)
														
 
															+    """
														
 
															+    print("\n" + "=" * 80)
														
 
															+    print(f"小红书搜索 + 详情补充工具")
														
 
															+    print("=" * 80)
														
 
															+    print(f"关键词: {keyword}")
														
 
															+    print(f"内容类型: {content_type}")
														
 
															+    print(f"排序方式: {sort_type}")
														
 
															+    print(f"发布时间: {publish_time}")
														
 
															+    print(f"详情补充: {'启用' if enable_detail else '禁用'}")
														
 
															+    print("=" * 80 + "\n")
														
 
															+
														
 
															+    # 1. 执行搜索
														
 
															+    print("步骤 1/3: 执行搜索...")
														
 
															+    print("-" * 80)
														
 
															+
														
 
															+    search_client = XiaohongshuSearch(results_dir=results_dir)
														
 
															+    search_result = search_client.search(
														
 
															+        keyword=keyword,
														
 
															+        content_type=content_type,
														
 
															+        sort_type=sort_type,
														
 
															+        publish_time=publish_time,
														
 
															+        cursor=cursor
														
 
															+    )
														
 
															+
														
 
															+    # 解析搜索结果
														
 
															+    notes_data = search_result.get("data", {}).get("data", [])
														
 
															+    print(f"✓ 搜索完成，获得 {len(notes_data)} 条结果\n")
														
 
															+
														
 
															+    if not notes_data:
														
 
															+        print("未找到任何结果")
														
 
															+        return [], ""
														
 
															+
														
 
															+    # 2. 转换为Post对象
														
 
															+    print("步骤 2/3: 解析搜索结果...")
														
 
															+    print("-" * 80)
														
 
															+
														
 
															+    posts: List[Post] = []
														
 
															+    for note in notes_data:
														
 
															+        try:
														
 
															+            post = process_note_data(note)
														
 
															+            posts.append(post)
														
 
															+        except Exception as e:
														
 
															+            print(f"  ✗ 解析失败: {e}")
														
 
															+
														
 
															+    print(f"✓ 成功解析 {len(posts)}/{len(notes_data)} 条结果\n")
														
 
															+
														
 
															+    # 3. 补充详情（如果启用）
														
 
															+    if enable_detail and posts:
														
 
															+        print("步骤 3/3: 补充详情信息...")
														
 
															+        print("-" * 80)
														
 
															+
														
 
															+        detail_client = XiaohongshuDetail(results_dir=results_dir)
														
 
															+        success, fail = enrich_posts_batch(
														
 
															+            posts,
														
 
															+            detail_client,
														
 
															+            show_progress=True,
														
 
															+            delay=detail_delay
														
 
															+        )
														
 
															+
														
 
															+        print(f"\n✓ 详情补充完成: 成功 {success}/{len(posts)}, 失败 {fail}")
														
 
															+        print_enrichment_stats(posts)
														
 
															+    else:
														
 
															+        print("步骤 3/3: 跳过详情补充\n")
														
 
															+
														
 
															+    # 4. 保存结果
														
 
															+    filepath = save_enriched_results(keyword, posts, search_result, results_dir)
														
 
															+
														
 
															+    return posts, filepath
														
 
															+
														
 
															+
														
 
															+def save_enriched_results(
														
 
															+    keyword: str,
														
 
															+    posts: List[Post],
														
 
															+    search_result: Dict[str, Any],
														
 
															+    results_dir: str = None
														
 
															+) -> str:
														
 
															+    """
														
 
															+    保存增强后的结果
														
 
															+
														
 
															+    Args:
														
 
															+        keyword: 搜索关键词
														
 
															+        posts: Post对象列表
														
 
															+        search_result: 原始搜索结果
														
 
															+        results_dir: 结果输出目录
														
 
															+
														
 
															+    Returns:
														
 
															+        保存的文件路径
														
 
															+    """
														
 
															+    # 确定输出目录
														
 
															+    if results_dir:
														
 
															+        base_dir = results_dir
														
 
															+    else:
														
 
															+        script_dir = os.path.dirname(os.path.abspath(__file__))
														
 
															+        project_root = os.path.dirname(os.path.dirname(script_dir))
														
 
															+        base_dir = os.path.join(project_root, "data", "search")
														
 
															+
														
 
															+    # 创建目录
														
 
															+    result_dir = os.path.join(base_dir, "enriched", keyword)
														
 
															+    os.makedirs(result_dir, exist_ok=True)
														
 
															+
														
 
															+    # 构建结果数据
														
 
															+    enriched_data = {
														
 
															+        "metadata": {
														
 
															+            "keyword": keyword,
														
 
															+            "search_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
														
 
															+            "total_posts": len(posts),
														
 
															+            "enriched_posts": sum(1 for p in posts if p.detail_fetched),
														
 
															+            "video_posts": sum(1 for p in posts if p.type == "video"),
														
 
															+            "image_posts": sum(1 for p in posts if p.type != "video"),
														
 
															+        },
														
 
															+        "posts": [create_enriched_summary(p) for p in posts],
														
 
															+        "original_search_result": search_result  # 保留原始搜索结果供参考
														
 
															+    }
														
 
															+
														
 
															+    # 保存文件
														
 
															+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
														
 
															+    filename = f"{timestamp}_enriched.json"
														
 
															+    filepath = os.path.join(result_dir, filename)
														
 
															+
														
 
															+    with open(filepath, 'w', encoding='utf-8') as f:
														
 
															+        json.dump(enriched_data, f, ensure_ascii=False, indent=2)
														
 
															+
														
 
															+    print(f"\n✓ 结果已保存: {filepath}\n")
														
 
															+    return filepath
														
 
															+
														
 
															+
														
 
															+def main():
														
 
															+    """命令行入口"""
														
 
															+    parser = argparse.ArgumentParser(
														
 
															+        description='小红书搜索 + 详情补充工具',
														
 
															+        formatter_class=argparse.RawDescriptionHelpFormatter,
														
 
															+        epilog="""
														
 
															+使用示例:
														
 
															+  # 基础搜索并补充详情
														
 
															+  python3 search_with_detail.py --keyword "健身教程"
														
 
															+
														
 
															+  # 搜索视频内容
														
 
															+  python3 search_with_detail.py --keyword "化妆教程" --content-type "视频"
														
 
															+
														
 
															+  # 仅搜索不补充详情
														
 
															+  python3 search_with_detail.py --keyword "美食" --no-detail
														
 
															+
														
 
															+  # 自定义输出目录
														
 
															+  python3 search_with_detail.py --keyword "旅游" --results-dir "custom/output"
														
 
															+        """
														
 
															+    )
														
 
															+
														
 
															+    # 搜索参数
														
 
															+    parser.add_argument(
														
 
															+        '--keyword',
														
 
															+        type=str,
														
 
															+        required=True,
														
 
															+        help='搜索关键词（必填）'
														
 
															+    )
														
 
															+    parser.add_argument(
														
 
															+        '--content-type',
														
 
															+        type=str,
														
 
															+        default='不限',
														
 
															+        choices=['不限', '视频', '图文'],
														
 
															+        help='内容类型（默认: 不限）'
														
 
															+    )
														
 
															+    parser.add_argument(
														
 
															+        '--sort-type',
														
 
															+        type=str,
														
 
															+        default='综合',
														
 
															+        choices=['综合', '最新', '最多点赞', '最多评论'],
														
 
															+        help='排序方式（默认: 综合）'
														
 
															+    )
														
 
															+    parser.add_argument(
														
 
															+        '--publish-time',
														
 
															+        type=str,
														
 
															+        default='不限',
														
 
															+        choices=['不限', '一天内', '一周内', '半年内'],
														
 
															+        help='发布时间筛选（默认: 不限）'
														
 
															+    )
														
 
															+    parser.add_argument(
														
 
															+        '--cursor',
														
 
															+        type=str,
														
 
															+        default='',
														
 
															+        help='翻页游标（默认为空，即第一页）'
														
 
															+    )
														
 
															+
														
 
															+    # 详情补充参数
														
 
															+    parser.add_argument(
														
 
															+        '--no-detail',
														
 
															+        action='store_true',
														
 
															+        help='禁用详情补充（仅搜索）'
														
 
															+    )
														
 
															+    parser.add_argument(
														
 
															+        '--detail-delay',
														
 
															+        type=int,
														
 
															+        default=1,
														
 
															+        help='详情请求间隔时间（秒），默认1秒'
														
 
															+    )
														
 
															+
														
 
															+    # 输出参数
														
 
															+    parser.add_argument(
														
 
															+        '--results-dir',
														
 
															+        type=str,
														
 
															+        default=None,
														
 
															+        help='结果输出目录（默认: data/search）'
														
 
															+    )
														
 
															+
														
 
															+    args = parser.parse_args()
														
 
															+
														
 
															+    # 执行搜索和补充
														
 
															+    try:
														
 
															+        posts, filepath = search_and_enrich(
														
 
															+            keyword=args.keyword,
														
 
															+            content_type=args.content_type,
														
 
															+            sort_type=args.sort_type,
														
 
															+            publish_time=args.publish_time,
														
 
															+            cursor=args.cursor,
														
 
															+            enable_detail=not args.no_detail,
														
 
															+            detail_delay=args.detail_delay,
														
 
															+            results_dir=args.results_dir
														
 
															+        )
														
 
															+
														
 
															+        # 打印摘要
														
 
															+        print("=" * 80)
														
 
															+        print("执行完成")
														
 
															+        print("=" * 80)
														
 
															+        print(f"关键词: {args.keyword}")
														
 
															+        print(f"获得帖子: {len(posts)} 条")
														
 
															+        if not args.no_detail:
														
 
															+            enriched = sum(1 for p in posts if p.detail_fetched)
														
 
															+            print(f"详情补充: {enriched}/{len(posts)} 条")
														
 
															+        print(f"结果文件: {filepath}")
														
 
															+        print("=" * 80)
														
 
															+
														
 
															+    except Exception as e:
														
 
															+        print(f"\n✗ 执行失败: {e}", file=sys.stderr)
														
 
															+        import traceback
														
 
															+        traceback.print_exc()
														
 
															+        sys.exit(1)
														
 
															+
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    main()
														
--- a/script/search/xiaohongshu_detail.py
+++ b/script/search/xiaohongshu_detail.py
@@ -0,0 +1,282 @@
 
															+#!/usr/bin/env python3
														
 
															+"""
														
 
															+小红书笔记详情获取工具
														
 
															+根据笔记ID获取小红书笔记的详细信息
														
 
															+"""
														
 
															+
														
 
															+import requests
														
 
															+import json
														
 
															+import os
														
 
															+import argparse
														
 
															+import time
														
 
															+from datetime import datetime
														
 
															+from typing import Dict, Any, List
														
 
															+
														
 
															+
														
 
															+class XiaohongshuDetail:
														
 
															+    """小红书笔记详情API封装类"""
														
 
															+
														
 
															+    BASE_URL = "http://47.84.182.56:8001"
														
 
															+    TOOL_NAME = "get_xhs_detail_by_note_id"
														
 
															+    PLATFORM = "xiaohongshu"
														
 
															+
														
 
															+    def __init__(self, results_dir: str = None):
														
 
															+        """
														
 
															+        初始化API客户端
														
 
															+
														
 
															+        Args:
														
 
															+            results_dir: 结果输出目录，默认为项目根目录下的 data/detail 文件夹
														
 
															+        """
														
 
															+        self.api_url = f"{self.BASE_URL}/tools/call/{self.TOOL_NAME}"
														
 
															+
														
 
															+        # 设置结果输出目录
														
 
															+        if results_dir:
														
 
															+            self.results_base_dir = results_dir
														
 
															+        else:
														
 
															+            # 默认使用项目根目录的 data/detail 文件夹
														
 
															+            script_dir = os.path.dirname(os.path.abspath(__file__))
														
 
															+            project_root = os.path.dirname(os.path.dirname(script_dir))
														
 
															+            self.results_base_dir = os.path.join(project_root, "data", "detail")
														
 
															+
														
 
															+    def get_detail(
														
 
															+        self,
														
 
															+        note_id: str,
														
 
															+        timeout: int = 30,
														
 
															+        max_retries: int = 3,
														
 
															+        retry_delay: int = 2
														
 
															+    ) -> Dict[str, Any]:
														
 
															+        """
														
 
															+        获取小红书笔记详情（带重试机制）
														
 
															+
														
 
															+        Args:
														
 
															+            note_id: 笔记ID
														
 
															+            timeout: 请求超时时间（秒），默认30秒
														
 
															+            max_retries: 最大重试次数，默认3次
														
 
															+            retry_delay: 重试间隔时间（秒），默认2秒
														
 
															+
														
 
															+        Returns:
														
 
															+            API响应的JSON数据
														
 
															+
														
 
															+        Raises:
														
 
															+            requests.exceptions.RequestException: 所有重试都失败时抛出异常
														
 
															+        """
														
 
															+        payload = {
														
 
															+            "note_id": note_id
														
 
															+        }
														
 
															+
														
 
															+        last_exception = None
														
 
															+
														
 
															+        # 重试循环：最多尝试 max_retries 次
														
 
															+        for attempt in range(1, max_retries + 1):
														
 
															+            try:
														
 
															+                if attempt > 1:
														
 
															+                    print(f"    重试第 {attempt - 1}/{max_retries - 1} 次: {note_id}")
														
 
															+
														
 
															+                response = requests.post(
														
 
															+                    self.api_url,
														
 
															+                    json=payload,
														
 
															+                    timeout=timeout,
														
 
															+                    headers={"Content-Type": "application/json"}
														
 
															+                )
														
 
															+                response.raise_for_status()
														
 
															+                result = response.json()
														
 
															+
														
 
															+                if attempt > 1:
														
 
															+                    print(f"    ✓ 重试成功")
														
 
															+
														
 
															+                return result
														
 
															+
														
 
															+            except requests.exceptions.RequestException as e:
														
 
															+                last_exception = e
														
 
															+
														
 
															+                if attempt < max_retries:
														
 
															+                    # 还有重试机会，等待后继续
														
 
															+                    print(f"    ✗ 请求失败 (第{attempt}次尝试): {e}")
														
 
															+                    print(f"    等待 {retry_delay} 秒后重试...")
														
 
															+                    time.sleep(retry_delay)
														
 
															+                else:
														
 
															+                    # 已达最大重试次数，抛出异常
														
 
															+                    print(f"    ✗ 请求失败 (已达最大重试次数 {max_retries}): {e}")
														
 
															+
														
 
															+        # 所有重试都失败，抛出最后一次的异常
														
 
															+        raise last_exception
														
 
															+
														
 
															+    def get_details_batch(
														
 
															+        self,
														
 
															+        note_ids: List[str],
														
 
															+        timeout: int = 30,
														
 
															+        max_retries: int = 3,
														
 
															+        retry_delay: int = 2,
														
 
															+        batch_delay: int = 1
														
 
															+    ) -> List[Dict[str, Any]]:
														
 
															+        """
														
 
															+        批量获取多个笔记的详情
														
 
															+
														
 
															+        Args:
														
 
															+            note_ids: 笔记ID列表
														
 
															+            timeout: 请求超时时间（秒），默认30秒
														
 
															+            max_retries: 最大重试次数，默认3次
														
 
															+            retry_delay: 重试间隔时间（秒），默认2秒
														
 
															+            batch_delay: 批量请求间隔时间（秒），默认1秒
														
 
															+
														
 
															+        Returns:
														
 
															+            包含所有笔记详情的列表
														
 
															+        """
														
 
															+        results = []
														
 
															+        total = len(note_ids)
														
 
															+
														
 
															+        for idx, note_id in enumerate(note_ids, 1):
														
 
															+            print(f"正在获取笔记详情 ({idx}/{total}): {note_id}")
														
 
															+            try:
														
 
															+                result = self.get_detail(note_id, timeout, max_retries, retry_delay)
														
 
															+                results.append({
														
 
															+                    "note_id": note_id,
														
 
															+                    "success": True,
														
 
															+                    "data": result
														
 
															+                })
														
 
															+                print(f"  ✓ 成功获取")
														
 
															+            except Exception as e:
														
 
															+                print(f"  ✗ 获取失败: {e}")
														
 
															+                results.append({
														
 
															+                    "note_id": note_id,
														
 
															+                    "success": False,
														
 
															+                    "error": str(e)
														
 
															+                })
														
 
															+
														
 
															+            # 避免请求过快，添加延迟（最后一个不需要延迟）
														
 
															+            if idx < total:
														
 
															+                time.sleep(batch_delay)
														
 
															+
														
 
															+        return results
														
 
															+
														
 
															+    def save_result(self, note_id: str, result: Dict[str, Any]) -> str:
														
 
															+        """
														
 
															+        保存单个笔记详情到文件
														
 
															+        目录结构: results/xiaohongshu_detail/note_id/时间戳.json
														
 
															+
														
 
															+        Args:
														
 
															+            note_id: 笔记ID
														
 
															+            result: API返回的结果
														
 
															+
														
 
															+        Returns:
														
 
															+            保存的文件路径
														
 
															+        """
														
 
															+        # 创建目录结构: results/xiaohongshu_detail/note_id/
														
 
															+        result_dir = os.path.join(self.results_base_dir, "xiaohongshu_detail", note_id)
														
 
															+        os.makedirs(result_dir, exist_ok=True)
														
 
															+
														
 
															+        # 文件名使用时间戳
														
 
															+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
														
 
															+        filename = f"{timestamp}.json"
														
 
															+        filepath = os.path.join(result_dir, filename)
														
 
															+
														
 
															+        # 保存结果
														
 
															+        with open(filepath, 'w', encoding='utf-8') as f:
														
 
															+            json.dump(result, f, ensure_ascii=False, indent=2)
														
 
															+
														
 
															+        return filepath
														
 
															+
														
 
															+    def save_batch_results(self, results: List[Dict[str, Any]], batch_name: str = None) -> str:
														
 
															+        """
														
 
															+        保存批量获取的结果到单个文件
														
 
															+        目录结构: results/xiaohongshu_detail/batch/时间戳_批次名.json
														
 
															+
														
 
															+        Args:
														
 
															+            results: 批量获取的结果列表
														
 
															+            batch_name: 批次名称（可选）
														
 
															+
														
 
															+        Returns:
														
 
															+            保存的文件路径
														
 
															+        """
														
 
															+        # 创建目录结构
														
 
															+        result_dir = os.path.join(self.results_base_dir, "xiaohongshu_detail", "batch")
														
 
															+        os.makedirs(result_dir, exist_ok=True)
														
 
															+
														
 
															+        # 文件名使用时间戳和批次名
														
 
															+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
														
 
															+        if batch_name:
														
 
															+            filename = f"{timestamp}_{batch_name}.json"
														
 
															+        else:
														
 
															+            filename = f"{timestamp}.json"
														
 
															+        filepath = os.path.join(result_dir, filename)
														
 
															+
														
 
															+        # 保存结果
														
 
															+        with open(filepath, 'w', encoding='utf-8') as f:
														
 
															+            json.dump(results, f, ensure_ascii=False, indent=2)
														
 
															+
														
 
															+        return filepath
														
 
															+
														
 
															+
														
 
															+def main():
														
 
															+    """示例使用"""
														
 
															+    # 解析命令行参数
														
 
															+    parser = argparse.ArgumentParser(description='小红书笔记详情获取工具')
														
 
															+    parser.add_argument(
														
 
															+        '--results-dir',
														
 
															+        type=str,
														
 
															+        default='data/detail',
														
 
															+        help='结果输出目录 (默认: data/detail)'
														
 
															+    )
														
 
															+
														
 
															+    # 创建互斥参数组：单个笔记ID 或 批量笔记ID
														
 
															+    group = parser.add_mutually_exclusive_group(required=True)
														
 
															+    group.add_argument(
														
 
															+        '--note-id',
														
 
															+        type=str,
														
 
															+        help='单个笔记ID'
														
 
															+    )
														
 
															+    group.add_argument(
														
 
															+        '--note-ids',
														
 
															+        type=str,
														
 
															+        help='多个笔记ID，用逗号分隔，例如: id1,id2,id3'
														
 
															+    )
														
 
															+    group.add_argument(
														
 
															+        '--note-ids-file',
														
 
															+        type=str,
														
 
															+        help='包含笔记ID的文件路径，每行一个ID'
														
 
															+    )
														
 
															+
														
 
															+    parser.add_argument(
														
 
															+        '--batch-name',
														
 
															+        type=str,
														
 
															+        help='批量获取时的批次名称（可选）'
														
 
															+    )
														
 
															+
														
 
															+    args = parser.parse_args()
														
 
															+
														
 
															+    # 创建API客户端实例
														
 
															+    client = XiaohongshuDetail(results_dir=args.results_dir)
														
 
															+
														
 
															+    try:
														
 
															+        # 单个笔记ID
														
 
															+        if args.note_id:
														
 
															+            result = client.get_detail(args.note_id)
														
 
															+            filepath = client.save_result(args.note_id, result)
														
 
															+            print(f"Output: {filepath}")
														
 
															+
														
 
															+        # 多个笔记ID（命令行逗号分隔）
														
 
															+        elif args.note_ids:
														
 
															+            note_ids = [nid.strip() for nid in args.note_ids.split(',') if nid.strip()]
														
 
															+            results = client.get_details_batch(note_ids)
														
 
															+            filepath = client.save_batch_results(results, args.batch_name)
														
 
															+            print(f"\n批量获取完成")
														
 
															+            print(f"成功: {sum(1 for r in results if r['success'])}/{len(results)}")
														
 
															+            print(f"Output: {filepath}")
														
 
															+
														
 
															+        # 从文件读取笔记ID
														
 
															+        elif args.note_ids_file:
														
 
															+            with open(args.note_ids_file, 'r', encoding='utf-8') as f:
														
 
															+                note_ids = [line.strip() for line in f if line.strip()]
														
 
															+            results = client.get_details_batch(note_ids)
														
 
															+            filepath = client.save_batch_results(results, args.batch_name)
														
 
															+            print(f"\n批量获取完成")
														
 
															+            print(f"成功: {sum(1 for r in results if r['success'])}/{len(results)}")
														
 
															+            print(f"Output: {filepath}")
														
 
															+
														
 
															+    except Exception as e:
														
 
															+        print(f"Error: {e}", file=__import__('sys').stderr)
														
 
															+        raise
														
 
															+
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    main()
														
--- a/script/search/xiaohongshu_search.py
+++ b/script/search/xiaohongshu_search.py
@@ -41,7 +41,7 @@ class XiaohongshuSearch:
 
															     def search(
														
 
															         self,
														
 
															         keyword: str,
														
 
															-        content_type: str = "图文",
														
 
															+        content_type: str = "不限",
														
 
															         sort_type: str = "综合",
														
 
															         publish_time: str = "不限",
														
 
															         cursor: str = "",
														
@@ -91,7 +91,14 @@ class XiaohongshuSearch:
 
															                     headers={"Content-Type": "application/json"}
														
 
															                 )
														
 
															                 response.raise_for_status()
														
 
															-                result = response.json()
														
 
															+                api_response = response.json()
														
 
															+
														
 
															+                # 解析API返回的result字段（是JSON字符串）
														
 
															+                if not api_response.get("success"):
														
 
															+                    raise Exception(f"API返回失败: {api_response}")
														
 
															+
														
 
															+                result_str = api_response.get("result", "{}")
														
 
															+                result = json.loads(result_str)
														
 
															                 # 预处理返回数据：提取 image_list 中的 URL 字符串
														
 
															                 self._preprocess_response(result)
														
@@ -125,7 +132,6 @@ class XiaohongshuSearch:
 
															         """
														
 
															         # 获取帖子列表
														
 
															         notes = result.get("data", {}).get("data", [])
														
 
															-
														
 
															         for note in notes:
														
 
															             note_card = note.get("note_card", {})
														
 
															             image_list_raw = note_card.get("image_list", [])