vor 1 Monat · 94ff7ab346
--- a/examples/content_needs_generation/skills/content_deconstruction_search.md
+++ b/examples/content_needs_generation/skills/content_deconstruction_search.md
@@ -1,28 +1,27 @@
 
															 ---
														
 
															 name: query_content_deconstruction_by_keywords
														
 
															-description: 根据关键词搜索视频标题和标题的解析结果
														
 
															+description: 根据关键词搜索搜索选题点的解析结果的解析结果，返回最高匹配度
														
 
															 ---
														
 
															 ## 根据关键词搜索视频标题和解析结果
														
 
															-你可以通过内容解析结果搜索工具 `query_content_deconstruction_by_keywords` 根据关键词搜索视频标题和标题的解析结果。
														
 
															+你可以通过内容解析结果搜索工具 `query_content_deconstruction_by_keywords` 根据关键词搜索选题点的解析结果的解析结果，并获取最高匹配度。
														
 
															 ### 使用场景
														
 
															-- 需要查找包含特定关键词的视频内容
														
 
															-- 需要获取视频标题的解析结果（包括选题、关键点、灵感点、目的点等）
														
 
															-- 需要分析特定主题的视频内容结构
														
 
															+- 需要评估标题与现有视频内容的匹配程度
														
 
															+- 需要判断标题关键词在内容库中的覆盖情况
														
 
															+- 需要计算标题分词后的关键词匹配比例
														
 
															 ### 参数说明
														
 
															-- `keywords`: 关键词列表，例如：`["食用", "禁忌"]`
														
 
															+- `keywords`: 关键词列表，需要将标题进行完全分词后传入，例如：`["食用", "禁忌"]`
														
 
															+  - 注意：应该先将标题进行分词处理，然后将分词结果作为关键词列表传入
														
 
															+
														
 
															 ### 返回结果
														
 
															-工具会返回一个字典，其中：
														
 
															-- 每个关键词作为键
														
 
															-- 对应的值是该关键词匹配的视频列表
														
 
															-- 每个视频包含：
														
 
															-  - `contentId`: 内容ID
														
 
															-  - `title`: 视频标题
														
 
															-  - `description`: 解析结果数组，包含不同类型的描述信息（选题、关键点、灵感点、目的点等）
														
 
															+工具会返回一个字典，包含：
														
 
															+- `max_content_id_ratio`: 最高匹配度（百分比），例如：`60.0` 表示 60%
														
 
															+  - 计算方式：匹配最多关键词的 contentId 对应的关键词数量 / 总关键词数量 × 100
														
 
															+  - 如果未找到匹配内容，返回 `0.0`
														
--- a/examples/content_needs_generation/tool/content_deconstruction_search.py
+++ b/examples/content_needs_generation/tool/content_deconstruction_search.py
@@ -3,6 +3,7 @@
 
															 用于 Agent 执行时根据关键词搜索视频内容及其解析结果。
														
 
															 """
														
 
															+import asyncio
														
 
															 import json
														
 
															 import os
														
 
															 from typing import Any, Dict, List, Optional
														
@@ -60,7 +61,7 @@ async def _call_content_deconstruction_api(
 
															 @tool(
														
 
															-    description="根据关键词搜索视频标题和标题的解析结果。支持传入多个关键词，返回每个关键词对应的视频列表及其解析信息。",
														
 
															+    description="根据关键词搜索视频标题和标题的解析结果。支持传入多个关键词，自动过滤掉 type 为'选题'的 description，统计每个 contentId 对应的关键词数量，并返回最高匹配比例（匹配最多关键词的 contentId 占所有关键词的比例）。",
														
 
															     display={
														
 
															         "zh": {
														
 
															             "name": "内容解析结果搜索",
														
@@ -75,12 +76,17 @@ async def query_content_deconstruction_by_keywords(
 
															 ) -> ToolResult:
														
 
															     """
														
 
															     根据关键词搜索视频标题和标题的解析结果。
														
 
															+    
														
 
															+    处理流程：
														
 
															+    1. 过滤掉 description 中 type 为 "选题" 的项
														
 
															+    2. 统计每个 contentId 对应的关键词数量
														
 
															+    3. 计算并返回最高匹配比例（匹配最多关键词的 contentId 占所有关键词的比例）
														
 
															     Args:
														
 
															         keywords: 关键词列表，例如：['食用', '禁忌']
														
 
															     Returns:
														
 
															-        ToolResult: 包含每个关键词对应的视频列表及其解析结果
														
 
															+        ToolResult: 包含过滤后的结果、contentId 统计信息、最高匹配比例等
														
 
															     """
														
 
															     # 验证关键词列表
														
 
															     if not keywords:
														
@@ -120,24 +126,77 @@ async def query_content_deconstruction_by_keywords(
 
															             title="内容解析结果搜索",
														
 
															             output=json.dumps(
														
 
															                 {
														
 
															-                    "message": "未找到相关内容",
														
 
															-                    "keywords": keywords,
														
 
															+                    "max_content_id_ratio": 0.0,
														
 
															                 },
														
 
															                 ensure_ascii=False,
														
 
															                 indent=2,
														
 
															             ),
														
 
															         )
														
 
															-    # 统计每个关键词的结果数量
														
 
															-    keyword_counts = {
														
 
															-        keyword: len(videos) if isinstance(videos, list) else 0
														
 
															-        for keyword, videos in results.items()
														
 
															+    # 1. 过滤 description，去掉 type 为 "选题" 的项
														
 
															+    filtered_results = {}
														
 
															+    for keyword, videos in results.items():
														
 
															+        if not isinstance(videos, list):
														
 
															+            continue
														
 
															+        
														
 
															+        filtered_videos = []
														
 
															+        for video in videos:
														
 
															+            if not isinstance(video, dict):
														
 
															+                continue
														
 
															+            
														
 
															+            # 复制视频信息
														
 
															+            filtered_video = video.copy()
														
 
															+            
														
 
															+            # 过滤 description，去掉 type 为 "选题" 的项
														
 
															+            if "description" in filtered_video and isinstance(filtered_video["description"], list):
														
 
															+                filtered_video["description"] = [
														
 
															+                    desc for desc in filtered_video["description"]
														
 
															+                    if isinstance(desc, dict) and desc.get("type") != "选题"
														
 
															+                ]
														
 
															+            
														
 
															+            # 如果过滤后还有 description，说明该词匹配到了该 contentId
														
 
															+            if filtered_video.get("description") and len(filtered_video["description"]) > 0:
														
 
															+                filtered_videos.append(filtered_video)
														
 
															+        
														
 
															+        if filtered_videos:
														
 
															+            filtered_results[keyword] = filtered_videos
														
 
															+
														
 
															+    # 2. 统计每个 contentId 对应的关键词数量
														
 
															+    content_id_to_keywords: Dict[str, set] = {}
														
 
															+    for keyword, videos in filtered_results.items():
														
 
															+        for video in videos:
														
 
															+            content_id = video.get("contentId")
														
 
															+            if content_id:
														
 
															+                if content_id not in content_id_to_keywords:
														
 
															+                    content_id_to_keywords[content_id] = set()
														
 
															+                content_id_to_keywords[content_id].add(keyword)
														
 
															+
														
 
															+    # 3. 计算每个 contentId 对应最多词的比例
														
 
															+    max_keyword_count = 0
														
 
															+    max_content_id = None
														
 
															+    if content_id_to_keywords:
														
 
															+        for content_id, matched_keywords in content_id_to_keywords.items():
														
 
															+            keyword_count = len(matched_keywords)
														
 
															+            if keyword_count > max_keyword_count:
														
 
															+                max_keyword_count = keyword_count
														
 
															+                max_content_id = content_id
														
 
															+    
														
 
															+    # 计算比例（匹配到的关键词数 / 总关键词数）
														
 
															+    max_ratio = max_keyword_count / len(keywords) if keywords else 0.0
														
 
															+
														
 
															+    # 构建输出结果，只返回最高匹配度
														
 
															+    output_data = {
														
 
															+        "max_content_id_ratio": round(max_ratio * 100, 2),  # 转换为百分比，保留2位小数
														
 
															     }
														
 
															-    total_count = sum(keyword_counts.values())
														
 
															-    output = json.dumps(results, ensure_ascii=False, indent=2)
														
 
															+    output = json.dumps(output_data, ensure_ascii=False, indent=2)
														
 
															     return ToolResult(
														
 
															         title=f"内容解析结果搜索 - {len(keywords)} 个关键词",
														
 
															         output=output,
														
 
															-        long_term_memory=f"检索到内容解析结果，关键词: {', '.join(keywords)}，共 {total_count} 条结果",
														
 
															+        long_term_memory=f"检索到内容解析结果，关键词: {', '.join(keywords)}，最高匹配比例: {max_ratio * 100:.2f}%",
														
 
															     )
														
 
															+
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    res = asyncio.run(query_content_deconstruction_by_keywords(['食用', '禁忌']))
														
 
															+    print(res)