1 месяц назад · 94ff7ab346
--- a/examples/content_needs_generation/skills/content_deconstruction_search.md
+++ b/examples/content_needs_generation/skills/content_deconstruction_search.md
@@ -1,28 +1,27 @@
 
				 ---
			
 
				 name: query_content_deconstruction_by_keywords
			
 
				-description: 根据关键词搜索视频标题和标题的解析结果
			
 
				+description: 根据关键词搜索搜索选题点的解析结果的解析结果，返回最高匹配度
			
 
				 ---
			
 
				 
			
 
				 ## 根据关键词搜索视频标题和解析结果
			
 
				 
			
 
				-你可以通过内容解析结果搜索工具 `query_content_deconstruction_by_keywords` 根据关键词搜索视频标题和标题的解析结果。
			
 
				+你可以通过内容解析结果搜索工具 `query_content_deconstruction_by_keywords` 根据关键词搜索选题点的解析结果的解析结果，并获取最高匹配度。
			
 
				 
			
 
				 ### 使用场景
			
 
				 
			
 
				-- 需要查找包含特定关键词的视频内容
			
 
				-- 需要获取视频标题的解析结果（包括选题、关键点、灵感点、目的点等）
			
 
				-- 需要分析特定主题的视频内容结构
			
 
				+- 需要评估标题与现有视频内容的匹配程度
			
 
				+- 需要判断标题关键词在内容库中的覆盖情况
			
 
				+- 需要计算标题分词后的关键词匹配比例
			
 
				 
			
 
				 ### 参数说明
			
 
				 
			
 
				-- `keywords`: 关键词列表，例如：`["食用", "禁忌"]`
			
 
				+- `keywords`: 关键词列表，需要将标题进行完全分词后传入，例如：`["食用", "禁忌"]`
			
 
				+  - 注意：应该先将标题进行分词处理，然后将分词结果作为关键词列表传入
			
 
				+
			
 
				 
			
 
				 ### 返回结果
			
 
				 
			
 
				-工具会返回一个字典，其中：
			
 
				-- 每个关键词作为键
			
 
				-- 对应的值是该关键词匹配的视频列表
			
 
				-- 每个视频包含：
			
 
				-  - `contentId`: 内容ID
			
 
				-  - `title`: 视频标题
			
 
				-  - `description`: 解析结果数组，包含不同类型的描述信息（选题、关键点、灵感点、目的点等）
			
 
				+工具会返回一个字典，包含：
			
 
				+- `max_content_id_ratio`: 最高匹配度（百分比），例如：`60.0` 表示 60%
			
 
				+  - 计算方式：匹配最多关键词的 contentId 对应的关键词数量 / 总关键词数量 × 100
			
 
				+  - 如果未找到匹配内容，返回 `0.0`
			
--- a/examples/content_needs_generation/tool/content_deconstruction_search.py
+++ b/examples/content_needs_generation/tool/content_deconstruction_search.py
@@ -3,6 +3,7 @@
 
				 
			
 
				 用于 Agent 执行时根据关键词搜索视频内容及其解析结果。
			
 
				 """
			
 
				+import asyncio
			
 
				 import json
			
 
				 import os
			
 
				 from typing import Any, Dict, List, Optional
			
@@ -60,7 +61,7 @@ async def _call_content_deconstruction_api(
 
				 
			
 
				 
			
 
				 @tool(
			
 
				-    description="根据关键词搜索视频标题和标题的解析结果。支持传入多个关键词，返回每个关键词对应的视频列表及其解析信息。",
			
 
				+    description="根据关键词搜索视频标题和标题的解析结果。支持传入多个关键词，自动过滤掉 type 为'选题'的 description，统计每个 contentId 对应的关键词数量，并返回最高匹配比例（匹配最多关键词的 contentId 占所有关键词的比例）。",
			
 
				     display={
			
 
				         "zh": {
			
 
				             "name": "内容解析结果搜索",
			
@@ -75,12 +76,17 @@ async def query_content_deconstruction_by_keywords(
 
				 ) -> ToolResult:
			
 
				     """
			
 
				     根据关键词搜索视频标题和标题的解析结果。
			
 
				+    
			
 
				+    处理流程：
			
 
				+    1. 过滤掉 description 中 type 为 "选题" 的项
			
 
				+    2. 统计每个 contentId 对应的关键词数量
			
 
				+    3. 计算并返回最高匹配比例（匹配最多关键词的 contentId 占所有关键词的比例）
			
 
				 
			
 
				     Args:
			
 
				         keywords: 关键词列表，例如：['食用', '禁忌']
			
 
				 
			
 
				     Returns:
			
 
				-        ToolResult: 包含每个关键词对应的视频列表及其解析结果
			
 
				+        ToolResult: 包含过滤后的结果、contentId 统计信息、最高匹配比例等
			
 
				     """
			
 
				     # 验证关键词列表
			
 
				     if not keywords:
			
@@ -120,24 +126,77 @@ async def query_content_deconstruction_by_keywords(
 
				             title="内容解析结果搜索",
			
 
				             output=json.dumps(
			
 
				                 {
			
 
				-                    "message": "未找到相关内容",
			
 
				-                    "keywords": keywords,
			
 
				+                    "max_content_id_ratio": 0.0,
			
 
				                 },
			
 
				                 ensure_ascii=False,
			
 
				                 indent=2,
			
 
				             ),
			
 
				         )
			
 
				 
			
 
				-    # 统计每个关键词的结果数量
			
 
				-    keyword_counts = {
			
 
				-        keyword: len(videos) if isinstance(videos, list) else 0
			
 
				-        for keyword, videos in results.items()
			
 
				+    # 1. 过滤 description，去掉 type 为 "选题" 的项
			
 
				+    filtered_results = {}
			
 
				+    for keyword, videos in results.items():
			
 
				+        if not isinstance(videos, list):
			
 
				+            continue
			
 
				+        
			
 
				+        filtered_videos = []
			
 
				+        for video in videos:
			
 
				+            if not isinstance(video, dict):
			
 
				+                continue
			
 
				+            
			
 
				+            # 复制视频信息
			
 
				+            filtered_video = video.copy()
			
 
				+            
			
 
				+            # 过滤 description，去掉 type 为 "选题" 的项
			
 
				+            if "description" in filtered_video and isinstance(filtered_video["description"], list):
			
 
				+                filtered_video["description"] = [
			
 
				+                    desc for desc in filtered_video["description"]
			
 
				+                    if isinstance(desc, dict) and desc.get("type") != "选题"
			
 
				+                ]
			
 
				+            
			
 
				+            # 如果过滤后还有 description，说明该词匹配到了该 contentId
			
 
				+            if filtered_video.get("description") and len(filtered_video["description"]) > 0:
			
 
				+                filtered_videos.append(filtered_video)
			
 
				+        
			
 
				+        if filtered_videos:
			
 
				+            filtered_results[keyword] = filtered_videos
			
 
				+
			
 
				+    # 2. 统计每个 contentId 对应的关键词数量
			
 
				+    content_id_to_keywords: Dict[str, set] = {}
			
 
				+    for keyword, videos in filtered_results.items():
			
 
				+        for video in videos:
			
 
				+            content_id = video.get("contentId")
			
 
				+            if content_id:
			
 
				+                if content_id not in content_id_to_keywords:
			
 
				+                    content_id_to_keywords[content_id] = set()
			
 
				+                content_id_to_keywords[content_id].add(keyword)
			
 
				+
			
 
				+    # 3. 计算每个 contentId 对应最多词的比例
			
 
				+    max_keyword_count = 0
			
 
				+    max_content_id = None
			
 
				+    if content_id_to_keywords:
			
 
				+        for content_id, matched_keywords in content_id_to_keywords.items():
			
 
				+            keyword_count = len(matched_keywords)
			
 
				+            if keyword_count > max_keyword_count:
			
 
				+                max_keyword_count = keyword_count
			
 
				+                max_content_id = content_id
			
 
				+    
			
 
				+    # 计算比例（匹配到的关键词数 / 总关键词数）
			
 
				+    max_ratio = max_keyword_count / len(keywords) if keywords else 0.0
			
 
				+
			
 
				+    # 构建输出结果，只返回最高匹配度
			
 
				+    output_data = {
			
 
				+        "max_content_id_ratio": round(max_ratio * 100, 2),  # 转换为百分比，保留2位小数
			
 
				     }
			
 
				-    total_count = sum(keyword_counts.values())
			
 
				 
			
 
				-    output = json.dumps(results, ensure_ascii=False, indent=2)
			
 
				+    output = json.dumps(output_data, ensure_ascii=False, indent=2)
			
 
				     return ToolResult(
			
 
				         title=f"内容解析结果搜索 - {len(keywords)} 个关键词",
			
 
				         output=output,
			
 
				-        long_term_memory=f"检索到内容解析结果，关键词: {', '.join(keywords)}，共 {total_count} 条结果",
			
 
				+        long_term_memory=f"检索到内容解析结果，关键词: {', '.join(keywords)}，最高匹配比例: {max_ratio * 100:.2f}%",
			
 
				     )
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    res = asyncio.run(query_content_deconstruction_by_keywords(['食用', '禁忌']))
			
 
				+    print(res)