|
@@ -3,6 +3,7 @@
|
|
|
|
|
|
|
|
用于 Agent 执行时根据关键词搜索视频内容及其解析结果。
|
|
用于 Agent 执行时根据关键词搜索视频内容及其解析结果。
|
|
|
"""
|
|
"""
|
|
|
|
|
+import asyncio
|
|
|
import json
|
|
import json
|
|
|
import os
|
|
import os
|
|
|
from typing import Any, Dict, List, Optional
|
|
from typing import Any, Dict, List, Optional
|
|
@@ -60,7 +61,7 @@ async def _call_content_deconstruction_api(
|
|
|
|
|
|
|
|
|
|
|
|
|
@tool(
|
|
@tool(
|
|
|
- description="根据关键词搜索视频标题和标题的解析结果。支持传入多个关键词,返回每个关键词对应的视频列表及其解析信息。",
|
|
|
|
|
|
|
+ description="根据关键词搜索视频标题和标题的解析结果。支持传入多个关键词,自动过滤掉 type 为'选题'的 description,统计每个 contentId 对应的关键词数量,并返回最高匹配比例(匹配最多关键词的 contentId 占所有关键词的比例)。",
|
|
|
display={
|
|
display={
|
|
|
"zh": {
|
|
"zh": {
|
|
|
"name": "内容解析结果搜索",
|
|
"name": "内容解析结果搜索",
|
|
@@ -75,12 +76,17 @@ async def query_content_deconstruction_by_keywords(
|
|
|
) -> ToolResult:
|
|
) -> ToolResult:
|
|
|
"""
|
|
"""
|
|
|
根据关键词搜索视频标题和标题的解析结果。
|
|
根据关键词搜索视频标题和标题的解析结果。
|
|
|
|
|
+
|
|
|
|
|
+ 处理流程:
|
|
|
|
|
+ 1. 过滤掉 description 中 type 为 "选题" 的项
|
|
|
|
|
+ 2. 统计每个 contentId 对应的关键词数量
|
|
|
|
|
+ 3. 计算并返回最高匹配比例(匹配最多关键词的 contentId 占所有关键词的比例)
|
|
|
|
|
|
|
|
Args:
|
|
Args:
|
|
|
keywords: 关键词列表,例如:['食用', '禁忌']
|
|
keywords: 关键词列表,例如:['食用', '禁忌']
|
|
|
|
|
|
|
|
Returns:
|
|
Returns:
|
|
|
- ToolResult: 包含每个关键词对应的视频列表及其解析结果
|
|
|
|
|
|
|
+ ToolResult: 包含过滤后的结果、contentId 统计信息、最高匹配比例等
|
|
|
"""
|
|
"""
|
|
|
# 验证关键词列表
|
|
# 验证关键词列表
|
|
|
if not keywords:
|
|
if not keywords:
|
|
@@ -120,24 +126,77 @@ async def query_content_deconstruction_by_keywords(
|
|
|
title="内容解析结果搜索",
|
|
title="内容解析结果搜索",
|
|
|
output=json.dumps(
|
|
output=json.dumps(
|
|
|
{
|
|
{
|
|
|
- "message": "未找到相关内容",
|
|
|
|
|
- "keywords": keywords,
|
|
|
|
|
|
|
+ "max_content_id_ratio": 0.0,
|
|
|
},
|
|
},
|
|
|
ensure_ascii=False,
|
|
ensure_ascii=False,
|
|
|
indent=2,
|
|
indent=2,
|
|
|
),
|
|
),
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
- # 统计每个关键词的结果数量
|
|
|
|
|
- keyword_counts = {
|
|
|
|
|
- keyword: len(videos) if isinstance(videos, list) else 0
|
|
|
|
|
- for keyword, videos in results.items()
|
|
|
|
|
|
|
+ # 1. 过滤 description,去掉 type 为 "选题" 的项
|
|
|
|
|
+ filtered_results = {}
|
|
|
|
|
+ for keyword, videos in results.items():
|
|
|
|
|
+ if not isinstance(videos, list):
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ filtered_videos = []
|
|
|
|
|
+ for video in videos:
|
|
|
|
|
+ if not isinstance(video, dict):
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ # 复制视频信息
|
|
|
|
|
+ filtered_video = video.copy()
|
|
|
|
|
+
|
|
|
|
|
+ # 过滤 description,去掉 type 为 "选题" 的项
|
|
|
|
|
+ if "description" in filtered_video and isinstance(filtered_video["description"], list):
|
|
|
|
|
+ filtered_video["description"] = [
|
|
|
|
|
+ desc for desc in filtered_video["description"]
|
|
|
|
|
+ if isinstance(desc, dict) and desc.get("type") != "选题"
|
|
|
|
|
+ ]
|
|
|
|
|
+
|
|
|
|
|
+ # 如果过滤后还有 description,说明该词匹配到了该 contentId
|
|
|
|
|
+ if filtered_video.get("description") and len(filtered_video["description"]) > 0:
|
|
|
|
|
+ filtered_videos.append(filtered_video)
|
|
|
|
|
+
|
|
|
|
|
+ if filtered_videos:
|
|
|
|
|
+ filtered_results[keyword] = filtered_videos
|
|
|
|
|
+
|
|
|
|
|
+ # 2. 统计每个 contentId 对应的关键词数量
|
|
|
|
|
+ content_id_to_keywords: Dict[str, set] = {}
|
|
|
|
|
+ for keyword, videos in filtered_results.items():
|
|
|
|
|
+ for video in videos:
|
|
|
|
|
+ content_id = video.get("contentId")
|
|
|
|
|
+ if content_id:
|
|
|
|
|
+ if content_id not in content_id_to_keywords:
|
|
|
|
|
+ content_id_to_keywords[content_id] = set()
|
|
|
|
|
+ content_id_to_keywords[content_id].add(keyword)
|
|
|
|
|
+
|
|
|
|
|
+ # 3. 计算每个 contentId 对应最多词的比例
|
|
|
|
|
+ max_keyword_count = 0
|
|
|
|
|
+ max_content_id = None
|
|
|
|
|
+ if content_id_to_keywords:
|
|
|
|
|
+ for content_id, matched_keywords in content_id_to_keywords.items():
|
|
|
|
|
+ keyword_count = len(matched_keywords)
|
|
|
|
|
+ if keyword_count > max_keyword_count:
|
|
|
|
|
+ max_keyword_count = keyword_count
|
|
|
|
|
+ max_content_id = content_id
|
|
|
|
|
+
|
|
|
|
|
+ # 计算比例(匹配到的关键词数 / 总关键词数)
|
|
|
|
|
+ max_ratio = max_keyword_count / len(keywords) if keywords else 0.0
|
|
|
|
|
+
|
|
|
|
|
+ # 构建输出结果,只返回最高匹配度
|
|
|
|
|
+ output_data = {
|
|
|
|
|
+ "max_content_id_ratio": round(max_ratio * 100, 2), # 转换为百分比,保留2位小数
|
|
|
}
|
|
}
|
|
|
- total_count = sum(keyword_counts.values())
|
|
|
|
|
|
|
|
|
|
- output = json.dumps(results, ensure_ascii=False, indent=2)
|
|
|
|
|
|
|
+ output = json.dumps(output_data, ensure_ascii=False, indent=2)
|
|
|
return ToolResult(
|
|
return ToolResult(
|
|
|
title=f"内容解析结果搜索 - {len(keywords)} 个关键词",
|
|
title=f"内容解析结果搜索 - {len(keywords)} 个关键词",
|
|
|
output=output,
|
|
output=output,
|
|
|
- long_term_memory=f"检索到内容解析结果,关键词: {', '.join(keywords)},共 {total_count} 条结果",
|
|
|
|
|
|
|
+ long_term_memory=f"检索到内容解析结果,关键词: {', '.join(keywords)},最高匹配比例: {max_ratio * 100:.2f}%",
|
|
|
)
|
|
)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+if __name__ == '__main__':
|
|
|
|
|
+ res = asyncio.run(query_content_deconstruction_by_keywords(['食用', '禁忌']))
|
|
|
|
|
+ print(res)
|