|
|
@@ -13,9 +13,18 @@ import requests
|
|
|
|
|
|
from agent import ToolResult, tool
|
|
|
from db import update_content_plan_ids
|
|
|
+from utils.tool_logging import format_tool_result_for_log, log_tool_call
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
+_LABEL_ACCOUNT = "工具调用:create_crawler_plan_by_douyin_account_id -> 按抖音账号创建爬取计划"
|
|
|
+_LABEL_CONTENT = "工具调用:create_crawler_plan_by_douyin_content_id -> 按抖音视频创建爬取计划"
|
|
|
+
|
|
|
+
|
|
|
+def _log_aigc_return(label: str, params: Dict[str, Any], r: ToolResult) -> ToolResult:
|
|
|
+ log_tool_call(label, params, format_tool_result_for_log(r))
|
|
|
+ return r
|
|
|
+
|
|
|
USE_REAL_API = False
|
|
|
|
|
|
AIGC_BASE_URL = "https://aigc-api.aiddit.com"
|
|
|
@@ -92,25 +101,40 @@ async def create_crawler_plan_by_douyin_account_id(
|
|
|
- 建议从 metadata.result 获取结构化数据,而非解析 output 文本
|
|
|
"""
|
|
|
|
|
|
+ call_params: Dict[str, Any] = {
|
|
|
+ "account_id": account_id,
|
|
|
+ "sort_type": sort_type,
|
|
|
+ "produce_plan_ids": produce_plan_ids if produce_plan_ids is not None else [],
|
|
|
+ }
|
|
|
+
|
|
|
# 验证 account_id 格式
|
|
|
if not account_id or not isinstance(account_id, str):
|
|
|
logger.error(f"create_crawler_plan_by_douyin_account_id invalid account_id: {account_id}")
|
|
|
- return ToolResult(
|
|
|
- title="根据抖音账号ID创建爬取计划失败",
|
|
|
- output="",
|
|
|
- error="account_id 参数无效:必须是非空字符串",
|
|
|
+ return _log_aigc_return(
|
|
|
+ _LABEL_ACCOUNT,
|
|
|
+ call_params,
|
|
|
+ ToolResult(
|
|
|
+ title="根据抖音账号ID创建爬取计划失败",
|
|
|
+ output="",
|
|
|
+ error="account_id 参数无效:必须是非空字符串",
|
|
|
+ ),
|
|
|
)
|
|
|
|
|
|
if not account_id.startswith("MS4wLjABAAAA"):
|
|
|
logger.error(f"create_crawler_plan_by_douyin_account_id invalid sec_uid format account_id:{account_id}")
|
|
|
- return ToolResult(
|
|
|
- title="根据抖音账号ID创建爬取计划失败",
|
|
|
- output="",
|
|
|
- error=f"account_id 格式错误:必须以 MS4wLjABAAAA 开头,当前值: {account_id[:min(20, len(account_id))]}...",
|
|
|
+ return _log_aigc_return(
|
|
|
+ _LABEL_ACCOUNT,
|
|
|
+ call_params,
|
|
|
+ ToolResult(
|
|
|
+ title="根据抖音账号ID创建爬取计划失败",
|
|
|
+ output="",
|
|
|
+ error=f"account_id 格式错误:必须以 MS4wLjABAAAA 开头,当前值: {account_id[:min(20, len(account_id))]}...",
|
|
|
+ ),
|
|
|
)
|
|
|
|
|
|
if produce_plan_ids is None:
|
|
|
produce_plan_ids = []
|
|
|
+ call_params["produce_plan_ids"] = produce_plan_ids
|
|
|
|
|
|
dt = datetime.now().strftime("%Y%m%d%h%M%s")
|
|
|
crawler_plan_name = f"【内容寻找Agent自动创建】{dt}_抖音账号ID爬取计划_{account_id[:min(30, len(account_id))]}"
|
|
|
@@ -145,10 +169,14 @@ async def create_crawler_plan_by_douyin_account_id(
|
|
|
|
|
|
response_json = post(CRAWLER_PLAN_CREATE_URL, params)
|
|
|
if response_json.get("code") != 0:
|
|
|
- return ToolResult(
|
|
|
- title="根据抖音账号ID创建爬取计划失败",
|
|
|
- output=response_json.get("msg", "接口异常"),
|
|
|
- error=f"create crawler plan interface error",
|
|
|
+ return _log_aigc_return(
|
|
|
+ _LABEL_ACCOUNT,
|
|
|
+ call_params,
|
|
|
+ ToolResult(
|
|
|
+ title="根据抖音账号ID创建爬取计划失败",
|
|
|
+ output=response_json.get("msg", "接口异常"),
|
|
|
+ error=f"create crawler plan interface error",
|
|
|
+ ),
|
|
|
)
|
|
|
|
|
|
crawler_plan_id = response_json.get("data", {}).get("id", "")
|
|
|
@@ -175,35 +203,43 @@ async def create_crawler_plan_by_douyin_account_id(
|
|
|
summary_lines.append(f" 绑定结果: {'绑定成功' if not produce_plan_info.get('msg') else '绑定失败'}")
|
|
|
summary_lines.append(f" 信息: {produce_plan_info.get('msg', '成功')}")
|
|
|
|
|
|
- return ToolResult(
|
|
|
- title="根据抖音账号ID创建爬取计划",
|
|
|
- output="\n".join(summary_lines),
|
|
|
- metadata={
|
|
|
- "result": {
|
|
|
- "crawler_info": {
|
|
|
- "crawler_plan_id": crawler_plan_id,
|
|
|
- "crawler_plan_name": crawler_plan_name,
|
|
|
- "sort_type": sort_type,
|
|
|
- },
|
|
|
- "produce_plan_infos": [
|
|
|
- {
|
|
|
- "produce_plan_id": produce_plan_info.get("produce_plan_id", ""),
|
|
|
- "produce_plan_name": produce_plan_info.get("produce_plan_name", ""),
|
|
|
- "is_success": "绑定成功" if not produce_plan_info.get("msg") else "绑定失败",
|
|
|
- "msg": produce_plan_info.get("msg", "成功"),
|
|
|
- }
|
|
|
- for produce_plan_info in produce_plan_infos
|
|
|
- ]
|
|
|
- }
|
|
|
- },
|
|
|
- long_term_memory="Create crawler plan by DouYin Account ID",
|
|
|
+ return _log_aigc_return(
|
|
|
+ _LABEL_ACCOUNT,
|
|
|
+ call_params,
|
|
|
+ ToolResult(
|
|
|
+ title="根据抖音账号ID创建爬取计划",
|
|
|
+ output="\n".join(summary_lines),
|
|
|
+ metadata={
|
|
|
+ "result": {
|
|
|
+ "crawler_info": {
|
|
|
+ "crawler_plan_id": crawler_plan_id,
|
|
|
+ "crawler_plan_name": crawler_plan_name,
|
|
|
+ "sort_type": sort_type,
|
|
|
+ },
|
|
|
+ "produce_plan_infos": [
|
|
|
+ {
|
|
|
+ "produce_plan_id": produce_plan_info.get("produce_plan_id", ""),
|
|
|
+ "produce_plan_name": produce_plan_info.get("produce_plan_name", ""),
|
|
|
+ "is_success": "绑定成功" if not produce_plan_info.get("msg") else "绑定失败",
|
|
|
+ "msg": produce_plan_info.get("msg", "成功"),
|
|
|
+ }
|
|
|
+ for produce_plan_info in produce_plan_infos
|
|
|
+ ],
|
|
|
+ }
|
|
|
+ },
|
|
|
+ long_term_memory="Create crawler plan by DouYin Account ID",
|
|
|
+ ),
|
|
|
)
|
|
|
except Exception as e:
|
|
|
logger.error(f"create douyin account crawler plan error: {str(e)}, account_id: {account_id} ")
|
|
|
- return ToolResult(
|
|
|
- title="根据抖音账号ID创建爬取计划失败",
|
|
|
- output="",
|
|
|
- error=f"创建爬取计划错误:{str(e)}",
|
|
|
+ return _log_aigc_return(
|
|
|
+ _LABEL_ACCOUNT,
|
|
|
+ call_params,
|
|
|
+ ToolResult(
|
|
|
+ title="根据抖音账号ID创建爬取计划失败",
|
|
|
+ output="",
|
|
|
+ error=f"创建爬取计划错误:{str(e)}",
|
|
|
+ ),
|
|
|
)
|
|
|
|
|
|
|
|
|
@@ -232,35 +268,44 @@ async def create_crawler_plan_by_douyin_content_id(
|
|
|
Note:
|
|
|
- 建议从 metadata.result 获取结构化数据,而非解析 output 文本
|
|
|
"""
|
|
|
+ call_params: Dict[str, Any] = {"trace_id": trace_id}
|
|
|
# 先临时返回创建成功,不要真实创建
|
|
|
if USE_REAL_API == False:
|
|
|
- return ToolResult(
|
|
|
- title="根据抖音内容创建爬取计划",
|
|
|
- output="",
|
|
|
- metadata={
|
|
|
- "result": {
|
|
|
- "crawler_info": {
|
|
|
- "crawler_plan_id": "1234567890",
|
|
|
- "crawler_plan_name": "抖音视频直接抓取",
|
|
|
- },
|
|
|
- "produce_plan_infos": [
|
|
|
- {
|
|
|
- "produce_plan_id": "1234567890",
|
|
|
- "produce_plan_name": "抖音视频直接抓取",
|
|
|
- "is_success": "绑定成功",
|
|
|
- "msg": "成功",
|
|
|
- }
|
|
|
- ]
|
|
|
- }
|
|
|
- },
|
|
|
- long_term_memory="Create crawler plan by DouYin Content IDs",
|
|
|
+ return _log_aigc_return(
|
|
|
+ _LABEL_CONTENT,
|
|
|
+ call_params,
|
|
|
+ ToolResult(
|
|
|
+ title="根据抖音内容创建爬取计划",
|
|
|
+ output="",
|
|
|
+ metadata={
|
|
|
+ "result": {
|
|
|
+ "crawler_info": {
|
|
|
+ "crawler_plan_id": "1234567890",
|
|
|
+ "crawler_plan_name": "抖音视频直接抓取",
|
|
|
+ },
|
|
|
+ "produce_plan_infos": [
|
|
|
+ {
|
|
|
+ "produce_plan_id": "1234567890",
|
|
|
+ "produce_plan_name": "抖音视频直接抓取",
|
|
|
+ "is_success": "绑定成功",
|
|
|
+ "msg": "成功",
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ }
|
|
|
+ },
|
|
|
+ long_term_memory="Create crawler plan by DouYin Content IDs",
|
|
|
+ ),
|
|
|
)
|
|
|
if not trace_id or not isinstance(trace_id, str):
|
|
|
logger.error(f"create_crawler_plan_by_douyin_content_id invalid trace_id: {trace_id}")
|
|
|
- return ToolResult(
|
|
|
- title="根据抖音内容创建爬取计划失败",
|
|
|
- output="",
|
|
|
- error="trace_id 参数无效: trace_id 必须是非空字符串",
|
|
|
+ return _log_aigc_return(
|
|
|
+ _LABEL_CONTENT,
|
|
|
+ call_params,
|
|
|
+ ToolResult(
|
|
|
+ title="根据抖音内容创建爬取计划失败",
|
|
|
+ output="",
|
|
|
+ error="trace_id 参数无效: trace_id 必须是非空字符串",
|
|
|
+ ),
|
|
|
)
|
|
|
|
|
|
output_dir = os.getenv("OUTPUT_DIR", ".cache/output")
|
|
|
@@ -270,27 +315,40 @@ async def create_crawler_plan_by_douyin_content_id(
|
|
|
except Exception as e:
|
|
|
msg = f"加载/解析 output.json 失败: {e}"
|
|
|
logger.error(msg, exc_info=True)
|
|
|
- return ToolResult(
|
|
|
- title="根据抖音内容创建爬取计划失败",
|
|
|
- output="",
|
|
|
- error=msg,
|
|
|
+ return _log_aigc_return(
|
|
|
+ _LABEL_CONTENT,
|
|
|
+ call_params,
|
|
|
+ ToolResult(
|
|
|
+ title="根据抖音内容创建爬取计划失败",
|
|
|
+ output="",
|
|
|
+ error=msg,
|
|
|
+ ),
|
|
|
)
|
|
|
|
|
|
+ call_params["content_ids_count"] = len(content_ids)
|
|
|
if not content_ids:
|
|
|
- return ToolResult(
|
|
|
- title="根据抖音内容创建爬取计划失败",
|
|
|
- output="",
|
|
|
- error="未在 output.json.contents 中找到有效 aweme_id",
|
|
|
+ return _log_aigc_return(
|
|
|
+ _LABEL_CONTENT,
|
|
|
+ call_params,
|
|
|
+ ToolResult(
|
|
|
+ title="根据抖音内容创建爬取计划失败",
|
|
|
+ output="",
|
|
|
+ error="未在 output.json.contents 中找到有效 aweme_id",
|
|
|
+ ),
|
|
|
)
|
|
|
if len(content_ids) > 100:
|
|
|
logger.error(
|
|
|
"create_crawler_plan_by_douyin_content_id invalid content_ids length. "
|
|
|
f"content_ids.length: {len(content_ids)}"
|
|
|
)
|
|
|
- return ToolResult(
|
|
|
- title="根据抖音内容创建爬取计划失败",
|
|
|
- output="",
|
|
|
- error=f"content_ids 长度异常: 期望1~100, 实际{len(content_ids)}",
|
|
|
+ return _log_aigc_return(
|
|
|
+ _LABEL_CONTENT,
|
|
|
+ call_params,
|
|
|
+ ToolResult(
|
|
|
+ title="根据抖音内容创建爬取计划失败",
|
|
|
+ output="",
|
|
|
+ error=f"content_ids 长度异常: 期望1~100, 实际{len(content_ids)}",
|
|
|
+ ),
|
|
|
)
|
|
|
|
|
|
produce_plan_ids = _get_produce_plan_ids_from_env()
|
|
|
@@ -318,10 +376,14 @@ async def create_crawler_plan_by_douyin_content_id(
|
|
|
|
|
|
response_json = post(CRAWLER_PLAN_CREATE_URL, params)
|
|
|
if response_json.get("code") != 0:
|
|
|
- return ToolResult(
|
|
|
- title="根据抖音内容ID创建爬取计划失败",
|
|
|
- output=response_json.get("msg", "接口异常"),
|
|
|
- error=f"create crawler plan interface error",
|
|
|
+ return _log_aigc_return(
|
|
|
+ _LABEL_CONTENT,
|
|
|
+ call_params,
|
|
|
+ ToolResult(
|
|
|
+ title="根据抖音内容ID创建爬取计划失败",
|
|
|
+ output=response_json.get("msg", "接口异常"),
|
|
|
+ error=f"create crawler plan interface error",
|
|
|
+ ),
|
|
|
)
|
|
|
|
|
|
crawler_plan_id = response_json.get("data", {}).get("id", "")
|
|
|
@@ -363,35 +425,43 @@ async def create_crawler_plan_by_douyin_content_id(
|
|
|
except Exception as e:
|
|
|
logger.error(f"update content plan ids failed: {e}", exc_info=True)
|
|
|
|
|
|
- return ToolResult(
|
|
|
- title="根据抖音内容ID创建爬取计划",
|
|
|
- output="\n".join(summary_lines),
|
|
|
- metadata={
|
|
|
- "result": {
|
|
|
- "crawler_info": {
|
|
|
- "crawler_plan_id": crawler_plan_id,
|
|
|
- "crawler_plan_name": crawler_plan_name,
|
|
|
+ return _log_aigc_return(
|
|
|
+ _LABEL_CONTENT,
|
|
|
+ call_params,
|
|
|
+ ToolResult(
|
|
|
+ title="根据抖音内容ID创建爬取计划",
|
|
|
+ output="\n".join(summary_lines),
|
|
|
+ metadata={
|
|
|
+ "result": {
|
|
|
+ "crawler_info": {
|
|
|
+ "crawler_plan_id": crawler_plan_id,
|
|
|
+ "crawler_plan_name": crawler_plan_name,
|
|
|
+ },
|
|
|
+ "produce_plan_infos": [
|
|
|
+ {
|
|
|
+ "produce_plan_id": produce_plan_info.get("produce_plan_id", ""),
|
|
|
+ "produce_plan_name": produce_plan_info.get("produce_plan_name", ""),
|
|
|
+ "is_success": "绑定成功" if not produce_plan_info.get("msg") else "绑定失败",
|
|
|
+ "msg": produce_plan_info.get("msg", "成功"),
|
|
|
+ }
|
|
|
+ for produce_plan_info in produce_plan_infos
|
|
|
+ ],
|
|
|
},
|
|
|
- "produce_plan_infos": [
|
|
|
- {
|
|
|
- "produce_plan_id": produce_plan_info.get("produce_plan_id", ""),
|
|
|
- "produce_plan_name": produce_plan_info.get("produce_plan_name", ""),
|
|
|
- "is_success": "绑定成功" if not produce_plan_info.get("msg") else "绑定失败",
|
|
|
- "msg": produce_plan_info.get("msg", "成功"),
|
|
|
- }
|
|
|
- for produce_plan_info in produce_plan_infos
|
|
|
- ]
|
|
|
+ "db": {"updated_rows": db_updated_rows},
|
|
|
},
|
|
|
- "db": {"updated_rows": db_updated_rows},
|
|
|
- },
|
|
|
- long_term_memory="Create crawler plan by DouYin Content IDs",
|
|
|
+ long_term_memory="Create crawler plan by DouYin Content IDs",
|
|
|
+ ),
|
|
|
)
|
|
|
except Exception as e:
|
|
|
logger.error(f"create douyin content crawler plan error. content_ids: {content_ids}, error: {str(e)}")
|
|
|
- return ToolResult(
|
|
|
- title="根据抖音内容ID创建爬取计划失败",
|
|
|
- output="",
|
|
|
- error=f"创建爬取计划错误:{str(e)}",
|
|
|
+ return _log_aigc_return(
|
|
|
+ _LABEL_CONTENT,
|
|
|
+ call_params,
|
|
|
+ ToolResult(
|
|
|
+ title="根据抖音内容ID创建爬取计划失败",
|
|
|
+ output="",
|
|
|
+ error=f"创建爬取计划错误:{str(e)}",
|
|
|
+ ),
|
|
|
)
|
|
|
|
|
|
|