|
|
@@ -7,22 +7,42 @@ import logging
|
|
|
import os
|
|
|
from datetime import datetime
|
|
|
from pathlib import Path
|
|
|
-from typing import List, Dict, Union, Tuple, Any
|
|
|
+from typing import List, Dict, Union, Tuple, Any, Optional
|
|
|
|
|
|
import requests
|
|
|
from zoneinfo import ZoneInfo
|
|
|
|
|
|
from agent import ToolResult, tool
|
|
|
-from db import update_content_plan_ids
|
|
|
+from db import get_connection, fetch_demand_content_merge_leve2, update_content_plan_ids
|
|
|
from utils.tool_logging import format_tool_result_for_log, log_tool_call
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
+
|
|
|
+AIGC_PLAN_ID_MAP = {
|
|
|
+"健康知识": {"生成ID": "20260408092313211598604", "发布ID": "20260408115944193153417"},
|
|
|
+"历史名人": {"生成ID": "20260408083251311809309", "发布ID": "20260408115139124511126"},
|
|
|
+"知识科普": {"生成ID": "20260408083824905654920", "发布ID": "20260408115231567509261"},
|
|
|
+"搞笑段子": {"生成ID": "20260408091536533918237", "发布ID": "20260408115842127748387"},
|
|
|
+"社会风气": {"生成ID": "20260408084318884115213", "发布ID": "20260408115315950129776"},
|
|
|
+"人生忠告": {"生成ID": "20260408085205791658566", "发布ID": "20260408115405410408001"},
|
|
|
+"国际时政": {"生成ID": "20260408090208237400605", "发布ID": "20260408115616925523989"},
|
|
|
+"生活技巧科普": {"生成ID": "20260408083824905654920", "发布ID": "20260408115231567509261"},
|
|
|
+"贪污腐败": {"生成ID": "20260408090309503416878", "发布ID": "20260408115653908856043"},
|
|
|
+"民生政策": {"生成ID": "20260408090721867506475", "发布ID": "20260408115727030928177"},
|
|
|
+"对口型表演": {"生成ID": "20260408092122328523262", "发布ID": "20260408115914659162376"},
|
|
|
+"中国战争史": {"生成ID": "20260408090950446586451", "发布ID": "20260408115804931772327"},
|
|
|
+"人财诈骗": {"生成ID": "20260408093140652233649", "发布ID": "20260408120019784463902"},
|
|
|
+"当代正能量人物": {"生成ID": "20260408083148399635274", "发布ID": "20260408115046382803287"},
|
|
|
+"国家科技力量": {"生成ID": "20260408085807674913378", "发布ID": "20260408115542550181196"},
|
|
|
+"国家力量": {"生成ID": "20260408085807674913378", "发布ID": "20260408115542550181196"},
|
|
|
+"通用": {"生成ID": "20260408085649635441036", "发布ID": "20260408115439581604474"},
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
_LABEL_ACCOUNT = "工具调用:create_crawler_plan_by_douyin_account_id -> 按抖音账号创建爬取计划"
|
|
|
_LABEL_CONTENT = "工具调用:create_crawler_plan_by_douyin_content_id -> 按抖音视频创建爬取计划"
|
|
|
|
|
|
-AIGC_DEMAND_DOUYIN_CONTENT_PUBLISH_PLAN_ID=20260320065232171836746
|
|
|
-
|
|
|
SHANGHAI_TZ = ZoneInfo("Asia/Shanghai")
|
|
|
|
|
|
|
|
|
@@ -79,13 +99,31 @@ def _extract_content_ids(data: Dict[str, Any]) -> List[str]:
|
|
|
return content_ids
|
|
|
|
|
|
|
|
|
-def _get_produce_plan_ids_from_env() -> List[str]:
|
|
|
- """Read AIGC_DEMAND_DOUYIN_CONTENT_PRODUCE_PLAN_ID from env."""
|
|
|
- raw = os.getenv("AIGC_DEMAND_DOUYIN_CONTENT_PRODUCE_PLAN_ID", "").strip()
|
|
|
- if not raw:
|
|
|
- return []
|
|
|
- # 接口需要 List[str],因此把 env 字段(字符串)包装成 list。
|
|
|
- return [raw]
|
|
|
+def _extract_content_demand_id(data: Dict[str, Any]) -> Optional[int]:
|
|
|
+ """
|
|
|
+ Extract content_demand_id (demand_content.id) from output json.
|
|
|
+
|
|
|
+ Compatible keys:
|
|
|
+ - content_demand_id
|
|
|
+ - demand_content_id
|
|
|
+ - demand_id (legacy)
|
|
|
+ """
|
|
|
+ if not isinstance(data, dict):
|
|
|
+ return None
|
|
|
+ raw = (
|
|
|
+ data.get("content_demand_id")
|
|
|
+ if data.get("content_demand_id") is not None
|
|
|
+ else data.get("demand_content_id")
|
|
|
+ if data.get("demand_content_id") is not None
|
|
|
+ else data.get("demand_id")
|
|
|
+ )
|
|
|
+ if raw is None:
|
|
|
+ return None
|
|
|
+ try:
|
|
|
+ v = int(raw)
|
|
|
+ except Exception:
|
|
|
+ return None
|
|
|
+ return v if v > 0 else None
|
|
|
|
|
|
|
|
|
@tool(description="根据抖音账号ID创建爬取计划")
|
|
|
@@ -292,7 +330,7 @@ async def create_crawler_plan_by_douyin_content_id(
|
|
|
_LABEL_CONTENT,
|
|
|
call_params,
|
|
|
ToolResult(
|
|
|
- title="根据抖音内容创建爬取计划",
|
|
|
+ title="根据抖音内容创建爬取计划-本地环境跳过此步骤",
|
|
|
output="",
|
|
|
metadata={
|
|
|
"result": {
|
|
|
@@ -329,6 +367,7 @@ async def create_crawler_plan_by_douyin_content_id(
|
|
|
try:
|
|
|
data = _load_output_json(trace_id=trace_id, output_dir=output_dir)
|
|
|
content_ids = _extract_content_ids(data)
|
|
|
+ content_demand_id = _extract_content_demand_id(data)
|
|
|
except Exception as e:
|
|
|
msg = f"加载/解析 output.json 失败: {e}"
|
|
|
logger.error(msg, exc_info=True)
|
|
|
@@ -343,6 +382,8 @@ async def create_crawler_plan_by_douyin_content_id(
|
|
|
)
|
|
|
|
|
|
call_params["content_ids_count"] = len(content_ids)
|
|
|
+ if content_demand_id is not None:
|
|
|
+ call_params["content_demand_id"] = content_demand_id
|
|
|
if not content_ids:
|
|
|
return _log_aigc_return(
|
|
|
_LABEL_CONTENT,
|
|
|
@@ -368,7 +409,28 @@ async def create_crawler_plan_by_douyin_content_id(
|
|
|
),
|
|
|
)
|
|
|
|
|
|
- produce_plan_ids = _get_produce_plan_ids_from_env()
|
|
|
+ merge_leve2 = ""
|
|
|
+ if content_demand_id is not None:
|
|
|
+ try:
|
|
|
+ conn = get_connection()
|
|
|
+ try:
|
|
|
+ merge_leve2 = fetch_demand_content_merge_leve2(conn, content_demand_id) or ""
|
|
|
+ finally:
|
|
|
+ conn.close()
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(
|
|
|
+ "fetch demand_content.merge_leve2 failed. demand_content_id=%s err=%s",
|
|
|
+ content_demand_id,
|
|
|
+ str(e),
|
|
|
+ exc_info=True,
|
|
|
+ )
|
|
|
+ merge_leve2 = ""
|
|
|
+
|
|
|
+ plan_key = merge_leve2.strip() if merge_leve2.strip() in AIGC_PLAN_ID_MAP else "通用"
|
|
|
+ plan_ids = AIGC_PLAN_ID_MAP.get(plan_key) or AIGC_PLAN_ID_MAP.get("通用") or {}
|
|
|
+ produce_plan_id_selected = str(plan_ids.get("生成ID") or "").strip()
|
|
|
+ publish_plan_id_selected = str(plan_ids.get("发布ID") or "").strip()
|
|
|
+ produce_plan_ids = [produce_plan_id_selected] if produce_plan_id_selected else []
|
|
|
dt = datetime.now(SHANGHAI_TZ).strftime("%Y%m%d%H%M%S")
|
|
|
crawler_plan_name = f"【内容寻找Agent自动创建】抖音视频直接抓取-{dt}-抖音"
|
|
|
params = {
|
|
|
@@ -390,6 +452,13 @@ async def create_crawler_plan_by_douyin_content_id(
|
|
|
|
|
|
try:
|
|
|
summary_lines = [f"抖音视频爬取计划"]
|
|
|
+ if merge_leve2.strip():
|
|
|
+ summary_lines.append(f"需求品类(merge_leve2): {merge_leve2.strip()}")
|
|
|
+ summary_lines.append(f"计划匹配key: {plan_key}")
|
|
|
+ if produce_plan_id_selected:
|
|
|
+ summary_lines.append(f"生成计划ID(按品类匹配): {produce_plan_id_selected}")
|
|
|
+ if publish_plan_id_selected:
|
|
|
+ summary_lines.append(f"发布计划ID(按品类匹配): {publish_plan_id_selected}")
|
|
|
|
|
|
response_json = post(CRAWLER_PLAN_CREATE_URL, params)
|
|
|
if response_json.get("code") != 0:
|
|
|
@@ -409,7 +478,7 @@ async def create_crawler_plan_by_douyin_content_id(
|
|
|
summary_lines.append(f" 爬取计划ID: {crawler_plan_id}")
|
|
|
produce_plan_infos: List[Dict[str, str]] = []
|
|
|
db_updated_rows = 0
|
|
|
- # 环境里的生成计划 ID(字符串);与是否执行绑定接口无关,用于写库
|
|
|
+ # 选中的生成计划 ID(字符串);与是否执行绑定接口无关,用于写库
|
|
|
env_produce_plan_id = (produce_plan_ids[0] if produce_plan_ids else "").strip()
|
|
|
|
|
|
if produce_plan_ids:
|
|
|
@@ -430,16 +499,15 @@ async def create_crawler_plan_by_douyin_content_id(
|
|
|
summary_lines.append(f" 绑定结果: {'绑定成功' if not produce_plan_info.get('msg') else '绑定失败'}")
|
|
|
summary_lines.append(f" 信息: {produce_plan_info.get('msg', '成功')}")
|
|
|
|
|
|
- publish_plan_id_str = str(AIGC_DEMAND_DOUYIN_CONTENT_PUBLISH_PLAN_ID).strip()
|
|
|
# 爬取 / 生成 / 发布计划 id 任一存在则写库(不依赖是否已配置 produce_plan_ids 去走绑定)
|
|
|
- if (crawler_plan_id or "").strip() or env_produce_plan_id or publish_plan_id_str:
|
|
|
+ if (crawler_plan_id or "").strip() or env_produce_plan_id or publish_plan_id_selected:
|
|
|
try:
|
|
|
db_updated_rows = update_content_plan_ids(
|
|
|
trace_id=trace_id,
|
|
|
aweme_ids=content_ids,
|
|
|
crawler_plan_id=crawler_plan_id or "",
|
|
|
produce_plan_id=env_produce_plan_id,
|
|
|
- publish_plan_id=publish_plan_id_str,
|
|
|
+ publish_plan_id=publish_plan_id_selected,
|
|
|
)
|
|
|
except Exception as e:
|
|
|
logger.error(f"update content plan ids failed: {e}", exc_info=True)
|