2 mesiacov pred · f762466f52
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,4 +18,4 @@ select = ["E", "F", "I", "W"]
 
															 ignore = ["E501"]
														
 
															 [tool.ruff.lint.isort]
														
 
															-known-first-party = ["src"]
														
 
															+known-first-party = ["src", "agent", "gateway", "knowhub"]
														
--- a/src/agent/__init__.py
+++ b/src/agent/__init__.py
--- a/src/domain/search/core.py
+++ b/src/domain/search/core.py
@@ -0,0 +1,25 @@
 
															+from typing import Optional
														
 
															+from src.agents import cop_agent
														
 
															+
														
 
															+from src.config import LongArticlesSearchAgentConfig
														
 
															+from src.infra.database import AsyncMySQLPool
														
 
															+from src.infra.trace import LogService
														
 
															+
														
 
															+class SearchAgentCore:
														
 
															+    def __init__(self, pool: AsyncMySQLPool, log_service: LogService, config: LongArticlesSearchAgentConfig):
														
 
															+        self.pool = pool
														
 
															+        self.log_service = log_service
														
 
															+        self.config = config
														
 
															+
														
 
															+    async def run_agent(
														
 
															+            self, query: Optional[str] = None,
														
 
															+            trace_id: Optional[int] = None,
														
 
															+            stream_output: bool = True
														
 
															+    ):
														
 
															+        # query = query or DEFAULT_QUERY
														
 
															+        res = cop_agent.AgentRunner()
														
 
															+
														
 
															+        pass
														
 
															+
														
 
															+    async def deal(self):
														
 
															+        pass
														
--- a/tests/content_finder.prompt
+++ b/tests/content_finder.prompt
@@ -0,0 +1,78 @@
 
															+---
														
 
															+model: sonnet-4.6
														
 
															+temperature: 0.3
														
 
															+---
														
 
															+
														
 
															+$system$
														
 
															+你是一个专业的内容寻找助手，帮助运营人员在抖音平台上寻找符合要求的视频内容。
														
 
															+
														
 
															+## 重要约束
														
 
															+- 只在抖音平台搜索，不要切换到其他平台（小红书、B站等）
														
 
															+- 可用工具：`douyin_search`、`douyin_user_videos`、`get_content_fans_portrait`、`get_account_fans_portrait`、`store_results_mysql`、`create_crawler_plan_by_douyin_content_id`、`create_crawler_plan_by_douyin_account_id`
														
 
															+- **严格禁止**调用任何名称以 `browser_` 开头的浏览器工具
														
 
															+
														
 
															+## 平台背景
														
 
															+- 平台载体：微信小程序
														
 
															+- 核心用户群：95% 是 50 岁以上中老年人
														
 
															+- 增长方式：微信分享裂变
														
 
															+- 核心指标：分享率、DAU
														
 
															+
														
 
															+## 执行流程（按顺序，禁止跳步）
														
 
															+1. **搜索阶段**：按 `content_finding_strategy` 执行
														
 
															+2. **筛选阶段**：按 `content_filtering_strategy` 执行
														
 
															+3. **输出阶段**：先按 `output_schema` 写入 `output.json`
														
 
															+4. **Schema 校验阶段**：逐字段自检；不符合就重写 `output.json`
														
 
															+5. **入库阶段**：仅在 Schema 校验通过后，调用 `store_results_mysql(trace_id)` 存储到远程数据库
														
 
															+6. **接入平台阶段**：最后按 `aigc_platform_plan` 生成 AIGC 爬取计划
														
 
															+
														
 
															+## 强制要求（违反即为错误）
														
 
															+
														
 
															+### 画像工具必须调用
														
 
															+对每条候选内容，**必须**按以下顺序获取画像：
														
 
															+1. 先调用 `get_content_fans_portrait`，检查 `metadata.has_portrait`
														
 
															+2. 若 `has_portrait=False`，再调用 `get_account_fans_portrait` 兜底
														
 
															+3. **不允许跳过画像获取直接输出**
														
 
															+
														
 
															+### 输出字段必须严格遵循 Schema
														
 
															+- 顶层字段只能有：`trace_id`、`query`、`demand_id`、`summary`、`good_account_expansion`、`contents`
														
 
															+- 每条内容字段只能有：`title`、`aweme_id`、`rank`、`video_url`、`author_nickname`、`author_sec_uid`、`author_url`、`statistics`、`portrait_data`、`reason`
														
 
															+- **禁止自创字段**（如 `results`、`metrics`、`tags`、`platform` 等）
														
 
															+- **禁止使用中文 key**
														
 
															+
														
 
															+## 流程自检
														
 
															+
														
 
															+**在宣称任务完成或结束对话前，必须逐项确认；任一项未满足则继续执行，不得提前收尾。**
														
 
															+
														
 
															+### 1.画像（内容 + 账号）是否已获取
														
 
															+- 对**最终写入 `contents` 的每一条**视频，是否都已调用过 `get_content_fans_portrait(aweme_id)`？
														
 
															+- 对其中 `metadata.has_portrait=False` 的条目，是否**在同一条目上**已调用 `get_account_fans_portrait(account_id=author.sec_uid)` 作为兜底？
														
 
															+- **禁止**：仅因内容侧无画像就跳过账号画像、直接把 `portrait_data` 当空或来源标为 `none` 而未尝试账号接口（除非两次调用均失败且已在理由中说明）。
														
 
															+
														
 
															+### 输出、校验、入库顺序是否正确
														
 
															+- 是否已先写 `output.json`，再完成 Schema 校验，最后才调用 `store_results_mysql(trace_id)`？
														
 
															+- **禁止**：未校验 Schema 就直接入库。
														
 
															+
														
 
															+### Schema 合规闸门（入库前必须通过）
														
 
															+- 在调用 `store_results_mysql` 前，必须逐项核对 `output.json` 是否满足 `output_schema`；**不通过就先重写 JSON，不得入库**。
														
 
															+- 顶层字段必须且仅能是：`trace_id`、`query`、`demand_id`、`summary`、`good_account_expansion`、`contents`。
														
 
															+- `summary` 必须是对象，且包含：`candidate_count`、`portrait_content_like_count`、`portrait_account_fans_count`、`portrait_none_count`、`filtered_in_count`（禁止用字符串 summary）。
														
 
															+- `good_account_expansion` 必须是对象：`{"enabled": <bool>, "accounts": [...]}`；`accounts` 每项字段必须是：`author_nickname`、`author_sec_uid`、`age_50_plus_ratio`、`age_50_plus_tgi`（禁止 `account_name`、`sec_uid` 等别名）。
														
 
															+- 每条 `contents` 的 `statistics` 字段必须是：`digg_count`、`comment_count`、`share_count`（禁止 `likes` / `comments` / `shares`）。
														
 
															+- 每条 `contents` 的 `portrait_data.source` 只允许：`content_like`、`account_fans`、`none`（禁止 `content`、`account` 等缩写）。
														
 
															+- 每条 `contents` 的 `portrait_data` 必须包含：`source`、`age_50_plus_ratio`、`age_50_plus_tgi`、`url`。
														
 
															+
														
 
															+### AIGC 接入（爬取计划）是否已接入
														
 
															+- `contents` 中入选视频是否在**入库成功后**已按 `aigc_platform_plan` 调用 `create_crawler_plan_by_douyin_content_id`？
														
 
															+- **禁止**：写完库就认为任务结束、不创建爬取计划。若某条创建失败，须在回复中说明原因；仅当入选视频已创建或已说明失败原因时，方可视为本阶段完成。
														
 
															+
														
 
															+
														
 
															+$user$
														
 
															+任务：找10个与「%query%」相关的、老年人感兴趣的视频。
														
 
															+要求：
														
 
															+- 适合老年人分享观看
														
 
															+- 热度要高，质量要好
														
 
															+
														
 
															+搜索词: %query%
														
 
															+搜索词id: %demand_id%（如有）
														
 
															+
														
 
															+请开始执行内容寻找任务。记住要多步推理，每次只执行一小步，然后思考下一步该做什么。
														
--- a/tests/run_single.py
+++ b/tests/run_single.py
@@ -0,0 +1,191 @@
 
															+from typing import Dict, Any, Optional
														
 
															+import os
														
 
															+from pathlib import Path
														
 
															+from agent import AgentRunner, RunConfig, FileSystemTraceStore, Trace, Message
														
 
															+from agent.llm import create_openrouter_llm_call
														
 
															+from agent.llm.prompts import SimplePrompt
														
 
															+from agent.tools.builtin.knowledge import KnowledgeConfig
														
 
															+
														
 
															+# 默认搜索词
														
 
															+DEFAULT_QUERY = "戏曲表演"
														
 
															+DEFAULT_DEMAND_ID = 1
														
 
															+
														
 
															+import logging
														
 
															+
														
 
															+logger = logging.getLogger(__name__)
														
 
															+
														
 
															+
														
 
															+async def run_agent(
														
 
															+        query: Optional[str] = None,
														
 
															+        demand_id: Optional[int] = None,
														
 
															+        stream_output: bool = True,
														
 
															+) -> Dict[str, Any]:
														
 
															+    """
														
 
															+    执行 agent 任务
														
 
															+
														
 
															+    Args:
														
 
															+        query: 查询内容（搜索词），None 则使用默认值
														
 
															+        demand_id: 本次搜索任务 id（int，关联 demand_content 表）
														
 
															+        stream_output: 是否流式输出到 stdout（run.py 需要，server.py 不需要）
														
 
															+
														
 
															+    Returns:
														
 
															+        {
														
 
															+            "trace_id": "20260317_103046_xyz789",
														
 
															+            "status": "completed" | "failed",
														
 
															+            "error": "错误信息"  # 失败时
														
 
															+        }
														
 
															+    """
														
 
															+    query = query or DEFAULT_QUERY
														
 
															+    demand_id = demand_id or DEFAULT_DEMAND_ID
														
 
															+
														
 
															+    # 加载 prompt
														
 
															+    prompt_path = "content_finder.prompt"
														
 
															+    prompt = SimplePrompt(prompt_path)
														
 
															+
														
 
															+    # output 目录
														
 
															+    output_dir = "output"
														
 
															+
														
 
															+    # 构建消息（替换 %query%、%output_dir%、%demand_id%）
														
 
															+    demand_id_str = str(demand_id) if demand_id is not None else ""
														
 
															+    messages = prompt.build_messages(query=query, output_dir=output_dir, demand_id=demand_id_str)
														
 
															+
														
 
															+    # 初始化配置
														
 
															+    api_key = os.getenv("OPEN_ROUTER_API_KEY")
														
 
															+    if not api_key:
														
 
															+        raise ValueError("OPEN_ROUTER_API_KEY 未设置")
														
 
															+
														
 
															+    model_name = prompt.config.get("model", "sonnet-4.6")
														
 
															+    model = os.getenv("MODEL", f"anthropic/claude-{model_name}")
														
 
															+    temperature = float(prompt.config.get("temperature", 0.3))
														
 
															+    max_iterations = 30
														
 
															+    trace_dir = "traces"
														
 
															+
														
 
															+    skills_dir = str(Path(__file__).parent / "skills")
														
 
															+
														
 
															+    Path(trace_dir).mkdir(parents=True, exist_ok=True)
														
 
															+
														
 
															+    store = FileSystemTraceStore(base_path=trace_dir)
														
 
															+
														
 
															+    allowed_tools = [
														
 
															+        "douyin_search",
														
 
															+        "douyin_user_videos",
														
 
															+        "get_content_fans_portrait",
														
 
															+        "get_account_fans_portrait",
														
 
															+        "store_results_mysql",
														
 
															+        "create_crawler_plan_by_douyin_content_id",
														
 
															+        "create_crawler_plan_by_douyin_account_id",
														
 
															+    ]
														
 
															+
														
 
															+    runner = AgentRunner(
														
 
															+        llm_call=create_openrouter_llm_call(model=model),
														
 
															+        trace_store=store,
														
 
															+        skills_dir=skills_dir,
														
 
															+    )
														
 
															+
														
 
															+    config = RunConfig(
														
 
															+        name="内容寻找",
														
 
															+        model=model,
														
 
															+        temperature=temperature,
														
 
															+        max_iterations=max_iterations,
														
 
															+        tools=allowed_tools,
														
 
															+        extra_llm_params={"max_tokens": 8192},
														
 
															+        knowledge=KnowledgeConfig(
														
 
															+            enable_extraction=True,
														
 
															+            enable_completion_extraction=True,
														
 
															+            enable_injection=True,
														
 
															+            owner="content_finder_agent",
														
 
															+            default_tags={"project": "content_finder"},
														
 
															+            default_scopes=["com.piaoquantv.supply"],
														
 
															+            default_search_types=["tool", "usecase", "definition"],
														
 
															+            default_search_owner="content_finder_agent"
														
 
															+        )
														
 
															+    )
														
 
															+
														
 
															+    # 执行
														
 
															+    trace_id = None
														
 
															+
														
 
															+    try:
														
 
															+        async for item in runner.run(messages=messages, config=config):
														
 
															+            if isinstance(item, Trace):
														
 
															+                trace_id = item.trace_id
														
 
															+
														
 
															+                if item.status == "completed":
														
 
															+                    logger.info(f"Agent 执行完成: trace_id={trace_id}")
														
 
															+                    return {
														
 
															+                        "trace_id": trace_id,
														
 
															+                        "status": "completed"
														
 
															+                    }
														
 
															+                elif item.status == "failed":
														
 
															+                    logger.error(f"Agent 执行失败: {item.error_message}")
														
 
															+                    return {
														
 
															+                        "trace_id": trace_id,
														
 
															+                        "status": "failed",
														
 
															+                        "error": item.error_message
														
 
															+                    }
														
 
															+
														
 
															+            elif isinstance(item, Message) and stream_output:
														
 
															+                # 流式输出（仅 run.py 需要）
														
 
															+                if item.role == "assistant":
														
 
															+                    content = item.content
														
 
															+                    if isinstance(content, dict):
														
 
															+                        text = content.get("text", "")
														
 
															+                        tool_calls = content.get("tool_calls", [])
														
 
															+
														
 
															+                        if text:
														
 
															+                            # 如果有推荐结果，完整输出
														
 
															+                            if len(text) > 500 and ("推荐结果" in text or "推荐内容" in text or "🎯" in text):
														
 
															+                                print(f"\n{text}")
														
 
															+                            # 如果有工具调用且文本较短，只输出摘要
														
 
															+                            elif tool_calls and len(text) > 100:
														
 
															+                                print(f"[思考] {text[:100]}...")
														
 
															+                            # 其他情况输出完整文本
														
 
															+                            else:
														
 
															+                                print(f"\n{text}")
														
 
															+
														
 
															+                        # 输出工具调用信息
														
 
															+                        if tool_calls:
														
 
															+                            for tc in tool_calls:
														
 
															+                                tool_name = tc.get("function", {}).get("name", "unknown")
														
 
															+                                # 跳过 goal 工具的输出，减少噪音
														
 
															+                                if tool_name != "goal":
														
 
															+                                    print(f"[工具] {tool_name}")
														
 
															+                    elif isinstance(content, str) and content:
														
 
															+                        print(f"\n{content}")
														
 
															+
														
 
															+                elif item.role == "tool":
														
 
															+                    content = item.content
														
 
															+                    if isinstance(content, dict):
														
 
															+                        tool_name = content.get("tool_name", "unknown")
														
 
															+                        print(f"[结果] {tool_name} ✓")
														
 
															+
														
 
															+        # 如果循环结束但没有返回，说明异常退出
														
 
															+        return {
														
 
															+            "trace_id": trace_id,
														
 
															+            "status": "failed",
														
 
															+            "error": "Agent 异常退出"
														
 
															+        }
														
 
															+
														
 
															+    except KeyboardInterrupt:
														
 
															+        logger.info("用户中断")
														
 
															+        if stream_output:
														
 
															+            print("\n用户中断")
														
 
															+        return {
														
 
															+            "trace_id": trace_id,
														
 
															+            "status": "failed",
														
 
															+            "error": "用户中断"
														
 
															+        }
														
 
															+    except Exception as e:
														
 
															+        logger.error(f"Agent 执行异常: {e}", exc_info=True)
														
 
															+        if stream_output:
														
 
															+            print(f"\n执行失败: {e}")
														
 
															+        return {
														
 
															+            "trace_id": trace_id,
														
 
															+            "status": "failed",
														
 
															+            "error": str(e)
														
 
															+        }
														
 
															+
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    import asyncio
														
 
															+    asyncio.run(run_agent())
														
 
															+