howard
/
Agent


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809
							"""
Sub-Agent 工具 - agent / evaluate

agent: 创建 Agent 执行任务（单任务 delegate 或多任务并行 explore）
evaluate: 评估目标执行结果是否满足要求
"""

import asyncio
from datetime import datetime
from typing import Any, Dict, List, Optional, Union

from agent.tools import tool
from agent.trace.models import Trace, Messages
from agent.trace.trace_id import generate_sub_trace_id
from agent.trace.goal_models import GoalTree
from agent.trace.websocket import broadcast_sub_trace_started, broadcast_sub_trace_completed


# ===== prompts =====

# ===== 评估任务 =====

EVALUATE_PROMPT_TEMPLATE = """# 评估任务

请评估以下任务的执行结果是否满足要求。

## 目标描述

{goal_description}

## 执行结果

{result_text}

## 输出格式

## 评估结论
[通过/不通过]

## 评估理由
[详细说明通过或不通过原因]

## 修改建议（如果不通过）
1. [建议1]
2. [建议2]
"""

# ===== 结果格式化 =====

DELEGATE_RESULT_HEADER = "## 委托任务完成\n"

DELEGATE_SAVED_KNOWLEDGE_HEADER = "**保存的知识** ({count} 条):"

DELEGATE_STATS_HEADER = "**执行统计**:"

EXPLORE_RESULT_HEADER = "## 探索结果\n"

EXPLORE_BRANCH_TEMPLATE = "### 方案 {branch_name}: {task}"

EXPLORE_STATUS_SUCCESS = "**状态**: ✓ 完成"

EXPLORE_STATUS_FAILED = "**状态**: ✗ 失败"

EXPLORE_STATUS_ERROR = "**状态**: ✗ 异常"

EXPLORE_SUMMARY_HEADER = "## 总结"

def build_evaluate_prompt(goal_description: str, result_text: str) -> str:
    return EVALUATE_PROMPT_TEMPLATE.format(
        goal_description=goal_description,
        result_text=result_text or "（无执行结果）",
    )


def _make_run_config(**kwargs):
    """延迟导入 RunConfig 以避免循环导入"""
    from agent.core.runner import RunConfig
    return RunConfig(**kwargs)


# ===== 辅助函数 =====

async def _update_collaborator(
    store, trace_id: str,
    name: str, sub_trace_id: str,
    status: str, summary: str = "",
) -> None:
    """
    更新 trace.context["collaborators"] 中的协作者信息。

    如果同名协作者已存在则更新，否则追加。
    """
    trace = await store.get_trace(trace_id)
    if not trace:
        return

    collaborators = trace.context.get("collaborators", [])

    # 查找已有记录
    existing = None
    for c in collaborators:
        if c.get("trace_id") == sub_trace_id:
            existing = c
            break

    if existing:
        existing["status"] = status
        if summary:
            existing["summary"] = summary
    else:
        collaborators.append({
            "name": name,
            "type": "agent",
            "trace_id": sub_trace_id,
            "status": status,
            "summary": summary,
        })

    trace.context["collaborators"] = collaborators
    await store.update_trace(trace_id, context=trace.context)


async def _update_goal_start(
    store, trace_id: str, goal_id: str, mode: str, sub_trace_ids: List[str]
) -> None:
    """标记 Goal 开始执行"""
    if not goal_id:
        return
    await store.update_goal(
        trace_id, goal_id,
        type="agent_call",
        agent_call_mode=mode,
        status="in_progress",
        sub_trace_ids=sub_trace_ids
    )


async def _update_goal_complete(
    store, trace_id: str, goal_id: str,
    status: str, summary: str, sub_trace_ids: List[str]
) -> None:
    """标记 Goal 完成"""
    if not goal_id:
        return
    await store.update_goal(
        trace_id, goal_id,
        status=status,
        summary=summary,
        sub_trace_ids=sub_trace_ids
    )


def _aggregate_stats(results: List[Dict[str, Any]]) -> Dict[str, Any]:
    """聚合多个结果的统计信息"""
    total_messages = 0
    total_tokens = 0
    total_cost = 0.0

    for result in results:
        if isinstance(result, dict) and "stats" in result:
            stats = result["stats"]
            total_messages += stats.get("total_messages", 0)
            total_tokens += stats.get("total_tokens", 0)
            total_cost += stats.get("total_cost", 0.0)

    return {
        "total_messages": total_messages,
        "total_tokens": total_tokens,
        "total_cost": total_cost
    }


def _get_allowed_tools(single: bool, context: dict) -> Optional[List[str]]:
    """获取允许工具列表。single=True: 全部(去掉 agent/evaluate); single=False: 只读"""
    if not single:
        return ["read_file", "grep_content", "glob_files", "goal"]
    # single (delegate): 获取所有工具，排除 agent 和 evaluate
    runner = context.get("runner")
    if runner and hasattr(runner, "tools") and hasattr(runner.tools, "registry"):
        all_tools = list(runner.tools.registry.keys())
        return [t for t in all_tools if t not in ("agent", "evaluate")]
    return None


def _format_single_result(result: Dict[str, Any], sub_trace_id: str, continued: bool) -> Dict[str, Any]:
    """格式化单任务（delegate）结果"""
    lines = [DELEGATE_RESULT_HEADER]
    summary = result.get("summary", "")
    if summary:
        lines.append(summary)
        lines.append("")

    # 添加保存的知识 ID
    saved_knowledge_ids = result.get("saved_knowledge_ids", [])
    if saved_knowledge_ids:
        lines.append("---\n")
        lines.append(DELEGATE_SAVED_KNOWLEDGE_HEADER.format(count=len(saved_knowledge_ids)))
        for kid in saved_knowledge_ids:
            lines.append(f"- {kid}")
        lines.append("")

    lines.append("---\n")
    lines.append(DELEGATE_STATS_HEADER)
    stats = result.get("stats", {})
    if stats:
        lines.append(f"- 消息数: {stats.get('total_messages', 0)}")
        lines.append(f"- Tokens: {stats.get('total_tokens', 0)}")
        lines.append(f"- 成本: ${stats.get('total_cost', 0.0):.4f}")
    formatted_summary = "\n".join(lines)

    return {
        "mode": "delegate",
        "sub_trace_id": sub_trace_id,
        "continue_from": continued,
        "saved_knowledge_ids": saved_knowledge_ids,  # 传递给父 agent
        **result,
        "summary": formatted_summary,
    }


def _format_multi_result(
    tasks: List[str], results: List[Dict[str, Any]], sub_trace_ids: List[Dict]
) -> Dict[str, Any]:
    """格式化多任务（explore）聚合结果"""
    lines = [EXPLORE_RESULT_HEADER]
    successful = 0
    failed = 0
    total_tokens = 0
    total_cost = 0.0

    for i, (task_item, result) in enumerate(zip(tasks, results)):
        branch_name = chr(ord('A') + i)
        lines.append(EXPLORE_BRANCH_TEMPLATE.format(branch_name=branch_name, task=task_item))

        if isinstance(result, dict):
            status = result.get("status", "unknown")
            if status == "completed":
                lines.append(EXPLORE_STATUS_SUCCESS)
                successful += 1
            else:
                lines.append(EXPLORE_STATUS_FAILED)
                failed += 1

            summary = result.get("summary", "")
            if summary:
                lines.append(f"**摘要**: {summary[:200]}...")

            stats = result.get("stats", {})
            if stats:
                messages = stats.get("total_messages", 0)
                tokens = stats.get("total_tokens", 0)
                cost = stats.get("total_cost", 0.0)
                lines.append(f"**统计**: {messages} messages, {tokens} tokens, ${cost:.4f}")
                total_tokens += tokens
                total_cost += cost
        else:
            lines.append(EXPLORE_STATUS_ERROR)
            failed += 1

        lines.append("")

    lines.append("---\n")
    lines.append(EXPLORE_SUMMARY_HEADER)
    lines.append(f"- 总分支数: {len(tasks)}")
    lines.append(f"- 成功: {successful}")
    lines.append(f"- 失败: {failed}")
    lines.append(f"- 总 tokens: {total_tokens}")
    lines.append(f"- 总成本: ${total_cost:.4f}")

    aggregated_summary = "\n".join(lines)
    overall_status = "completed" if successful > 0 else "failed"

    return {
        "mode": "explore",
        "status": overall_status,
        "summary": aggregated_summary,
        "sub_trace_ids": sub_trace_ids,
        "tasks": tasks,
        "stats": _aggregate_stats(results),
    }


async def _get_goal_description(store, trace_id: str, goal_id: str) -> str:
    """从 GoalTree 获取目标描述"""
    if not goal_id:
        return ""
    goal_tree = await store.get_goal_tree(trace_id)
    if goal_tree:
        target_goal = goal_tree.find(goal_id)
        if target_goal:
            return target_goal.description
    return f"Goal {goal_id}"


def _build_evaluate_prompt(goal_description: str, messages: Optional[Messages]) -> str:
    """
    构建评估 prompt。

    Args:
        goal_description: 代码从 GoalTree 注入的目标描述
        messages: 模型提供的消息（执行结果+上下文）
    """
    # 从 messages 提取文本内容
    result_text = ""
    if messages:
        parts = []
        for msg in messages:
            content = msg.get("content", "")
            if isinstance(content, str):
                parts.append(content)
            elif isinstance(content, list):
                # 多模态内容，提取文本部分
                for item in content:
                    if isinstance(item, dict) and item.get("type") == "text":
                        parts.append(item.get("text", ""))
        result_text = "\n".join(parts)

    return build_evaluate_prompt(goal_description, result_text)


def _make_event_printer(label: str):
    """
    创建子 Agent 执行过程打印函数。

    当父 runner.debug=True 时，传给 run_result(on_event=...)，
    实时输出子 Agent 的工具调用和助手消息。
    """
    prefix = f"  [{label}]"

    def on_event(item):
        from agent.trace.models import Trace, Message
        if isinstance(item, Message):
            if item.role == "assistant":
                content = item.content
                if isinstance(content, dict):
                    text = content.get("text", "")
                    tool_calls = content.get("tool_calls")
                    if text:
                        preview = text[:120] + "..." if len(text) > 120 else text
                        print(f"{prefix} {preview}")
                    if tool_calls:
                        for tc in tool_calls:
                            name = tc.get("function", {}).get("name", "unknown")
                            print(f"{prefix} 🛠️  {name}")
            elif item.role == "tool":
                content = item.content
                if isinstance(content, dict):
                    name = content.get("tool_name", "unknown")
                    desc = item.description or ""
                    desc_short = (desc[:60] + "...") if len(desc) > 60 else desc
                    suffix = f": {desc_short}" if desc_short else ""
                    print(f"{prefix} ✅ {name}{suffix}")
        elif isinstance(item, Trace):
            if item.status == "completed":
                print(f"{prefix} ✓ 完成")
            elif item.status == "failed":
                err = (item.error_message or "")[:80]
                print(f"{prefix} ✗ 失败: {err}")

    return on_event


# ===== 统一内部执行函数 =====

async def _run_agents(
    tasks: List[str],
    per_agent_msgs: List[Messages],
    continue_from: Optional[str],
    store, trace_id: str, goal_id: str, runner, context: dict,
    agent_type: Optional[str] = None,
    skills: Optional[List[str]] = None,
) -> Dict[str, Any]:
    """
    统一 agent 执行逻辑。

    single (len(tasks)==1): delegate 模式，全量工具（排除 agent/evaluate）
    multi (len(tasks)>1): explore 模式，只读工具，并行执行
    """
    single = len(tasks) == 1
    parent_trace = await store.get_trace(trace_id)

    # continue_from: 复用已有 trace（仅 single）
    sub_trace_id = None
    continued = False
    if single and continue_from:
        existing = await store.get_trace(continue_from)
        if not existing:
            return {"status": "failed", "error": f"Continue-from trace not found: {continue_from}"}
        sub_trace_id = continue_from
        continued = True
        goal_tree = await store.get_goal_tree(continue_from)
        mission = goal_tree.mission if goal_tree else tasks[0]
        sub_trace_ids = [{"trace_id": sub_trace_id, "mission": mission}]
    else:
        sub_trace_ids = []

    # 创建 sub-traces 和执行协程
    coros = []
    all_sub_trace_ids = list(sub_trace_ids)  # copy for continue_from case

    for i, (task_item, msgs) in enumerate(zip(tasks, per_agent_msgs)):
        if single and continued:
            # continue_from 已经设置了 sub_trace_id
            pass
        else:
            resolved_agent_type = agent_type or ("delegate" if single else "explore")
            suffix = "delegate" if single else f"explore-{i+1:03d}"
            stid = generate_sub_trace_id(trace_id, suffix)

            sub_trace = Trace(
                trace_id=stid,
                mode="agent",
                task=task_item,
                parent_trace_id=trace_id,
                parent_goal_id=goal_id,
                agent_type=resolved_agent_type,
                uid=parent_trace.uid if parent_trace else None,
                model=parent_trace.model if parent_trace else None,
                status="running",
                context={"created_by_tool": "agent"},
                created_at=datetime.now(),
            )
            await store.create_trace(sub_trace)
            await store.update_goal_tree(stid, GoalTree(mission=task_item))

            all_sub_trace_ids.append({"trace_id": stid, "mission": task_item})

            # 广播 sub_trace_started
            await broadcast_sub_trace_started(
                trace_id, stid, goal_id or "",
                resolved_agent_type, task_item,
            )

            if single:
                sub_trace_id = stid

        # 注册为活跃协作者
        cur_stid = sub_trace_id if single else all_sub_trace_ids[-1]["trace_id"]
        collab_name = task_item[:30] if single and not continued else (
            f"delegate-{cur_stid[:8]}" if single else f"explore-{i+1}"
        )
        await _update_collaborator(
            store, trace_id,
            name=collab_name, sub_trace_id=cur_stid,
            status="running", summary=task_item[:80],
        )

        # 构建消息
        agent_msgs = list(msgs) + [{"role": "user", "content": task_item}]
        allowed_tools = _get_allowed_tools(single, context)

        debug = getattr(runner, 'debug', False)
        agent_label = (agent_type or ("delegate" if single else f"explore-{i+1}"))
        on_event = _make_event_printer(agent_label) if debug else None

        coro = runner.run_result(
            messages=agent_msgs,
            config=_make_run_config(
                trace_id=cur_stid,
                agent_type=agent_type or ("delegate" if single else "explore"),
                model=parent_trace.model if parent_trace else "gpt-4o",
                uid=parent_trace.uid if parent_trace else None,
                tools=allowed_tools,
                name=task_item[:50],
                skills=skills,
            ),
            on_event=on_event,
        )
        coros.append((i, cur_stid, collab_name, coro))

    # 更新主 Goal 为 in_progress
    await _update_goal_start(
        store, trace_id, goal_id,
        "delegate" if single else "explore",
        all_sub_trace_ids,
    )

    # 执行
    if single:
        # 单任务直接执行（带异常处理）
        _, stid, collab_name, coro = coros[0]
        try:
            result = await coro

            await broadcast_sub_trace_completed(
                trace_id, stid,
                result.get("status", "completed"),
                result.get("summary", ""),
                result.get("stats", {}),
            )
            await _update_collaborator(
                store, trace_id,
                name=collab_name, sub_trace_id=stid,
                status=result.get("status", "completed"),
                summary=result.get("summary", "")[:80],
            )

            formatted = _format_single_result(result, stid, continued)

            await _update_goal_complete(
                store, trace_id, goal_id,
                result.get("status", "completed"),
                formatted["summary"],
                all_sub_trace_ids,
            )
            return formatted

        except Exception as e:
            error_msg = str(e)
            await broadcast_sub_trace_completed(
                trace_id, stid, "failed", error_msg, {},
            )
            await _update_collaborator(
                store, trace_id,
                name=collab_name, sub_trace_id=stid,
                status="failed", summary=error_msg[:80],
            )
            await _update_goal_complete(
                store, trace_id, goal_id,
                "failed", f"委托任务失败: {error_msg}",
                all_sub_trace_ids,
            )
            return {
                "mode": "delegate",
                "status": "failed",
                "error": error_msg,
                "sub_trace_id": stid,
            }
    else:
        # 多任务并行执行
        raw_results = await asyncio.gather(
            *(coro for _, _, _, coro in coros),
            return_exceptions=True,
        )

        processed_results = []
        for idx, raw in enumerate(raw_results):
            _, stid, collab_name, _ = coros[idx]
            if isinstance(raw, Exception):
                error_result = {
                    "status": "failed",
                    "summary": f"执行出错: {str(raw)}",
                    "stats": {"total_messages": 0, "total_tokens": 0, "total_cost": 0.0},
                }
                processed_results.append(error_result)
                await broadcast_sub_trace_completed(
                    trace_id, stid, "failed", str(raw), {},
                )
                await _update_collaborator(
                    store, trace_id,
                    name=collab_name, sub_trace_id=stid,
                    status="failed", summary=str(raw)[:80],
                )
            else:
                processed_results.append(raw)
                await broadcast_sub_trace_completed(
                    trace_id, stid,
                    raw.get("status", "completed"),
                    raw.get("summary", ""),
                    raw.get("stats", {}),
                )
                await _update_collaborator(
                    store, trace_id,
                    name=collab_name, sub_trace_id=stid,
                    status=raw.get("status", "completed"),
                    summary=raw.get("summary", "")[:80],
                )

        formatted = _format_multi_result(tasks, processed_results, all_sub_trace_ids)

        await _update_goal_complete(
            store, trace_id, goal_id,
            formatted["status"],
            formatted["summary"],
            all_sub_trace_ids,
        )
        return formatted


# ===== 工具定义 =====

@tool(description="创建 Agent 执行任务", hidden_params=["context"])
async def agent(
    task: Union[str, List[str]],
    messages: Optional[Union[Messages, List[Messages]]] = None,
    continue_from: Optional[str] = None,
    agent_type: Optional[str] = None,
    skills: Optional[List[str]] = None,
    context: Optional[dict] = None,
) -> Dict[str, Any]:
    """
    创建 Agent 执行任务。

    单任务 (task: str): delegate 模式，全量工具
    多任务 (task: List[str]): explore 模式，只读工具，并行执行

    Args:
        task: 任务描述。字符串=单任务，列表=多任务并行
        messages: 预置消息。1D 列表=所有 agent 共享；2D 列表=per-agent
        continue_from: 继续已有 trace（仅单任务）
        agent_type: 子 Agent 类型，决定 preset 和默认 skills（如 "deconstruct"）
        skills: 附加到 system prompt 的 skill 名称列表，覆盖 preset 默认值
        context: 框架自动注入的上下文
    """
    if not context:
        return {"status": "failed", "error": "context is required"}

    store = context.get("store")
    trace_id = context.get("trace_id")
    goal_id = context.get("goal_id")
    runner = context.get("runner")

    missing = []
    if not store:
        missing.append("store")
    if not trace_id:
        missing.append("trace_id")
    if not runner:
        missing.append("runner")
    if missing:
        return {"status": "failed", "error": f"Missing required context: {', '.join(missing)}"}

    # 归一化 task → list
    single = isinstance(task, str)
    tasks = [task] if single else task

    if not tasks:
        return {"status": "failed", "error": "task is required"}

    # 归一化 messages → List[Messages]（per-agent）
    if messages is None:
        per_agent_msgs: List[Messages] = [[] for _ in tasks]
    elif messages and isinstance(messages[0], list):
        per_agent_msgs = messages  # 2D: per-agent
    else:
        per_agent_msgs = [messages] * len(tasks)  # 1D: 共享

    if continue_from and not single:
        return {"status": "failed", "error": "continue_from requires single task"}

    return await _run_agents(
        tasks, per_agent_msgs, continue_from,
        store, trace_id, goal_id, runner, context,
        agent_type=agent_type,
        skills=skills,
    )


@tool(description="评估目标执行结果是否满足要求", hidden_params=["context"])
async def evaluate(
    messages: Optional[Messages] = None,
    target_goal_id: Optional[str] = None,
    continue_from: Optional[str] = None,
    context: Optional[dict] = None,
) -> Dict[str, Any]:
    """
    评估目标执行结果是否满足要求。

    代码自动从 GoalTree 注入目标描述。模型把执行结果和上下文放在 messages 中。

    Args:
        messages: 执行结果和上下文消息（OpenAI 格式）
        target_goal_id: 要评估的目标 ID（默认当前 goal_id）
        continue_from: 继续已有评估 trace
        context: 框架自动注入的上下文
    """
    if not context:
        return {"status": "failed", "error": "context is required"}

    store = context.get("store")
    trace_id = context.get("trace_id")
    current_goal_id = context.get("goal_id")
    runner = context.get("runner")

    missing = []
    if not store:
        missing.append("store")
    if not trace_id:
        missing.append("trace_id")
    if not runner:
        missing.append("runner")
    if missing:
        return {"status": "failed", "error": f"Missing required context: {', '.join(missing)}"}

    # target_goal_id 默认 context["goal_id"]
    goal_id = target_goal_id or current_goal_id

    # 从 GoalTree 获取目标描述
    goal_desc = await _get_goal_description(store, trace_id, goal_id)

    # 构建 evaluator prompt
    eval_prompt = _build_evaluate_prompt(goal_desc, messages)

    # 获取父 Trace 信息
    parent_trace = await store.get_trace(trace_id)

    # 处理 continue_from 或创建新 Sub-Trace
    if continue_from:
        existing_trace = await store.get_trace(continue_from)
        if not existing_trace:
            return {"status": "failed", "error": f"Continue-from trace not found: {continue_from}"}
        sub_trace_id = continue_from
        goal_tree = await store.get_goal_tree(continue_from)
        mission = goal_tree.mission if goal_tree else eval_prompt
        sub_trace_ids = [{"trace_id": sub_trace_id, "mission": mission}]
    else:
        sub_trace_id = generate_sub_trace_id(trace_id, "evaluate")
        sub_trace = Trace(
            trace_id=sub_trace_id,
            mode="agent",
            task=eval_prompt,
            parent_trace_id=trace_id,
            parent_goal_id=current_goal_id,
            agent_type="evaluate",
            uid=parent_trace.uid if parent_trace else None,
            model=parent_trace.model if parent_trace else None,
            status="running",
            context={"created_by_tool": "evaluate"},
            created_at=datetime.now(),
        )
        await store.create_trace(sub_trace)
        await store.update_goal_tree(sub_trace_id, GoalTree(mission=eval_prompt))
        sub_trace_ids = [{"trace_id": sub_trace_id, "mission": eval_prompt}]

        # 广播 sub_trace_started
        await broadcast_sub_trace_started(
            trace_id, sub_trace_id, current_goal_id or "",
            "evaluate", eval_prompt,
        )

    # 更新主 Goal 为 in_progress
    await _update_goal_start(store, trace_id, current_goal_id, "evaluate", sub_trace_ids)

    # 注册为活跃协作者
    eval_name = f"评估: {(goal_id or 'unknown')[:20]}"
    await _update_collaborator(
        store, trace_id,
        name=eval_name, sub_trace_id=sub_trace_id,
        status="running", summary=f"评估 Goal {goal_id}",
    )

    # 执行评估
    try:
        # evaluate 使用只读工具 + goal
        allowed_tools = ["read_file", "grep_content", "glob_files", "goal"]
        result = await runner.run_result(
            messages=[{"role": "user", "content": eval_prompt}],
            config=_make_run_config(
                trace_id=sub_trace_id,
                agent_type="evaluate",
                model=parent_trace.model if parent_trace else "gpt-4o",
                uid=parent_trace.uid if parent_trace else None,
                tools=allowed_tools,
                name=f"评估: {goal_id}",
            ),
            on_event=_make_event_printer("evaluate") if getattr(runner, 'debug', False) else None,
        )

        await broadcast_sub_trace_completed(
            trace_id, sub_trace_id,
            result.get("status", "completed"),
            result.get("summary", ""),
            result.get("stats", {}),
        )
        await _update_collaborator(
            store, trace_id,
            name=eval_name, sub_trace_id=sub_trace_id,
            status=result.get("status", "completed"),
            summary=result.get("summary", "")[:80],
        )

        formatted_summary = result.get("summary", "")

        await _update_goal_complete(
            store, trace_id, current_goal_id,
            result.get("status", "completed"),
            formatted_summary,
            sub_trace_ids,
        )

        return {
            "mode": "evaluate",
            "sub_trace_id": sub_trace_id,
            "continue_from": bool(continue_from),
            **result,
            "summary": formatted_summary,
        }

    except Exception as e:
        error_msg = str(e)
        await broadcast_sub_trace_completed(
            trace_id, sub_trace_id, "failed", error_msg, {},
        )
        await _update_collaborator(
            store, trace_id,
            name=eval_name, sub_trace_id=sub_trace_id,
            status="failed", summary=error_msg[:80],
        )
        await _update_goal_complete(
            store, trace_id, current_goal_id,
            "failed", f"评估任务失败: {error_msg}",
            sub_trace_ids,
        )
        return {
            "mode": "evaluate",
            "status": "failed",
            "error": error_msg,
            "sub_trace_id": sub_trace_id,
        }