Просмотр исходного кода

feat:删除了不必要的测试用例

elksmmx 4 недель назад
Родитель
Сommit
f3fff2bd11

+ 0 - 23
examples/integration_real_env/README.md

@@ -1,23 +0,0 @@
-# 真实环境集成测试(Agent-main)
-
-该测试用于在真实 `conda` 环境中验证 `Agent-main` 的核心功能链路是否正常。
-
-## 覆盖范围
-
-1. 工具注册链路(含 browser 工具注册可见性)
-2. 文件工具链:`write_file` / `read_file` / `edit_file` / `glob_files` / `grep_content`
-3. 命令工具:`bash_command`
-4. `AgentRunner.call`
-5. `AgentRunner.run`
-6. 统一 `subagent`:`delegate` / `explore` / `evaluate` / `continue_from`
-
-## 运行
-
-```bash
-BROWSER_USE_CONFIG_DIR=/tmp/browseruse-test conda run -n Agent \
-  python Agent-main/examples/integration_real_env/run.py
-```
-
-说明:
-- 测试默认使用内置 mock LLM,不依赖外部 API Key。
-- 目的是验证真实环境中的框架链路、工具执行与 trace 行为。

+ 0 - 320
examples/integration_real_env/run.py

@@ -1,320 +0,0 @@
-"""
-真实环境集成测试(Agent-main)。
-"""
-
-import asyncio
-import json
-import os
-import sys
-from dataclasses import dataclass
-from pathlib import Path
-from tempfile import TemporaryDirectory
-from typing import Any, Dict, List
-
-
-# 避免 browser_use 在受限环境写 ~/.config 触发权限错误
-os.environ.setdefault("BROWSER_USE_CONFIG_DIR", "/tmp/browseruse-test")
-
-PROJECT_ROOT = Path(__file__).resolve().parents[2]
-sys.path.insert(0, str(PROJECT_ROOT))
-
-
-@dataclass
-class CheckResult:
-    name: str
-    ok: bool
-    detail: str
-
-
-def record(results: List[CheckResult], name: str, ok: bool, detail: str) -> None:
-    results.append(CheckResult(name=name, ok=ok, detail=detail))
-    mark = "PASS" if ok else "FAIL"
-    print(f"[{mark}] {name}: {detail}")
-
-
-async def mock_llm_call(messages, model="gpt-4o", tools=None, **kwargs):
-    """
-    测试专用 mock LLM:
-    - 当有工具可用时:第一轮触发 bash_command,第二轮返回文本结论
-    - 当是 subagent 任务时:按 prompt 类型返回固定文本
-    """
-    state = kwargs.get("_test_state")
-    if isinstance(state, dict):
-        call_no = state.get("call_no", 0)
-        state["call_no"] = call_no + 1
-    else:
-        call_no = 0
-
-    last_user = ""
-    for msg in reversed(messages):
-        if msg.get("role") == "user":
-            last_user = str(msg.get("content", ""))
-            break
-
-    if "# 评估任务" in last_user:
-        return {
-            "content": "## 评估结论\n通过\n\n## 评估理由\n结果满足要求。",
-            "tool_calls": None,
-            "prompt_tokens": 10,
-            "completion_tokens": 10,
-            "finish_reason": "stop",
-            "cost": 0.0,
-        }
-
-    if "# 探索任务" in last_user:
-        return {
-            "content": "探索结论:优先采用方案 1。",
-            "tool_calls": None,
-            "prompt_tokens": 10,
-            "completion_tokens": 10,
-            "finish_reason": "stop",
-            "cost": 0.0,
-        }
-
-    if "委托" in last_user or "实现" in last_user or "继续" in last_user or "优化" in last_user:
-        return {
-            "content": "委托任务执行完成。",
-            "tool_calls": None,
-            "prompt_tokens": 10,
-            "completion_tokens": 10,
-            "finish_reason": "stop",
-            "cost": 0.0,
-        }
-
-    if call_no == 0 and tools:
-        return {
-            "content": "",
-            "tool_calls": [
-                {
-                    "id": "tc_1",
-                    "type": "function",
-                    "function": {
-                        "name": "bash_command",
-                        "arguments": json.dumps(
-                            {
-                                "command": "echo runner_run_ok",
-                                "description": "integration",
-                            }
-                        ),
-                    },
-                }
-            ],
-            "prompt_tokens": 12,
-            "completion_tokens": 8,
-            "finish_reason": "tool_calls",
-            "cost": 0.0,
-        }
-
-    return {
-        "content": "run_fallback_ok",
-        "tool_calls": None,
-        "prompt_tokens": 8,
-        "completion_tokens": 6,
-        "finish_reason": "stop",
-        "cost": 0.0,
-    }
-
-
-def check_tool_registry(results: List[CheckResult]) -> None:
-    from agent.tools import get_tool_registry
-
-    registry = get_tool_registry()
-    names = set(registry.get_tool_names())
-
-    core_required = {
-        "read_file",
-        "edit_file",
-        "write_file",
-        "glob_files",
-        "grep_content",
-        "bash_command",
-        "skill",
-        "list_skills",
-        "subagent",
-    }
-
-    core_missing = sorted(core_required - names)
-    record(
-        results,
-        "tool_registry_core",
-        len(core_missing) == 0,
-        "all core tools registered" if not core_missing else f"missing: {core_missing}",
-    )
-
-    browser_subset = {
-        "browser_search_web",
-        "browser_navigate_to_url",
-        "browser_screenshot",
-    }
-    browser_missing = sorted(browser_subset - names)
-    record(
-        results,
-        "tool_registry_browser",
-        len(browser_missing) == 0,
-        "browser tools visible" if not browser_missing else f"missing: {browser_missing}",
-    )
-
-
-async def check_file_tools(results: List[CheckResult]) -> None:
-    from agent.tools.builtin.file.write import write_file
-    from agent.tools.builtin.file.read import read_file
-    from agent.tools.builtin.file.edit import edit_file
-    from agent.tools.builtin.file.glob import glob_files
-    from agent.tools.builtin.file.grep import grep_content
-    from agent.tools.builtin.bash import bash_command
-
-    with TemporaryDirectory(prefix="agent-main-int-") as tmp:
-        tmp_path = Path(tmp)
-        target = tmp_path / "notes.txt"
-
-        wr = await write_file(file_path=str(target), content="hello\npython\nagent\n")
-        record(results, "write_file", wr.error is None, wr.error or "write success")
-
-        rd = await read_file(file_path=str(target))
-        read_ok = (rd.error is None) and ("python" in rd.output)
-        record(results, "read_file", read_ok, rd.error or "content contains python")
-
-        ed = await edit_file(file_path=str(target), old_string="python", new_string="python3")
-        record(results, "edit_file", ed.error is None, ed.error or "edit success")
-
-        gp = await grep_content(pattern="python3", path=str(tmp_path))
-        grep_ok = gp.error is None and "notes.txt" in gp.output
-        record(results, "grep_content", grep_ok, gp.error or "pattern found")
-
-        gb = await glob_files(pattern="**/*.txt", path=str(tmp_path))
-        glob_ok = gb.error is None and "notes.txt" in gb.output
-        record(results, "glob_files", glob_ok, gb.error or "glob matched")
-
-        bs = await bash_command(
-            command="echo integration_ok",
-            description="integration test",
-            workdir=str(tmp_path),
-        )
-        bash_ok = bs.error is None and "integration_ok" in bs.output
-        record(results, "bash_command", bash_ok, bs.error or "command output ok")
-
-
-async def check_runner(results: List[CheckResult]) -> None:
-    from agent.core.runner import AgentRunner
-    from agent.trace.store import FileSystemTraceStore
-    from agent.trace.models import Trace, Message
-
-    with TemporaryDirectory(prefix="agent-main-runner-") as tmp:
-        store = FileSystemTraceStore(base_path=tmp)
-
-        # call 模式
-        runner_call = AgentRunner(trace_store=store, llm_call=mock_llm_call)
-        call_result = await runner_call.call(messages=[{"role": "user", "content": "ping"}], trace=True)
-        call_ok = bool(call_result.trace_id) and isinstance(call_result.reply, str)
-        record(results, "runner_call", call_ok, f"trace_id={call_result.trace_id}, reply={call_result.reply}")
-
-        # run 模式(含工具调用)
-        state = {"call_no": 0}
-
-        async def llm_with_state(messages, model="gpt-4o", tools=None, **kwargs):
-            kwargs["_test_state"] = state
-            return await mock_llm_call(messages=messages, model=model, tools=tools, **kwargs)
-
-        runner_run = AgentRunner(trace_store=store, llm_call=llm_with_state)
-        events: List[Any] = []
-        async for item in runner_run.run(
-            task="请执行一次bash并给出结果",
-            system_prompt="你是测试助手",
-            model="gpt-4o-mini",
-        ):
-            events.append(item)
-
-        final_trace = None
-        assistant_texts = []
-        for item in events:
-            if isinstance(item, Trace):
-                final_trace = item
-            if isinstance(item, Message) and item.role == "assistant":
-                content = item.content
-                text = content.get("text", "") if isinstance(content, dict) else str(content)
-                if text:
-                    assistant_texts.append(text)
-
-        run_ok = bool(final_trace) and final_trace.status == "completed" and "run_fallback_ok" in assistant_texts
-        record(
-            results,
-            "runner_run",
-            run_ok,
-            f"status={getattr(final_trace, 'status', 'n/a')}, assistant_count={len(assistant_texts)}",
-        )
-
-
-async def check_subagent(results: List[CheckResult]) -> None:
-    from agent.core.runner import AgentRunner
-    from agent.trace.store import FileSystemTraceStore
-    from agent.trace.models import Trace
-    from agent.trace.goal_models import GoalTree
-    from agent.tools.builtin.subagent import subagent
-
-    with TemporaryDirectory(prefix="agent-main-subagent-") as tmp:
-        store = FileSystemTraceStore(base_path=tmp)
-        runner = AgentRunner(trace_store=store, llm_call=mock_llm_call)
-
-        main_trace = Trace(
-            trace_id="main-trace",
-            mode="agent",
-            task="主任务",
-            agent_type="default",
-            status="running",
-        )
-        await store.create_trace(main_trace)
-        goal_tree = GoalTree(mission="主任务")
-        goals = goal_tree.add_goals(["验证 subagent 功能"])
-        goal_tree.focus(goals[0].id)
-        await store.update_goal_tree(main_trace.trace_id, goal_tree)
-
-        ctx = {"store": store, "trace_id": main_trace.trace_id, "goal_id": goals[0].id, "runner": runner}
-
-        r1 = await subagent(mode="delegate", task="实现登录", context=ctx)
-        r2 = await subagent(mode="explore", branches=["方案A", "方案B"], background="请比较", context=ctx)
-        r3 = await subagent(
-            mode="evaluate",
-            target_goal_id=goals[0].id,
-            evaluation_input={"actual_result": "实现完成"},
-            requirements="给出是否通过",
-            context=ctx,
-        )
-        r4 = await subagent(mode="delegate", task="继续优化", continue_from=r1["sub_trace_id"], context=ctx)
-
-        s1 = str(r1.get("status", "")).strip()
-        s2 = str(r2.get("status", "")).strip()
-        s3 = str(r3.get("status", "")).strip()
-        s4 = str(r4.get("status", "")).strip()
-        same_trace = str(r4.get("sub_trace_id", "")).strip() == str(r1.get("sub_trace_id", "")).strip()
-        ok = (s1 == "completed" and s2 == "completed" and s3 == "completed" and s4 == "completed" and same_trace)
-        detail = (
-            f"delegate={s1}, explore={s2}, evaluate={s3}, continue={s4}, continue_same={same_trace}"
-        )
-        record(results, "subagent_unified", ok, detail)
-
-
-async def main() -> int:
-    results: List[CheckResult] = []
-
-    try:
-        check_tool_registry(results)
-        await check_file_tools(results)
-        await check_runner(results)
-        await check_subagent(results)
-    except Exception as exc:
-        record(results, "unexpected_exception", False, repr(exc))
-
-    total = len(results)
-    passed = sum(1 for r in results if r.ok)
-    failed = total - passed
-
-    print("\n=== Integration Summary ===")
-    print(f"Total: {total}")
-    print(f"Passed: {passed}")
-    print(f"Failed: {failed}")
-
-    return 0 if failed == 0 else 1
-
-
-if __name__ == "__main__":
-    raise SystemExit(asyncio.run(main()))

+ 0 - 16
examples/subagent_unified/README.md

@@ -1,16 +0,0 @@
-# Unified Subagent 测试
-
-本目录用于验证 main 分支新增的统一 `subagent` 工具能力:
-
-1. `mode="delegate"`
-2. `mode="explore"`
-3. `mode="evaluate"`
-4. `continue_from`
-
-## 运行
-
-```bash
-python examples/subagent_unified/run.py
-```
-
-脚本使用 mock LLM,不依赖外部 API Key。

+ 0 - 126
examples/subagent_unified/run.py

@@ -1,126 +0,0 @@
-"""
-统一 subagent 工具集成测试(mock LLM)。
-"""
-
-import asyncio
-import os
-import sys
-from pathlib import Path
-from tempfile import TemporaryDirectory
-
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
-
-from agent.core.runner import AgentRunner
-from agent.trace.store import FileSystemTraceStore
-from agent.trace.models import Trace
-from agent.trace.goal_models import GoalTree
-from agent.tools.builtin.subagent import subagent
-
-
-async def mock_llm_call(messages, model="gpt-4o", tools=None, **kwargs):
-    last_user = ""
-    for msg in reversed(messages):
-        if msg.get("role") == "user":
-            last_user = str(msg.get("content", ""))
-            break
-
-    if "# 评估任务" in last_user:
-        content = "## 评估结论\n通过\n\n## 评估理由\n满足需求。"
-    elif "# 探索任务" in last_user:
-        content = "探索完成:建议优先采用方案 1。"
-    else:
-        content = "委托任务已完成。"
-
-    return {
-        "content": content,
-        "tool_calls": None,
-        "finish_reason": "stop",
-        "prompt_tokens": 10,
-        "completion_tokens": 10,
-        "cost": 0.0,
-    }
-
-
-async def run_case():
-    with TemporaryDirectory(prefix="subagent-unified-") as tmp_dir:
-        store = FileSystemTraceStore(base_path=tmp_dir)
-        runner = AgentRunner(trace_store=store, llm_call=mock_llm_call)
-
-        # 创建主 Trace 与 GoalTree(供 subagent 作为父上下文)
-        main_trace = Trace(
-            trace_id="main-trace",
-            mode="agent",
-            task="主任务",
-            agent_type="default",
-            status="running",
-        )
-        await store.create_trace(main_trace)
-        goal_tree = GoalTree(mission="主任务")
-        new_goals = goal_tree.add_goals(["实现主流程"])
-        goal_tree.focus(new_goals[0].id)
-        await store.update_goal_tree(main_trace.trace_id, goal_tree)
-
-        context = {
-            "store": store,
-            "trace_id": main_trace.trace_id,
-            "goal_id": new_goals[0].id,
-            "runner": runner,
-        }
-
-        # 1) delegate
-        delegate_result = await subagent(
-            mode="delegate",
-            task="实现用户登录功能",
-            context=context,
-        )
-        assert delegate_result["status"] == "completed", delegate_result
-        assert delegate_result["summary"], delegate_result
-        delegate_trace = await store.get_trace(delegate_result["sub_trace_id"])
-        assert delegate_trace is not None
-        assert delegate_trace.parent_trace_id == main_trace.trace_id
-        assert delegate_trace.parent_goal_id == new_goals[0].id
-
-        # 2) explore
-        explore_result = await subagent(
-            mode="explore",
-            branches=["JWT 方案", "Session 方案"],
-            background="请比较维护成本和安全性。",
-            context=context,
-        )
-        assert explore_result["status"] == "completed", explore_result
-        assert "探索" in explore_result["summary"], explore_result
-
-        # 3) evaluate
-        evaluate_result = await subagent(
-            mode="evaluate",
-            target_goal_id=new_goals[0].id,
-            evaluation_input={"actual_result": "已实现登录接口并通过单元测试"},
-            requirements="请评估是否满足安全和可维护性要求。",
-            context=context,
-        )
-        assert evaluate_result["status"] == "completed", evaluate_result
-        assert "评估结论" in evaluate_result["summary"], evaluate_result
-
-        # 4) continue_from
-        continue_result = await subagent(
-            mode="delegate",
-            task="继续补充边界条件处理",
-            continue_from=delegate_result["sub_trace_id"],
-            context=context,
-        )
-        assert continue_result["status"] == "completed", continue_result
-        assert continue_result["sub_trace_id"] == delegate_result["sub_trace_id"]
-        assert continue_result["continue_from"] is True
-
-        print("✅ unified subagent tests passed")
-        print(f"delegate: {delegate_result['sub_trace_id']}")
-        print(f"explore : {explore_result['sub_trace_id']}")
-        print(f"evaluate: {evaluate_result['sub_trace_id']}")
-
-
-def main():
-    asyncio.run(run_case())
-
-
-if __name__ == "__main__":
-    main()