| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249 |
- from dotenv import load_dotenv
- load_dotenv()
- from typing import Dict, Any, Optional
- import os
- from pathlib import Path
- import json
- from tools import fetch_account_article_list, fetch_weixin_account, weixin_search
- from agent import AgentRunner, RunConfig, FileSystemTraceStore, Trace, Message
- from agent.llm import create_openrouter_llm_call
- from agent.llm.prompts import SimplePrompt
- from agent.tools.builtin.knowledge import KnowledgeConfig
- # 默认搜索词
- DEFAULT_QUERY = "伊朗、以色列、和平是永恒的主题"
- DEFAULT_DEMAND_ID = 1
- import logging
- logger = logging.getLogger(__name__)
- PROJECT_ROOT = Path(__file__).resolve().parent
- def _normalize_ascii_double_quotes(text: str) -> str:
- """将字符串中的 ASCII 双引号 `"` 规范化为中文双引号 `“`、`”`。"""
- if '"' not in text:
- return text
- chars: list[str] = []
- open_quote = True
- for ch in text:
- if ch == '"':
- chars.append("“" if open_quote else "”")
- open_quote = not open_quote
- else:
- chars.append(ch)
- return "".join(chars)
- def _sanitize_json_strings(value: Any) -> Any:
- if isinstance(value, str):
- return _normalize_ascii_double_quotes(value)
- if isinstance(value, list):
- return [_sanitize_json_strings(v) for v in value]
- if isinstance(value, dict):
- return {k: _sanitize_json_strings(v) for k, v in value.items()}
- return value
- def _sanitize_output_json(output_json_path: Path) -> None:
- """
- 任务完成后对 output.json 做后处理:
- - 递归清洗所有字符串值中的英文双引号 `"`
- - 保持合法 JSON
- """
- if not output_json_path.exists():
- logger.warning(f"未找到 output.json,跳过清洗: {output_json_path}")
- return
- try:
- data = json.loads(output_json_path.read_text(encoding="utf-8"))
- except Exception as e:
- logger.warning(f"output.json 解析失败,跳过清洗: {e}")
- return
- cleaned = _sanitize_json_strings(data)
- output_json_path.write_text(
- json.dumps(cleaned, ensure_ascii=False, indent=2),
- encoding="utf-8"
- )
- logger.info(f"已完成 output.json 引号清洗: {output_json_path}")
- async def run_agent(
- query: Optional[str] = None,
- demand_id: Optional[int] = None,
- stream_output: bool = True,
- ) -> Dict[str, Any]:
- """
- 执行 agent 任务
- Args:
- query: 查询内容(搜索词),None 则使用默认值
- demand_id: 本次搜索任务 id(int,关联 demand_content 表)
- stream_output: 是否流式输出到 stdout(run.py 需要,server.py 不需要)
- Returns:
- {
- "trace_id": "20260317_103046_xyz789",
- "status": "completed" | "failed",
- "error": "错误信息" # 失败时
- }
- """
- query = query or DEFAULT_QUERY
- demand_id = demand_id or DEFAULT_DEMAND_ID
- # 加载 prompt
- prompt_path = PROJECT_ROOT / "content_finder.prompt"
- prompt = SimplePrompt(prompt_path)
- # output 目录
- output_dir = str(PROJECT_ROOT / "output")
- # 构建消息(替换 %query%、%output_dir%、%demand_id%)
- demand_id_str = str(demand_id) if demand_id is not None else ""
- messages = prompt.build_messages(query=query, output_dir=output_dir, demand_id=demand_id_str)
- # 初始化配置
- api_key = os.getenv("OPEN_ROUTER_API_KEY")
- if not api_key:
- raise ValueError("OPEN_ROUTER_API_KEY 未设置")
- model_name = prompt.config.get("model", "sonnet-4.6")
- model = os.getenv("MODEL", f"anthropic/claude-{model_name}")
- temperature = float(prompt.config.get("temperature", 0.3))
- max_iterations = 30
- trace_dir = str(PROJECT_ROOT / "traces")
- skills_dir = str(PROJECT_ROOT / "skills")
- Path(trace_dir).mkdir(parents=True, exist_ok=True)
- store = FileSystemTraceStore(base_path=trace_dir)
- allowed_tools = [
- "weixin_search",
- "fetch_weixin_account",
- "fetch_account_article_list",
- "fetch_article_detail",
- ]
- runner = AgentRunner(
- llm_call=create_openrouter_llm_call(model=model),
- trace_store=store,
- skills_dir=skills_dir,
- )
- config = RunConfig(
- name="内容寻找",
- model=model,
- temperature=temperature,
- max_iterations=max_iterations,
- tools=allowed_tools,
- extra_llm_params={"max_tokens": 8192},
- knowledge=KnowledgeConfig(
- enable_extraction=False,
- enable_completion_extraction=False,
- enable_injection=False,
- # owner="content_finder_agent",
- # default_tags={"project": "content_finder"},
- # default_scopes=["com.piaoquantv.supply"],
- # default_search_types=["tool", "usecase", "definition"],
- # default_search_owner="content_finder_agent"
- )
- )
- # 执行
- trace_id = None
- try:
- async for item in runner.run(messages=messages, config=config):
- if isinstance(item, Trace):
- trace_id = item.trace_id
- if item.status == "completed":
- if trace_id:
- output_json_path = Path(output_dir) / trace_id / "output.json"
- _sanitize_output_json(output_json_path)
- logger.info(f"Agent 执行完成: trace_id={trace_id}")
- return {
- "trace_id": trace_id,
- "status": "completed"
- }
- elif item.status == "failed":
- logger.error(f"Agent 执行失败: {item.error_message}")
- return {
- "trace_id": trace_id,
- "status": "failed",
- "error": item.error_message
- }
- elif isinstance(item, Message) and stream_output:
- # 流式输出(仅 run.py 需要)
- if item.role == "assistant":
- content = item.content
- if isinstance(content, dict):
- text = content.get("text", "")
- tool_calls = content.get("tool_calls", [])
- if text:
- # 如果有推荐结果,完整输出
- if len(text) > 500 and ("推荐结果" in text or "推荐内容" in text or "🎯" in text):
- print(f"\n{text}")
- # 如果有工具调用且文本较短,只输出摘要
- elif tool_calls and len(text) > 100:
- print(f"[思考] {text[:100]}...")
- # 其他情况输出完整文本
- else:
- print(f"\n{text}")
- # 输出工具调用信息
- if tool_calls:
- for tc in tool_calls:
- tool_name = tc.get("function", {}).get("name", "unknown")
- # 跳过 goal 工具的输出,减少噪音
- if tool_name != "goal":
- print(f"[工具] {tool_name}")
- elif isinstance(content, str) and content:
- print(f"\n{content}")
- elif item.role == "tool":
- content = item.content
- if isinstance(content, dict):
- tool_name = content.get("tool_name", "unknown")
- print(f"[结果] {tool_name} ✓")
- # 如果循环结束但没有返回,说明异常退出
- return {
- "trace_id": trace_id,
- "status": "failed",
- "error": "Agent 异常退出"
- }
- except KeyboardInterrupt:
- logger.info("用户中断")
- if stream_output:
- print("\n用户中断")
- return {
- "trace_id": trace_id,
- "status": "failed",
- "error": "用户中断"
- }
- except Exception as e:
- logger.error(f"Agent 执行异常: {e}", exc_info=True)
- if stream_output:
- print(f"\n执行失败: {e}")
- return {
- "trace_id": trace_id,
- "status": "failed",
- "error": str(e)
- }
- if __name__ == "__main__":
- import asyncio
- asyncio.run(run_agent())
|