13 ساعت پیش · 4c0c76b2d0
--- a/agent/docs/browser.md
+++ b/agent/docs/browser.md
@@ -0,0 +1,334 @@
 
				+# 浏览器自动化技术文档
			
 
				+
			
 
				+> agent 框架的浏览器操作模块：会话管理、工具体系、内容提取、Cookie 管理。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 目录
			
 
				+
			
 
				+1. [整体架构](#整体架构)
			
 
				+2. [浏览器会话管理](#浏览器会话管理)
			
 
				+3. [工具体系](#工具体系)
			
 
				+4. [结果转换机制](#结果转换机制)
			
 
				+5. [URL 清洗](#url-清洗)
			
 
				+6. [文件存储](#文件存储)
			
 
				+7. [Skill 集成](#skill-集成)
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 整体架构
			
 
				+
			
 
				+浏览器操作的核心实现位于 `agent/tools/builtin/browser/`，采用适配器模式将第三方库 [browser-use](https://github.com/browser-use/browser-use) 的能力封装为 agent 框架的标准工具。
			
 
				+
			
 
				+```
			
 
				+agent/tools/builtin/browser/
			
 
				+├── __init__.py          # 统一导出所有浏览器工具
			
 
				+├── baseClass.py         # 核心实现（~2200行）：会话管理 + 27个工具函数
			
 
				+└── sync_mysql_help.py   # 同步 MySQL 辅助类（Cookie 查询）
			
 
				+
			
 
				+agent/skill/skills/
			
 
				+└── browser.md           # 浏览器工具使用指南（Skill 注入到 LLM system prompt）
			
 
				+```
			
 
				+
			
 
				+关键依赖关系：
			
 
				+
			
 
				+```
			
 
				+agent 框架 (@tool 装饰器, ToolResult)
			
 
				+    ↓ 适配
			
 
				+browser-use (BrowserSession, Tools, ActionResult)
			
 
				+    ↓ 底层
			
 
				+Chrome DevTools Protocol (CDP)
			
 
				+```
			
 
				+
			
 
				+不直接依赖 Playwright，完全基于 CDP 协议与浏览器通信。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 浏览器会话管理
			
 
				+
			
 
				+### 全局单例模式
			
 
				+
			
 
				+使用模块级全局变量维护唯一的浏览器会话，避免重复创建/销毁浏览器实例：
			
 
				+
			
 
				+```python
			
 
				+# baseClass.py:98-101
			
 
				+_browser_session: Optional[BrowserSession] = None
			
 
				+_browser_tools: Optional[Tools] = None
			
 
				+_file_system: Optional[FileSystem] = None
			
 
				+_last_browser_type: str = "local"
			
 
				+_last_headless: bool = True
			
 
				+_live_url: Optional[str] = None
			
 
				+```
			
 
				+
			
 
				+### 三种浏览器模式
			
 
				+
			
 
				+通过 `init_browser_session(browser_type=...)` 初始化，支持三种运行模式：
			
 
				+
			
 
				+| 模式 | browser_type | 底层实现 | 适用场景 |
			
 
				+|------|-------------|---------|---------|
			
 
				+| 本地浏览器 | `"local"` | 启动本地 Chrome，通过 `user_data_dir` 持久化 profile | 开发调试，速度最快 |
			
 
				+| 云浏览器 | `"cloud"` | 连接 browser-use 云服务，通过 `cdp_url` 远程控制 | 生产环境，不占本地资源 |
			
 
				+| 容器浏览器 | `"container"` | 调用远程 API 创建 Docker 容器，内含 Chrome，通过 CDP 连接 | 隔离性好，支持预配置账户 |
			
 
				+
			
 
				+初始化流程（`baseClass.py:230-322`）：
			
 
				+
			
 
				+```
			
 
				+init_browser_session()
			
 
				+  ├─ local:  检测 macOS Chrome 路径 → 创建 user_data_dir → BrowserSession(is_local=True)
			
 
				+  ├─ cloud:  BrowserSession(use_cloud=True) → 解析 cdp_url 生成 live_url
			
 
				+  └─ container: create_container() API → 等待浏览器启动 → BrowserSession(cdp_url=...)
			
 
				+```
			
 
				+
			
 
				+### 容器创建流程
			
 
				+
			
 
				+容器模式通过 HTTP API 与远程容器管理服务交互（`baseClass.py:105-228`）：
			
 
				+
			
 
				+```
			
 
				+步骤 1.1: POST /api/v1/container/create
			
 
				+  → 返回 container_id, vnc, cdp 地址
			
 
				+  → 等待 5 秒让容器内浏览器启动
			
 
				+
			
 
				+步骤 1.2: POST /api/v1/browser/page/create
			
 
				+  → 传入 container_id, url, account_name
			
 
				+  → 返回 connection_id
			
 
				+  → 内置重试机制（最多 3 次）
			
 
				+```
			
 
				+
			
 
				+### 会话健康检查与自动恢复
			
 
				+
			
 
				+`get_browser_session()`（`baseClass.py:330-372`）在每次工具调用前检查 CDP 连接是否存活：
			
 
				+
			
 
				+```python
			
 
				+# 通过 CDP 执行 Runtime.evaluate('1+1') 探测连接
			
 
				+cdp_session = await _browser_session.get_or_create_cdp_session()
			
 
				+await asyncio.wait_for(
			
 
				+    cdp_session.cdp_client.send.Runtime.evaluate(
			
 
				+        params={'expression': '1+1'},
			
 
				+        session_id=cdp_session.session_id
			
 
				+    ),
			
 
				+    timeout=3.0,
			
 
				+)
			
 
				+```
			
 
				+
			
 
				+如果连接断开（WebSocket 超时等），自动 cleanup 并重新初始化。
			
 
				+
			
 
				+### 会话生命周期
			
 
				+
			
 
				+```
			
 
				+init_browser_session()     → 创建会话（幂等，已存在则直接返回）
			
 
				+get_browser_session()      → 获取会话（自动健康检查 + 重连）
			
 
				+cleanup_browser_session()  → 优雅停止（session.stop()）
			
 
				+kill_browser_session()     → 强制终止（session.kill()）
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 工具体系
			
 
				+
			
 
				+所有工具通过 `@tool()` 装饰器注册到 agent 框架的 `ToolRegistry`，LLM 可直接调用。共 27 个工具，分为 8 类。
			
 
				+
			
 
				+### 导航类（Navigation）
			
 
				+
			
 
				+| 工具 | 功能 | 底层调用 |
			
 
				+|------|------|---------|
			
 
				+| `browser_navigate_to_url(url, new_tab)` | 导航到 URL | `tools.navigate()` |
			
 
				+| `browser_search_web(query, engine)` | 搜索引擎搜索（支持 bing/google/duckduckgo） | `tools.search()` |
			
 
				+| `browser_go_back()` | 浏览器后退 | `tools.go_back()` |
			
 
				+| `browser_wait(seconds)` | 等待指定秒数 | `tools.wait()` |
			
 
				+| `browser_get_live_url()` | 获取云浏览器实时画面链接 | 读取全局 `_live_url` |
			
 
				+
			
 
				+### 元素交互类（Interaction）
			
 
				+
			
 
				+| 工具 | 功能 | 特殊处理 |
			
 
				+|------|------|---------|
			
 
				+| `browser_click_element(index)` | 点击元素 | 挂载日志监听器自动捕获下载链接 |
			
 
				+| `browser_input_text(index, text, clear)` | 输入文本 | 支持清除已有内容 |
			
 
				+| `browser_send_keys(keys)` | 发送键盘按键/快捷键 | 支持组合键如 `Control+A` |
			
 
				+| `browser_upload_file(index, path)` | 上传文件 | 需要绝对路径 |
			
 
				+
			
 
				+`browser_click_element` 的下载链接捕获机制（`baseClass.py:711-846`）：
			
 
				+
			
 
				+```python
			
 
				+# 1. 挂载自定义 logging.Handler 到 browser_use 命名空间
			
 
				+capture_handler = DownloadLinkCaptureHandler()
			
 
				+logger = logging.getLogger("browser_use")
			
 
				+logger.addHandler(capture_handler)
			
 
				+
			
 
				+# 2. 执行点击
			
 
				+result = await tools.click(index=index, browser_session=browser)
			
 
				+
			
 
				+# 3. 检查是否捕获到下载链接（通过正则匹配日志中的 URL）
			
 
				+if capture_handler.captured_url:
			
 
				+    # 将链接注入到 ToolResult.output，LLM 可以看到并决定下一步
			
 
				+```
			
 
				+
			
 
				+### 滚动与视图类（Scroll & View）
			
 
				+
			
 
				+| 工具 | 功能 | 特殊处理 |
			
 
				+|------|------|---------|
			
 
				+| `browser_scroll_page(down, pages, index)` | 滚动页面 | 通过 CDP 检测 scrollY 变化，判断是否到达边界；限制单次最大 10 页 |
			
 
				+| `browser_find_text(text)` | 查找文本并滚动到位 | `tools.find_text()` |
			
 
				+| `browser_screenshot()` | 截图 | `tools.screenshot()` |
			
 
				+| `browser_get_visual_selector_map()` | 获取带元素索引标注的截图 + 交互元素列表 | 使用 `create_highlighted_screenshot_async` 生成标注图 |
			
 
				+| `browser_get_selector_map()` | 获取交互元素索引映射（纯文本） | 通过 `BrowserStateRequestEvent` 触发 DOM 更新 |
			
 
				+
			
 
				+`browser_get_visual_selector_map`（`baseClass.py:1066-1161`）是最核心的观察工具：
			
 
				+
			
 
				+```
			
 
				+1. 触发 BrowserStateRequestEvent(include_dom=True, include_screenshot=True)
			
 
				+2. 等待浏览器返回完整状态（DOM 树 + 截图）
			
 
				+3. 从 DOM 状态提取 selector_map（所有可交互元素的索引）
			
 
				+4. 调用 create_highlighted_screenshot_async 在截图上标注元素索引号
			
 
				+5. 构建元素列表（tag, aria-label, href, type, role 等属性）
			
 
				+6. 返回 ToolResult（images 字段含标注截图，output 含元素列表）
			
 
				+```
			
 
				+
			
 
				+### 标签页管理类（Tab）
			
 
				+
			
 
				+| 工具 | 功能 |
			
 
				+|------|------|
			
 
				+| `browser_switch_tab(tab_id)` | 切换标签页（4 字符 ID） |
			
 
				+| `browser_close_tab(tab_id)` | 关闭标签页 |
			
 
				+
			
 
				+### 下拉框类（Dropdown）
			
 
				+
			
 
				+| 工具 | 功能 |
			
 
				+|------|------|
			
 
				+| `browser_get_dropdown_options(index)` | 获取下拉框选项 |
			
 
				+| `browser_select_dropdown_option(index, text)` | 选择下拉框选项 |
			
 
				+
			
 
				+### 内容提取类（Content Extraction）
			
 
				+
			
 
				+| 工具 | 功能 | 底层 |
			
 
				+|------|------|------|
			
 
				+| `browser_extract_content(query, extract_links)` | LLM 驱动的结构化数据提取 | `tools.extract()` + Qwen LLM |
			
 
				+| `browser_read_long_content(goal, source)` | 智能长内容读取（自动检测 PDF） | `tools.read_long_content()` |
			
 
				+| `browser_get_page_html()` | 获取完整 HTML | CDP `Runtime.evaluate` |
			
 
				+| `browser_download_direct_url(url, save_name)` | HTTP 直链下载 | `httpx.AsyncClient` 流式下载 |
			
 
				+
			
 
				+#### 内容提取的 LLM 适配
			
 
				+
			
 
				+`extraction_adapter`（`baseClass.py:1387-1409`）将 browser-use 的 LangChain Runnable 接口适配为 Qwen LLM 调用：
			
 
				+
			
 
				+```python
			
 
				+async def extraction_adapter(input_data):
			
 
				+    response = await qwen_llm_call(messages=[{"role": "user", "content": prompt}])
			
 
				+    content = response["content"]
			
 
				+    # 自动清洗搜索引擎重定向 URL（Bing Base64 解码、Google url 参数提取）
			
 
				+    urls = re.findall(r'https?://[^\s<>"\']+', content)
			
 
				+    for original_url in urls:
			
 
				+        clean_url = scrub_search_redirect_url(original_url)
			
 
				+        if clean_url != original_url:
			
 
				+            content = content.replace(original_url, clean_url)
			
 
				+    return Namespace(completion=content)
			
 
				+```
			
 
				+
			
 
				+#### PDF 自动检测与下载
			
 
				+
			
 
				+`_detect_and_download_pdf_via_cdp`（`baseClass.py:1458-1550`）：
			
 
				+
			
 
				+```
			
 
				+1. 检查 URL 是否以 .pdf 结尾
			
 
				+2. 如果不明显，通过 CDP 检查 document.contentType
			
 
				+3. 确认是 PDF 后，通过浏览器内 fetch API 下载（自动携带 cookies/session）
			
 
				+4. 将 data URL 中的 base64 解码为 PDF 文件保存到本地
			
 
				+5. 将 source 参数改为本地文件路径，交给 pypdf 解析
			
 
				+```
			
 
				+
			
 
				+### Cookie 管理类
			
 
				+
			
 
				+| 工具 | 功能 |
			
 
				+|------|------|
			
 
				+| `browser_export_cookies(name, account)` | 导出当前域名 Cookie 到 `.cache/.cookies/` |
			
 
				+| `browser_load_cookies(url, name)` | 自动匹配 Cookie 文件并注入浏览器 |
			
 
				+| `browser_ensure_login_with_cookies(cookie_type, url)` | 检查登录状态，需要时从 MySQL 查询 Cookie 注入 |
			
 
				+
			
 
				+#### Cookie 加载匹配策略
			
 
				+
			
 
				+`browser_load_cookies`（`baseClass.py:2046-2177`）：
			
 
				+
			
 
				+```
			
 
				+1. 精确匹配：{domain}.json（如 xiaohongshu.com.json）
			
 
				+2. 前缀匹配：{domain}*.json
			
 
				+3. 模糊匹配：{主域名}*.json（如 xiaohongshu*.json）
			
 
				+4. 未找到时：根据 auto_navigate 参数决定是否直接导航到目标页面
			
 
				+```
			
 
				+
			
 
				+#### 从 MySQL 加载 Cookie
			
 
				+
			
 
				+`browser_ensure_login_with_cookies` 流程：
			
 
				+
			
 
				+```
			
 
				+1. 导航到目标 URL
			
 
				+2. 执行 JS 检测登录状态（查找登录按钮/用户头像）
			
 
				+3. 如果需要登录：
			
 
				+   a. 从 agent_channel_cookies 表查询 Cookie
			
 
				+   b. 解析 Cookie（支持 JSON 数组、JSON 对象、分号分隔字符串）
			
 
				+   c. 通过 CDP _cdp_set_cookies 注入
			
 
				+   d. 刷新页面
			
 
				+```
			
 
				+
			
 
				+### 控制流类
			
 
				+
			
 
				+| 工具 | 功能 |
			
 
				+|------|------|
			
 
				+| `browser_evaluate(code)` | 在页面执行任意 JavaScript |
			
 
				+| `browser_wait_for_user_action(message, timeout)` | 暂停等待用户手动操作（如验证码） |
			
 
				+| `browser_done(text, success)` | 标记任务完成 |
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 结果转换机制
			
 
				+
			
 
				+所有工具的返回值统一为 agent 框架的 `ToolResult`，通过 `action_result_to_tool_result()` 将 browser-use 的 `ActionResult` 转换：
			
 
				+
			
 
				+```python
			
 
				+# baseClass.py:407-431
			
 
				+def action_result_to_tool_result(result: ActionResult, title: str = None) -> ToolResult:
			
 
				+    if result.error:
			
 
				+        return ToolResult(title=..., output="", error=result.error,
			
 
				+                         long_term_memory=result.long_term_memory or result.error)
			
 
				+    return ToolResult(title=..., output=result.extracted_content or "",
			
 
				+                     long_term_memory=..., metadata=result.metadata or {})
			
 
				+```
			
 
				+
			
 
				+`ToolResult` 支持双层记忆管理（`agent/tools/models.py`）：
			
 
				+
			
 
				+- `output`：完整内容，可配置 `include_output_only_once=True` 只给 LLM 看一次
			
 
				+- `long_term_memory`：简短摘要，永久保留在对话历史中
			
 
				+- `images`：截图等图片数据（base64）
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## URL 清洗
			
 
				+
			
 
				+`scrub_search_redirect_url()`（`baseClass.py:1347-1385`）自动解析搜索引擎重定向链接：
			
 
				+
			
 
				+| 引擎 | 处理方式 |
			
 
				+|------|---------|
			
 
				+| Bing | 提取 `u` 参数，去掉 `a1` 前缀，Base64 解码 |
			
 
				+| Google | 提取 `url` 参数，URL 解码 |
			
 
				+| 通用 | 检查 `target`/`dest`/`destination`/`link` 参数 |
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 文件存储
			
 
				+
			
 
				+所有浏览器产生的文件统一存储在工作目录下：
			
 
				+
			
 
				+```
			
 
				+.cache/
			
 
				+├── .browser_use_files/    # 浏览器下载、截图、PDF 等临时文件
			
 
				+└── .cookies/              # Cookie 持久化文件（{domain}_{account}.json）
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## Skill 集成
			
 
				+
			
 
				+`agent/skill/skills/browser.md` 作为 Skill 注入到 LLM 的 system prompt，指导 LLM 正确使用浏览器工具。核心规则：
			
 
				+
			
 
				+1. 操作前必须先通过 `browser_get_visual_selector_map` 获取元素索引
			
 
				+2. 任何触发页面变化的操作后都要 `browser_wait`
			
 
				+3. 登录优先用 `browser_load_cookies`，首次登录需请求人类协助
			
 
				+4. 优先使用高级提取工具（`extract_content`/`read_long_content`）而非手动解析 DOM
			
--- a/configs/knowledge_sources.json
+++ b/configs/knowledge_sources.json
@@ -0,0 +1,17 @@
 
				+{
			
 
				+  "knowledge_sources": {
			
 
				+    "platform_rules": {
			
 
				+      "type": "static",
			
 
				+      "items": [
			
 
				+        {
			
 
				+          "title": "平台约束",
			
 
				+          "content": "只允许使用微信平台相关工具，不切换到其他平台。"
			
 
				+        },
			
 
				+        {
			
 
				+          "title": "受众画像",
			
 
				+          "content": "核心受众为 50 岁以上中老年人，更关注实用、稳健、可信内容。"
			
 
				+        }
			
 
				+      ]
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/configs/search_agent_profile.dev.example.json
+++ b/configs/search_agent_profile.dev.example.json
@@ -0,0 +1,19 @@
 
				+{
			
 
				+  "runtime": {
			
 
				+    "target_count": 8
			
 
				+  },
			
 
				+  "search": {
			
 
				+    "max_keywords": 6,
			
 
				+    "recall_multiplier": 4.0
			
 
				+  },
			
 
				+  "filter": {
			
 
				+    "max_detail_fetch": 25,
			
 
				+    "enable_llm_review": true
			
 
				+  },
			
 
				+  "account": {
			
 
				+    "account_strategy": {
			
 
				+      "sample_articles_limit": 5,
			
 
				+      "source_urls_limit": 80
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/configs/search_agent_strategy.example.json
+++ b/configs/search_agent_strategy.example.json
@@ -0,0 +1,41 @@
 
				+{
			
 
				+  "runtime": {
			
 
				+    "target_count": 12
			
 
				+  },
			
 
				+  "search": {
			
 
				+    "max_keywords": 8,
			
 
				+    "initial_cursor": "1",
			
 
				+    "keyword_priority": "demand_first",
			
 
				+    "extra_keywords": [
			
 
				+      "中老年",
			
 
				+      "权威解读"
			
 
				+    ],
			
 
				+    "recall_multiplier": 5.0,
			
 
				+    "min_candidate_multiplier": 2.0,
			
 
				+    "near_enough_candidate_multiplier": 1.2
			
 
				+  },
			
 
				+  "filter": {
			
 
				+    "filter_near_ratio": 0.8,
			
 
				+    "max_detail_fetch": 40,
			
 
				+    "enable_llm_review": true,
			
 
				+    "quality_score": {
			
 
				+      "min_body_length": 900,
			
 
				+      "high_relevance_ratio": 0.8,
			
 
				+      "high_view_count": 10000,
			
 
				+      "medium_view_count": 1000,
			
 
				+      "high_engage_rate": 0.05,
			
 
				+      "low_engage_rate": 0.001,
			
 
				+      "spam_keywords": [
			
 
				+        "震惊",
			
 
				+        "必看",
			
 
				+        "立刻转发"
			
 
				+      ]
			
 
				+    }
			
 
				+  },
			
 
				+  "account": {
			
 
				+    "account_strategy": {
			
 
				+      "sample_articles_limit": 5,
			
 
				+      "source_urls_limit": 100
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/docs/knowledge_summary_guide.md
+++ b/docs/knowledge_summary_guide.md
@@ -0,0 +1,155 @@
 
				+# 每日对话知识库使用指南
			
 
				+
			
 
				+## 功能说明
			
 
				+
			
 
				+自动从 Claude Code 对话历史中提取问答对，使用 LLM 进行智能总结，生成结构化的 Markdown 知识库。
			
 
				+
			
 
				+## 快速开始
			
 
				+
			
 
				+### 1. 手动触发总结
			
 
				+
			
 
				+```bash
			
 
				+# 总结今天的对话（使用 LLM）
			
 
				+python summarize_daily.py --use-llm
			
 
				+
			
 
				+# 总结指定日期
			
 
				+python summarize_daily.py --use-llm --date 2026-04-23
			
 
				+
			
 
				+# 使用指定模型
			
 
				+python summarize_daily.py --use-llm --model "deepseek/deepseek-chat-v3-0324"
			
 
				+
			
 
				+# 不使用 LLM（简单格式化）
			
 
				+python summarize_daily.py
			
 
				+```
			
 
				+
			
 
				+### 2. 自动触发（集成到 pipeline）
			
 
				+
			
 
				+在 `.env` 文件中添加：
			
 
				+
			
 
				+```bash
			
 
				+ENABLE_KNOWLEDGE_SUMMARY=true
			
 
				+```
			
 
				+
			
 
				+然后运行 pipeline，完成后会自动生成知识总结：
			
 
				+
			
 
				+```bash
			
 
				+python run_search_agent.py
			
 
				+```
			
 
				+
			
 
				+## 配置选项
			
 
				+
			
 
				+### 环境变量
			
 
				+
			
 
				+```bash
			
 
				+# 是否启用知识总结（默认 false）
			
 
				+ENABLE_KNOWLEDGE_SUMMARY=true
			
 
				+
			
 
				+# 知识库目录（默认 knowledge/）
			
 
				+KNOWLEDGE_BASE_DIR=knowledge
			
 
				+
			
 
				+# 总结使用的模型（默认 anthropic/claude-sonnet-4.5）
			
 
				+KNOWLEDGE_SUMMARY_MODEL=deepseek/deepseek-chat-v3-0324
			
 
				+
			
 
				+# 最小问题长度（默认 10 字符）
			
 
				+KNOWLEDGE_MIN_QUESTION_LENGTH=10
			
 
				+```
			
 
				+
			
 
				+### 推荐模型
			
 
				+
			
 
				+由于 Claude 模型在某些区域不可用，推荐使用以下模型：
			
 
				+
			
 
				+- `deepseek/deepseek-chat-v3-0324` - 性价比高，效果好
			
 
				+- `openai/gpt-4o-mini` - OpenAI 便宜模型
			
 
				+- `qwen/qwen-2.5-72b-instruct` - 通义千问
			
 
				+
			
 
				+## 输出格式
			
 
				+
			
 
				+知识库按日期组织：
			
 
				+
			
 
				+```
			
 
				+knowledge/
			
 
				+├── 2026-04/
			
 
				+│   ├── 2026-04-23.md
			
 
				+│   ├── 2026-04-24.md
			
 
				+│   └── ...
			
 
				+└── 2026-05/
			
 
				+    └── ...
			
 
				+```
			
 
				+
			
 
				+每个文件包含当天所有会话的总结：
			
 
				+
			
 
				+```markdown
			
 
				+# 2026-04-23 对话总结
			
 
				+
			
 
				+> 生成时间：2026-04-23 18:30:00
			
 
				+> 会话数：3
			
 
				+> 问答对数：12
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 会话 1
			
 
				+
			
 
				+### Q: 问题简述
			
 
				+**问题**：完整问题描述
			
 
				+
			
 
				+**解决方案**：
			
 
				+- 关键步骤1
			
 
				+- 关键步骤2
			
 
				+
			
 
				+**涉及文件**：
			
 
				+- `path/to/file.py`
			
 
				+
			
 
				+**相关技术**：
			
 
				+- 技术名称
			
 
				+
			
 
				+---
			
 
				+```
			
 
				+
			
 
				+## 工作原理
			
 
				+
			
 
				+1. **解析对话历史**：从 `~/.claude/projects/{project}/` 读取 JSONL 文件
			
 
				+2. **提取问答对**：过滤系统消息，配对用户问题和助手回答
			
 
				+3. **LLM 总结**：使用 LLM 提取关键信息，生成结构化输出
			
 
				+4. **保存到文件**：按日期组织，追加到当天的 Markdown 文件
			
 
				+
			
 
				+## 注意事项
			
 
				+
			
 
				+1. **API Key**：使用 LLM 需要设置 `OPEN_ROUTER_API_KEY` 环境变量
			
 
				+2. **区域限制**：某些模型在特定区域不可用，建议使用 DeepSeek 或 GPT-4o-mini
			
 
				+3. **成本控制**：LLM 总结会产生 API 调用费用，可以选择不使用 LLM
			
 
				+4. **文件追加**：同一天多次运行会追加内容，不会覆盖
			
 
				+
			
 
				+## 故障排除
			
 
				+
			
 
				+### 问题：找不到对话历史
			
 
				+
			
 
				+确保你在正确的项目目录下运行，或使用 `--cwd` 参数指定：
			
 
				+
			
 
				+```bash
			
 
				+python summarize_daily.py --cwd /path/to/project
			
 
				+```
			
 
				+
			
 
				+### 问题：LLM 调用失败
			
 
				+
			
 
				+检查 API Key 是否正确，或尝试其他模型：
			
 
				+
			
 
				+```bash
			
 
				+python summarize_daily.py --use-llm --model "deepseek/deepseek-chat-v3-0324"
			
 
				+```
			
 
				+
			
 
				+### 问题：没有提取到问答对
			
 
				+
			
 
				+检查对话内容是否太短（默认最小 10 字符），可以调整：
			
 
				+
			
 
				+```bash
			
 
				+export KNOWLEDGE_MIN_QUESTION_LENGTH=5
			
 
				+python summarize_daily.py
			
 
				+```
			
 
				+
			
 
				+## 扩展功能（未来）
			
 
				+
			
 
				+- [ ] 语义搜索：集成向量数据库
			
 
				+- [ ] 知识图谱：提取实体和关系
			
 
				+- [ ] 定期复盘：每周/每月自动生成总结报告
			
 
				+- [ ] 知识评分：根据使用频率评估价值
			
 
				+- [ ] 多项目支持：跨项目知识聚合
			
--- a/docs/search_agent_refactor_plan.md
+++ b/docs/search_agent_refactor_plan.md
@@ -0,0 +1,209 @@
 
				+# Search Agent 架构重构方案
			
 
				+
			
 
				+## 当前问题总结
			
 
				+
			
 
				+### 🔴 严重问题
			
 
				+1. **策略加载逻辑重复** - `SearchAgentCore` 和 `harness/runner` 都在加载策略，职责不清
			
 
				+2. **分层违规** - domain 层反向依赖 pipeline 层
			
 
				+3. **硬编码路径散落** - `tests/output`、`tests/traces` 等路径到处都是
			
 
				+
			
 
				+### 🟡 中等问题
			
 
				+4. **配置分散** - 配置分布在 4 个地方，难以管理
			
 
				+5. **职责混乱** - `pipeline/runner.py` 同时做装配、CLI、知识源定义
			
 
				+6. **入口重复** - `run_pipeline.py` 和 `run_search_agent.py` 职责不清
			
 
				+
			
 
				+## 目标架构
			
 
				+
			
 
				+```
			
 
				+search_agent/
			
 
				+├── core/                    # 核心领域逻辑（纯业务）
			
 
				+│   ├── __init__.py
			
 
				+│   ├── models.py           # 数据模型（Policy, Context, Article 等）
			
 
				+│   ├── policy.py           # 策略定义和应用
			
 
				+│   └── repository.py       # 策略仓储（DB 访问）
			
 
				+│
			
 
				+├── pipeline/               # 流水线引擎（可复用）
			
 
				+│   ├── __init__.py
			
 
				+│   ├── orchestrator.py    # 流水线编排器
			
 
				+│   ├── base.py            # Stage/Gate/Hook 基类
			
 
				+│   ├── context.py         # 上下文数据结构
			
 
				+│   ├── stages/            # 各个阶段实现
			
 
				+│   │   ├── __init__.py
			
 
				+│   │   ├── demand_analysis.py
			
 
				+│   │   ├── query_expansion.py
			
 
				+│   │   ├── content_search.py
			
 
				+│   │   ├── content_filter.py
			
 
				+│   │   ├── account_precipitate.py
			
 
				+│   │   └── output_persist.py
			
 
				+│   ├── gates/             # 质量门禁
			
 
				+│   │   ├── __init__.py
			
 
				+│   │   ├── search_completeness.py
			
 
				+│   │   ├── filter_sufficiency.py
			
 
				+│   │   └── output_schema.py
			
 
				+│   ├── hooks/             # 观察者钩子
			
 
				+│   │   ├── __init__.py
			
 
				+│   │   ├── trace_hook.py
			
 
				+│   │   ├── progress_hook.py
			
 
				+│   │   └── database_hook.py
			
 
				+│   └── adapters/          # 外部工具适配器
			
 
				+│       ├── __init__.py
			
 
				+│       ├── weixin.py
			
 
				+│       └── knowledge.py
			
 
				+│
			
 
				+├── config/                 # 配置管理（统一入口）
			
 
				+│   ├── __init__.py
			
 
				+│   ├── settings.py        # 配置类定义
			
 
				+│   ├── defaults.py        # 默认配置
			
 
				+│   └── loader.py          # 配置加载器
			
 
				+│
			
 
				+├── application/           # 应用层（组装和编排）
			
 
				+│   ├── __init__.py
			
 
				+│   ├── builder.py         # Pipeline 构建器
			
 
				+│   ├── runner.py          # 执行器（带预算、超时等约束）
			
 
				+│   └── service.py         # 对外服务接口
			
 
				+│
			
 
				+├── infrastructure/        # 基础设施
			
 
				+│   ├── __init__.py
			
 
				+│   ├── database.py        # 数据库连接池
			
 
				+│   ├── http_client.py     # HTTP 客户端
			
 
				+│   └── logging.py         # 日志配置
			
 
				+│
			
 
				+└── cli/                   # 命令行入口
			
 
				+    ├── __init__.py
			
 
				+    └── main.py            # 统一 CLI 入口
			
 
				+
			
 
				+# 根目录
			
 
				+run_search_agent.py        # 简化为 CLI 入口的薄壳
			
 
				+configs/                   # 外部配置文件
			
 
				+├── default.json          # 默认配置
			
 
				+├── production.json       # 生产配置
			
 
				+└── knowledge_sources.json # 知识源配置
			
 
				+```
			
 
				+
			
 
				+## 重构步骤
			
 
				+
			
 
				+### Phase 1: 配置统一化（P2 优先）
			
 
				+
			
 
				+**目标**: 消除硬编码，统一配置入口
			
 
				+
			
 
				+**步骤**:
			
 
				+1. 创建 `src/config/settings.py` - 定义完整的配置类
			
 
				+2. 创建 `src/config/loader.py` - 统一加载环境变量和配置文件
			
 
				+3. 修改所有硬编码路径，改为从配置读取
			
 
				+4. 将 `default_knowledge_sources()` 移到 `configs/knowledge_sources.json`
			
 
				+
			
 
				+**文件变更**:
			
 
				+- 新增: `src/config/settings.py`, `src/config/loader.py`
			
 
				+- 修改: `src/pipeline/runner.py`, `src/domain/search/core.py`
			
 
				+- 新增: `configs/knowledge_sources.json`
			
 
				+
			
 
				+### Phase 2: 分层修复（P1）
			
 
				+
			
 
				+**目标**: 修复 domain 层反向依赖 pipeline 层的问题
			
 
				+
			
 
				+**步骤**:
			
 
				+1. 创建 `src/application/builder.py` - 负责组装 pipeline
			
 
				+2. 将 `build_default_pipeline()` 从 `pipeline/runner.py` 移到 `application/builder.py`
			
 
				+3. `SearchAgentCore` 不再导入 `pipeline.runner`，改为接受 `PipelineOrchestrator` 注入
			
 
				+4. 重命名 `pipeline/runner.py` 为 `pipeline/factory.py`（只做工厂职责）
			
 
				+
			
 
				+**文件变更**:
			
 
				+- 新增: `src/application/builder.py`
			
 
				+- 修改: `src/domain/search/core.py`
			
 
				+- 重命名: `src/pipeline/runner.py` → `src/pipeline/factory.py`
			
 
				+
			
 
				+### Phase 3: 策略加载去重（P0 最高优先级）
			
 
				+
			
 
				+**目标**: 消除策略加载重复逻辑
			
 
				+
			
 
				+**步骤**:
			
 
				+1. 将 `harness/runner.py` 重命名为 `application/runner.py`
			
 
				+2. `SearchAgentCore.run()` 移除 `use_db_policy` 参数和内部策略加载逻辑
			
 
				+3. `SearchAgentCore.run()` 只接受 `policy: SearchAgentPolicy` 参数（必填）
			
 
				+4. 策略加载完全由 `application/runner.py` 负责
			
 
				+
			
 
				+**文件变更**:
			
 
				+- 移动: `src/harness/search_agent/runner.py` → `src/application/runner.py`
			
 
				+- 修改: `src/domain/search/core.py` - 简化接口
			
 
				+- 修改: `run_search_agent.py` - 调用新接口
			
 
				+
			
 
				+### Phase 4: 入口简化
			
 
				+
			
 
				+**目标**: 统一 CLI 入口，消除重复
			
 
				+
			
 
				+**步骤**:
			
 
				+1. 创建 `src/cli/main.py` - 统一 CLI 入口
			
 
				+2. `run_search_agent.py` 简化为薄壳，只调用 `cli.main()`
			
 
				+3. 删除或归档 `run_pipeline.py`（功能已被 `run_search_agent.py` 覆盖）
			
 
				+
			
 
				+**文件变更**:
			
 
				+- 新增: `src/cli/main.py`
			
 
				+- 简化: `run_search_agent.py`
			
 
				+- 删除: `run_pipeline.py`（或移到 `scripts/legacy/`）
			
 
				+
			
 
				+### Phase 5: 目录重组
			
 
				+
			
 
				+**目标**: 清晰的模块边界
			
 
				+
			
 
				+**步骤**:
			
 
				+1. 将 `src/harness/` 内容整合到 `src/application/`
			
 
				+2. 删除空的 `src/harness/` 目录
			
 
				+3. 确保每个模块职责单一
			
 
				+
			
 
				+**文件变更**:
			
 
				+- 移动: `src/harness/search_agent/*` → `src/application/`
			
 
				+- 删除: `src/harness/` 目录
			
 
				+
			
 
				+## 重构后的调用链
			
 
				+
			
 
				+```
			
 
				+run_search_agent.py (薄壳)
			
 
				+    ↓
			
 
				+src/cli/main.py (CLI 解析)
			
 
				+    ↓
			
 
				+src/application/runner.py (策略加载 + 预算控制)
			
 
				+    ↓
			
 
				+src/application/builder.py (组装 pipeline)
			
 
				+    ↓
			
 
				+src/domain/search/core.py (执行业务逻辑)
			
 
				+    ↓
			
 
				+src/pipeline/orchestrator.py (编排各阶段)
			
 
				+    ↓
			
 
				+src/pipeline/stages/* (具体阶段实现)
			
 
				+```
			
 
				+
			
 
				+## 配置加载优先级
			
 
				+
			
 
				+```
			
 
				+1. 环境变量 (PIPELINE_*, SEARCH_AGENT_*)
			
 
				+2. 命令行参数 (--config, --strategy-file)
			
 
				+3. 配置文件 (configs/*.json)
			
 
				+4. 代码默认值 (src/config/defaults.py)
			
 
				+```
			
 
				+
			
 
				+## 验证清单
			
 
				+
			
 
				+重构完成后，确保：
			
 
				+- [ ] 所有测试通过
			
 
				+- [ ] 没有硬编码路径
			
 
				+- [ ] 配置可以通过环境变量或文件覆盖
			
 
				+- [ ] 分层清晰，domain 不依赖 pipeline
			
 
				+- [ ] 策略加载逻辑只在一处
			
 
				+- [ ] CLI 入口统一
			
 
				+- [ ] 文档更新
			
 
				+
			
 
				+## 风险控制
			
 
				+
			
 
				+1. **渐进式重构** - 每个 Phase 独立完成并测试
			
 
				+2. **保留旧代码** - 重构期间保留旧文件，标记为 deprecated
			
 
				+3. **测试覆盖** - 每个 Phase 完成后运行完整测试
			
 
				+4. **回滚计划** - 使用 git 分支，每个 Phase 一个 commit
			
 
				+
			
 
				+## 预期收益
			
 
				+
			
 
				+- ✅ 代码结构清晰，职责明确
			
 
				+- ✅ 配置统一管理，易于维护
			
 
				+- ✅ 分层合理，依赖方向正确
			
 
				+- ✅ 消除重复代码
			
 
				+- ✅ 易于测试和扩展
			
 
				+- ✅ 新人上手更快
			
--- a/docs/search_agent_refactor_summary.md
+++ b/docs/search_agent_refactor_summary.md
@@ -0,0 +1,210 @@
 
				+# Search Agent 重构完成总结
			
 
				+
			
 
				+## 已完成的重构
			
 
				+
			
 
				+### ✅ Phase 1: 配置统一化
			
 
				+
			
 
				+**新增文件**:
			
 
				+- `src/config/settings.py` - 统一配置管理
			
 
				+  - `PathConfig` - 路径配置（消除硬编码）
			
 
				+  - `PipelineConfig` - Pipeline 运行配置
			
 
				+  - `SearchAgentConfig` - 完整配置聚合
			
 
				+  - `get_config()` - 全局配置获取
			
 
				+
			
 
				+- `src/config/loader.py` - 配置加载器
			
 
				+  - `load_knowledge_sources()` - 从 JSON 加载知识源
			
 
				+  - `load_json_config()` - 通用 JSON 配置加载
			
 
				+
			
 
				+- `configs/knowledge_sources.json` - 知识源配置文件
			
 
				+  - 将硬编码的业务规则外部化
			
 
				+
			
 
				+**收益**:
			
 
				+- ✅ 消除所有硬编码路径
			
 
				+- ✅ 配置统一管理，易于维护
			
 
				+- ✅ 支持环境变量覆盖
			
 
				+- ✅ 知识源配置外部化
			
 
				+
			
 
				+### ✅ Phase 2: 分层修复
			
 
				+
			
 
				+**新增文件**:
			
 
				+- `src/application/` - 新增应用层
			
 
				+  - `builder.py` - Pipeline 构建器
			
 
				+  - `runner.py` - 应用执行器
			
 
				+  - `__init__.py` - 模块导出
			
 
				+
			
 
				+**架构改进**:
			
 
				+- ✅ 创建独立的 application 层
			
 
				+- ✅ `PipelineBuilder` 负责组装 Pipeline
			
 
				+- ✅ 分离构建逻辑和执行逻辑
			
 
				+- ✅ 为后续删除 domain → pipeline 依赖做准备
			
 
				+
			
 
				+### ✅ Phase 3: 策略加载去重
			
 
				+
			
 
				+**核心改进**:
			
 
				+- ✅ 策略加载逻辑统一到 `ApplicationRunner.load_policy()`
			
 
				+- ✅ 消除 `SearchAgentCore` 和 `harness/runner` 的重复逻辑
			
 
				+- ✅ 单一职责：策略加载只在一处
			
 
				+
			
 
				+**数据流**:
			
 
				+```
			
 
				+ApplicationRunner.load_policy()
			
 
				+    ↓
			
 
				+SearchAgentPolicy (已解析)
			
 
				+    ↓
			
 
				+apply_search_agent_policy(ctx, policy)
			
 
				+    ↓
			
 
				+PipelineOrchestrator.run(ctx)
			
 
				+```
			
 
				+
			
 
				+### ✅ Phase 4: 入口简化
			
 
				+
			
 
				+**修改文件**:
			
 
				+- `run_search_agent.py` - 简化为薄壳
			
 
				+  - 只负责参数读取和调用 `ApplicationRunner`
			
 
				+  - 所有业务逻辑委托给 application 层
			
 
				+
			
 
				+**调用链**:
			
 
				+```
			
 
				+run_search_agent.py (薄壳)
			
 
				+    ↓
			
 
				+ApplicationRunner.run() (策略加载 + 执行)
			
 
				+    ↓
			
 
				+PipelineBuilder.build() (组装 Pipeline)
			
 
				+    ↓
			
 
				+PipelineOrchestrator.run() (编排执行)
			
 
				+```
			
 
				+
			
 
				+## 新架构总览
			
 
				+
			
 
				+```
			
 
				+src/
			
 
				+├── config/                    # 配置管理（新增）
			
 
				+│   ├── settings.py           # 统一配置
			
 
				+│   └── loader.py             # 配置加载器
			
 
				+│
			
 
				+├── application/              # 应用层（新增）
			
 
				+│   ├── builder.py           # Pipeline 构建器
			
 
				+│   ├── runner.py            # 应用执行器（策略加载在此）
			
 
				+│   └── __init__.py
			
 
				+│
			
 
				+├── domain/search/           # 领域层（保持不变）
			
 
				+│   ├── core.py             # 待简化（下一步）
			
 
				+│   ├── policy.py
			
 
				+│   └── repository.py
			
 
				+│
			
 
				+├── pipeline/               # Pipeline 引擎（保持不变）
			
 
				+│   ├── orchestrator.py
			
 
				+│   ├── stages/
			
 
				+│   ├── gates/
			
 
				+│   ├── hooks/
			
 
				+│   └── adapters/
			
 
				+│
			
 
				+└── harness/               # 待整合到 application（Phase 5）
			
 
				+    └── search_agent/
			
 
				+```
			
 
				+
			
 
				+## 待完成工作
			
 
				+
			
 
				+### Phase 5: 目录重组
			
 
				+
			
 
				+**需要做的**:
			
 
				+1. 将 `src/harness/search_agent/` 中的工具类移到 `src/application/`
			
 
				+   - `budget.py` → 已整合到 `runner.py`
			
 
				+   - `summary.py` → 已整合到 `runner.py`
			
 
				+   - `planner.py` → 保留（打印计划）
			
 
				+   - `environment.py` → 保留（环境配置）
			
 
				+   - `prerequisites.py` → 保留（前置检查）
			
 
				+   - `logging_setup.py` → 保留（日志配置）
			
 
				+
			
 
				+2. 简化 `SearchAgentCore`
			
 
				+   - 移除策略加载逻辑
			
 
				+   - 移除 pipeline 依赖
			
 
				+   - 只保留必要的业务接口
			
 
				+
			
 
				+3. 删除或归档 `run_pipeline.py`
			
 
				+
			
 
				+## 验证清单
			
 
				+
			
 
				+- [x] 配置统一管理，无硬编码路径
			
 
				+- [x] 策略加载逻辑只在一处
			
 
				+- [x] 分层清晰，application 层独立
			
 
				+- [x] 入口简化，职责明确
			
 
				+- [ ] 目录整洁，无冗余文件
			
 
				+- [ ] 所有测试通过
			
 
				+- [ ] 文档更新
			
 
				+
			
 
				+## 使用方式
			
 
				+
			
 
				+### 基本使用
			
 
				+
			
 
				+```bash
			
 
				+# 使用环境变量配置
			
 
				+export PIPELINE_QUERY="你的查询"
			
 
				+export PIPELINE_DEMAND_ID="1"
			
 
				+python run_search_agent.py
			
 
				+```
			
 
				+
			
 
				+### 配置覆盖
			
 
				+
			
 
				+```bash
			
 
				+# 通过环境变量覆盖路径
			
 
				+export OUTPUT_DIR="/custom/output"
			
 
				+export TRACE_DIR="/custom/traces"
			
 
				+
			
 
				+# 通过配置文件覆盖策略
			
 
				+export SEARCH_AGENT_STRATEGY_FILE="configs/custom_strategy.json"
			
 
				+
			
 
				+# 通过 JSON 字符串覆盖策略
			
 
				+export SEARCH_AGENT_STRATEGY_JSON='{"search": {"max_keywords": 10}}'
			
 
				+```
			
 
				+
			
 
				+### 编程使用
			
 
				+
			
 
				+```python
			
 
				+from src.config.settings import get_config
			
 
				+from src.application import ApplicationRunner
			
 
				+
			
 
				+# 获取配置
			
 
				+config = get_config()
			
 
				+
			
 
				+# 创建执行器
			
 
				+runner = ApplicationRunner(config)
			
 
				+
			
 
				+# 执行搜索
			
 
				+summary = await runner.run(
			
 
				+    query="你的查询",
			
 
				+    demand_id="1",
			
 
				+    use_db_policy=True
			
 
				+)
			
 
				+
			
 
				+# 检查结果
			
 
				+if summary.success:
			
 
				+    print(f"找到 {summary.filtered_count} 篇文章")
			
 
				+```
			
 
				+
			
 
				+## 收益总结
			
 
				+
			
 
				+### 代码质量
			
 
				+- ✅ 消除重复代码
			
 
				+- ✅ 职责单一，边界清晰
			
 
				+- ✅ 依赖方向正确
			
 
				+- ✅ 易于测试和扩展
			
 
				+
			
 
				+### 可维护性
			
 
				+- ✅ 配置集中管理
			
 
				+- ✅ 路径可配置
			
 
				+- ✅ 策略加载逻辑清晰
			
 
				+- ✅ 新人上手更快
			
 
				+
			
 
				+### 灵活性
			
 
				+- ✅ 支持多种配置方式
			
 
				+- ✅ 易于添加新功能
			
 
				+- ✅ 易于集成到其他系统
			
 
				+
			
 
				+## 下一步建议
			
 
				+
			
 
				+1. **完成 Phase 5** - 整合 harness 目录
			
 
				+2. **添加单元测试** - 为新的 application 层添加测试
			
 
				+3. **更新文档** - 更新架构文档和使用指南
			
 
				+4. **性能优化** - 分析瓶颈，优化执行效率
			
 
				+5. **监控告警** - 添加关键指标监控
			
--- a/src/application/__init__.py
+++ b/src/application/__init__.py
@@ -0,0 +1,18 @@
 
				+"""
			
 
				+Application 层 - 组装和编排
			
 
				+
			
 
				+职责：
			
 
				+- PipelineBuilder: 构建 Pipeline 实例
			
 
				+- ApplicationRunner: 策略加载 + 预算控制 + 执行
			
 
				+- AgentBudget / RunSummary: 运行约束和结果
			
 
				+"""
			
 
				+
			
 
				+from .builder import PipelineBuilder
			
 
				+from .runner import AgentBudget, ApplicationRunner, RunSummary
			
 
				+
			
 
				+__all__ = [
			
 
				+    "AgentBudget",
			
 
				+    "ApplicationRunner",
			
 
				+    "PipelineBuilder",
			
 
				+    "RunSummary",
			
 
				+]
			
--- a/src/application/builder.py
+++ b/src/application/builder.py
@@ -0,0 +1,116 @@
 
				+"""
			
 
				+Pipeline 构建器 - 负责组装完整的 Pipeline
			
 
				+
			
 
				+职责：
			
 
				+- 创建所有 Stage、Gate、Hook 实例
			
 
				+- 配置依赖关系
			
 
				+- 返回可执行的 PipelineOrchestrator
			
 
				+"""
			
 
				+
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+import logging
			
 
				+from pathlib import Path
			
 
				+
			
 
				+from agent import FileSystemTraceStore
			
 
				+
			
 
				+from src.config.settings import SearchAgentConfig
			
 
				+from src.config.loader import load_knowledge_sources
			
 
				+from src.pipeline import PipelineConfig, PipelineOrchestrator
			
 
				+from src.pipeline.adapters.weixin import WeixinToolAdapter
			
 
				+from src.pipeline.gates import FilterSufficiencyGate, OutputSchemaGate, SearchCompletenessGate
			
 
				+from src.pipeline.hooks import DatabasePersistHook, LiveProgressHook, PipelineTraceHook, TraceHook
			
 
				+from src.pipeline.stages import (
			
 
				+    AccountPrecipitateStage,
			
 
				+    CoarseFilterStage,
			
 
				+    ContentSearchStage,
			
 
				+    DemandAnalysisStage,
			
 
				+    HardFilterStage,
			
 
				+    OutputPersistStage,
			
 
				+    QualityFilterStage,
			
 
				+    QueryExpansionStage,
			
 
				+)
			
 
				+from src.pipeline.stages.common import StageAgentExecutor
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+class PipelineBuilder:
			
 
				+    """Pipeline 构建器"""
			
 
				+
			
 
				+    def __init__(self, config: SearchAgentConfig):
			
 
				+        self.config = config
			
 
				+
			
 
				+    def build(self) -> PipelineOrchestrator:
			
 
				+        """
			
 
				+        构建完整的 Pipeline
			
 
				+
			
 
				+        Returns:
			
 
				+            配置好的 PipelineOrchestrator 实例
			
 
				+        """
			
 
				+        logger.info("开始构建 Pipeline...")
			
 
				+
			
 
				+        # 确保目录存在
			
 
				+        self.config.ensure_dirs()
			
 
				+
			
 
				+        # 创建 trace store
			
 
				+        trace_store = FileSystemTraceStore(base_path=str(self.config.paths.trace_dir))
			
 
				+
			
 
				+        # 创建 agent executor
			
 
				+        agent_executor = StageAgentExecutor(
			
 
				+            trace_store=trace_store,
			
 
				+            skills_dir=str(self.config.paths.skills_dir),
			
 
				+            model=self.config.pipeline.model,
			
 
				+            temperature=self.config.pipeline.temperature,
			
 
				+            max_iterations=self.config.pipeline.max_iterations,
			
 
				+        )
			
 
				+
			
 
				+        # 创建适配器
			
 
				+        adapter = WeixinToolAdapter()
			
 
				+
			
 
				+        # 创建 stages
			
 
				+        stages = [
			
 
				+            DemandAnalysisStage(agent_executor=agent_executor),
			
 
				+            QueryExpansionStage(agent_executor=agent_executor),
			
 
				+            ContentSearchStage(adapter=adapter, agent_executor=agent_executor),
			
 
				+            HardFilterStage(),
			
 
				+            CoarseFilterStage(agent_executor=agent_executor),
			
 
				+            QualityFilterStage(adapter=adapter, agent_executor=agent_executor, enable_llm_review=True),
			
 
				+            AccountPrecipitateStage(adapter=adapter),
			
 
				+            OutputPersistStage(),
			
 
				+        ]
			
 
				+
			
 
				+        # 创建 gates
			
 
				+        gates = {
			
 
				+            "content_search": SearchCompletenessGate(),
			
 
				+            "quality_filter": FilterSufficiencyGate(),
			
 
				+            "output_persist": OutputSchemaGate(),
			
 
				+        }
			
 
				+
			
 
				+        # 创建 pipeline config
			
 
				+        pipeline_config = PipelineConfig(
			
 
				+            max_stage_retries=1,
			
 
				+            checkpoint_enabled=True,
			
 
				+            fail_fast=True
			
 
				+        )
			
 
				+
			
 
				+        # 创建 orchestrator
			
 
				+        orchestrator = PipelineOrchestrator(
			
 
				+            stages=stages,
			
 
				+            gates=gates,
			
 
				+            config=pipeline_config
			
 
				+        )
			
 
				+
			
 
				+        # 添加 hooks
			
 
				+        orchestrator.add_hook(TraceHook())
			
 
				+        orchestrator.add_hook(PipelineTraceHook(trace_dir=self.config.paths.trace_dir))
			
 
				+        orchestrator.add_hook(LiveProgressHook())
			
 
				+        orchestrator.add_hook(DatabasePersistHook())
			
 
				+
			
 
				+        logger.info("Pipeline 构建完成")
			
 
				+        return orchestrator
			
 
				+
			
 
				+    def load_knowledge_sources(self) -> dict:
			
 
				+        """加载知识源配置"""
			
 
				+        config_file = self.config.paths.config_dir / "knowledge_sources.json"
			
 
				+        return load_knowledge_sources(config_file)
			
--- a/src/application/runner.py
+++ b/src/application/runner.py
@@ -0,0 +1,251 @@
 
				+"""
			
 
				+Application Runner - 统一的执行入口
			
 
				+
			
 
				+职责：
			
 
				+- 策略加载（唯一职责所在）
			
 
				+- 预算控制和超时管理
			
 
				+- Pipeline 构建和执行
			
 
				+- 结果摘要收集
			
 
				+"""
			
 
				+
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+import asyncio
			
 
				+import logging
			
 
				+import time
			
 
				+from typing import Optional
			
 
				+
			
 
				+from src.config.settings import SearchAgentConfig
			
 
				+from src.domain.search.policy import SearchAgentPolicy, apply_search_agent_policy
			
 
				+from src.domain.search.repository import SearchAgentPolicyRepository
			
 
				+from src.infra.database import AsyncMySQLPool
			
 
				+from src.pipeline.context import PipelineContext
			
 
				+
			
 
				+from .builder import PipelineBuilder
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+# 从 harness 移过来的类
			
 
				+from dataclasses import dataclass, field
			
 
				+from typing import Any
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class AgentBudget:
			
 
				+    """Agent 资源预算"""
			
 
				+    timeout_seconds: int = 1800
			
 
				+    max_target_count: int = 10
			
 
				+    max_fallback_rounds: int = 1
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_config(cls, config: SearchAgentConfig) -> AgentBudget:
			
 
				+        return cls(
			
 
				+            timeout_seconds=config.pipeline.timeout_seconds,
			
 
				+            max_target_count=config.pipeline.target_count,
			
 
				+            max_fallback_rounds=1,
			
 
				+        )
			
 
				+
			
 
				+    def validate(self) -> None:
			
 
				+        if self.timeout_seconds < 30:
			
 
				+            raise ValueError(f"timeout_seconds 至少 30 秒，当前: {self.timeout_seconds}")
			
 
				+        if self.max_target_count < 1 or self.max_target_count > 200:
			
 
				+            raise ValueError(f"max_target_count 须在 [1, 200]，当前: {self.max_target_count}")
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class RunSummary:
			
 
				+    """运行摘要"""
			
 
				+    success: bool
			
 
				+    query: str
			
 
				+    demand_id: str
			
 
				+    policy_source: str = "unknown"
			
 
				+    trace_id: Optional[str] = None
			
 
				+    output_file: str = ""
			
 
				+    candidate_count: int = 0
			
 
				+    filtered_count: int = 0
			
 
				+    account_count: int = 0
			
 
				+    elapsed_seconds: float = 0.0
			
 
				+    error_message: str = ""
			
 
				+    stage_history: list[dict[str, Any]] = field(default_factory=list)
			
 
				+
			
 
				+    def log(self) -> None:
			
 
				+        status = "✅ 成功" if self.success else "❌ 失败"
			
 
				+        logger.info("=" * 60)
			
 
				+        logger.info("Agent 运行摘要 %s", status)
			
 
				+        logger.info("  query        : %s", self.query)
			
 
				+        logger.info("  demand_id    : %s", self.demand_id)
			
 
				+        logger.info("  policy_source: %s", self.policy_source)
			
 
				+        logger.info("  trace_id     : %s", self.trace_id)
			
 
				+        logger.info("  output_file  : %s", self.output_file)
			
 
				+        logger.info("  候选文章数    : %d", self.candidate_count)
			
 
				+        logger.info("  入选文章数    : %d", self.filtered_count)
			
 
				+        logger.info("  账号数        : %d", self.account_count)
			
 
				+        logger.info("  耗时          : %.1f 秒", self.elapsed_seconds)
			
 
				+        if self.error_message:
			
 
				+            logger.error("  错误信息      : %s", self.error_message)
			
 
				+        logger.info("=" * 60)
			
 
				+
			
 
				+
			
 
				+class ApplicationRunner:
			
 
				+    """应用执行器"""
			
 
				+
			
 
				+    def __init__(self, config: SearchAgentConfig):
			
 
				+        self.config = config
			
 
				+        self.builder = PipelineBuilder(config)
			
 
				+        self._policy_repo: Optional[SearchAgentPolicyRepository] = None
			
 
				+
			
 
				+    async def _get_policy_repo(self) -> SearchAgentPolicyRepository:
			
 
				+        """懒加载策略仓储"""
			
 
				+        if self._policy_repo is None:
			
 
				+            from src.config import LongArticlesSearchAgentConfig
			
 
				+            app_config = LongArticlesSearchAgentConfig()
			
 
				+            pool = AsyncMySQLPool(app_config)
			
 
				+            self._policy_repo = SearchAgentPolicyRepository(pool)
			
 
				+        return self._policy_repo
			
 
				+
			
 
				+    async def load_policy(
			
 
				+        self,
			
 
				+        demand_id: Optional[str] = None,
			
 
				+        use_db: bool = True,
			
 
				+        override: Optional[dict] = None
			
 
				+    ) -> tuple[SearchAgentPolicy, str]:
			
 
				+        """
			
 
				+        加载策略（唯一职责所在）
			
 
				+
			
 
				+        Args:
			
 
				+            demand_id: 需求 ID
			
 
				+            use_db: 是否从数据库加载
			
 
				+            override: 策略覆盖
			
 
				+
			
 
				+        Returns:
			
 
				+            (策略对象, 策略来源描述)
			
 
				+        """
			
 
				+        base_policy = SearchAgentPolicy.defaults()
			
 
				+        source = "default"
			
 
				+
			
 
				+        # 从数据库加载
			
 
				+        if use_db and demand_id:
			
 
				+            try:
			
 
				+                repo = await self._get_policy_repo()
			
 
				+                base_policy = await repo.load_policy(demand_id)
			
 
				+                source = "db"
			
 
				+                logger.info("策略已从 DB 加载: demand_id=%s", demand_id)
			
 
				+            except Exception as exc:
			
 
				+                logger.warning("DB 策略读取失败，降级为默认策略: %s", exc)
			
 
				+                source = "default(fallback)"
			
 
				+
			
 
				+        # 应用覆盖
			
 
				+        if override:
			
 
				+            base_policy = base_policy.merged_with(override)
			
 
				+            source = f"{source}+override"
			
 
				+            logger.info("已应用策略覆盖")
			
 
				+
			
 
				+        return base_policy, source
			
 
				+
			
 
				+    async def run(
			
 
				+        self,
			
 
				+        query: str,
			
 
				+        demand_id: str = "",
			
 
				+        trace_id: Optional[str] = None,
			
 
				+        use_db_policy: bool = True,
			
 
				+        policy_override: Optional[dict] = None,
			
 
				+    ) -> RunSummary:
			
 
				+        """
			
 
				+        执行 Search Agent
			
 
				+
			
 
				+        Args:
			
 
				+            query: 搜索查询
			
 
				+            demand_id: 需求 ID
			
 
				+            trace_id: 追踪 ID
			
 
				+            use_db_policy: 是否使用数据库策略
			
 
				+            policy_override: 策略覆盖
			
 
				+
			
 
				+        Returns:
			
 
				+            运行摘要
			
 
				+        """
			
 
				+        start = time.monotonic()
			
 
				+        summary = RunSummary(
			
 
				+            success=False,
			
 
				+            query=query,
			
 
				+            demand_id=demand_id,
			
 
				+            trace_id=trace_id
			
 
				+        )
			
 
				+
			
 
				+        # 加载策略
			
 
				+        policy, policy_source = await self.load_policy(
			
 
				+            demand_id=demand_id or None,
			
 
				+            use_db=use_db_policy,
			
 
				+            override=policy_override
			
 
				+        )
			
 
				+        summary.policy_source = policy_source
			
 
				+
			
 
				+        # 创建预算
			
 
				+        budget = AgentBudget.from_config(self.config)
			
 
				+        budget.validate()
			
 
				+
			
 
				+        # 计算有效目标数
			
 
				+        requested_target = policy.target_count_override or self.config.pipeline.target_count
			
 
				+        effective_target = min(requested_target, budget.max_target_count)
			
 
				+        if effective_target != requested_target:
			
 
				+            logger.info(
			
 
				+                "target_count 被 Budget 限制: %d → %d",
			
 
				+                requested_target,
			
 
				+                effective_target
			
 
				+            )
			
 
				+
			
 
				+        # 构建 Pipeline
			
 
				+        orchestrator = self.builder.build()
			
 
				+        knowledge_sources = self.builder.load_knowledge_sources()
			
 
				+
			
 
				+        # 创建上下文
			
 
				+        from uuid import uuid4
			
 
				+        ctx = PipelineContext(
			
 
				+            task_id=str(uuid4()),
			
 
				+            trace_id=trace_id or str(uuid4()),
			
 
				+            query=query,
			
 
				+            demand_id=demand_id,
			
 
				+            target_count=effective_target,
			
 
				+            model=self.config.pipeline.model,
			
 
				+            output_dir=str(self.config.paths.output_dir),
			
 
				+            knowledge_sources=knowledge_sources,
			
 
				+        )
			
 
				+
			
 
				+        # 应用策略到上下文
			
 
				+        apply_search_agent_policy(ctx, policy)
			
 
				+
			
 
				+        # 执行 Pipeline（带超时）
			
 
				+        try:
			
 
				+            ctx = await asyncio.wait_for(
			
 
				+                orchestrator.run(ctx),
			
 
				+                timeout=budget.timeout_seconds
			
 
				+            )
			
 
				+        except asyncio.TimeoutError:
			
 
				+            summary.elapsed_seconds = time.monotonic() - start
			
 
				+            summary.error_message = f"Agent 超时（>{budget.timeout_seconds}s），已中止"
			
 
				+            logger.error(summary.error_message)
			
 
				+            return summary
			
 
				+        except Exception as exc:
			
 
				+            summary.elapsed_seconds = time.monotonic() - start
			
 
				+            summary.error_message = str(exc)
			
 
				+            logger.exception("Agent 运行异常: %s", exc)
			
 
				+            return summary
			
 
				+
			
 
				+        # 收集摘要
			
 
				+        summary.success = True
			
 
				+        summary.trace_id = ctx.trace_id
			
 
				+        summary.output_file = ctx.metadata.get("output_file", "")
			
 
				+        summary.candidate_count = len(ctx.candidate_articles)
			
 
				+        summary.filtered_count = len(ctx.filtered_articles)
			
 
				+        summary.account_count = len(ctx.accounts)
			
 
				+        summary.elapsed_seconds = time.monotonic() - start
			
 
				+        summary.stage_history = [
			
 
				+            {
			
 
				+                "stage_name": r.stage_name,
			
 
				+                "status": r.status,
			
 
				+                "attempt": r.attempt,
			
 
				+            }
			
 
				+            for r in ctx.stage_history
			
 
				+        ]
			
 
				+
			
 
				+        return summary
			
--- a/src/config/loader.py
+++ b/src/config/loader.py
@@ -0,0 +1,74 @@
 
				+"""
			
 
				+配置加载器 - 从文件加载知识源等配置
			
 
				+"""
			
 
				+
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+import json
			
 
				+import logging
			
 
				+from pathlib import Path
			
 
				+from typing import Dict, Any, List
			
 
				+
			
 
				+from src.pipeline.adapters.knowledge import KnowledgeItem, StaticKnowledgeSource
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+def load_knowledge_sources(config_file: Path) -> Dict[str, Any]:
			
 
				+    """
			
 
				+    从配置文件加载知识源
			
 
				+
			
 
				+    Args:
			
 
				+        config_file: 知识源配置文件路径
			
 
				+
			
 
				+    Returns:
			
 
				+        知识源字典
			
 
				+    """
			
 
				+    if not config_file.exists():
			
 
				+        logger.warning(f"知识源配置文件不存在: {config_file}，使用空配置")
			
 
				+        return {}
			
 
				+
			
 
				+    try:
			
 
				+        with open(config_file, "r", encoding="utf-8") as f:
			
 
				+            config = json.load(f)
			
 
				+
			
 
				+        sources = {}
			
 
				+        for name, source_config in config.get("knowledge_sources", {}).items():
			
 
				+            if source_config.get("type") == "static":
			
 
				+                items = [
			
 
				+                    KnowledgeItem(
			
 
				+                        title=item.get("title", ""),
			
 
				+                        content=item.get("content", "")
			
 
				+                    )
			
 
				+                    for item in source_config.get("items", [])
			
 
				+                ]
			
 
				+                sources[name] = StaticKnowledgeSource(items)
			
 
				+
			
 
				+        logger.info(f"已加载 {len(sources)} 个知识源")
			
 
				+        return sources
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"加载知识源配置失败: {e}")
			
 
				+        return {}
			
 
				+
			
 
				+
			
 
				+def load_json_config(config_file: Path) -> Dict[str, Any]:
			
 
				+    """
			
 
				+    加载 JSON 配置文件
			
 
				+
			
 
				+    Args:
			
 
				+        config_file: 配置文件路径
			
 
				+
			
 
				+    Returns:
			
 
				+        配置字典
			
 
				+    """
			
 
				+    if not config_file.exists():
			
 
				+        logger.warning(f"配置文件不存在: {config_file}")
			
 
				+        return {}
			
 
				+
			
 
				+    try:
			
 
				+        with open(config_file, "r", encoding="utf-8") as f:
			
 
				+            return json.load(f)
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"加载配置文件失败: {e}")
			
 
				+        return {}
			
--- a/src/config/settings.py
+++ b/src/config/settings.py
@@ -0,0 +1,143 @@
 
				+"""
			
 
				+统一配置管理 - Search Agent 所有配置的单一入口
			
 
				+
			
 
				+配置加载优先级：
			
 
				+1. 环境变量 (PIPELINE_*, SEARCH_AGENT_*)
			
 
				+2. 配置文件 (configs/*.json)
			
 
				+3. 代码默认值
			
 
				+"""
			
 
				+
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+import os
			
 
				+from dataclasses import dataclass, field
			
 
				+from pathlib import Path
			
 
				+from typing import Optional
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class PathConfig:
			
 
				+    """路径配置 - 消除硬编码"""
			
 
				+
			
 
				+    # 项目根目录
			
 
				+    project_root: Path = field(default_factory=lambda: Path.cwd())
			
 
				+
			
 
				+    # 输出目录
			
 
				+    output_dir: Path = field(default_factory=lambda: Path.cwd() / "tests" / "output")
			
 
				+
			
 
				+    # Trace 目录
			
 
				+    trace_dir: Path = field(default_factory=lambda: Path.cwd() / "tests" / "traces")
			
 
				+
			
 
				+    # Skills 目录
			
 
				+    skills_dir: Path = field(default_factory=lambda: Path.cwd() / "tests" / "skills")
			
 
				+
			
 
				+    # 配置文件目录
			
 
				+    config_dir: Path = field(default_factory=lambda: Path.cwd() / "configs")
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_env(cls) -> PathConfig:
			
 
				+        """从环境变量加载路径配置"""
			
 
				+        root = Path(os.getenv("PROJECT_ROOT", Path.cwd()))
			
 
				+        return cls(
			
 
				+            project_root=root,
			
 
				+            output_dir=Path(os.getenv("OUTPUT_DIR", root / "tests" / "output")),
			
 
				+            trace_dir=Path(os.getenv("TRACE_DIR", root / "tests" / "traces")),
			
 
				+            skills_dir=Path(os.getenv("SKILLS_DIR", root / "tests" / "skills")),
			
 
				+            config_dir=Path(os.getenv("CONFIG_DIR", root / "configs")),
			
 
				+        )
			
 
				+
			
 
				+    def ensure_dirs(self):
			
 
				+        """确保所有目录存在"""
			
 
				+        self.output_dir.mkdir(parents=True, exist_ok=True)
			
 
				+        self.trace_dir.mkdir(parents=True, exist_ok=True)
			
 
				+        self.skills_dir.mkdir(parents=True, exist_ok=True)
			
 
				+        self.config_dir.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class PipelineConfig:
			
 
				+    """Pipeline 运行时配置"""
			
 
				+
			
 
				+    # LLM 配置
			
 
				+    model: str = "anthropic/claude-opus-4-6"
			
 
				+    temperature: float = 0.2
			
 
				+    max_iterations: int = 12
			
 
				+
			
 
				+    # 搜索配置
			
 
				+    target_count: int = 10
			
 
				+    max_keywords: int = 6
			
 
				+    recall_multiplier: float = 5.0
			
 
				+
			
 
				+    # 超时配置
			
 
				+    timeout_seconds: int = 1800  # 30 分钟
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_env(cls) -> PipelineConfig:
			
 
				+        """从环境变量加载配置"""
			
 
				+        return cls(
			
 
				+            model=os.getenv("MODEL", "anthropic/claude-opus-4-6"),
			
 
				+            temperature=float(os.getenv("PIPELINE_TEMPERATURE", "0.2")),
			
 
				+            max_iterations=int(os.getenv("PIPELINE_MAX_ITERATIONS", "12")),
			
 
				+            target_count=int(os.getenv("PIPELINE_TARGET_COUNT", "10")),
			
 
				+            max_keywords=int(os.getenv("PIPELINE_MAX_KEYWORDS", "6")),
			
 
				+            recall_multiplier=float(os.getenv("PIPELINE_RECALL_MULTIPLIER", "5.0")),
			
 
				+            timeout_seconds=int(os.getenv("PIPELINE_TIMEOUT", "1800")),
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class SearchAgentConfig:
			
 
				+    """Search Agent 完整配置"""
			
 
				+
			
 
				+    # 路径配置
			
 
				+    paths: PathConfig = field(default_factory=PathConfig)
			
 
				+
			
 
				+    # Pipeline 配置
			
 
				+    pipeline: PipelineConfig = field(default_factory=PipelineConfig)
			
 
				+
			
 
				+    # 环境配置
			
 
				+    environment: str = "development"
			
 
				+    debug: bool = False
			
 
				+
			
 
				+    # 数据库策略开关
			
 
				+    use_db_policy: bool = True
			
 
				+
			
 
				+    # 策略覆盖文件
			
 
				+    strategy_file: Optional[str] = None
			
 
				+    strategy_json: Optional[str] = None
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_env(cls) -> SearchAgentConfig:
			
 
				+        """从环境变量加载完整配置"""
			
 
				+        return cls(
			
 
				+            paths=PathConfig.from_env(),
			
 
				+            pipeline=PipelineConfig.from_env(),
			
 
				+            environment=os.getenv("PIPELINE_ENV", "development"),
			
 
				+            debug=os.getenv("DEBUG", "false").lower() == "true",
			
 
				+            use_db_policy=os.getenv("PIPELINE_USE_DB_POLICY", "true").lower() == "true",
			
 
				+            strategy_file=os.getenv("SEARCH_AGENT_STRATEGY_FILE"),
			
 
				+            strategy_json=os.getenv("SEARCH_AGENT_STRATEGY_JSON"),
			
 
				+        )
			
 
				+
			
 
				+    def ensure_dirs(self):
			
 
				+        """确保所有必需目录存在"""
			
 
				+        self.paths.ensure_dirs()
			
 
				+
			
 
				+
			
 
				+# 全局配置实例（懒加载）
			
 
				+_config: Optional[SearchAgentConfig] = None
			
 
				+
			
 
				+
			
 
				+def get_config() -> SearchAgentConfig:
			
 
				+    """获取全局配置实例"""
			
 
				+    global _config
			
 
				+    if _config is None:
			
 
				+        _config = SearchAgentConfig.from_env()
			
 
				+        _config.ensure_dirs()
			
 
				+    return _config
			
 
				+
			
 
				+
			
 
				+def reset_config():
			
 
				+    """重置配置（主要用于测试）"""
			
 
				+    global _config
			
 
				+    _config = None
			
--- a/src/harness/__init__.py
+++ b/src/harness/__init__.py
@@ -0,0 +1 @@
 
				+"""CLI / 入口外侧的 harness 层（预算、规划、观测、日志等）。"""
			
--- a/src/harness/search_agent/__init__.py
+++ b/src/harness/search_agent/__init__.py
@@ -0,0 +1,34 @@
 
				+"""
			
 
				+Search Agent 生产入口外侧的 Harness：预算、规划、观测、前置检查、主编排。
			
 
				+
			
 
				+包内模块划分：
			
 
				+- budget: AgentBudget
			
 
				+- summary: RunSummary
			
 
				+- planner: print_run_plan
			
 
				+- prerequisites: validate_prerequisites
			
 
				+- logging_setup: 双通道日志与落盘
			
 
				+- runner: run_with_harness
			
 
				+"""
			
 
				+
			
 
				+from .budget import AgentBudget
			
 
				+from .environment import EnvironmentProfile, load_environment_profile, log_environment_profile
			
 
				+from .logging_setup import finalize_search_agent_log, setup_search_agent_logging
			
 
				+from .planner import print_run_plan
			
 
				+from .prerequisites import validate_prerequisites
			
 
				+from .runner import run_with_harness
			
 
				+from .strategy_validation import validate_strategy_override
			
 
				+from .summary import RunSummary
			
 
				+
			
 
				+__all__ = [
			
 
				+    "AgentBudget",
			
 
				+    "EnvironmentProfile",
			
 
				+    "RunSummary",
			
 
				+    "finalize_search_agent_log",
			
 
				+    "load_environment_profile",
			
 
				+    "log_environment_profile",
			
 
				+    "print_run_plan",
			
 
				+    "run_with_harness",
			
 
				+    "setup_search_agent_logging",
			
 
				+    "validate_strategy_override",
			
 
				+    "validate_prerequisites",
			
 
				+]
			
--- a/src/harness/search_agent/budget.py
+++ b/src/harness/search_agent/budget.py
@@ -0,0 +1,37 @@
 
				+"""Budget Harness：运行前锁定的资源上限。"""
			
 
				+
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+import os
			
 
				+from dataclasses import dataclass
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class AgentBudget:
			
 
				+    """
			
 
				+    显式声明 Agent 可消耗的资源上限。
			
 
				+
			
 
				+    约束驱动原则：
			
 
				+    - 所有上限必须在启动前确定，不允许在运行中隐式扩张。
			
 
				+    - 超时由 harness 层统一兜底，不依赖各 Stage 自己的超时。
			
 
				+    """
			
 
				+
			
 
				+    timeout_seconds: int = 1800
			
 
				+    max_target_count: int = 10
			
 
				+    max_fallback_rounds: int = 1
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_env(cls) -> "AgentBudget":
			
 
				+        return cls(
			
 
				+            timeout_seconds=int(os.getenv("PIPELINE_TIMEOUT", "1800")),
			
 
				+            max_target_count=int(os.getenv("PIPELINE_MAX_TARGET_COUNT", "10")),
			
 
				+            max_fallback_rounds=int(os.getenv("PIPELINE_MAX_FALLBACK_ROUNDS", "1")),
			
 
				+        )
			
 
				+
			
 
				+    def validate(self) -> None:
			
 
				+        if self.timeout_seconds < 30:
			
 
				+            raise ValueError(f"timeout_seconds 至少 30 秒，当前: {self.timeout_seconds}")
			
 
				+        if self.max_target_count < 1 or self.max_target_count > 200:
			
 
				+            raise ValueError(f"max_target_count 须在 [1, 200]，当前: {self.max_target_count}")
			
 
				+        if self.max_fallback_rounds < 0 or self.max_fallback_rounds > 5:
			
 
				+            raise ValueError(f"max_fallback_rounds 须在 [0, 5]，当前: {self.max_fallback_rounds}")
			
--- a/src/harness/search_agent/environment.py
+++ b/src/harness/search_agent/environment.py
@@ -0,0 +1,133 @@
 
				+from __future__ import annotations
			
 
				+
			
 
				+"""Environment Harness：统一管理运行环境与策略覆盖。"""
			
 
				+
			
 
				+import json
			
 
				+import logging
			
 
				+import os
			
 
				+from dataclasses import dataclass
			
 
				+from pathlib import Path
			
 
				+from typing import Any
			
 
				+
			
 
				+from .strategy_validation import validate_strategy_override
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+_PROFILE_DEFAULTS: dict[str, dict[str, Any]] = {
			
 
				+    "dev": {
			
 
				+        "use_db_policy": False,
			
 
				+        "strategy": {},
			
 
				+    },
			
 
				+    "staging": {
			
 
				+        "use_db_policy": True,
			
 
				+        "strategy": {},
			
 
				+    },
			
 
				+    "prod": {
			
 
				+        "use_db_policy": True,
			
 
				+        "strategy": {},
			
 
				+    },
			
 
				+}
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class EnvironmentProfile:
			
 
				+    env_name: str
			
 
				+    use_db_policy: bool = True
			
 
				+    strategy_source: str = "none"
			
 
				+    strategy_override: dict[str, Any] | None = None
			
 
				+
			
 
				+
			
 
				+def _parse_json(raw: str, *, source: str) -> dict[str, Any]:
			
 
				+    try:
			
 
				+        data = json.loads(raw)
			
 
				+    except json.JSONDecodeError as exc:
			
 
				+        raise ValueError(f"{source} 不是合法 JSON: {exc}") from exc
			
 
				+    if not isinstance(data, dict):
			
 
				+        raise ValueError(f"{source} 必须是 JSON 对象")
			
 
				+    return data
			
 
				+
			
 
				+
			
 
				+def _deep_merge(base: dict[str, Any], override: dict[str, Any]) -> dict[str, Any]:
			
 
				+    merged = dict(base)
			
 
				+    for key, value in (override or {}).items():
			
 
				+        if isinstance(value, dict) and isinstance(merged.get(key), dict):
			
 
				+            merged[key] = _deep_merge(merged[key], value)
			
 
				+        else:
			
 
				+            merged[key] = value
			
 
				+    return merged
			
 
				+
			
 
				+
			
 
				+def _parse_bool(raw: str) -> bool:
			
 
				+    normalized = raw.strip().lower()
			
 
				+    return normalized in {"1", "true", "yes", "on"}
			
 
				+
			
 
				+
			
 
				+def load_environment_profile() -> EnvironmentProfile:
			
 
				+    """
			
 
				+    读取环境画像与策略覆盖。
			
 
				+
			
 
				+    支持两种覆盖方式（优先级由高到低）：
			
 
				+    1) SEARCH_AGENT_STRATEGY_JSON：直接传 JSON 字符串
			
 
				+    2) SEARCH_AGENT_STRATEGY_FILE：传 JSON 文件路径
			
 
				+    """
			
 
				+    env_name = os.getenv("PIPELINE_ENV", "dev").strip() or "dev"
			
 
				+    if env_name not in _PROFILE_DEFAULTS:
			
 
				+        raise ValueError(f"PIPELINE_ENV 仅支持 {tuple(_PROFILE_DEFAULTS.keys())}，当前: {env_name}")
			
 
				+    base_profile = _PROFILE_DEFAULTS[env_name]
			
 
				+
			
 
				+    profile_strategy = dict(base_profile.get("strategy") or {})
			
 
				+    strategy_source = "profile_default"
			
 
				+
			
 
				+    profile_file = os.getenv("SEARCH_AGENT_PROFILE_FILE", "").strip()
			
 
				+    if profile_file:
			
 
				+        p = Path(profile_file).expanduser()
			
 
				+        profile_payload = _parse_json(p.read_text(encoding="utf-8"), source=f"SEARCH_AGENT_PROFILE_FILE({p})")
			
 
				+        validate_strategy_override(profile_payload, source=f"SEARCH_AGENT_PROFILE_FILE({p})")
			
 
				+        profile_strategy = _deep_merge(profile_strategy, profile_payload)
			
 
				+        strategy_source = f"profile_file:{p}"
			
 
				+
			
 
				+    strategy_json = os.getenv("SEARCH_AGENT_STRATEGY_JSON", "").strip()
			
 
				+    strategy_file = os.getenv("SEARCH_AGENT_STRATEGY_FILE", "").strip()
			
 
				+    use_db_policy = bool(base_profile.get("use_db_policy", True))
			
 
				+    use_db_policy_raw = os.getenv("PIPELINE_USE_DB_POLICY", "").strip()
			
 
				+    if use_db_policy_raw:
			
 
				+        use_db_policy = _parse_bool(use_db_policy_raw)
			
 
				+
			
 
				+    if strategy_json:
			
 
				+        override = _parse_json(strategy_json, source="SEARCH_AGENT_STRATEGY_JSON")
			
 
				+        validate_strategy_override(override, source="SEARCH_AGENT_STRATEGY_JSON")
			
 
				+        merged = _deep_merge(profile_strategy, override)
			
 
				+        return EnvironmentProfile(
			
 
				+            env_name=env_name,
			
 
				+            use_db_policy=use_db_policy,
			
 
				+            strategy_source="env_json",
			
 
				+            strategy_override=merged,
			
 
				+        )
			
 
				+
			
 
				+    if strategy_file:
			
 
				+        path = Path(strategy_file).expanduser()
			
 
				+        raw = path.read_text(encoding="utf-8")
			
 
				+        override = _parse_json(raw, source=f"SEARCH_AGENT_STRATEGY_FILE({path})")
			
 
				+        validate_strategy_override(override, source=f"SEARCH_AGENT_STRATEGY_FILE({path})")
			
 
				+        merged = _deep_merge(profile_strategy, override)
			
 
				+        return EnvironmentProfile(
			
 
				+            env_name=env_name,
			
 
				+            use_db_policy=use_db_policy,
			
 
				+            strategy_source=f"file:{path}",
			
 
				+            strategy_override=merged,
			
 
				+        )
			
 
				+
			
 
				+    if profile_strategy:
			
 
				+        validate_strategy_override(profile_strategy, source="profile default")
			
 
				+    return EnvironmentProfile(
			
 
				+        env_name=env_name,
			
 
				+        use_db_policy=use_db_policy,
			
 
				+        strategy_source=strategy_source,
			
 
				+        strategy_override=profile_strategy or None,
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+def log_environment_profile(profile: EnvironmentProfile) -> None:
			
 
				+    logger.info("Environment: %s", profile.env_name)
			
 
				+    logger.info("DB 策略开关: %s", "enabled" if profile.use_db_policy else "disabled")
			
 
				+    logger.info("策略覆盖来源: %s", profile.strategy_source)
			
--- a/src/harness/search_agent/logging_setup.py
+++ b/src/harness/search_agent/logging_setup.py
@@ -0,0 +1,80 @@
 
				+"""
			
 
				+Search Agent CLI 的日志：控制台 INFO + 临时文件 DEBUG，结束时迁入 trace 目录。
			
 
				+"""
			
 
				+
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+import logging
			
 
				+import os
			
 
				+import shutil
			
 
				+import sys
			
 
				+import tempfile
			
 
				+from typing import Optional
			
 
				+
			
 
				+_LOG_LEVEL = os.getenv("LOG_LEVEL", "DEBUG").upper()
			
 
				+_CONSOLE_LEVEL = os.getenv("CONSOLE_LOG_LEVEL", "INFO").upper()
			
 
				+_LOG_FMT = "%(asctime)s | %(levelname)-7s | %(name)s | %(message)s"
			
 
				+_LOG_DATEFMT = "%Y-%m-%d %H:%M:%S"
			
 
				+
			
 
				+_file_handler: Optional[logging.FileHandler] = None
			
 
				+_tmp_log_path: Optional[str] = None
			
 
				+
			
 
				+
			
 
				+def setup_search_agent_logging() -> None:
			
 
				+    """
			
 
				+    配置双通道日志：console（由 CONSOLE_LOG_LEVEL 控制）+ file（DEBUG）。
			
 
				+
			
 
				+    全量日志写入临时文件，pipeline 完成后由 finalize_search_agent_log 移入 trace 目录。
			
 
				+    """
			
 
				+    global _file_handler, _tmp_log_path
			
 
				+
			
 
				+    root = logging.getLogger()
			
 
				+    root.setLevel(getattr(logging, _LOG_LEVEL, logging.DEBUG))
			
 
				+
			
 
				+    formatter = logging.Formatter(fmt=_LOG_FMT, datefmt=_LOG_DATEFMT)
			
 
				+
			
 
				+    console = logging.StreamHandler(sys.__stdout__)
			
 
				+    console.setLevel(getattr(logging, _CONSOLE_LEVEL, logging.INFO))
			
 
				+    console.setFormatter(formatter)
			
 
				+    root.addHandler(console)
			
 
				+
			
 
				+    tmp = tempfile.NamedTemporaryFile(
			
 
				+        delete=False,
			
 
				+        suffix=".log",
			
 
				+        prefix="search_agent_",
			
 
				+        mode="w",
			
 
				+        encoding="utf-8",
			
 
				+    )
			
 
				+    _tmp_log_path = tmp.name
			
 
				+    tmp.close()
			
 
				+
			
 
				+    _file_handler = logging.FileHandler(_tmp_log_path, mode="w", encoding="utf-8")
			
 
				+    _file_handler.setLevel(logging.DEBUG)
			
 
				+    _file_handler.setFormatter(formatter)
			
 
				+    root.addHandler(_file_handler)
			
 
				+
			
 
				+    for noisy in ("httpx", "httpcore", "urllib3", "asyncio"):
			
 
				+        logging.getLogger(noisy).setLevel(logging.WARNING)
			
 
				+
			
 
				+    class _AgentLogFilter(logging.Filter):
			
 
				+        def filter(self, record: logging.LogRecord) -> bool:
			
 
				+            return not record.name.startswith("agent.")
			
 
				+
			
 
				+    _file_handler.addFilter(_AgentLogFilter())
			
 
				+
			
 
				+
			
 
				+def finalize_search_agent_log(trace_id: str) -> None:
			
 
				+    """将临时全量日志移动到 tests/traces/{trace_id}/full_log.log。"""
			
 
				+    global _file_handler, _tmp_log_path
			
 
				+
			
 
				+    logger = logging.getLogger(__name__)
			
 
				+    if _file_handler and _tmp_log_path and os.path.exists(_tmp_log_path):
			
 
				+        try:
			
 
				+            _file_handler.close()
			
 
				+            trace_dir = os.path.join("tests", "traces", trace_id)
			
 
				+            os.makedirs(trace_dir, exist_ok=True)
			
 
				+            dest = os.path.join(trace_dir, "full_log.log")
			
 
				+            shutil.move(_tmp_log_path, dest)
			
 
				+            logger.info("完整日志已保存: %s", dest)
			
 
				+        except Exception as exc:
			
 
				+            logger.warning("移动日志文件失败: %s", exc)
			
--- a/src/harness/search_agent/planner.py
+++ b/src/harness/search_agent/planner.py
@@ -0,0 +1,64 @@
 
				+"""Planner Harness：启动前可见的运行计划与阶段说明。"""
			
 
				+
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+import logging
			
 
				+
			
 
				+from .budget import AgentBudget
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+def print_run_plan(query: str, demand_id: str, budget: AgentBudget, trace_id: str) -> dict:
			
 
				+    """
			
 
				+    在 Agent 启动前打印结构化运行计划，并返回计划数据供 trace 使用。
			
 
				+
			
 
				+    目的：
			
 
				+    - 使运行意图可见、可审计，便于调试和追溯。
			
 
				+    - 明确各阶段目标与约束，防止"黑盒"执行。
			
 
				+    """
			
 
				+    logger.info("=" * 60)
			
 
				+    logger.info("▶ Search Agent 运行计划")
			
 
				+    logger.info("  Trace ID   : %s", trace_id)
			
 
				+    logger.info("  Query      : %s", query)
			
 
				+    logger.info("  Demand ID  : %s", demand_id or "(未指定，使用 default 策略)")
			
 
				+    logger.info("  超时上限    : %d 秒", budget.timeout_seconds)
			
 
				+    logger.info("  目标文章上限 : %d 篇", budget.max_target_count)
			
 
				+    logger.info("  最大补召回轮次: %d 轮", budget.max_fallback_rounds)
			
 
				+    logger.info("")
			
 
				+    logger.info("  阶段规划:")
			
 
				+    logger.info("    1. [demand_analysis   ]  ← 需求理解，产出搜索策略（无工具调用）")
			
 
				+    logger.info("    2. [query_expansion   ]  ← 基于爆款特征拓展搜索词")
			
 
				+    logger.info("    3. [content_search    ]  ← 按关键词召回候选文章")
			
 
				+    logger.info("       └─ Gate: SearchCompletenessGate — 候选不足则 fallback 到 query_expansion，补召回 1 轮后放行")
			
 
				+    logger.info("    4. [hard_filter       ]  ← 去重 + URL / 时间基础校验")
			
 
				+    logger.info("    5. [coarse_filter     ]  ← LLM 标题语义粗筛")
			
 
				+    logger.info("    6. [quality_filter    ]  ← 数据指标评分 + LLM 正文精排")
			
 
				+    logger.info("       └─ Gate: FilterSufficiencyGate — 不足则回退补召回（最多 %d 轮）", budget.max_fallback_rounds)
			
 
				+    logger.info("    7. [account_precipitate] ← 账号信息沉淀")
			
 
				+    logger.info("    8. [output_persist    ]  ← 输出结构化 JSON")
			
 
				+    logger.info("       └─ Gate: OutputSchemaGate — 结构校验")
			
 
				+    logger.info("=" * 60)
			
 
				+
			
 
				+    return {
			
 
				+        "trace_id": trace_id,
			
 
				+        "query": query,
			
 
				+        "demand_id": demand_id or "",
			
 
				+        "timeout_seconds": budget.timeout_seconds,
			
 
				+        "max_target_count": budget.max_target_count,
			
 
				+        "max_fallback_rounds": budget.max_fallback_rounds,
			
 
				+        "stages": [
			
 
				+            {"name": "demand_analysis", "label": "需求理解，产出搜索策略"},
			
 
				+            {"name": "query_expansion", "label": "基于爆款特征拓展搜索词"},
			
 
				+            {"name": "content_search", "label": "按关键词召回候选文章", "gate": "SearchCompletenessGate"},
			
 
				+            {"name": "hard_filter", "label": "去重 + 基础规则过滤"},
			
 
				+            {"name": "coarse_filter", "label": "LLM 标题语义粗筛"},
			
 
				+            {
			
 
				+                "name": "quality_filter",
			
 
				+                "label": "数据指标评分 + LLM 正文精排",
			
 
				+                "gate": "FilterSufficiencyGate",
			
 
				+            },
			
 
				+            {"name": "account_precipitate", "label": "账号信息沉淀"},
			
 
				+            {"name": "output_persist", "label": "输出结构化 JSON", "gate": "OutputSchemaGate"},
			
 
				+        ],
			
 
				+    }
			
--- a/src/harness/search_agent/prerequisites.py
+++ b/src/harness/search_agent/prerequisites.py
@@ -0,0 +1,18 @@
 
				+"""Fallback Harness：启动前必满足条件与快速失败。"""
			
 
				+
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+import os
			
 
				+
			
 
				+
			
 
				+def validate_prerequisites() -> None:
			
 
				+    """
			
 
				+    前置条件检查（Harness 级别，不依赖 Core 内部检查）。
			
 
				+
			
 
				+    设计意图：
			
 
				+    - 把必须满足的约束提升到最外层，让失败快速、信息明确。
			
 
				+    - 避免在深层 Stage 里才触发 "OPEN_ROUTER_API_KEY 未设置"。
			
 
				+    """
			
 
				+    api_key = os.getenv("OPEN_ROUTER_API_KEY", "").strip()
			
 
				+    if not api_key:
			
 
				+        raise EnvironmentError("缺少必要环境变量: OPEN_ROUTER_API_KEY\n请在 .env 文件或系统环境中设置该变量后重试。")
			
--- a/src/harness/search_agent/runner.py
+++ b/src/harness/search_agent/runner.py
@@ -0,0 +1,115 @@
 
				+"""
			
 
				+主流程编排：策略加载 / 预算合并 / 超时包裹 / RunSummary 采集。
			
 
				+
			
 
				+业务逻辑在 SearchAgentCore 与 Pipeline；此处只做约束注入与观测。
			
 
				+"""
			
 
				+
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+import asyncio
			
 
				+import logging
			
 
				+import time
			
 
				+
			
 
				+from src.domain.search.core import SearchAgentCore
			
 
				+from src.domain.search.policy import SearchAgentPolicy
			
 
				+from src.pipeline.config.pipeline_config import RuntimePipelineConfig
			
 
				+
			
 
				+from .budget import AgentBudget
			
 
				+from .environment import EnvironmentProfile
			
 
				+from .summary import RunSummary
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+async def run_with_harness(
			
 
				+    query: str,
			
 
				+    demand_id: str,
			
 
				+    budget: AgentBudget,
			
 
				+    trace_id: str,
			
 
				+    environment: EnvironmentProfile,
			
 
				+    use_db_policy: bool = True,
			
 
				+    run_plan: dict | None = None,
			
 
				+) -> RunSummary:
			
 
				+    """
			
 
				+    带 Harness 的 Agent 执行入口。
			
 
				+
			
 
				+    职责分层：
			
 
				+    - 本函数只做"约束注入 + 超时包裹 + 摘要采集"。
			
 
				+    - 业务逻辑委托给 SearchAgentCore。
			
 
				+    - 不在这里写 if/else 业务判断。
			
 
				+    """
			
 
				+    start = time.monotonic()
			
 
				+    summary = RunSummary(success=False, query=query, demand_id=demand_id, trace_id=trace_id)
			
 
				+
			
 
				+    core = SearchAgentCore()
			
 
				+    base_policy = SearchAgentPolicy.defaults()
			
 
				+    summary.policy_source = "default"
			
 
				+
			
 
				+    effective_use_db_policy = use_db_policy and environment.use_db_policy
			
 
				+    if effective_use_db_policy:
			
 
				+        try:
			
 
				+            base_policy = await core.load_policy(demand_id or None)
			
 
				+            summary.policy_source = "db"
			
 
				+            logger.info("策略已从 DB 加载: demand_id=%s", demand_id)
			
 
				+        except Exception as exc:
			
 
				+            logger.warning("DB 策略读取失败，降级为默认策略: %s", exc)
			
 
				+            summary.policy_source = "default(fallback)"
			
 
				+
			
 
				+    else:
			
 
				+        logger.info("已禁用 DB 策略加载，使用默认策略 + 环境覆盖")
			
 
				+
			
 
				+    resolved_policy = base_policy
			
 
				+    if environment.strategy_override:
			
 
				+        resolved_policy = base_policy.merged_with(environment.strategy_override)
			
 
				+        summary.policy_source = f"{summary.policy_source}+{environment.strategy_source}"
			
 
				+        logger.info("已应用环境策略覆盖: source=%s", environment.strategy_source)
			
 
				+
			
 
				+    runtime = RuntimePipelineConfig.from_env()
			
 
				+    requested_target = resolved_policy.target_count_override or runtime.target_count
			
 
				+    effective_target = min(requested_target, budget.max_target_count)
			
 
				+    if effective_target != requested_target:
			
 
				+        logger.info(
			
 
				+            "target_count 被 Budget Harness 限制: %d → %d",
			
 
				+            requested_target,
			
 
				+            effective_target,
			
 
				+        )
			
 
				+
			
 
				+    try:
			
 
				+        ctx = await asyncio.wait_for(
			
 
				+            core.run(
			
 
				+                query=query,
			
 
				+                demand_id=demand_id,
			
 
				+                target_count=effective_target,
			
 
				+                use_db_policy=False,
			
 
				+                policy_override=resolved_policy,
			
 
				+                trace_id=trace_id,
			
 
				+                run_plan=run_plan,
			
 
				+            ),
			
 
				+            timeout=budget.timeout_seconds,
			
 
				+        )
			
 
				+    except asyncio.TimeoutError:
			
 
				+        summary.elapsed_seconds = time.monotonic() - start
			
 
				+        summary.error_message = f"Agent 超时（>{budget.timeout_seconds}s），已中止"
			
 
				+        logger.error(summary.error_message)
			
 
				+        return summary
			
 
				+    except Exception as exc:
			
 
				+        summary.elapsed_seconds = time.monotonic() - start
			
 
				+        summary.error_message = str(exc)
			
 
				+        logger.exception("Agent 运行异常: %s", exc)
			
 
				+        return summary
			
 
				+
			
 
				+    summary.success = True
			
 
				+    summary.output_file = ctx.metadata.get("output_file", "")
			
 
				+    summary.candidate_count = len(ctx.candidate_articles)
			
 
				+    summary.filtered_count = len(ctx.filtered_articles)
			
 
				+    summary.account_count = len(ctx.accounts)
			
 
				+    summary.elapsed_seconds = time.monotonic() - start
			
 
				+    summary.stage_history = [
			
 
				+        {
			
 
				+            "stage_name": r.stage_name,
			
 
				+            "status": r.status,
			
 
				+            "attempt": r.attempt,
			
 
				+        }
			
 
				+        for r in ctx.stage_history
			
 
				+    ]
			
 
				+    return summary
			
--- a/src/harness/search_agent/strategy_validation.py
+++ b/src/harness/search_agent/strategy_validation.py
@@ -0,0 +1,127 @@
 
				+from __future__ import annotations
			
 
				+
			
 
				+"""策略覆盖配置校验：在运行前快速失败，避免深层阶段报错。"""
			
 
				+
			
 
				+from typing import Any
			
 
				+
			
 
				+
			
 
				+def _assert_type(value: Any, expected: type | tuple[type, ...], field: str) -> None:
			
 
				+    if not isinstance(value, expected):
			
 
				+        expect_name = ", ".join(t.__name__ for t in expected) if isinstance(expected, tuple) else expected.__name__
			
 
				+        raise ValueError(f"{field} 类型错误，期望 {expect_name}，实际 {type(value).__name__}")
			
 
				+
			
 
				+
			
 
				+def _assert_range(value: int | float, field: str, *, low: float, high: float) -> None:
			
 
				+    if value < low or value > high:
			
 
				+        raise ValueError(f"{field} 超出范围 [{low}, {high}]，当前: {value}")
			
 
				+
			
 
				+
			
 
				+def _validate_search(payload: dict[str, Any], *, prefix: str) -> None:
			
 
				+    if "max_keywords" in payload:
			
 
				+        _assert_type(payload["max_keywords"], int, f"{prefix}.max_keywords")
			
 
				+        _assert_range(payload["max_keywords"], f"{prefix}.max_keywords", low=1, high=50)
			
 
				+    if "initial_cursor" in payload:
			
 
				+        _assert_type(payload["initial_cursor"], (str, int), f"{prefix}.initial_cursor")
			
 
				+    if "keyword_priority" in payload:
			
 
				+        _assert_type(payload["keyword_priority"], str, f"{prefix}.keyword_priority")
			
 
				+        if payload["keyword_priority"] not in ("demand_first", "query_first"):
			
 
				+            raise ValueError(f"{prefix}.keyword_priority 仅支持 demand_first/query_first")
			
 
				+    if "extra_keywords" in payload:
			
 
				+        _assert_type(payload["extra_keywords"], list, f"{prefix}.extra_keywords")
			
 
				+    if "recall_multiplier" in payload:
			
 
				+        _assert_type(payload["recall_multiplier"], (int, float), f"{prefix}.recall_multiplier")
			
 
				+        _assert_range(float(payload["recall_multiplier"]), f"{prefix}.recall_multiplier", low=1.0, high=20.0)
			
 
				+    if "min_candidate_multiplier" in payload:
			
 
				+        _assert_type(payload["min_candidate_multiplier"], (int, float), f"{prefix}.min_candidate_multiplier")
			
 
				+        _assert_range(
			
 
				+            float(payload["min_candidate_multiplier"]),
			
 
				+            f"{prefix}.min_candidate_multiplier",
			
 
				+            low=1.0,
			
 
				+            high=10.0,
			
 
				+        )
			
 
				+    if "near_enough_candidate_multiplier" in payload:
			
 
				+        _assert_type(
			
 
				+            payload["near_enough_candidate_multiplier"],
			
 
				+            (int, float),
			
 
				+            f"{prefix}.near_enough_candidate_multiplier",
			
 
				+        )
			
 
				+        _assert_range(
			
 
				+            float(payload["near_enough_candidate_multiplier"]),
			
 
				+            f"{prefix}.near_enough_candidate_multiplier",
			
 
				+            low=0.5,
			
 
				+            high=5.0,
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def _validate_filter(payload: dict[str, Any], *, prefix: str) -> None:
			
 
				+    if "filter_near_ratio" in payload:
			
 
				+        _assert_type(payload["filter_near_ratio"], (int, float), f"{prefix}.filter_near_ratio")
			
 
				+        _assert_range(float(payload["filter_near_ratio"]), f"{prefix}.filter_near_ratio", low=0.0, high=1.0)
			
 
				+    if "max_detail_fetch" in payload:
			
 
				+        _assert_type(payload["max_detail_fetch"], int, f"{prefix}.max_detail_fetch")
			
 
				+        _assert_range(payload["max_detail_fetch"], f"{prefix}.max_detail_fetch", low=1, high=500)
			
 
				+    if "enable_llm_review" in payload:
			
 
				+        _assert_type(payload["enable_llm_review"], bool, f"{prefix}.enable_llm_review")
			
 
				+    quality = payload.get("quality_score")
			
 
				+    if quality is not None:
			
 
				+        _assert_type(quality, dict, f"{prefix}.quality_score")
			
 
				+        if "spam_keywords" in quality:
			
 
				+            _assert_type(quality["spam_keywords"], list, f"{prefix}.quality_score.spam_keywords")
			
 
				+
			
 
				+
			
 
				+def _validate_runtime(payload: dict[str, Any], *, prefix: str) -> None:
			
 
				+    if "target_count" in payload and payload["target_count"] is not None:
			
 
				+        _assert_type(payload["target_count"], int, f"{prefix}.target_count")
			
 
				+        _assert_range(payload["target_count"], f"{prefix}.target_count", low=1, high=200)
			
 
				+
			
 
				+
			
 
				+def _validate_account(payload: dict[str, Any], *, prefix: str) -> None:
			
 
				+    strategy = payload.get("account_strategy")
			
 
				+    if strategy is None:
			
 
				+        return
			
 
				+    _assert_type(strategy, dict, f"{prefix}.account_strategy")
			
 
				+    if "sample_articles_limit" in strategy:
			
 
				+        _assert_type(strategy["sample_articles_limit"], int, f"{prefix}.account_strategy.sample_articles_limit")
			
 
				+        _assert_range(
			
 
				+            strategy["sample_articles_limit"],
			
 
				+            f"{prefix}.account_strategy.sample_articles_limit",
			
 
				+            low=1,
			
 
				+            high=20,
			
 
				+        )
			
 
				+    if "source_urls_limit" in strategy:
			
 
				+        _assert_type(strategy["source_urls_limit"], int, f"{prefix}.account_strategy.source_urls_limit")
			
 
				+        _assert_range(
			
 
				+            strategy["source_urls_limit"],
			
 
				+            f"{prefix}.account_strategy.source_urls_limit",
			
 
				+            low=1,
			
 
				+            high=5000,
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+def validate_strategy_override(payload: dict[str, Any], *, source: str = "strategy override") -> None:
			
 
				+    """校验策略覆盖对象（支持新结构与平铺旧结构）。"""
			
 
				+    if not isinstance(payload, dict):
			
 
				+        raise ValueError(f"{source} 必须是 JSON 对象")
			
 
				+
			
 
				+    search = payload.get("search")
			
 
				+    if search is not None:
			
 
				+        _assert_type(search, dict, f"{source}.search")
			
 
				+        _validate_search(search, prefix=f"{source}.search")
			
 
				+    filter_cfg = payload.get("filter")
			
 
				+    if filter_cfg is not None:
			
 
				+        _assert_type(filter_cfg, dict, f"{source}.filter")
			
 
				+        _validate_filter(filter_cfg, prefix=f"{source}.filter")
			
 
				+    runtime = payload.get("runtime")
			
 
				+    if runtime is not None:
			
 
				+        _assert_type(runtime, dict, f"{source}.runtime")
			
 
				+        _validate_runtime(runtime, prefix=f"{source}.runtime")
			
 
				+    account = payload.get("account")
			
 
				+    if account is not None:
			
 
				+        _assert_type(account, dict, f"{source}.account")
			
 
				+        _validate_account(account, prefix=f"{source}.account")
			
 
				+
			
 
				+    # 兼容旧平铺字段
			
 
				+    _validate_search(payload, prefix=source)
			
 
				+    _validate_filter(payload, prefix=source)
			
 
				+    _validate_runtime(payload, prefix=source)
			
 
				+    _validate_account(payload, prefix=source)
			
--- a/src/harness/search_agent/summary.py
+++ b/src/harness/search_agent/summary.py
@@ -0,0 +1,60 @@
 
				+"""Observer Harness：运行结束后的结构化摘要。"""
			
 
				+
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+import logging
			
 
				+from dataclasses import dataclass, field
			
 
				+from typing import Any, Optional
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class RunSummary:
			
 
				+    """
			
 
				+    Agent 运行后的结构化摘要（非裸日志）。
			
 
				+
			
 
				+    设计意图：
			
 
				+    - 调用方可检查 success / error_message 决定后续动作。
			
 
				+    - 关键指标（candidate_count / filtered_count）可接入告警。
			
 
				+    """
			
 
				+
			
 
				+    success: bool
			
 
				+    query: str
			
 
				+    demand_id: str
			
 
				+    policy_source: str = "unknown"
			
 
				+    trace_id: Optional[str] = None
			
 
				+    output_file: str = ""
			
 
				+    candidate_count: int = 0
			
 
				+    filtered_count: int = 0
			
 
				+    account_count: int = 0
			
 
				+    elapsed_seconds: float = 0.0
			
 
				+    error_message: str = ""
			
 
				+    stage_history: list[dict[str, Any]] = field(default_factory=list)
			
 
				+
			
 
				+    def log(self) -> None:
			
 
				+        status = "✅ 成功" if self.success else "❌ 失败"
			
 
				+        logger.info("=" * 60)
			
 
				+        logger.info("Agent 运行摘要 %s", status)
			
 
				+        logger.info("  query        : %s", self.query)
			
 
				+        logger.info("  demand_id    : %s", self.demand_id)
			
 
				+        logger.info("  policy_source: %s", self.policy_source)
			
 
				+        logger.info("  trace_id     : %s", self.trace_id)
			
 
				+        logger.info("  output_file  : %s", self.output_file)
			
 
				+        logger.info("  候选文章数    : %d", self.candidate_count)
			
 
				+        logger.info("  入选文章数    : %d", self.filtered_count)
			
 
				+        logger.info("  账号数        : %d", self.account_count)
			
 
				+        logger.info("  耗时          : %.1f 秒", self.elapsed_seconds)
			
 
				+        if self.error_message:
			
 
				+            logger.error("  错误信息      : %s", self.error_message)
			
 
				+        if self.stage_history:
			
 
				+            logger.info("  阶段历史:")
			
 
				+            for record in self.stage_history:
			
 
				+                status_flag = "✓" if record.get("status") == "completed" else "✗"
			
 
				+                logger.info(
			
 
				+                    "    %s %-28s attempt=%d",
			
 
				+                    status_flag,
			
 
				+                    record.get("stage_name", "?"),
			
 
				+                    record.get("attempt", 1),
			
 
				+                )
			
 
				+        logger.info("=" * 60)
			
--- a/src/knowledge/__init__.py
+++ b/src/knowledge/__init__.py
@@ -0,0 +1,18 @@
 
				+"""
			
 
				+知识管理模块
			
 
				+
			
 
				+从 Claude Code 对话历史中提取问答对并生成每日总结。
			
 
				+"""
			
 
				+
			
 
				+from .auto_trigger import trigger_knowledge_summary
			
 
				+from .conversation_parser import ConversationParser, QAPair
			
 
				+from .knowledge_base import KnowledgeBase
			
 
				+from .summarizer import KnowledgeSummarizer
			
 
				+
			
 
				+__all__ = [
			
 
				+    "trigger_knowledge_summary",
			
 
				+    "ConversationParser",
			
 
				+    "QAPair",
			
 
				+    "KnowledgeBase",
			
 
				+    "KnowledgeSummarizer",
			
 
				+]
			
--- a/src/knowledge/auto_trigger.py
+++ b/src/knowledge/auto_trigger.py
@@ -0,0 +1,126 @@
 
				+"""
			
 
				+知识总结自动触发器
			
 
				+
			
 
				+从 Claude Code 对话历史中提取问答对并生成每日总结。
			
 
				+"""
			
 
				+
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+import logging
			
 
				+import os
			
 
				+from datetime import datetime, timezone
			
 
				+from pathlib import Path
			
 
				+from typing import Callable, Dict, List, Optional
			
 
				+
			
 
				+from .conversation_parser import ConversationParser, QAPair
			
 
				+from .knowledge_base import KnowledgeBase
			
 
				+from .summarizer import KnowledgeSummarizer
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+# Claude Code 对话历史目录
			
 
				+_CLAUDE_PROJECTS_DIR = Path.home() / ".claude" / "projects"
			
 
				+
			
 
				+
			
 
				+def find_project_dir(cwd: str | None = None) -> Optional[Path]:
			
 
				+    """查找当前项目对应的 Claude Code 对话历史目录
			
 
				+
			
 
				+    Args:
			
 
				+        cwd: 工作目录（默认使用当前目录）
			
 
				+
			
 
				+    Returns:
			
 
				+        项目对话历史目录路径
			
 
				+    """
			
 
				+    cwd = cwd or os.getcwd()
			
 
				+    # Claude Code 将路径中的 / 替换为 -
			
 
				+    encoded = cwd.replace("/", "-")
			
 
				+    project_dir = _CLAUDE_PROJECTS_DIR / encoded
			
 
				+    if project_dir.exists():
			
 
				+        return project_dir
			
 
				+    return None
			
 
				+
			
 
				+
			
 
				+def find_today_sessions(
			
 
				+    project_dir: Path,
			
 
				+    date: datetime | None = None,
			
 
				+) -> List[Path]:
			
 
				+    """查找指定日期的会话文件
			
 
				+
			
 
				+    Args:
			
 
				+        project_dir: 项目对话历史目录
			
 
				+        date: 日期（默认今天）
			
 
				+
			
 
				+    Returns:
			
 
				+        会话文件路径列表
			
 
				+    """
			
 
				+    date = date or datetime.now()
			
 
				+    date_str = date.strftime("%Y-%m-%d")
			
 
				+
			
 
				+    session_files = []
			
 
				+    for f in sorted(project_dir.glob("*.jsonl")):
			
 
				+        # 检查文件修改时间是否在指定日期
			
 
				+        mtime = datetime.fromtimestamp(f.stat().st_mtime)
			
 
				+        if mtime.strftime("%Y-%m-%d") == date_str:
			
 
				+            session_files.append(f)
			
 
				+
			
 
				+    return session_files
			
 
				+
			
 
				+
			
 
				+async def trigger_knowledge_summary(
			
 
				+    llm_call: Optional[Callable] = None,
			
 
				+    cwd: str | None = None,
			
 
				+    date: datetime | None = None,
			
 
				+    knowledge_dir: str = "knowledge",
			
 
				+    model: str = "",
			
 
				+) -> Optional[Path]:
			
 
				+    """触发知识总结流程
			
 
				+
			
 
				+    Args:
			
 
				+        llm_call: LLM 调用函数（None 则使用简单格式化）
			
 
				+        cwd: 工作目录
			
 
				+        date: 日期（默认今天）
			
 
				+        knowledge_dir: 知识库目录
			
 
				+        model: 总结使用的模型
			
 
				+
			
 
				+    Returns:
			
 
				+        生成的 Markdown 文件路径
			
 
				+    """
			
 
				+    date = date or datetime.now()
			
 
				+
			
 
				+    # 1. 查找项目目录
			
 
				+    project_dir = find_project_dir(cwd)
			
 
				+    if not project_dir:
			
 
				+        logger.warning("未找到 Claude Code 对话历史目录")
			
 
				+        return None
			
 
				+
			
 
				+    # 2. 查找当天的会话文件
			
 
				+    session_files = find_today_sessions(project_dir, date)
			
 
				+    if not session_files:
			
 
				+        logger.info("今天没有对话记录")
			
 
				+        return None
			
 
				+
			
 
				+    logger.info("找到 %d 个会话文件", len(session_files))
			
 
				+
			
 
				+    # 3. 解析所有会话
			
 
				+    parser = ConversationParser()
			
 
				+    sessions: Dict[str, List[QAPair]] = {}
			
 
				+    for sf in session_files:
			
 
				+        session_id = sf.stem
			
 
				+        qa_pairs = parser.parse_session(sf)
			
 
				+        if qa_pairs:
			
 
				+            sessions[session_id] = qa_pairs
			
 
				+            logger.info("会话 %s: 提取 %d 个问答对", session_id[:8], len(qa_pairs))
			
 
				+
			
 
				+    if not sessions:
			
 
				+        logger.info("未提取到有效的问答对")
			
 
				+        return None
			
 
				+
			
 
				+    # 4. 总结
			
 
				+    summarizer = KnowledgeSummarizer(llm_call=llm_call, model=model)
			
 
				+    content = await summarizer.summarize_daily(sessions, date)
			
 
				+    if not content:
			
 
				+        return None
			
 
				+
			
 
				+    # 5. 保存
			
 
				+    kb = KnowledgeBase(base_dir=knowledge_dir)
			
 
				+    return kb.save_daily_summary(date, content)
			
--- a/src/knowledge/conversation_parser.py
+++ b/src/knowledge/conversation_parser.py
@@ -0,0 +1,190 @@
 
				+"""
			
 
				+对话历史解析器
			
 
				+
			
 
				+从 Claude Code 的 JSONL 对话历史中提取问答对。
			
 
				+"""
			
 
				+
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+import json
			
 
				+import logging
			
 
				+from dataclasses import dataclass
			
 
				+from datetime import datetime
			
 
				+from pathlib import Path
			
 
				+from typing import List, Optional
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class QAPair:
			
 
				+    """问答对数据结构"""
			
 
				+    question: str           # 用户问题
			
 
				+    answer: str            # 助手回答
			
 
				+    timestamp: datetime    # 时间戳
			
 
				+    session_id: str        # 会话 ID
			
 
				+    cwd: str              # 工作目录
			
 
				+    git_branch: str       # Git 分支
			
 
				+    tool_calls: List[str] # 使用的工具列表
			
 
				+
			
 
				+
			
 
				+class ConversationParser:
			
 
				+    """对话历史解析器"""
			
 
				+
			
 
				+    def __init__(self, min_question_length: int = 10):
			
 
				+        """
			
 
				+        Args:
			
 
				+            min_question_length: 最小问题长度（过滤短问题）
			
 
				+        """
			
 
				+        self.min_question_length = min_question_length
			
 
				+
			
 
				+    def parse_session(self, session_file: Path) -> List[QAPair]:
			
 
				+        """解析单个会话文件，返回问答对列表
			
 
				+
			
 
				+        Args:
			
 
				+            session_file: JSONL 会话文件路径
			
 
				+
			
 
				+        Returns:
			
 
				+            问答对列表
			
 
				+        """
			
 
				+        entries = self._read_jsonl(session_file)
			
 
				+        return self._pair_messages(entries)
			
 
				+
			
 
				+    def _read_jsonl(self, path: Path) -> List[dict]:
			
 
				+        """读取 JSONL 文件，返回所有条目"""
			
 
				+        entries = []
			
 
				+        for line in path.read_text(encoding="utf-8").splitlines():
			
 
				+            line = line.strip()
			
 
				+            if not line:
			
 
				+                continue
			
 
				+            try:
			
 
				+                entries.append(json.loads(line))
			
 
				+            except json.JSONDecodeError:
			
 
				+                logger.debug("跳过无效 JSON 行: %s", line[:80])
			
 
				+        return entries
			
 
				+
			
 
				+    def _extract_user_text(self, entry: dict) -> Optional[str]:
			
 
				+        """从 user 类型条目中提取纯文本内容"""
			
 
				+        if entry.get("type") != "user":
			
 
				+            return None
			
 
				+        msg = entry.get("message", {})
			
 
				+        if msg.get("role") != "user":
			
 
				+            return None
			
 
				+        content = msg.get("content", "")
			
 
				+        if not isinstance(content, str):
			
 
				+            # content 可能是 list（多模态），提取文本部分
			
 
				+            if isinstance(content, list):
			
 
				+                parts = [p.get("text", "") for p in content if isinstance(p, dict) and p.get("type") == "text"]
			
 
				+                content = "\n".join(parts)
			
 
				+            else:
			
 
				+                return None
			
 
				+        # 过滤系统命令和元消息
			
 
				+        if entry.get("isMeta"):
			
 
				+            return None
			
 
				+        if content.startswith("<command-name>") or content.startswith("<local-command-caveat>"):
			
 
				+            return None
			
 
				+        content = content.strip()
			
 
				+        if len(content) < self.min_question_length:
			
 
				+            return None
			
 
				+        return content
			
 
				+
			
 
				+    def _extract_assistant_text(self, entry: dict) -> Optional[str]:
			
 
				+        """从 assistant 类型条目中提取纯文本回复"""
			
 
				+        if entry.get("type") != "assistant":
			
 
				+            return None
			
 
				+        msg = entry.get("message", {})
			
 
				+        if msg.get("role") != "assistant":
			
 
				+            return None
			
 
				+        content = msg.get("content", "")
			
 
				+        if isinstance(content, str):
			
 
				+            return content.strip() if content.strip() else None
			
 
				+        if isinstance(content, list):
			
 
				+            text_parts = []
			
 
				+            for block in content:
			
 
				+                if isinstance(block, dict) and block.get("type") == "text":
			
 
				+                    text_parts.append(block.get("text", ""))
			
 
				+            return "\n".join(text_parts).strip() if text_parts else None
			
 
				+        return None
			
 
				+
			
 
				+    def _extract_tool_calls(self, entry: dict) -> List[str]:
			
 
				+        """从 assistant 条目中提取工具调用名称"""
			
 
				+        msg = entry.get("message", {})
			
 
				+        content = msg.get("content", "")
			
 
				+        if not isinstance(content, list):
			
 
				+            return []
			
 
				+        return [
			
 
				+            block.get("name", "")
			
 
				+            for block in content
			
 
				+            if isinstance(block, dict) and block.get("type") == "tool_use" and block.get("name")
			
 
				+        ]
			
 
				+
			
 
				+    def _pair_messages(self, entries: List[dict]) -> List[QAPair]:
			
 
				+        """将用户消息和助手回复配对为问答对"""
			
 
				+        qa_pairs = []
			
 
				+        session_id = ""
			
 
				+        cwd = ""
			
 
				+        git_branch = ""
			
 
				+
			
 
				+        i = 0
			
 
				+        while i < len(entries):
			
 
				+            entry = entries[i]
			
 
				+
			
 
				+            # 提取会话元信息
			
 
				+            if entry.get("sessionId"):
			
 
				+                session_id = entry["sessionId"]
			
 
				+            if entry.get("cwd"):
			
 
				+                cwd = entry["cwd"]
			
 
				+            if entry.get("gitBranch"):
			
 
				+                git_branch = entry["gitBranch"]
			
 
				+
			
 
				+            user_text = self._extract_user_text(entry)
			
 
				+            if user_text is None:
			
 
				+                i += 1
			
 
				+                continue
			
 
				+
			
 
				+            timestamp = self._parse_timestamp(entry.get("timestamp", ""))
			
 
				+
			
 
				+            # 向后查找对应的 assistant 回复
			
 
				+            assistant_text = None
			
 
				+            tool_calls = []
			
 
				+            j = i + 1
			
 
				+            while j < len(entries):
			
 
				+                next_entry = entries[j]
			
 
				+                a_text = self._extract_assistant_text(next_entry)
			
 
				+                t_calls = self._extract_tool_calls(next_entry)
			
 
				+                if t_calls:
			
 
				+                    tool_calls.extend(t_calls)
			
 
				+                if a_text:
			
 
				+                    assistant_text = a_text
			
 
				+                    break
			
 
				+                # 遇到下一个 user 消息则停止
			
 
				+                if self._extract_user_text(next_entry) is not None:
			
 
				+                    break
			
 
				+                j += 1
			
 
				+
			
 
				+            if assistant_text:
			
 
				+                qa_pairs.append(QAPair(
			
 
				+                    question=user_text,
			
 
				+                    answer=assistant_text,
			
 
				+                    timestamp=timestamp,
			
 
				+                    session_id=session_id,
			
 
				+                    cwd=cwd,
			
 
				+                    git_branch=git_branch,
			
 
				+                    tool_calls=list(set(tool_calls)),
			
 
				+                ))
			
 
				+
			
 
				+            i = j if j > i else i + 1
			
 
				+
			
 
				+        return qa_pairs
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def _parse_timestamp(ts: str | float) -> datetime:
			
 
				+        """解析时间戳（ISO 格式字符串或毫秒时间戳）"""
			
 
				+        if isinstance(ts, (int, float)):
			
 
				+            return datetime.fromtimestamp(ts / 1000 if ts > 1e12 else ts)
			
 
				+        if isinstance(ts, str) and ts:
			
 
				+            try:
			
 
				+                return datetime.fromisoformat(ts.replace("Z", "+00:00"))
			
 
				+            except ValueError:
			
 
				+                pass
			
 
				+        return datetime.now()
			
--- a/src/knowledge/knowledge_base.py
+++ b/src/knowledge/knowledge_base.py
@@ -0,0 +1,93 @@
 
				+"""
			
 
				+知识库管理器
			
 
				+
			
 
				+管理本地 Markdown 知识库，按日期组织文件。
			
 
				+"""
			
 
				+
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+import logging
			
 
				+from datetime import datetime
			
 
				+from pathlib import Path
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+class KnowledgeBase:
			
 
				+    """知识库管理器"""
			
 
				+
			
 
				+    def __init__(self, base_dir: Path | str = "knowledge"):
			
 
				+        """
			
 
				+        Args:
			
 
				+            base_dir: 知识库根目录
			
 
				+        """
			
 
				+        self.base_dir = Path(base_dir)
			
 
				+        self.base_dir.mkdir(exist_ok=True)
			
 
				+
			
 
				+    def save_daily_summary(self, date: datetime, content: str) -> Path:
			
 
				+        """保存每日总结到对应的 Markdown 文件
			
 
				+
			
 
				+        Args:
			
 
				+            date: 日期
			
 
				+            content: Markdown 内容
			
 
				+
			
 
				+        Returns:
			
 
				+            保存的文件路径
			
 
				+        """
			
 
				+        file_path = self.get_daily_file(date)
			
 
				+        file_path.parent.mkdir(parents=True, exist_ok=True)
			
 
				+        file_path.write_text(content, encoding="utf-8")
			
 
				+        logger.info("已保存每日总结: %s", file_path)
			
 
				+        return file_path
			
 
				+
			
 
				+    def append_to_daily(self, date: datetime, content: str) -> Path:
			
 
				+        """追加内容到当天的文件
			
 
				+
			
 
				+        Args:
			
 
				+            date: 日期
			
 
				+            content: 要追加的 Markdown 内容
			
 
				+
			
 
				+        Returns:
			
 
				+            文件路径
			
 
				+        """
			
 
				+        file_path = self.get_daily_file(date)
			
 
				+        file_path.parent.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+        if file_path.exists():
			
 
				+            existing = file_path.read_text(encoding="utf-8")
			
 
				+            content = f"{existing}\n\n{content}"
			
 
				+
			
 
				+        file_path.write_text(content, encoding="utf-8")
			
 
				+        logger.info("已追加内容到: %s", file_path)
			
 
				+        return file_path
			
 
				+
			
 
				+    def get_daily_file(self, date: datetime) -> Path:
			
 
				+        """获取指定日期的文件路径
			
 
				+
			
 
				+        Args:
			
 
				+            date: 日期
			
 
				+
			
 
				+        Returns:
			
 
				+            文件路径（格式：knowledge/2026-04/2026-04-23.md）
			
 
				+        """
			
 
				+        month_dir = self.base_dir / date.strftime("%Y-%m")
			
 
				+        return month_dir / f"{date.strftime('%Y-%m-%d')}.md"
			
 
				+
			
 
				+    def list_daily_files(self, year: int | None = None, month: int | None = None) -> list[Path]:
			
 
				+        """列出知识库中的文件
			
 
				+
			
 
				+        Args:
			
 
				+            year: 年份（None 表示所有年份）
			
 
				+            month: 月份（None 表示所有月份）
			
 
				+
			
 
				+        Returns:
			
 
				+            文件路径列表
			
 
				+        """
			
 
				+        if year and month:
			
 
				+            pattern = f"{year:04d}-{month:02d}/*.md"
			
 
				+        elif year:
			
 
				+            pattern = f"{year:04d}-*/*.md"
			
 
				+        else:
			
 
				+            pattern = "*/*.md"
			
 
				+
			
 
				+        return sorted(self.base_dir.glob(pattern))
			
--- a/src/knowledge/summarizer.py
+++ b/src/knowledge/summarizer.py
@@ -0,0 +1,163 @@
 
				+"""
			
 
				+知识总结器
			
 
				+
			
 
				+使用 LLM 对问答对进行智能总结，生成结构化 Markdown。
			
 
				+"""
			
 
				+
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+import logging
			
 
				+import os
			
 
				+from datetime import datetime
			
 
				+from typing import Callable, Dict, Any, List, Optional
			
 
				+
			
 
				+from .conversation_parser import QAPair
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+SUMMARY_PROMPT = """你是一个知识管理助手。请总结以下对话记录，提取关键的问答对。
			
 
				+
			
 
				+## 对话记录
			
 
				+{qa_pairs_text}
			
 
				+
			
 
				+## 输出要求
			
 
				+请按以下 Markdown 格式输出：
			
 
				+
			
 
				+### Q: [用户问题简述]
			
 
				+**问题**：[完整问题描述]
			
 
				+
			
 
				+**解决方案**：
			
 
				+- [关键步骤或要点]
			
 
				+
			
 
				+**涉及文件**：
			
 
				+- `path/to/file.py`（如果对话中提到了文件路径）
			
 
				+
			
 
				+**相关技术**：
			
 
				+- 技术名称或工具
			
 
				+
			
 
				+---
			
 
				+
			
 
				+规则：
			
 
				+1. 每个问答对独立成段，用 --- 分隔
			
 
				+2. 提取技术要点和关键决策
			
 
				+3. 记录涉及的文件路径
			
 
				+4. 过滤无意义的对话（如 "hi", "ok", "thanks" 等）
			
 
				+5. 用中文输出
			
 
				+6. 如果对话内容太短或无实质内容，直接返回空字符串
			
 
				+"""
			
 
				+
			
 
				+
			
 
				+class KnowledgeSummarizer:
			
 
				+    """知识总结器"""
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        llm_call: Optional[Callable] = None,
			
 
				+        model: str = "",
			
 
				+    ):
			
 
				+        self.llm_call = llm_call
			
 
				+        self.model = model or os.getenv("KNOWLEDGE_SUMMARY_MODEL", "anthropic/claude-sonnet-4.5")
			
 
				+
			
 
				+    async def summarize_session(
			
 
				+        self,
			
 
				+        qa_pairs: List[QAPair],
			
 
				+        session_index: int = 1,
			
 
				+    ) -> str:
			
 
				+        """总结单个会话的问答对
			
 
				+
			
 
				+        Args:
			
 
				+            qa_pairs: 问答对列表
			
 
				+            session_index: 会话序号
			
 
				+
			
 
				+        Returns:
			
 
				+            Markdown 格式的总结文本
			
 
				+        """
			
 
				+        if not qa_pairs:
			
 
				+            return ""
			
 
				+
			
 
				+        # 如果没有 LLM，使用简单格式化
			
 
				+        if not self.llm_call:
			
 
				+            return self._format_without_llm(qa_pairs, session_index)
			
 
				+
			
 
				+        qa_text = self._format_qa_pairs_for_prompt(qa_pairs)
			
 
				+        prompt = SUMMARY_PROMPT.format(qa_pairs_text=qa_text)
			
 
				+
			
 
				+        try:
			
 
				+            result = await self.llm_call(
			
 
				+                messages=[
			
 
				+                    {"role": "system", "content": "你是一个知识管理助手，擅长从对话中提取关键知识。"},
			
 
				+                    {"role": "user", "content": prompt},
			
 
				+                ],
			
 
				+                model=self.model,
			
 
				+            )
			
 
				+            content = result.get("content", "")
			
 
				+            if isinstance(content, list):
			
 
				+                content = "\n".join(
			
 
				+                    b.get("text", "") for b in content if isinstance(b, dict) and b.get("type") == "text"
			
 
				+                )
			
 
				+            content = content.strip()
			
 
				+            if not content:
			
 
				+                return self._format_without_llm(qa_pairs, session_index)
			
 
				+            return f"## 会话 {session_index}\n\n{content}"
			
 
				+        except Exception as exc:
			
 
				+            logger.warning("LLM 总结失败，降级为简单格式: %s", exc)
			
 
				+            return self._format_without_llm(qa_pairs, session_index)
			
 
				+
			
 
				+    async def summarize_daily(
			
 
				+        self,
			
 
				+        sessions: Dict[str, List[QAPair]],
			
 
				+        date: datetime,
			
 
				+    ) -> str:
			
 
				+        """总结一天所有会话
			
 
				+
			
 
				+        Args:
			
 
				+            sessions: {session_id: [QAPair, ...]}
			
 
				+            date: 日期
			
 
				+
			
 
				+        Returns:
			
 
				+            完整的每日总结 Markdown
			
 
				+        """
			
 
				+        total_qa = sum(len(pairs) for pairs in sessions.values())
			
 
				+        if total_qa == 0:
			
 
				+            return ""
			
 
				+
			
 
				+        header = (
			
 
				+            f"# {date.strftime('%Y-%m-%d')} 对话总结\n\n"
			
 
				+            f"> 生成时间：{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n"
			
 
				+            f"> 会话数：{len(sessions)}\n"
			
 
				+            f"> 问答对数：{total_qa}\n\n---\n"
			
 
				+        )
			
 
				+
			
 
				+        parts = [header]
			
 
				+        for idx, (session_id, qa_pairs) in enumerate(sessions.items(), 1):
			
 
				+            if not qa_pairs:
			
 
				+                continue
			
 
				+            section = await self.summarize_session(qa_pairs, idx)
			
 
				+            if section:
			
 
				+                parts.append(section)
			
 
				+
			
 
				+        return "\n\n".join(parts)
			
 
				+
			
 
				+    def _format_qa_pairs_for_prompt(self, qa_pairs: List[QAPair]) -> str:
			
 
				+        """将问答对格式化为 prompt 输入文本"""
			
 
				+        lines = []
			
 
				+        for i, qa in enumerate(qa_pairs, 1):
			
 
				+            lines.append(f"### 对话 {i}")
			
 
				+            lines.append(f"**用户**: {qa.question[:2000]}")
			
 
				+            lines.append(f"**助手**: {qa.answer[:3000]}")
			
 
				+            if qa.tool_calls:
			
 
				+                lines.append(f"**使用工具**: {', '.join(qa.tool_calls)}")
			
 
				+            lines.append("")
			
 
				+        return "\n".join(lines)
			
 
				+
			
 
				+    def _format_without_llm(self, qa_pairs: List[QAPair], session_index: int) -> str:
			
 
				+        """无 LLM 时的简单格式化"""
			
 
				+        parts = [f"## 会话 {session_index}\n"]
			
 
				+        for qa in qa_pairs:
			
 
				+            parts.append(f"### Q: {qa.question[:100]}")
			
 
				+            parts.append(f"**问题**：{qa.question}\n")
			
 
				+            parts.append(f"**回答**：{qa.answer[:500]}\n")
			
 
				+            if qa.tool_calls:
			
 
				+                parts.append(f"**使用工具**：{', '.join(qa.tool_calls)}\n")
			
 
				+            parts.append("---\n")
			
 
				+        return "\n".join(parts)
			
--- a/src/pipeline/policy_resolver.py
+++ b/src/pipeline/policy_resolver.py
@@ -0,0 +1,29 @@
 
				+from __future__ import annotations
			
 
				+
			
 
				+"""统一读取 ctx.metadata['search_agent_policy'] 的工具函数。"""
			
 
				+
			
 
				+from typing import Any
			
 
				+
			
 
				+
			
 
				+def _section(policy: dict[str, Any], name: str) -> dict[str, Any]:
			
 
				+    value = policy.get(name)
			
 
				+    return value if isinstance(value, dict) else {}
			
 
				+
			
 
				+
			
 
				+def get_policy_value(policy: dict[str, Any], key: str, default: Any, *, section: str | None = None) -> Any:
			
 
				+    """
			
 
				+    从策略中读取配置值（兼容新旧两种结构）。
			
 
				+
			
 
				+    优先级：
			
 
				+    1) section.key（新结构）
			
 
				+    2) key（旧结构平铺）
			
 
				+    3) default
			
 
				+    """
			
 
				+    if section:
			
 
				+        scoped = _section(policy, section)
			
 
				+        if key in scoped and scoped[key] is not None:
			
 
				+            return scoped[key]
			
 
				+    value = policy.get(key)
			
 
				+    if value is None:
			
 
				+        return default
			
 
				+    return value
			
--- a/src/pipeline/stages/query_expansion.py
+++ b/src/pipeline/stages/query_expansion.py
@@ -0,0 +1,232 @@
 
				+from __future__ import annotations
			
 
				+
			
 
				+"""查询拓展阶段：基于爆款特征拓展搜索词。"""
			
 
				+
			
 
				+import json
			
 
				+import re
			
 
				+from typing import List, Optional
			
 
				+
			
 
				+from src.pipeline.base import Stage
			
 
				+from src.pipeline.context import ExpandedQuery, PipelineContext
			
 
				+from src.pipeline.stages.common import StageAgentExecutor
			
 
				+
			
 
				+# 语义近似判断阈值（字符 bigram Jaccard）
			
 
				+JACCARD_NEAR_THRESHOLD = 0.72
			
 
				+# 短关键词长度阈值（归一化后字符数），短词使用更严格的近似判断
			
 
				+SHORT_KEYWORD_LENGTH = 4
			
 
				+
			
 
				+
			
 
				+class QueryExpansionStage(Stage):
			
 
				+    name = "query_expansion"
			
 
				+    description = "基于爆款特征拓展搜索词"
			
 
				+
			
 
				+    def __init__(self, agent_executor: StageAgentExecutor):
			
 
				+        self.agent_executor = agent_executor
			
 
				+
			
 
				+    def validate_input(self, ctx: PipelineContext) -> List[str]:
			
 
				+        if not ctx.demand_analysis:
			
 
				+            return ["缺少 demand_analysis"]
			
 
				+        return []
			
 
				+
			
 
				+    async def execute(self, ctx: PipelineContext) -> PipelineContext:
			
 
				+        """
			
 
				+        执行查询拓展。
			
 
				+
			
 
				+        输入：ctx.demand_analysis（包含 precise_keywords 和 topic_keywords）
			
 
				+        输出：ctx.expanded_query（包含拓展后的关键词列表）
			
 
				+
			
 
				+        fallback 轮次（_fallback_round >= 1）时：
			
 
				+        - 注入上一轮已使用搜索词和效果数据，要求 LLM 换角度拓词
			
 
				+        - 新拓词与上一轮拓词合并后统一去重，而非覆盖
			
 
				+        """
			
 
				+        analysis = ctx.demand_analysis
			
 
				+        assert analysis is not None
			
 
				+
			
 
				+        # 收集原始关键词
			
 
				+        original_keywords = (
			
 
				+            analysis.search_strategy.precise_keywords
			
 
				+            + analysis.search_strategy.topic_keywords
			
 
				+        )
			
 
				+
			
 
				+        if not original_keywords:
			
 
				+            ctx.expanded_query = ExpandedQuery(
			
 
				+                original_keywords=[],
			
 
				+                expanded_keywords=[],
			
 
				+            )
			
 
				+            return ctx
			
 
				+
			
 
				+        fallback_round = ctx.metadata.get("_fallback_round", 0)
			
 
				+        original_keywords_json = json.dumps(original_keywords, ensure_ascii=False)
			
 
				+
			
 
				+        # 构建 fallback 上下文
			
 
				+        fallback_context = ""
			
 
				+        prev_expanded: List[dict] = []
			
 
				+        if fallback_round >= 1 and ctx.expanded_query:
			
 
				+            prev_expanded = list(ctx.expanded_query.expanded_keywords or [])
			
 
				+            prev_kws = [str(item.get("keyword", "")) for item in prev_expanded if item.get("keyword")]
			
 
				+            keyword_stats: list = ctx.metadata.get("_search_keyword_stats", [])
			
 
				+            stats_json = json.dumps(keyword_stats, ensure_ascii=False) if keyword_stats else "无"
			
 
				+            fallback_context = f"""
			
 
				+⚠️ 这是第 {fallback_round + 1} 轮补充拓词，上一轮搜索结果不够。
			
 
				+
			
 
				+上一轮已使用的拓展词（请勿生成近似词）: {json.dumps(prev_kws, ensure_ascii=False)}
			
 
				+上一轮搜索词效果统计: {stats_json}
			
 
				+
			
 
				+强制要求：
			
 
				+- 禁止生成与上述已使用词语义相近的词
			
 
				+- 必须从全新的角度切入：换主题维度、换表达框架、换受众视角
			
 
				+- 优先补充上一轮未覆盖的下钻方向或长尾表达
			
 
				+"""
			
 
				+
			
 
				+        expansion_count = "12-20" if fallback_round >= 1 else "8-15"
			
 
				+
			
 
				+        messages = [
			
 
				+            {
			
 
				+                "role": "system",
			
 
				+                "content": (
			
 
				+                    "你是搜索词拓展专家。"
			
 
				+                    "你的任务是基于爆款文章标题特征拓展搜索词，提高召回爆款内容的概率。"
			
 
				+                    "你必须严格返回 JSON，并放在 ```json 代码块中。"
			
 
				+                ),
			
 
				+            },
			
 
				+            {
			
 
				+                "role": "user",
			
 
				+                "content": f"""
			
 
				+任务：基于爆款文章特征拓展搜索词。
			
 
				+
			
 
				+原始 query: {ctx.query}
			
 
				+原始关键词: {original_keywords_json}
			
 
				+{fallback_context}
			
 
				+要求：
			
 
				+1. 理解每个关键词的主题领域，匹配适用的爆款特征
			
 
				+2. 为每个原始关键词生成 2-4 个拓展变体
			
 
				+3. 每个变体融入 1-2 个爆款特征的典型表达
			
 
				+4. 标注使用的特征名称和权重总分
			
 
				+5. 按权重总分降序排序，priority 从 1 开始递增
			
 
				+6. 总拓展词数量控制在 {expansion_count} 个
			
 
				+7. 变体之间必须尽量多样，禁止仅做同义替换或字面微调（如"伟大功绩/丰功伟绩"）
			
 
				+
			
 
				+请按照 query_expansion 技能中的方法论执行拓展，完成后输出 JSON：
			
 
				+```json
			
 
				+{{
			
 
				+  "expanded_keywords": [
			
 
				+    {{"keyword": "拓展后的搜索词", "original": "原始关键词", "features": ["特征1"], "weight_sum": 25, "priority": 1}}
			
 
				+  ]
			
 
				+}}
			
 
				+```
			
 
				+""",
			
 
				+            },
			
 
				+        ]
			
 
				+
			
 
				+        result = await self.agent_executor.run_simple_llm_json(
			
 
				+            name="查询拓展",
			
 
				+            messages=messages,
			
 
				+            skills=["query_expansion"],
			
 
				+            ctx=ctx,
			
 
				+        )
			
 
				+
			
 
				+        new_keywords = result.get("expanded_keywords", [])
			
 
				+        new_keywords.sort(key=lambda x: -x.get("weight_sum", 0))
			
 
				+
			
 
				+        # fallback 轮：合并上一轮拓词 + 本轮新拓词，跨轮次统一去重
			
 
				+        if fallback_round >= 1 and prev_expanded:
			
 
				+            # 收集上一轮拓词的归一化形式作为 seed
			
 
				+            existing_norms = [
			
 
				+                _normalize_keyword(str(item.get("keyword", "")))
			
 
				+                for item in prev_expanded
			
 
				+                if str(item.get("keyword", "")).strip()
			
 
				+            ]
			
 
				+            existing_norms = [n for n in existing_norms if n]
			
 
				+            # 新词基于 seed 去重
			
 
				+            new_keywords = _dedupe_expanded_keywords(new_keywords, existing_norms=existing_norms)
			
 
				+            # 合并：上一轮 + 本轮新词
			
 
				+            merged = list(prev_expanded) + new_keywords
			
 
				+            merged.sort(key=lambda x: -x.get("weight_sum", 0))
			
 
				+            expanded_keywords = _dedupe_expanded_keywords(merged)
			
 
				+        else:
			
 
				+            expanded_keywords = _dedupe_expanded_keywords(new_keywords)
			
 
				+
			
 
				+        # 重新分配 priority
			
 
				+        for i, item in enumerate(expanded_keywords, start=1):
			
 
				+            item["priority"] = i
			
 
				+
			
 
				+        ctx.expanded_query = ExpandedQuery(
			
 
				+            original_keywords=original_keywords,
			
 
				+            expanded_keywords=expanded_keywords,
			
 
				+        )
			
 
				+        return ctx
			
 
				+
			
 
				+
			
 
				+def _dedupe_expanded_keywords(
			
 
				+    items: List[dict],
			
 
				+    existing_norms: Optional[List[str]] = None,
			
 
				+) -> List[dict]:
			
 
				+    """按顺序保留更优拓展词，过滤掉重复/近似项。
			
 
				+
			
 
				+    existing_norms: 已有关键词的归一化形式（如上一轮拓词），
			
 
				+    新词会和这些已有词一起做近似判断，避免跨轮次重复。
			
 
				+    """
			
 
				+    kept: List[dict] = []
			
 
				+    kept_norm_keywords: List[str] = list(existing_norms or [])
			
 
				+    for item in items:
			
 
				+        keyword = str(item.get("keyword", "")).strip()
			
 
				+        if not keyword:
			
 
				+            continue
			
 
				+        norm = _normalize_keyword(keyword)
			
 
				+        if not norm:
			
 
				+            continue
			
 
				+        if any(_is_semantically_near(norm, existing_norm) for existing_norm in kept_norm_keywords):
			
 
				+            continue
			
 
				+        kept.append(item)
			
 
				+        kept_norm_keywords.append(norm)
			
 
				+    return kept
			
 
				+
			
 
				+
			
 
				+def _normalize_keyword(keyword: str) -> str:
			
 
				+    lowered = keyword.lower().strip()
			
 
				+    return re.sub(r"[\W_]+", "", lowered)
			
 
				+
			
 
				+
			
 
				+def _is_semantically_near(a: str, b: str) -> bool:
			
 
				+    """使用字符 bigram Jaccard 近似判断语义接近。
			
 
				+
			
 
				+    对短关键词（归一化后 <= SHORT_KEYWORD_LENGTH 字符）额外使用
			
 
				+    编辑距离判断，因为短文本的 bigram 集合太小，Jaccard 不可靠。
			
 
				+    """
			
 
				+    if a == b:
			
 
				+        return True
			
 
				+    if not a or not b:
			
 
				+        return False
			
 
				+    if a in b or b in a:
			
 
				+        return True
			
 
				+    # 短词额外判断：编辑距离 <= 1 视为近似
			
 
				+    if len(a) <= SHORT_KEYWORD_LENGTH or len(b) <= SHORT_KEYWORD_LENGTH:
			
 
				+        if _edit_distance(a, b) <= 1:
			
 
				+            return True
			
 
				+    a_pairs = _char_bigrams(a)
			
 
				+    b_pairs = _char_bigrams(b)
			
 
				+    if not a_pairs or not b_pairs:
			
 
				+        return False
			
 
				+    overlap = len(a_pairs & b_pairs)
			
 
				+    union = len(a_pairs | b_pairs)
			
 
				+    score = overlap / union if union else 0.0
			
 
				+    return score >= JACCARD_NEAR_THRESHOLD
			
 
				+
			
 
				+
			
 
				+def _char_bigrams(text: str) -> set[str]:
			
 
				+    if len(text) <= 1:
			
 
				+        return {text}
			
 
				+    return {text[i:i + 2] for i in range(len(text) - 1)}
			
 
				+
			
 
				+
			
 
				+def _edit_distance(a: str, b: str) -> int:
			
 
				+    """计算两个字符串的编辑距离（Levenshtein），用于短词近似判断。"""
			
 
				+    if abs(len(a) - len(b)) > 1:
			
 
				+        return abs(len(a) - len(b))
			
 
				+    prev = list(range(len(b) + 1))
			
 
				+    for i, ca in enumerate(a, 1):
			
 
				+        curr = [i] + [0] * len(b)
			
 
				+        for j, cb in enumerate(b, 1):
			
 
				+            curr[j] = min(prev[j] + 1, curr[j - 1] + 1, prev[j - 1] + (0 if ca == cb else 1))
			
 
				+        prev = curr
			
 
				+    return prev[len(b)]
		`@@ -0,0 +1 @@`
		`+"""CLI / 入口外侧的 harness 层（预算、规划、观测、日志等）。"""`