TanJingyu há 10 horas atrás
pai
commit
57dc162e06
46 ficheiros alterados com 2395 adições e 35 exclusões
  1. 60 0
      knowledge_v2/.cache/32377b4374ed/execution_record.json
  2. 60 0
      knowledge_v2/.cache/32377b4374ed/function_knowledge/final_result.json
  3. 1 0
      knowledge_v2/.cache/32377b4374ed/function_knowledge/generated_query.txt
  4. 1 0
      knowledge_v2/.cache/32377b4374ed/function_knowledge/selected_tool.txt
  5. 3 0
      knowledge_v2/.cache/32377b4374ed/function_knowledge/tool_params.json
  6. 2 0
      knowledge_v2/.cache/32377b4374ed/function_knowledge/tool_result.json
  7. 1 0
      knowledge_v2/.cache/32377b4374ed/question.txt
  8. 43 0
      knowledge_v2/.cache/7869ec16cc96/execution_record.json
  9. 43 0
      knowledge_v2/.cache/7869ec16cc96/function_knowledge/final_result.json
  10. 1 0
      knowledge_v2/.cache/7869ec16cc96/function_knowledge/generated_query.txt
  11. 1 0
      knowledge_v2/.cache/7869ec16cc96/function_knowledge/selected_tool.txt
  12. 3 0
      knowledge_v2/.cache/7869ec16cc96/function_knowledge/tool_params.json
  13. 2 0
      knowledge_v2/.cache/7869ec16cc96/function_knowledge/tool_result.json
  14. 1 0
      knowledge_v2/.cache/7869ec16cc96/question.txt
  15. 37 0
      knowledge_v2/.cache/8a3f1b130fec/execution_record.json
  16. 37 0
      knowledge_v2/.cache/8a3f1b130fec/function_knowledge/final_result.json
  17. 1 0
      knowledge_v2/.cache/8a3f1b130fec/function_knowledge/generated_query.txt
  18. 1 0
      knowledge_v2/.cache/8a3f1b130fec/function_knowledge/selected_tool.txt
  19. 3 0
      knowledge_v2/.cache/8a3f1b130fec/function_knowledge/tool_params.json
  20. 2 0
      knowledge_v2/.cache/8a3f1b130fec/function_knowledge/tool_result.json
  21. 1 0
      knowledge_v2/.cache/8a3f1b130fec/question.txt
  22. 61 0
      knowledge_v2/.cache/c035d2052b5f/execution_record.json
  23. 61 0
      knowledge_v2/.cache/c035d2052b5f/function_knowledge/final_result.json
  24. 1 0
      knowledge_v2/.cache/c035d2052b5f/function_knowledge/generated_query.txt
  25. 1 0
      knowledge_v2/.cache/c035d2052b5f/function_knowledge/selected_tool.txt
  26. 3 0
      knowledge_v2/.cache/c035d2052b5f/function_knowledge/tool_params.json
  27. 2 0
      knowledge_v2/.cache/c035d2052b5f/function_knowledge/tool_result.json
  28. 1 0
      knowledge_v2/.cache/c035d2052b5f/question.txt
  29. 109 0
      knowledge_v2/CLEAR_CACHE_GUIDE.md
  30. 271 0
      knowledge_v2/README.md
  31. 149 0
      knowledge_v2/cache_manager.py
  32. 115 0
      knowledge_v2/clear_cache.py
  33. 125 0
      knowledge_v2/clear_cache_simple.py
  34. 529 1
      knowledge_v2/function_knowledge.py
  35. 102 33
      knowledge_v2/llm_search_knowledge.py
  36. 199 1
      knowledge_v2/multi_search_knowledge.py
  37. 13 0
      knowledge_v2/prompt/function_generate_query_prompt.md
  38. 24 0
      knowledge_v2/prompt/function_knowledge_extract_tool_params_prompt.md
  39. 22 0
      knowledge_v2/prompt/function_knowledge_generate_new_tool_prompt.md
  40. 14 0
      knowledge_v2/prompt/function_knowledge_select_tools_prompt.md
  41. 16 0
      knowledge_v2/prompt/llm_search_generate_query_prompt.md
  42. 12 0
      knowledge_v2/prompt/llm_search_merge_knowledge_prompt.md
  43. 16 0
      knowledge_v2/prompt/multi_search_merge_knowledge_prompt.md
  44. 56 0
      knowledge_v2/test_detail_info.py
  45. 21 0
      knowledge_v2/tool_infos/wechat_search_article.txt
  46. 168 0
      knowledge_v2/tools_library.py

Diff do ficheiro suprimidas por serem muito extensas
+ 60 - 0
knowledge_v2/.cache/32377b4374ed/execution_record.json


Diff do ficheiro suprimidas por serem muito extensas
+ 60 - 0
knowledge_v2/.cache/32377b4374ed/function_knowledge/final_result.json


+ 1 - 0
knowledge_v2/.cache/32377b4374ed/function_knowledge/generated_query.txt

@@ -0,0 +1 @@
+小狗穿搭 游戏

+ 1 - 0
knowledge_v2/.cache/32377b4374ed/function_knowledge/selected_tool.txt

@@ -0,0 +1 @@
+wechat_search_article

+ 3 - 0
knowledge_v2/.cache/32377b4374ed/function_knowledge/tool_params.json

@@ -0,0 +1,3 @@
+{
+  "keyword": "小狗穿搭 游戏"
+}

Diff do ficheiro suprimidas por serem muito extensas
+ 2 - 0
knowledge_v2/.cache/32377b4374ed/function_knowledge/tool_result.json


+ 1 - 0
knowledge_v2/.cache/32377b4374ed/question.txt

@@ -0,0 +1 @@
+小狗 穿搭||无||游戏博主

Diff do ficheiro suprimidas por serem muito extensas
+ 43 - 0
knowledge_v2/.cache/7869ec16cc96/execution_record.json


Diff do ficheiro suprimidas por serem muito extensas
+ 43 - 0
knowledge_v2/.cache/7869ec16cc96/function_knowledge/final_result.json


+ 1 - 0
knowledge_v2/.cache/7869ec16cc96/function_knowledge/generated_query.txt

@@ -0,0 +1 @@
+游戏 小猫 穿搭

+ 1 - 0
knowledge_v2/.cache/7869ec16cc96/function_knowledge/selected_tool.txt

@@ -0,0 +1 @@
+wechat_search_article

+ 3 - 0
knowledge_v2/.cache/7869ec16cc96/function_knowledge/tool_params.json

@@ -0,0 +1,3 @@
+{
+  "keyword": "游戏 小猫 穿搭"
+}

Diff do ficheiro suprimidas por serem muito extensas
+ 2 - 0
knowledge_v2/.cache/7869ec16cc96/function_knowledge/tool_result.json


+ 1 - 0
knowledge_v2/.cache/7869ec16cc96/question.txt

@@ -0,0 +1 @@
+小猫 穿搭||无||游戏博主

Diff do ficheiro suprimidas por serem muito extensas
+ 37 - 0
knowledge_v2/.cache/8a3f1b130fec/execution_record.json


Diff do ficheiro suprimidas por serem muito extensas
+ 37 - 0
knowledge_v2/.cache/8a3f1b130fec/function_knowledge/final_result.json


+ 1 - 0
knowledge_v2/.cache/8a3f1b130fec/function_knowledge/generated_query.txt

@@ -0,0 +1 @@
+搜索微信文章

+ 1 - 0
knowledge_v2/.cache/8a3f1b130fec/function_knowledge/selected_tool.txt

@@ -0,0 +1 @@
+wechat_search_article

+ 3 - 0
knowledge_v2/.cache/8a3f1b130fec/function_knowledge/tool_params.json

@@ -0,0 +1,3 @@
+{
+  "keyword": "微信文章"
+}

Diff do ficheiro suprimidas por serem muito extensas
+ 2 - 0
knowledge_v2/.cache/8a3f1b130fec/function_knowledge/tool_result.json


+ 1 - 0
knowledge_v2/.cache/8a3f1b130fec/question.txt

@@ -0,0 +1 @@
+去哪儿搜索微信文章?||无||游戏博主

Diff do ficheiro suprimidas por serem muito extensas
+ 61 - 0
knowledge_v2/.cache/c035d2052b5f/execution_record.json


Diff do ficheiro suprimidas por serem muito extensas
+ 61 - 0
knowledge_v2/.cache/c035d2052b5f/function_knowledge/final_result.json


+ 1 - 0
knowledge_v2/.cache/c035d2052b5f/function_knowledge/generated_query.txt

@@ -0,0 +1 @@
+小老虎 游戏穿搭

+ 1 - 0
knowledge_v2/.cache/c035d2052b5f/function_knowledge/selected_tool.txt

@@ -0,0 +1 @@
+wechat_search_article

+ 3 - 0
knowledge_v2/.cache/c035d2052b5f/function_knowledge/tool_params.json

@@ -0,0 +1,3 @@
+{
+  "keyword": "小老虎 游戏穿搭"
+}

Diff do ficheiro suprimidas por serem muito extensas
+ 2 - 0
knowledge_v2/.cache/c035d2052b5f/function_knowledge/tool_result.json


+ 1 - 0
knowledge_v2/.cache/c035d2052b5f/question.txt

@@ -0,0 +1 @@
+小老虎 穿搭||无||游戏博主

+ 109 - 0
knowledge_v2/CLEAR_CACHE_GUIDE.md

@@ -0,0 +1,109 @@
+# 快速清除缓存指南
+
+## 问题说明
+
+如果您在代码更新前运行过程序,旧的缓存文件不包含`detail`信息(prompt和response)。
+
+更新代码后,系统会从旧缓存读取,导致最终的`execution_record.json`中某些步骤缺少`detail`字段。
+
+## 解决方案
+
+### 方案1: 使用清除工具(推荐)
+
+运行清除缓存工具:
+```bash
+python clear_cache.py
+```
+
+选项:
+1. **列出所有缓存** - 查看缓存了哪些问题
+2. **清除所有缓存** - 删除所有缓存,重新运行
+3. **清除特定问题** - 只删除某个问题的缓存
+
+### 方案2: 手动删除缓存目录
+
+直接删除缓存目录:
+```bash
+# PowerShell
+Remove-Item -Recurse -Force .cache
+```
+
+或者在文件管理器中删除 `.cache` 文件夹。
+
+### 方案3: 代码中清除
+
+```python
+from knowledge_v2.cache_manager import CacheManager
+
+cache = CacheManager()
+
+# 清除所有缓存
+cache.clear()
+
+# 或清除特定问题的缓存
+cache.clear("小老虎 穿搭")
+```
+
+## 重新运行
+
+清除缓存后,重新运行您的代码:
+```python
+from knowledge_v2.function_knowledge import get_knowledge
+
+result = get_knowledge(
+    question="小老虎 穿搭",
+    post_info="无",
+    persona_info="游戏博主"
+)
+```
+
+此时所有步骤都会重新执行,`execution_record.json`中的每个步骤都会包含完整的`detail`信息。
+
+## 验证结果
+
+检查生成的 `execution_record.json`:
+```python
+import json
+
+# 读取执行记录
+with open('.cache/{hash}/execution_record.json', 'r', encoding='utf-8') as f:
+    record = json.load(f)
+
+# 检查每个步骤是否有detail
+for step in record['execution']['steps']:
+    print(f"步骤 {step['step']}: {step['name']}")
+    if 'detail' in step:
+        detail = step['detail']
+        print(f"  - cached: {detail.get('cached', False)}")
+        print(f"  - 有prompt: {'prompt' in detail and detail['prompt'] is not None}")
+        print(f"  - 有response: {'response' in detail and detail['response'] is not None}")
+    else:
+        print("  - ⚠️ 缺少detail字段")
+```
+
+## 预期输出
+
+清除缓存后重新运行,应该看到:
+```
+步骤 1: generate_query
+  - cached: False
+  - 有prompt: True
+  - 有response: True
+步骤 2: select_tool
+  - cached: False
+  - 有prompt: True
+  - 有response: True
+步骤 3: extract_tool_params
+  - cached: False
+  - 有prompt: True
+  - 有response: True
+步骤 4: call_tool
+  (此步骤不涉及LLM,没有detail字段)
+```
+
+## 未来避免此问题
+
+以后如果修改了缓存结构,建议:
+1. 清除旧缓存
+2. 或者升级缓存版本号
+3. 让系统自动识别并忽略旧格式的缓存

+ 271 - 0
knowledge_v2/README.md

@@ -0,0 +1,271 @@
+# Knowledge V2 系统架构说明
+
+## 整体流程梳理
+
+```
+function_knowledge.py (入口)
+    ├── [步骤1] generate_query: 生成查询语句
+    ├── [步骤2] select_tool: 选择工具
+    ├──┬── [有工具]
+    │  │    ├── [步骤3] extract_tool_params: 提取工具参数(通过LLM)
+    │  │    └── [步骤4] call_tool: 调用工具
+    │  └── [无工具] 
+    │       └── [步骤4] multi_search_knowledge.py
+    │                   ├── llm_search_knowledge.py
+    │                   │    ├── generate_queries: 生成多个query
+    │                   │    ├── search_knowledge_batch: 批量搜索
+    │                   │    └── merge_knowledge: 合并多个query的结果
+    │                   └── merge_knowledge: 合并多渠道结果
+    └── [后台] generate_and_save_new_tool: 异步生成新工具
+```
+
+## 核心特性
+
+### 1. 动态工具参数提取
+- **问题**:之前工具调用参数是硬编码的 `{"keyword": query}`
+- **解决**:使用LLM根据工具信息动态提取参数
+- **优势**:通用、智能、支持多参数工具
+- **详细说明**:[IMPROVEMENT_SUMMARY.md](IMPROVEMENT_SUMMARY.md)
+
+### 2. 结构化JSON输出
+- **输出格式**:完整的执行记录JSON,包含输入、执行过程、结果和元数据
+- **双文件保存**:
+  - `execution_record.json` - 格式化的JSON,易于阅读
+  - `final_result.json` - 系统缓存
+- **优势**:可追溯、可调试、可审计
+
+### 3. Prompt和LLM交互追踪(新)⭐
+- **完整记录**:每个LLM调用都记录prompt和response
+- **透明度**:可以看到系统如何与LLM交互
+- **调试利器**:快速定位问题,优化prompt效果
+- **详细说明**:[DETAILED_TRACKING.md](DETAILED_TRACKING.md)
+- **示例**:[example_detailed_output.json](example_detailed_output.json)
+
+**示例结构**:
+```json
+{
+  "steps": [{
+    "name": "generate_query",
+    "detail": {
+      "prompt": "完整的prompt文本...",
+      "response": "LLM的响应..."
+    }
+  }]
+}
+```
+
+### 4. 完善的缓存系统
+- 分级缓存结构
+- 支持断点续传
+- 节省资源和成本
+
+### 4. 详细的日志系统
+- 结构化日志输出
+- 清晰的步骤标记
+- 便于调试和监控
+```
+
+## 缓存策略
+
+### 缓存目录结构
+```
+.cache/
+├── {question_hash}/                 # 每个问题一个文件夹
+│   ├── question.txt                # 原始问题文本(方便查看)
+│   ├── function_knowledge/         # function_knowledge模块缓存
+│   │   ├── generated_query.txt     # 生成的query
+│   │   ├── selected_tool.txt       # 选择的工具名
+│   │   ├── tool_params.json        # 提取的工具参数(新)
+│   │   ├── tool_result.json        # 工具执行结果
+│   │   └── final_result.txt        # 最终结果
+│   ├── multi_search/               # multi_search模块缓存
+│   │   ├── final_knowledge.txt     # 最终合并知识
+│   │   └── merged_knowledge.txt    # 合并后的知识
+│   └── llm_search/                 # llm_search模块缓存
+│       ├── generated_queries.json  # 生成的查询列表
+│       ├── merged_knowledge.txt    # 合并后的知识
+│       └── search_results/         # 搜索结果
+│           ├── search_result_001.txt
+│           ├── search_result_002.txt
+│           └── search_result_003.txt
+```
+
+### 缓存优点
+1. **节省资源**:避免重复调用LLM和搜索引擎
+2. **断点续传**:任何步骤失败都可以从缓存恢复
+3. **调试方便**:可以查看每个步骤的中间结果
+4. **性能提升**:缓存命中时速度大幅提升
+
+### 缓存key设计
+- `function_knowledge`: 使用 `question||post_info||persona_info` 组合
+- `multi_search`: 使用原始 `question`
+- `llm_search`: 使用原始 `question`
+
+## 日志系统
+
+### 日志格式
+```
+[模块] 操作状态 - 描述
+  ↓ 级联操作
+  ✓ 成功
+  ✗ 失败
+  ⚠ 警告
+  → 正在执行
+```
+
+### 日志层级
+
+#### 1. Function Knowledge (最外层)
+```
+================================================================================
+Function Knowledge - 开始处理
+问题: ...
+帖子信息: ...
+人设信息: ...
+================================================================================
+[步骤1] 生成Query...
+✓ 使用缓存的Query: ...
+[步骤2] 选择工具...
+  当前可用工具数: 1
+  → 调用Gemini选择工具...
+✓ 选择结果: wechat_search_article
+[步骤3] 调用工具: wechat_search_article
+  → 调用工具,参数: {'keyword': '...'}
+✓ 工具调用完成
+================================================================================
+✓ Function Knowledge 完成 (结果长度: 123)
+================================================================================
+```
+
+#### 2. Multi-Search (中层)
+```
+============================================================
+Multi-Search - 开始处理问题: ...
+============================================================
+[渠道1] 调用 LLM Search...
+  (LLM Search 内部日志)
+✓ LLM Search 完成 (长度: 456)
+[Multi-Search] 合并多渠道知识 - 1 个渠道
+  有效渠道: ['LLM Search']
+  → 调用Gemini合并多渠道知识...
+✓ 多渠道知识合并完成 (长度: 450)
+============================================================
+✓ Multi-Search 完成 (最终长度: 450)
+============================================================
+```
+
+#### 3. LLM Search (最内层)
+```
+============================================================
+LLM Search - 开始处理问题: ...
+============================================================
+[步骤1] 生成搜索Query - 问题: ...
+→ 调用Gemini生成query...
+→ 解析生成的query...
+✓ 成功生成 3 个query:
+  1. query1
+  2. query2
+  3. query3
+[步骤2] 批量搜索 - 共 3 个Query
+  [1] 搜索Query: query1
+  → 调用搜索引擎...
+  ✓ 获取知识文本 (长度: 100)
+  ...
+✓ 批量搜索完成,获得 3 个有效结果
+[步骤3] 合并知识 - 共 3 个文本
+  有效文本数量: 3/3
+  → 调用Gemini合并知识文本...
+✓ 成功合并知识文本 (长度: 250)
+============================================================
+✓ LLM Search 完成 (最终长度: 250)
+============================================================
+```
+
+## 使用示例
+
+### 基本使用(启用缓存)
+```python
+from knowledge_v2.function_knowledge import get_knowledge
+
+result = get_knowledge(
+    question="去哪儿搜索微信文章?",
+    post_info="无",
+    persona_info="游戏博主"
+)
+
+# 访问执行结果
+print(f"结果类型: {result['result']['type']}")
+print(f"执行时间: {result['metadata']['execution_time']:.2f}秒")
+
+# 如果是工具调用
+if result['result']['type'] == 'tool':
+    tool_info = result['execution']['tool_info']
+    print(f"工具: {tool_info['tool_name']}")
+    print(f"参数: {tool_info['parameters']}")
+
+# 获取文本内容
+content = result['result']['content']
+```
+
+### 禁用缓存
+```python
+from knowledge_v2.function_knowledge import FunctionKnowledge
+
+agent = FunctionKnowledge(use_cache=False)
+result = agent.get_knowledge(
+    question="...",
+    post_info="...",
+    persona_info="..."
+)
+```
+
+### 查看执行记录文件
+执行后会生成格式化的JSON文件:
+```
+.cache/{question_hash}/execution_record.json
+```
+可以直接打开查看完整的执行过程。
+
+### 清除缓存
+```python
+from knowledge_v2.cache_manager import CacheManager
+
+cache = CacheManager()
+
+# 清除特定问题的缓存
+cache.clear("去哪儿搜索微信文章?")
+
+# 清除所有缓存
+cache.clear()
+```
+
+## 性能优化
+
+### 缓存命中率统计
+可通过日志中的 `✓ 使用缓存的...` 标记来统计缓存命中情况
+
+### 建议
+1. **首次运行**:完整流程,生成所有缓存
+2. **后续运行**:大部分步骤使用缓存,只在必要时调用LLM
+3. **调试时**:可关闭缓存或清除特定步骤缓存
+4. **生产环境**:建议启用缓存,定期清理过期缓存
+
+## 故障恢复
+
+### 场景1:LLM调用失败
+- 已完成的步骤有缓存
+- 重新运行只需重试失败步骤
+
+### 场景2:搜索引擎超时
+- 部分query已有缓存
+- 只需重试失败的query
+
+### 场景3:网络中断
+- 所有中间结果都有缓存
+- 恢复后从断点继续
+
+## 维护建议
+
+1. **定期清理**:建议每周清理过期缓存(>7天)
+2. **监控大小**:关注 `.cache` 目录大小
+3. **备份重要**:可备份常用问题的缓存

+ 149 - 0
knowledge_v2/cache_manager.py

@@ -0,0 +1,149 @@
+"""
+缓存管理模块
+提供统一的缓存读写接口,支持基于问题的分级缓存
+"""
+
+import os
+import json
+import hashlib
+from typing import Any, Optional
+from loguru import logger
+
+
+class CacheManager:
+    """缓存管理器"""
+    
+    def __init__(self, base_cache_dir: str = None):
+        """
+        初始化缓存管理器
+        
+        Args:
+            base_cache_dir: 缓存根目录,默认为当前目录下的 .cache
+        """
+        if base_cache_dir is None:
+            current_dir = os.path.dirname(os.path.abspath(__file__))
+            base_cache_dir = os.path.join(current_dir, '.cache')
+        
+        self.base_cache_dir = base_cache_dir
+        os.makedirs(base_cache_dir, exist_ok=True)
+        logger.info(f"缓存管理器初始化,缓存目录: {base_cache_dir}")
+    
+    def _get_question_hash(self, question: str) -> str:
+        """
+        获取问题的hash值,用作文件夹名
+        
+        Args:
+            question: 问题文本
+            
+        Returns:
+            str: hash值(MD5的前12位)
+        """
+        return hashlib.md5(question.encode('utf-8')).hexdigest()[:12]
+    
+    def _get_cache_path(self, question: str, cache_type: str, filename: str) -> str:
+        """
+        获取缓存文件的完整路径
+        
+        Args:
+            question: 问题文本
+            cache_type: 缓存类型(如 'function_knowledge', 'llm_search', 'multi_search')
+            filename: 缓存文件名
+            
+        Returns:
+            str: 缓存文件完整路径
+        """
+        question_hash = self._get_question_hash(question)
+        cache_dir = os.path.join(self.base_cache_dir, question_hash, cache_type)
+        os.makedirs(cache_dir, exist_ok=True)
+        
+        # 同时保存原始问题文本以便查看
+        question_file = os.path.join(self.base_cache_dir, question_hash, 'question.txt')
+        if not os.path.exists(question_file):
+            with open(question_file, 'w', encoding='utf-8') as f:
+                f.write(question)
+        
+        return os.path.join(cache_dir, filename)
+    
+    def get(self, question: str, cache_type: str, filename: str) -> Optional[Any]:
+        """
+        读取缓存
+        
+        Args:
+            question: 问题文本
+            cache_type: 缓存类型
+            filename: 缓存文件名
+            
+        Returns:
+            缓存内容,如果缓存不存在则返回 None
+        """
+        cache_path = self._get_cache_path(question, cache_type, filename)
+        
+        if not os.path.exists(cache_path):
+            logger.debug(f"缓存未命中: {cache_type}/{filename}")
+            return None
+        
+        try:
+            with open(cache_path, 'r', encoding='utf-8') as f:
+                content = f.read()
+            
+            # 尝试解析为JSON
+            if filename.endswith('.json'):
+                content = json.loads(content)
+            
+            logger.info(f"✓ 缓存命中: {cache_type}/{filename}")
+            return content
+            
+        except Exception as e:
+            logger.error(f"读取缓存失败 {cache_type}/{filename}: {e}")
+            return None
+    
+    def set(self, question: str, cache_type: str, filename: str, content: Any) -> bool:
+        """
+        写入缓存
+        
+        Args:
+            question: 问题文本
+            cache_type: 缓存类型
+            filename: 缓存文件名
+            content: 缓存内容
+            
+        Returns:
+            bool: 是否写入成功
+        """
+        cache_path = self._get_cache_path(question, cache_type, filename)
+        
+        try:
+            # 如果是字典或列表,转换为JSON
+            if isinstance(content, (dict, list)):
+                content = json.dumps(content, ensure_ascii=False, indent=2)
+            
+            with open(cache_path, 'w', encoding='utf-8') as f:
+                f.write(str(content))
+            
+            logger.debug(f"缓存已保存: {cache_type}/{filename}")
+            return True
+            
+        except Exception as e:
+            logger.error(f"写入缓存失败 {cache_type}/{filename}: {e}")
+            return False
+    
+    def clear(self, question: str = None):
+        """
+        清除缓存
+        
+        Args:
+            question: 如果指定,只清除该问题的缓存;否则清除所有缓存
+        """
+        if question:
+            question_hash = self._get_question_hash(question)
+            cache_dir = os.path.join(self.base_cache_dir, question_hash)
+            if os.path.exists(cache_dir):
+                import shutil
+                shutil.rmtree(cache_dir)
+                logger.info(f"已清除问题缓存: {question[:30]}...")
+        else:
+            import shutil
+            if os.path.exists(self.base_cache_dir):
+                shutil.rmtree(self.base_cache_dir)
+                os.makedirs(self.base_cache_dir)
+                logger.info("已清除所有缓存")

+ 115 - 0
knowledge_v2/clear_cache.py

@@ -0,0 +1,115 @@
+"""
+清除缓存工具
+"""
+
+import os
+import sys
+import shutil
+
+# 添加路径
+current_dir = os.path.dirname(os.path.abspath(__file__))
+root_dir = os.path.dirname(current_dir)
+sys.path.insert(0, root_dir)
+
+from knowledge_v2.cache_manager import CacheManager
+
+def clear_all_cache():
+    """清除所有缓存"""
+    cache = CacheManager()
+    
+    if os.path.exists(cache.base_cache_dir):
+        print(f"缓存目录: {cache.base_cache_dir}")
+        
+        # 统计信息
+        total_size = 0
+        file_count = 0
+        for root, dirs, files in os.walk(cache.base_cache_dir):
+            for file in files:
+                file_path = os.path.join(root, file)
+                total_size += os.path.getsize(file_path)
+                file_count += 1
+        
+        print(f"文件数量: {file_count}")
+        print(f"总大小: {total_size / 1024:.2f} KB")
+        
+        # 确认删除
+        response = input("\n确认清除所有缓存?(yes/no): ")
+        if response.lower() == 'yes':
+            shutil.rmtree(cache.base_cache_dir)
+            os.makedirs(cache.base_cache_dir)
+            print("✓ 已清除所有缓存")
+        else:
+            print("取消操作")
+    else:
+        print("缓存目录不存在")
+
+def clear_question_cache(question: str):
+    """清除特定问题的缓存"""
+    cache = CacheManager()
+    
+    import hashlib
+    question_hash = hashlib.md5(question.encode('utf-8')).hexdigest()[:12]
+    question_dir = os.path.join(cache.base_cache_dir, question_hash)
+    
+    if os.path.exists(question_dir):
+        # 显示问题文本
+        question_file = os.path.join(question_dir, 'question.txt')
+        if os.path.exists(question_file):
+            with open(question_file, 'r', encoding='utf-8') as f:
+                cached_question = f.read()
+            print(f"找到缓存的问题: {cached_question}")
+        
+        # 确认删除
+        response = input("\n确认清除此问题的缓存?(yes/no): ")
+        if response.lower() == 'yes':
+            shutil.rmtree(question_dir)
+            print("✓ 已清除该问题的缓存")
+        else:
+            print("取消操作")
+    else:
+        print("未找到该问题的缓存")
+
+def list_cached_questions():
+    """列出所有缓存的问题"""
+    cache = CacheManager()
+    
+    if not os.path.exists(cache.base_cache_dir):
+        print("缓存目录不存在")
+        return
+    
+    print("缓存的问题列表:")
+    print("=" * 60)
+    
+    count = 0
+    for dir_name in os.listdir(cache.base_cache_dir):
+        question_dir = os.path.join(cache.base_cache_dir, dir_name)
+        if os.path.isdir(question_dir):
+            question_file = os.path.join(question_dir, 'question.txt')
+            if os.path.exists(question_file):
+                with open(question_file, 'r', encoding='utf-8') as f:
+                    question = f.read()
+                count += 1
+                print(f"{count}. [{dir_name}] {question[:50]}...")
+    
+    print("=" * 60)
+    print(f"总计: {count} 个缓存问题")
+
+if __name__ == "__main__":
+    print("缓存管理工具")
+    print("=" * 60)
+    print("1. 列出所有缓存")
+    print("2. 清除所有缓存")
+    print("3. 清除特定问题的缓存")
+    print("=" * 60)
+    
+    choice = input("请选择操作 (1/2/3): ")
+    
+    if choice == "1":
+        list_cached_questions()
+    elif choice == "2":
+        clear_all_cache()
+    elif choice == "3":
+        question = input("请输入问题的完整文本(需要与原问题完全一致): ")
+        clear_question_cache(question)
+    else:
+        print("无效的选择")

+ 125 - 0
knowledge_v2/clear_cache_simple.py

@@ -0,0 +1,125 @@
+"""
+简化版缓存清除工具 - 不依赖外部库
+"""
+
+import os
+import shutil
+import hashlib
+
+def get_cache_dir():
+    """获取缓存目录"""
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    return os.path.join(current_dir, '.cache')
+
+def clear_all_cache():
+    """清除所有缓存"""
+    cache_dir = get_cache_dir()
+    
+    if os.path.exists(cache_dir):
+        print(f"缓存目录: {cache_dir}")
+        
+        # 统计信息
+        total_size = 0
+        file_count = 0
+        for root, dirs, files in os.walk(cache_dir):
+            for file in files:
+                file_path = os.path.join(root, file)
+                try:
+                    total_size += os.path.getsize(file_path)
+                    file_count += 1
+                except:
+                    pass
+        
+        print(f"文件数量: {file_count}")
+        print(f"总大小: {total_size / 1024:.2f} KB")
+        
+        # 确认删除
+        response = input("\n确认清除所有缓存?(yes/no): ")
+        if response.lower() == 'yes':
+            shutil.rmtree(cache_dir)
+            os.makedirs(cache_dir)
+            print("✓ 已清除所有缓存")
+            return True
+        else:
+            print("取消操作")
+            return False
+    else:
+        print("缓存目录不存在")
+        return False
+
+def clear_question_cache(question: str):
+    """清除特定问题的缓存"""
+    cache_dir = get_cache_dir()
+    
+    question_hash = hashlib.md5(question.encode('utf-8')).hexdigest()[:12]
+    question_dir = os.path.join(cache_dir, question_hash)
+    
+    if os.path.exists(question_dir):
+        # 显示问题文本
+        question_file = os.path.join(question_dir, 'question.txt')
+        if os.path.exists(question_file):
+            with open(question_file, 'r', encoding='utf-8') as f:
+                cached_question = f.read()
+            print(f"找到缓存的问题: {cached_question}")
+        
+        # 确认删除
+        response = input("\n确认清除此问题的缓存?(yes/no): ")
+        if response.lower() == 'yes':
+            shutil.rmtree(question_dir)
+            print("✓ 已清除该问题的缓存")
+            return True
+        else:
+            print("取消操作")
+            return False
+    else:
+        print("未找到该问题的缓存")
+        return False
+
+def list_cached_questions():
+    """列出所有缓存的问题"""
+    cache_dir = get_cache_dir()
+    
+    if not os.path.exists(cache_dir):
+        print("缓存目录不存在")
+        return
+    
+    print("\n缓存的问题列表:")
+    print("=" * 60)
+    
+    count = 0
+    for dir_name in os.listdir(cache_dir):
+        question_dir = os.path.join(cache_dir, dir_name)
+        if os.path.isdir(question_dir):
+            question_file = os.path.join(question_dir, 'question.txt')
+            if os.path.exists(question_file):
+                try:
+                    with open(question_file, 'r', encoding='utf-8') as f:
+                        question = f.read()
+                    count += 1
+                    print(f"{count}. [{dir_name}] {question[:50]}...")
+                except:
+                    pass
+    
+    print("=" * 60)
+    print(f"总计: {count} 个缓存问题\n")
+
+if __name__ == "__main__":
+    print("=" * 60)
+    print("缓存管理工具")
+    print("=" * 60)
+    print("1. 列出所有缓存")
+    print("2. 清除所有缓存")
+    print("3. 清除特定问题的缓存")
+    print("=" * 60)
+    
+    choice = input("请选择操作 (1/2/3): ")
+    
+    if choice == "1":
+        list_cached_questions()
+    elif choice == "2":
+        clear_all_cache()
+    elif choice == "3":
+        question = input("请输入问题的完整文本(需要与原问题完全一致): ")
+        clear_question_cache(question)
+    else:
+        print("无效的选择")

+ 529 - 1
knowledge_v2/function_knowledge.py

@@ -1,3 +1,4 @@
+
 '''
 方法知识获取模块
 1. 输入:问题 + 帖子信息 + 账号人设信息
@@ -7,4 +8,531 @@
     - 返回新的方法工具知识
     - 异步从新方法知识中获取新工具(调用大模型执行,prompt在 function_knowledge_generate_new_tool_prompt.md 中),调用工具库系统,接入新的工具
 4. 调用选择的方法工具执行验证,返回工具执行结果
-'''
+'''
+
+import os
+import sys
+import json
+import threading
+from loguru import logger
+
+# 设置路径以便导入工具类
+current_dir = os.path.dirname(os.path.abspath(__file__))
+root_dir = os.path.dirname(current_dir)
+sys.path.insert(0, root_dir)
+
+from utils.gemini_client import generate_text
+from knowledge_v2.tools_library import call_tool, save_tool_info, get_all_tool_infos, get_tool_info
+from knowledge_v2.multi_search_knowledge import get_knowledge as get_multi_search_knowledge
+from knowledge_v2.cache_manager import CacheManager
+
+class FunctionKnowledge:
+    """方法知识获取类"""
+    
+    def __init__(self, use_cache: bool = True):
+        """
+        初始化
+        
+        Args:
+            use_cache: 是否启用缓存,默认启用
+        """
+        logger.info("=" * 80)
+        logger.info("初始化 FunctionKnowledge - 方法知识获取入口")
+        self.prompt_dir = os.path.join(current_dir, "prompt")
+        self.use_cache = use_cache
+        self.cache = CacheManager() if use_cache else None
+        logger.info(f"缓存状态: {'启用' if use_cache else '禁用'}")
+        logger.info("=" * 80)
+        
+    def _load_prompt(self, filename: str) -> str:
+        """加载prompt文件内容"""
+        prompt_path = os.path.join(self.prompt_dir, filename)
+        if not os.path.exists(prompt_path):
+            raise FileNotFoundError(f"Prompt文件不存在: {prompt_path}")
+        with open(prompt_path, 'r', encoding='utf-8') as f:
+            return f.read().strip()
+
+    def generate_query(self, question: str, post_info: str, persona_info: str) -> tuple:
+        """
+        生成查询语句
+        
+        Returns:
+            tuple: (query, detail_info)
+            - query: 生成的查询语句
+            - detail_info: 详细信息dict,包含prompt和response
+        """
+        logger.info(f"[步骤1] 生成Query...")
+        
+        # 组合问题的唯一标识
+        combined_question = f"{question}||{post_info}||{persona_info}"
+        
+        detail_info = {"cached": False, "prompt": None, "response": None}
+        
+        # 尝试从缓存读取
+        if self.use_cache:
+            cached_query = self.cache.get(combined_question, 'function_knowledge', 'generated_query.txt')
+            if cached_query:
+                logger.info(f"✓ 使用缓存的Query: {cached_query}")
+                detail_info["cached"] = True
+                return cached_query, detail_info
+        
+        try:
+            prompt_template = self._load_prompt("function_generate_query_prompt.md")
+            prompt = prompt_template.format(
+                question=question,
+                post_info=post_info,
+                persona_info=persona_info
+            )
+            
+            detail_info["prompt"] = prompt
+            
+            logger.info("→ 调用Gemini生成Query...")
+            query = generate_text(prompt=prompt)
+            query = query.strip()
+            
+            detail_info["response"] = query
+            
+            logger.info(f"✓ 生成Query: {query}")
+            
+            # 写入缓存
+            if self.use_cache:
+                self.cache.set(combined_question, 'function_knowledge', 'generated_query.txt', query)
+            
+            return query, detail_info
+        except Exception as e:
+            logger.error(f"✗ 生成Query失败: {e}")
+            detail_info["error"] = str(e)
+            return question, detail_info  # 降级使用原问题
+
+    def select_tool(self, combined_question: str, query: str) -> tuple:
+        """
+        选择合适的工具
+        
+        Returns:
+            tuple: (tool_name, detail_info)
+        """
+        logger.info(f"[步骤2] 选择工具...")
+        
+        detail_info = {"cached": False, "prompt": None, "response": None, "available_tools_count": 0}
+        
+        # 尝试从缓存读取
+        if self.use_cache:
+            cached_tool = self.cache.get(combined_question, 'function_knowledge', 'selected_tool.txt')
+            if cached_tool:
+                logger.info(f"✓ 使用缓存的工具: {cached_tool}")
+                detail_info["cached"] = True
+                return cached_tool, detail_info
+        
+        try:
+            all_tool_infos = get_all_tool_infos()
+            if not all_tool_infos:
+                logger.info("  工具库为空,无可用工具")
+                return "None", detail_info
+            
+            tool_count = len(all_tool_infos.split('--- Tool:')) - 1
+            detail_info["available_tools_count"] = tool_count
+            logger.info(f"  当前可用工具数: {tool_count}")
+                
+            prompt_template = self._load_prompt("function_knowledge_select_tools_prompt.md")
+            prompt = prompt_template.format(
+                query=query,
+                tool_infos=all_tool_infos
+            )
+            
+            detail_info["prompt"] = prompt
+            detail_info["tool_infos"] = all_tool_infos
+            
+            logger.info("→ 调用Gemini选择工具...")
+            tool_name = generate_text(prompt=prompt)
+            tool_name = tool_name.strip()
+            
+            detail_info["response"] = tool_name
+            
+            logger.info(f"✓ 选择结果: {tool_name}")
+            
+            # 写入缓存
+            if self.use_cache:
+                self.cache.set(combined_question, 'function_knowledge', 'selected_tool.txt', tool_name)
+            
+            return tool_name, detail_info
+        except Exception as e:
+            logger.error(f"✗ 选择工具失败: {e}")
+            detail_info["error"] = str(e)
+            return "None", detail_info
+
+    def extract_tool_params(self, combined_question: str, tool_name: str, query: str) -> tuple:
+        """
+        根据工具信息和查询提取调用参数
+        
+        Args:
+            combined_question: 组合问题(用于缓存)
+            tool_name: 工具名称
+            query: 查询内容
+            
+        Returns:
+            tuple: (params, detail_info)
+        """
+        logger.info(f"[步骤3] 提取工具参数...")
+        
+        # 初始化detail_info
+        detail_info = {"cached": False, "prompt": None, "response": None, "tool_info": None}
+        
+        # 尝试从缓存读取
+        if self.use_cache:
+            cached_params = self.cache.get(combined_question, 'function_knowledge', 'tool_params.json')
+            if cached_params:
+                logger.info(f"✓ 使用缓存的参数: {cached_params}")
+                detail_info["cached"] = True
+                return cached_params, detail_info
+        
+        try:
+            # 获取工具信息
+            tool_info = get_tool_info(tool_name)
+            if not tool_info:
+                logger.warning(f"  ⚠ 未找到工具 {tool_name} 的信息,使用默认参数")
+                # 降级:使用query作为keyword
+                default_params = {"keyword": query}
+                detail_info["fallback"] = "tool_info_not_found"
+                return default_params, detail_info
+            
+            detail_info["tool_info"] = tool_info
+            logger.info(f"  工具 {tool_name} 信息长度: {len(tool_info)}")
+            
+            # 加载prompt
+            prompt_template = self._load_prompt("function_knowledge_extract_tool_params_prompt.md")
+            prompt = prompt_template.format(
+                query=query,
+                tool_info=tool_info
+            )
+            
+            detail_info["prompt"] = prompt
+            
+            # 调用LLM提取参数
+            logger.info("  → 调用Gemini提取参数...")
+            response_text = generate_text(prompt=prompt)
+            detail_info["response"] = response_text
+            
+            # 解析JSON
+            logger.info("  → 解析参数JSON...")
+            try:
+                # 清理可能的markdown标记
+                response_text = response_text.strip()
+                if response_text.startswith("```json"):
+                    response_text = response_text[7:]
+                if response_text.startswith("```"):
+                    response_text = response_text[3:]
+                if response_text.endswith("```"):
+                    response_text = response_text[:-3]
+                response_text = response_text.strip()
+                
+                params = json.loads(response_text)
+                
+                logger.info(f"✓ 提取参数成功: {params}")
+                
+                # 写入缓存
+                if self.use_cache:
+                    self.cache.set(combined_question, 'function_knowledge', 'tool_params.json', params)
+                
+                return params, detail_info
+                
+            except json.JSONDecodeError as e:
+                logger.error(f"  ✗ 解析JSON失败: {e}")
+                logger.error(f"  响应内容: {response_text}")
+                # 降级:使用query作为keyword
+                default_params = {"keyword": query}
+                logger.warning(f"  使用默认参数: {default_params}")
+                detail_info["fallback"] = "json_decode_error"
+                return default_params, detail_info
+                
+        except Exception as e:
+            logger.error(f"✗ 提取工具参数失败: {e}")
+            # 降级:使用query作为keyword
+            default_params = {"keyword": query}
+            detail_info["error"] = str(e)
+            detail_info["fallback"] = "exception"
+            return default_params, detail_info
+
+
+    def generate_and_save_new_tool(self, knowledge: str):
+        """异步生成并保存新工具"""
+        try:
+            logger.info("开始生成新工具...")
+            prompt_template = self._load_prompt("function_knowledge_generate_new_tool_prompt.md")
+            prompt = prompt_template.format(knowledge=knowledge)
+            
+            tool_code = generate_text(prompt=prompt)
+            
+            # 简单解析工具名(假设工具名在 def xxx( 中)
+            # 这里做一个简单的提取,实际可能需要更复杂的解析
+            import re
+            match = re.search(r"def\s+([a-zA-Z_][a-zA-Z0-9_]*)", tool_code)
+            if match:
+                tool_name = match.group(1)
+                save_path = save_tool_info(tool_name, tool_code)
+                logger.info(f"新工具已保存: {save_path}")
+            else:
+                logger.warning("无法从生成的代码中提取工具名")
+                
+        except Exception as e:
+            logger.error(f"生成新工具失败: {e}")
+
+    def get_knowledge(self, question: str, post_info: str, persona_info: str) -> dict:
+        """
+        获取方法知识的主流程
+        
+        Returns:
+            dict: 包含完整执行信息的字典
+            {
+                "input": {...},          # 原始输入
+                "execution": {...},      # 执行过程信息
+                "result": {...},         # 最终结果
+                "metadata": {...}        # 元数据
+            }
+        """
+        import time
+        start_time = time.time()
+        timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
+        
+        logger.info("=" * 80)
+        logger.info(f"Function Knowledge - 开始处理")
+        logger.info(f"问题: {question}")
+        logger.info(f"帖子信息: {post_info}")
+        logger.info(f"人设信息: {persona_info}")
+        logger.info("=" * 80)
+        
+        # 组合问题的唯一标识
+        combined_question = f"{question}||{post_info}||{persona_info}"
+        
+        # 初始化执行记录
+        execution_record = {
+            "input": {
+                "question": question,
+                "post_info": post_info,
+                "persona_info": persona_info,
+                "timestamp": timestamp
+            },
+            "execution": {
+                "steps": [],
+                "tool_info": None,
+                "knowledge_search_info": None
+            },
+            "result": {
+                "type": None,  # "tool" 或 "knowledge"
+                "content": None,
+                "raw_data": None
+            },
+            "metadata": {
+                "execution_time": None,
+                "cache_hits": [],
+                "errors": []
+            }
+        }
+        
+        # 检查最终结果缓存
+        if self.use_cache:
+            cached_final = self.cache.get(combined_question, 'function_knowledge', 'final_result.json')
+            if cached_final:
+                logger.info(f"✓ 使用缓存的最终结果")
+                logger.info("=" * 80 + "\n")
+                # 如果是完整的执行记录,直接返回
+                if isinstance(cached_final, dict) and "execution" in cached_final:
+                    return cached_final
+                # 否则构造一个简单的返回
+                return {
+                    "input": execution_record["input"],
+                    "execution": {"cached": True},
+                    "result": {"type": "cached", "content": cached_final},
+                    "metadata": {"cache_hit": True}
+                }
+        
+        try:
+            # 步骤1: 生成Query
+            step1_start = time.time()
+            query, query_detail = self.generate_query(question, post_info, persona_info)
+            execution_record["execution"]["steps"].append({
+                "step": 1,
+                "name": "generate_query",
+                "duration": time.time() - step1_start,
+                "output": query,
+                "detail": query_detail  # 包含prompt和response
+            })
+            
+            # 步骤2: 选择工具
+            step2_start = time.time()
+            tool_name, tool_select_detail = self.select_tool(combined_question, query)
+            execution_record["execution"]["steps"].append({
+                "step": 2,
+                "name": "select_tool",
+                "duration": time.time() - step2_start,
+                "output": tool_name,
+                "detail": tool_select_detail  # 包含prompt、response和可用工具列表
+            })
+            
+            result_content = None
+            if tool_name and tool_name != "None":
+                # 路径A: 使用工具
+                execution_record["result"]["type"] = "tool"
+                
+                # 步骤3: 提取参数
+                step3_start = time.time()
+                arguments, params_detail = self.extract_tool_params(combined_question, tool_name, query)
+                execution_record["execution"]["steps"].append({
+                    "step": 3,
+                    "name": "extract_tool_params",
+                    "duration": time.time() - step3_start,
+                    "output": arguments,
+                    "detail": params_detail  # 包含prompt、response和工具信息
+                })
+                
+                # 步骤4: 调用工具
+                logger.info(f"[步骤4] 调用工具: {tool_name}")
+                
+                # 检查工具调用缓存
+                if self.use_cache:
+                    cached_tool_result = self.cache.get(combined_question, 'function_knowledge', 'tool_result.json')
+                    if cached_tool_result:
+                        logger.info(f"✓ 使用缓存的工具调用结果")
+                        execution_record["metadata"]["cache_hits"].append("tool_result")
+                        tool_result = cached_tool_result
+                    else:
+                        step4_start = time.time()
+                        logger.info(f"  → 调用工具,参数: {arguments}")
+                        tool_result = call_tool(tool_name, arguments)
+                        
+                        # 缓存工具调用结果
+                        self.cache.set(combined_question, 'function_knowledge', 'tool_result.json', tool_result)
+                        
+                        execution_record["execution"]["steps"].append({
+                            "step": 4,
+                            "name": "call_tool",
+                            "duration": time.time() - step4_start,
+                            "output": "success"
+                        })
+                else:
+                    step4_start = time.time()
+                    logger.info(f"  → 调用工具,参数: {arguments}")
+                    tool_result = call_tool(tool_name, arguments)
+                    execution_record["execution"]["steps"].append({
+                        "step": 4,
+                        "name": "call_tool",
+                        "duration": time.time() - step4_start,
+                        "output": "success"
+                    })
+                
+                # 记录工具调用信息
+                execution_record["execution"]["tool_info"] = {
+                    "tool_name": tool_name,
+                    "parameters": arguments,
+                    "result": tool_result
+                }
+                
+                result_content = f"工具 {tool_name} 执行结果: {json.dumps(tool_result, ensure_ascii=False)}"
+                execution_record["result"]["content"] = result_content
+                execution_record["result"]["raw_data"] = tool_result
+                
+                logger.info(f"✓ 工具调用完成")
+                
+            else:
+                # 路径B: 知识搜索
+                execution_record["result"]["type"] = "knowledge_search"
+                
+                logger.info("[步骤4] 未找到合适工具,调用 MultiSearch...")
+                
+                step4_start = time.time()
+                knowledge = get_multi_search_knowledge(query)
+                
+                execution_record["execution"]["steps"].append({
+                    "step": 4,
+                    "name": "multi_search_knowledge",
+                    "duration": time.time() - step4_start,
+                    "output": f"knowledge_length: {len(knowledge)}"
+                })
+                
+                # 记录知识搜索信息
+                execution_record["execution"]["knowledge_search_info"] = {
+                    "query": query,
+                    "knowledge_length": len(knowledge),
+                    "source": "multi_search"
+                }
+                
+                result_content = knowledge
+                execution_record["result"]["content"] = knowledge
+                execution_record["result"]["raw_data"] = {"knowledge": knowledge, "query": query}
+                
+                # 异步生成新工具
+                logger.info("[后台任务] 启动新工具生成线程...")
+                threading.Thread(target=self.generate_and_save_new_tool, args=(knowledge,)).start()
+            
+            # 计算总执行时间
+            execution_record["metadata"]["execution_time"] = time.time() - start_time
+            
+            # 保存完整的执行记录到JSON文件
+            if self.use_cache:
+                self.cache.set(combined_question, 'function_knowledge', 'final_result.json', execution_record)
+                
+                # 同时保存一个格式化的JSON文件供人类阅读
+                from knowledge_v2.cache_manager import CacheManager
+                cache = CacheManager()
+                import hashlib
+                question_hash = hashlib.md5(combined_question.encode('utf-8')).hexdigest()[:12]
+                output_file = os.path.join(cache.base_cache_dir, question_hash, 'execution_record.json')
+                
+                try:
+                    with open(output_file, 'w', encoding='utf-8') as f:
+                        json.dump(execution_record, f, ensure_ascii=False, indent=2)
+                    logger.info(f"✓ 完整执行记录已保存: {output_file}")
+                except Exception as e:
+                    logger.error(f"保存执行记录失败: {e}")
+            
+            logger.info("=" * 80)
+            logger.info(f"✓ Function Knowledge 完成")
+            logger.info(f"  类型: {execution_record['result']['type']}")
+            logger.info(f"  结果长度: {len(result_content) if result_content else 0}")
+            logger.info(f"  执行时间: {execution_record['metadata']['execution_time']:.2f}秒")
+            logger.info("=" * 80 + "\n")
+            
+            return execution_record
+            
+        except Exception as e:
+            logger.error(f"✗ 执行失败: {e}")
+            import traceback
+            error_trace = traceback.format_exc()
+            
+            execution_record["metadata"]["errors"].append({
+                "error": str(e),
+                "traceback": error_trace
+            })
+            execution_record["result"]["type"] = "error"
+            execution_record["result"]["content"] = f"执行失败: {str(e)}"
+            execution_record["metadata"]["execution_time"] = time.time() - start_time
+            
+            return execution_record
+
+def get_knowledge(question: str, post_info: str, persona_info: str) -> dict:
+    """
+    便捷调用函数
+    
+    Returns:
+        dict: 完整的执行记录,包含输入、执行过程、结果和元数据
+    """
+    agent = FunctionKnowledge()
+    return agent.get_knowledge(question, post_info, persona_info)
+
+if __name__ == "__main__":
+    # 测试代码
+    question = "小老虎 穿搭"
+    post_info = "无"
+    persona_info = "游戏博主"
+    
+    try:
+        agent = FunctionKnowledge()
+        execution_result = agent.get_knowledge(question, post_info, persona_info)
+        print("=" * 50)
+        print("执行结果:")
+        print("=" * 50)
+        print(f"类型: {execution_result['result']['type']}")
+        print(f"内容预览: {execution_result['result']['content'][:200]}...")
+        print(f"执行时间: {execution_result['metadata']['execution_time']:.2f}秒")
+        print(f"\n完整JSON已保存到缓存目录")
+    except Exception as e:
+        logger.error(f"测试失败: {e}")

+ 102 - 33
knowledge_v2/llm_search_knowledge.py

@@ -24,15 +24,27 @@ sys.path.insert(0, root_dir)
 
 from utils.gemini_client import generate_text
 from utils.qwen_client import QwenClient
+from knowledge_v2.cache_manager import CacheManager
 
 
 class LLMSearchKnowledge:
     """基于LLM+search的知识获取类"""
     
-    def __init__(self):
-        """初始化"""
+    def __init__(self, use_cache: bool = True):
+        """
+        初始化
+        
+        Args:
+            use_cache: 是否启用缓存,默认启用
+        """
+        logger.info("=" * 60)
+        logger.info("初始化 LLMSearchKnowledge")
         self.qwen_client = QwenClient()
         self.prompt_dir = os.path.join(current_dir, "prompt")
+        self.use_cache = use_cache
+        self.cache = CacheManager() if use_cache else None
+        logger.info(f"缓存状态: {'启用' if use_cache else '禁用'}")
+        logger.info("=" * 60)
         
     def _load_prompt(self, filename: str) -> str:
         """
@@ -81,9 +93,16 @@ class LLMSearchKnowledge:
         Raises:
             Exception: 生成query失败时抛出异常
         """
+        logger.info(f"[步骤1] 生成搜索Query - 问题: {question[:50]}...")
+        
+        # 尝试从缓存读取
+        if self.use_cache:
+            cached_queries = self.cache.get(question, 'llm_search', 'generated_queries.json')
+            if cached_queries:
+                logger.info(f"✓ 使用缓存的queries: {cached_queries}")
+                return cached_queries
+        
         try:
-            logger.info(f"开始生成query,问题: {question[:50]}...")
-            
             # 加载prompt
             prompt_template = self._load_prompt("llm_search_generate_query_prompt.md")
             
@@ -91,11 +110,11 @@ class LLMSearchKnowledge:
             prompt = prompt_template.format(question=question)
             
             # 调用gemini生成query
-            logger.info("调用Gemini生成query")
+            logger.info("调用Gemini生成query...")
             response_text = generate_text(prompt=prompt)
             
             # 解析JSON响应
-            logger.info("解析生成的query")
+            logger.info("解析生成的query...")
             try:
                 # 尝试提取JSON部分(去除可能的markdown代码块标记)
                 response_text = response_text.strip()
@@ -113,23 +132,33 @@ class LLMSearchKnowledge:
                 if not queries:
                     raise ValueError("生成的query列表为空")
                 
-                logger.info(f"成功生成 {len(queries)} 个query: {queries}")
+                logger.info(f"✓ 成功生成 {len(queries)} 个query:")
+                for i, q in enumerate(queries, 1):
+                    logger.info(f"  {i}. {q}")
+                
+                # 写入缓存
+                if self.use_cache:
+                    self.cache.set(question, 'llm_search', 'generated_queries.json', queries)
+                
                 return queries
                 
             except json.JSONDecodeError as e:
-                logger.error(f"解析JSON失败: {e}, 响应内容: {response_text}")
+                logger.error(f"✗ 解析JSON失败: {e}")
+                logger.error(f"响应内容: {response_text}")
                 raise ValueError(f"无法解析模型返回的JSON: {e}")
                 
         except Exception as e:
-            logger.error(f"生成query失败: {e}")
+            logger.error(f"生成query失败: {e}")
             raise
     
-    def search_knowledge(self, query: str) -> str:
+    def search_knowledge(self, question: str, query: str, query_index: int = 0) -> str:
         """
         根据单个query搜索知识
         
         Args:
+            question: 原始问题(用于缓存)
             query: 搜索query
+            query_index: query索引(用于缓存文件名)
             
         Returns:
             str: 搜索到的知识文本(content字段)
@@ -137,10 +166,19 @@ class LLMSearchKnowledge:
         Raises:
             Exception: 搜索失败时抛出异常
         """
+        logger.info(f"  [{query_index}] 搜索Query: {query}")
+        
+        # 尝试从缓存读取
+        if self.use_cache:
+            cache_filename = f"search_result_{query_index:03d}.txt"
+            cached_result = self.cache.get(question, 'llm_search/search_results', cache_filename)
+            if cached_result:
+                logger.info(f"  ✓ 使用缓存结果 (长度: {len(cached_result)})")
+                return cached_result
+        
         try:
-            logger.info(f"搜索知识,query: {query}")
-            
             # 调用qwen_client的search_and_chat方法
+            logger.info(f"  → 调用搜索引擎...")
             result = self.qwen_client.search_and_chat(
                 user_prompt=query,
                 search_strategy="agent"
@@ -150,44 +188,54 @@ class LLMSearchKnowledge:
             knowledge_text = result.get("content", "")
             
             if not knowledge_text:
-                logger.warning(f"query '{query}' 的搜索结果为空")
+                logger.warning(f"query '{query}' 的搜索结果为空")
                 return ""
             
-            logger.info(f"成功获取知识文本,长度: {len(knowledge_text)}")
+            logger.info(f"  ✓ 获取知识文本 (长度: {len(knowledge_text)})")
+            
+            # 写入缓存
+            if self.use_cache:
+                cache_filename = f"search_result_{query_index:03d}.txt"
+                self.cache.set(question, 'llm_search/search_results', cache_filename, knowledge_text)
+            
             return knowledge_text
             
         except Exception as e:
-            logger.error(f"搜索知识失败,query: {query}, 错误: {e}")
+            logger.error(f"搜索知识失败,query: {query}, 错误: {e}")
             raise
     
-    def search_knowledge_batch(self, queries: List[str]) -> List[str]:
+    def search_knowledge_batch(self, question: str, queries: List[str]) -> List[str]:
         """
         批量搜索知识
         
         Args:
+            question: 原始问题(用于缓存)
             queries: query列表
             
         Returns:
             List[str]: 知识文本列表
         """
+        logger.info(f"[步骤2] 批量搜索 - 共 {len(queries)} 个Query")
+        
         knowledge_texts = []
         for i, query in enumerate(queries, 1):
             try:
-                logger.info(f"搜索第 {i}/{len(queries)} 个query")
-                knowledge_text = self.search_knowledge(query)
+                knowledge_text = self.search_knowledge(question, query, i)
                 knowledge_texts.append(knowledge_text)
             except Exception as e:
-                logger.error(f"搜索第 {i} 个query失败,跳过: {e}")
+                logger.error(f"搜索第 {i} 个query失败,跳过: {e}")
                 # 失败时添加空字符串,保持索引对应
                 knowledge_texts.append("")
         
+        logger.info(f"✓ 批量搜索完成,获得 {len([k for k in knowledge_texts if k])} 个有效结果")
         return knowledge_texts
     
-    def merge_knowledge(self, knowledge_texts: List[str]) -> str:
+    def merge_knowledge(self, question: str, knowledge_texts: List[str]) -> str:
         """
         合并多个知识文本
         
         Args:
+            question: 原始问题(用于缓存)
             knowledge_texts: 知识文本列表
             
         Returns:
@@ -196,18 +244,30 @@ class LLMSearchKnowledge:
         Raises:
             Exception: 合并失败时抛出异常
         """
+        logger.info(f"[步骤3] 合并知识 - 共 {len(knowledge_texts)} 个文本")
+        
+        # 尝试从缓存读取
+        if self.use_cache:
+            cached_merged = self.cache.get(question, 'llm_search', 'merged_knowledge.txt')
+            if cached_merged:
+                logger.info(f"✓ 使用缓存的合并知识 (长度: {len(cached_merged)})")
+                return cached_merged
+        
         try:
-            logger.info(f"开始合并 {len(knowledge_texts)} 个知识文本")
-            
             # 过滤空文本
             valid_texts = [text for text in knowledge_texts if text.strip()]
+            logger.info(f"  有效文本数量: {len(valid_texts)}/{len(knowledge_texts)}")
+            
             if not valid_texts:
-                logger.warning("所有知识文本都为空,返回空字符串")
+                logger.warning("所有知识文本都为空,返回空字符串")
                 return ""
             
             if len(valid_texts) == 1:
-                logger.info("只有一个有效知识文本,直接返回")
-                return valid_texts[0]
+                logger.info("  只有一个有效知识文本,直接返回")
+                result = valid_texts[0]
+                if self.use_cache:
+                    self.cache.set(question, 'llm_search', 'merged_knowledge.txt', result)
+                return result
             
             # 加载prompt
             prompt_template = self._load_prompt("llm_search_merge_knowledge_prompt.md")
@@ -221,14 +281,19 @@ class LLMSearchKnowledge:
             prompt = prompt_template.format(knowledge_texts=knowledge_texts_str)
             
             # 调用gemini合并知识
-            logger.info("调用Gemini合并知识文本")
+            logger.info("调用Gemini合并知识文本...")
             merged_text = generate_text(prompt=prompt)
             
-            logger.info(f"成功合并知识文本,长度: {len(merged_text)}")
+            logger.info(f"✓ 成功合并知识文本 (长度: {len(merged_text)})")
+            
+            # 写入缓存
+            if self.use_cache:
+                self.cache.set(question, 'llm_search', 'merged_knowledge.txt', merged_text.strip())
+            
             return merged_text.strip()
             
         except Exception as e:
-            logger.error(f"合并知识文本失败: {e}")
+            logger.error(f"合并知识文本失败: {e}")
             raise
     
     def get_knowledge(self, question: str) -> str:
@@ -245,22 +310,26 @@ class LLMSearchKnowledge:
             Exception: 处理过程中出现错误时抛出异常
         """
         try:
-            logger.info(f"开始处理问题: {question[:50]}...")
+            logger.info(f"{'='*60}")
+            logger.info(f"LLM Search - 开始处理问题: {question[:50]}...")
+            logger.info(f"{'='*60}")
             
             # 步骤1: 生成多个query
             queries = self.generate_queries(question)
             
             # 步骤2: 对每个query搜索知识
-            knowledge_texts = self.search_knowledge_batch(queries)
+            knowledge_texts = self.search_knowledge_batch(question, queries)
             
             # 步骤3: 合并多个知识文本
-            merged_knowledge = self.merge_knowledge(knowledge_texts)
+            merged_knowledge = self.merge_knowledge(question, knowledge_texts)
             
-            logger.info(f"成功获取知识文本,长度: {len(merged_knowledge)}")
+            logger.info(f"{'='*60}")
+            logger.info(f"✓ LLM Search 完成 (最终长度: {len(merged_knowledge)})")
+            logger.info(f"{'='*60}\n")
             return merged_knowledge
             
         except Exception as e:
-            logger.error(f"获取知识文本失败,问题: {question[:50]}..., 错误: {e}")
+            logger.error(f"获取知识文本失败,问题: {question[:50]}..., 错误: {e}")
             raise
 
 

+ 199 - 1
knowledge_v2/multi_search_knowledge.py

@@ -1,7 +1,205 @@
+
 '''
 多渠道获取知识,当前有两个渠道 llm_search_knowledge.py 和 xhs_search_knowledge.py
 1. 输入:问题
 2. 判断选择哪些渠道获取知识,目录默认返回 llm_search 和 xhs_search 两个渠道
 3. 根据选择的结果调用对应的渠道获取知识
 4. 合并多个渠道返回知识文本,返回知识文本,使用大模型合并,prompt在 prompt/multi_search_merge_knowledge_prompt.md 中
-'''
+补充:暂时将xhs_search_knowledge渠道的调用注释掉,后续完成xhs_search_knowledge的实现
+'''
+
+import os
+import sys
+import json
+from typing import List, Dict
+from loguru import logger
+
+# 设置路径以便导入工具类
+current_dir = os.path.dirname(os.path.abspath(__file__))
+root_dir = os.path.dirname(current_dir)
+sys.path.insert(0, root_dir)
+
+from utils.gemini_client import generate_text
+from knowledge_v2.llm_search_knowledge import get_knowledge as get_llm_knowledge
+from knowledge_v2.cache_manager import CacheManager
+# from knowledge_v2.xhs_search_knowledge import get_knowledge as get_xhs_knowledge
+
+class MultiSearchKnowledge:
+    """多渠道知识获取类"""
+    
+    def __init__(self, use_cache: bool = True):
+        """
+        初始化
+        
+        Args:
+            use_cache: 是否启用缓存,默认启用
+        """
+        logger.info("=" * 60)
+        logger.info("初始化 MultiSearchKnowledge")
+        self.prompt_dir = os.path.join(current_dir, "prompt")
+        self.use_cache = use_cache
+        self.cache = CacheManager() if use_cache else None
+        logger.info(f"缓存状态: {'启用' if use_cache else '禁用'}")
+        logger.info("=" * 60)
+        
+    def _load_prompt(self, filename: str) -> str:
+        """
+        加载prompt文件内容
+        
+        Args:
+            filename: prompt文件名
+            
+        Returns:
+            str: prompt内容
+        """
+        prompt_path = os.path.join(self.prompt_dir, filename)
+        
+        if not os.path.exists(prompt_path):
+            error_msg = f"Prompt文件不存在: {prompt_path}"
+            logger.error(error_msg)
+            raise FileNotFoundError(error_msg)
+        
+        try:
+            with open(prompt_path, 'r', encoding='utf-8') as f:
+                content = f.read().strip()
+                if not content:
+                    error_msg = f"Prompt文件内容为空: {prompt_path}"
+                    logger.error(error_msg)
+                    raise ValueError(error_msg)
+                return content
+        except Exception as e:
+            logger.error(f"读取prompt文件 {filename} 失败: {e}")
+            raise
+
+    def merge_knowledge(self, question: str, knowledge_map: Dict[str, str]) -> str:
+        """
+        合并多个渠道的知识文本
+        
+        Args:
+            question: 用户问题  
+            knowledge_map: 渠道名到知识文本的映射
+            
+        Returns:
+            str: 合并后的知识文本
+        """
+        logger.info(f"[Multi-Search] 合并多渠道知识 - {len(knowledge_map)} 个渠道")
+        
+        # 尝试从缓存读取
+        if self.use_cache:
+            cached_merged = self.cache.get(question, 'multi_search', 'merged_knowledge.txt')
+            if cached_merged:
+                logger.info(f"✓ 使用缓存的合并知识 (长度: {len(cached_merged)})")
+                return cached_merged
+        
+        try:
+            # 过滤空文本
+            valid_knowledge = {k: v for k, v in knowledge_map.items() if v and v.strip()}
+            logger.info(f"  有效渠道: {list(valid_knowledge.keys())}")
+            
+            if not valid_knowledge:
+                logger.warning("  ⚠ 所有渠道的知识文本都为空")
+                return ""
+            
+            # 如果只有一个渠道有内容,也经过LLM整理以保证输出风格一致
+            
+            # 加载prompt
+            prompt_template = self._load_prompt("multi_search_merge_knowledge_prompt.md")
+            
+            # 构建知识文本部分
+            knowledge_texts_str = ""
+            for source, text in valid_knowledge.items():
+                knowledge_texts_str += f"【来源:{source}】\n{text}\n\n"
+            
+            # 填充prompt
+            prompt = prompt_template.format(question=question, knowledge_texts=knowledge_texts_str)
+            
+            # 调用大模型
+            logger.info("  → 调用Gemini合并多渠道知识...")
+            merged_text = generate_text(prompt=prompt)
+            
+            logger.info(f"✓ 多渠道知识合并完成 (长度: {len(merged_text)})")
+            
+            # 写入缓存
+            if self.use_cache:
+                self.cache.set(question, 'multi_search', 'merged_knowledge.txt', merged_text.strip())
+            
+            return merged_text.strip()
+            
+        except Exception as e:
+            logger.error(f"✗ 合并知识失败: {e}")
+            raise
+
+    def get_knowledge(self, question: str) -> str:
+        """
+        获取知识的主方法
+        
+        Args:
+            question: 问题字符串
+            
+        Returns:
+            str: 最终的知识文本
+        """
+        logger.info(f"{'='*60}")
+        logger.info(f"Multi-Search - 开始处理问题: {question[:50]}...")
+        logger.info(f"{'='*60}")
+        
+        # 检查整体缓存
+        if self.use_cache:
+            cached_final = self.cache.get(question, 'multi_search', 'final_knowledge.txt')
+            if cached_final:
+                logger.info(f"✓ 使用缓存的最终知识 (长度: {len(cached_final)})")
+                logger.info(f"{'='*60}\n")
+                return cached_final
+        
+        knowledge_map = {}
+        
+        # 1. 获取 LLM Search 知识
+        try:
+            logger.info("[渠道1] 调用 LLM Search...")
+            llm_knowledge = get_llm_knowledge(question)
+            knowledge_map["LLM Search"] = llm_knowledge
+            logger.info(f"✓ LLM Search 完成 (长度: {len(llm_knowledge)})")
+        except Exception as e:
+            logger.error(f"✗ LLM Search 失败: {e}")
+            knowledge_map["LLM Search"] = ""
+            
+        # 2. 获取 XHS Search 知识 (暂时注释)
+        # try:
+        #     logger.info("[渠道2] 调用 XHS Search...")
+        #     xhs_knowledge = get_xhs_knowledge(question)
+        #     knowledge_map["XHS Search"] = xhs_knowledge
+        # except Exception as e:
+        #     logger.error(f"✗ XHS Search 失败: {e}")
+        #     knowledge_map["XHS Search"] = ""
+        
+        # 3. 合并知识
+        final_knowledge = self.merge_knowledge(question, knowledge_map)
+        
+        # 保存最终缓存
+        if self.use_cache and final_knowledge:
+            self.cache.set(question, 'multi_search', 'final_knowledge.txt', final_knowledge)
+        
+        logger.info(f"{'='*60}")
+        logger.info(f"✓ Multi-Search 完成 (最终长度: {len(final_knowledge)})")
+        logger.info(f"{'='*60}\n")
+        
+        return final_knowledge
+
+def get_knowledge(question: str) -> str:
+    """
+    便捷调用函数
+    """
+    agent = MultiSearchKnowledge()
+    return agent.get_knowledge(question)
+
+if __name__ == "__main__":
+    # 测试代码
+    test_question = "如何评价最近的国产3A游戏黑神话悟空?"
+    try:
+        result = get_knowledge(test_question)
+        print("=" * 50)
+        print("最终整合知识:")
+        print("=" * 50)
+        print(result)
+    except Exception as e:
+        logger.error(f"测试失败: {e}")

+ 13 - 0
knowledge_v2/prompt/function_generate_query_prompt.md

@@ -0,0 +1,13 @@
+你是一个智能助手。你的任务是根据用户的问题、帖子信息和账号人设信息,生成一个用于搜索或调用工具的查询语句(Query)。
+
+用户问题:
+{question}
+
+帖子信息:
+{post_info}
+
+账号人设信息:
+{persona_info}
+
+请分析上述信息,提炼出核心需求,生成一个简洁明了的查询语句。
+只输出查询语句,不要包含任何解释。

+ 24 - 0
knowledge_v2/prompt/function_knowledge_extract_tool_params_prompt.md

@@ -0,0 +1,24 @@
+你是一个API调用专家。你的任务是根据工具的信息和用户的查询,生成正确的调用参数。
+
+查询内容:
+{query}
+
+工具信息:
+{tool_info}
+
+请分析工具的参数要求,根据查询内容提取或推断出合适的参数值。
+
+输出格式:请以 JSON 格式输出参数字典,例如:
+{{
+    "param1": "value1",
+    "param2": "value2"
+}}
+
+注意事项:
+1. 只输出参数的JSON字典,不要包含任何解释
+2. 参数名必须与工具定义中的参数名完全一致
+3. 参数值要从查询中提取或合理推断
+4. 不要添加工具定义中没有的参数
+5. 如果某个参数无法从查询中获取,使用合理的默认值或省略该参数
+
+只输出JSON,不要包含markdown标记。

+ 22 - 0
knowledge_v2/prompt/function_knowledge_generate_new_tool_prompt.md

@@ -0,0 +1,22 @@
+你是一个Python工具生成专家。你的任务是根据提供的知识文本,抽象并定义一个新的Python工具函数。
+
+知识文本:
+{knowledge}
+
+请遵循以下规则生成工具:
+1. **函数名**:使用英文,简洁且具有描述性,例如 `search_recipe`。
+2. **参数**:根据知识文本中的操作步骤,提取必要的输入参数,并添加类型注解。
+3. **文档字符串**:详细描述工具的功能、参数和返回值。
+4. **实现**:由于你无法真正实现外部调用,请生成一个模拟实现的函数体,或者如果知识文本包含具体的API调用逻辑,请尽量还原。如果无法实现,请在函数体中返回一个描述性的字符串或模拟数据。
+5. **格式**:直接输出Python代码,不要包含markdown代码块标记。
+
+示例输出:
+def example_tool(param1: str) -> str:
+    """
+    这是一个示例工具
+    :param param1: 参数1
+    :return: 结果
+    """
+    return f"Processed {param1}"
+
+请生成代码:

+ 14 - 0
knowledge_v2/prompt/function_knowledge_select_tools_prompt.md

@@ -0,0 +1,14 @@
+你是一个工具选择专家。你的任务是根据用户的查询语句,从给定的工具列表中选择最合适的一个工具。
+
+用户查询:
+{query}
+
+可用工具列表:
+{tool_infos}
+
+
+请判断是否有工具可以解决用户的查询。
+- 如果有,请返回该工具的名称。
+- 如果没有合适的工具,请返回 "None"。
+
+只输出工具名称或 "None",不要包含任何解释。

+ 16 - 0
knowledge_v2/prompt/llm_search_generate_query_prompt.md

@@ -0,0 +1,16 @@
+你是一个搜索专家。你的任务是根据用户的问题,生成多个用于搜索引擎的查询语句(Query),以便从不同角度获取全面的信息。
+
+用户问题:
+{question}
+
+请生成 3-5 个查询语句,涵盖问题的核心概念、相关背景和细节。
+请以 JSON 格式输出,格式如下:
+{{
+    "queries": [
+        "查询语句1",
+        "查询语句2",
+        "查询语句3"
+    ]
+}}
+
+只输出 JSON,不要包含任何解释或 markdown 标记。

+ 12 - 0
knowledge_v2/prompt/llm_search_merge_knowledge_prompt.md

@@ -0,0 +1,12 @@
+你是一个知识整合专家。你的任务是将以下来自不同搜索结果的知识文本进行整合,生成一份全面、准确、逻辑清晰的回答。
+
+以下是搜索到的知识文本:
+{knowledge_texts}
+
+请遵循以下原则进行整合:
+1. **全面性**:涵盖所有文本提供的关键信息,不要遗漏重要细节。
+2. **准确性**:去除重复信息,解决可能的冲突(如果存在冲突,请保留主要观点或说明差异)。
+3. **逻辑性**:将信息组织成结构清晰的文章,使用恰当的标题、列表和段落。
+4. **可读性**:语言通顺流畅,易于理解。
+
+请直接输出整合后的知识文本,不要包含任何解释性的话语。

+ 16 - 0
knowledge_v2/prompt/multi_search_merge_knowledge_prompt.md

@@ -0,0 +1,16 @@
+你是一个知识整合专家。你的任务是根据用户的问题,将来自不同渠道(如LLM搜索、小红书搜索等)的知识文本进行整合,生成一份全面、准确、逻辑清晰的回答。
+
+用户问题:
+{question}
+
+以下是来自不同渠道的知识文本:
+{knowledge_texts}
+
+请遵循以下原则进行整合:
+1. **全面性**:涵盖所有渠道提供的关键信息,不要遗漏重要细节。
+2. **准确性**:如果不同渠道的信息存在冲突,请尝试分析原因并给出最合理的解释,或者同时呈现不同的观点并说明来源。
+3. **逻辑性**:将信息组织成结构清晰的文章,使用恰当的标题、列表和段落。
+4. **可读性**:语言通顺流畅,易于理解。
+5. **去重**:去除重复的信息。
+
+请直接输出整合后的知识文本,不要包含任何解释性的话语。

+ 56 - 0
knowledge_v2/test_detail_info.py

@@ -0,0 +1,56 @@
+"""
+快速测试 - 验证detail_info初始化
+"""
+
+import os
+import sys
+
+# 添加路径
+current_dir = os.path.dirname(os.path.abspath(__file__))
+root_dir = os.path.dirname(current_dir)
+sys.path.insert(0, root_dir)
+
+print("测试: detail_info初始化验证")
+print("=" * 60)
+
+try:
+    from knowledge_v2.function_knowledge import FunctionKnowledge
+    
+    # 创建实例(禁用缓存以测试所有路径)
+    agent = FunctionKnowledge(use_cache=False)
+    
+    # 测试1: generate_query
+    print("\n[测试1] generate_query")
+    query, detail = agent.generate_query("测试问题", "无", "测试")
+    print(f"✓ 返回类型: {type(query)}, {type(detail)}")
+    print(f"✓ detail keys: {detail.keys()}")
+    assert 'prompt' in detail, "detail应该包含prompt"
+    assert 'response' in detail, "detail应该包含response"
+    print("✓ generate_query 通过")
+    
+    # 测试2: select_tool  
+    print("\n[测试2] select_tool")
+    combined = "测试||无||测试"
+    tool_name, detail = agent.select_tool(combined, query)
+    print(f"✓ 返回类型: {type(tool_name)}, {type(detail)}")
+    print(f"✓ detail keys: {detail.keys()}")
+    assert 'prompt' in detail, "detail应该包含prompt"
+    assert 'response' in detail, "detail应该包含response"
+    print("✓ select_tool 通过")
+    
+    # 测试3: extract_tool_params
+    print("\n[测试3] extract_tool_params")
+    params, detail = agent.extract_tool_params(combined, "test_tool", query)
+    print(f"✓ 返回类型: {type(params)}, {type(detail)}")
+    print(f"✓ detail keys: {detail.keys()}")
+    assert 'prompt' in detail or 'fallback' in detail, "detail应该包含prompt或fallback"
+    print("✓ extract_tool_params 通过")
+    
+    print("\n" + "=" * 60)
+    print("✓ 所有测试通过!detail_info初始化正确")
+    print("=" * 60)
+    
+except Exception as e:
+    print(f"\n✗ 测试失败: {e}")
+    import traceback
+    traceback.print_exc()

+ 21 - 0
knowledge_v2/tool_infos/wechat_search_article.txt

@@ -0,0 +1,21 @@
+工具名称: wechat_search_article
+
+功能描述: 搜索微信公众号文章
+
+参数说明:
+- keyword (必需): 搜索关键词,字符串类型
+  示例: "英雄联盟"、"黑神话悟空"
+
+返回值:
+- 返回包含文章列表的JSON数据,每篇文章包含标题、链接、摘要等信息
+
+使用示例:
+curl --location 'http://47.84.182.56:8001/tools/call/wechat_search_article' \
+--header 'Content-Type: application/json' \
+--data '{
+    "keyword": "英雄联盟"
+}'
+
+注意事项:
+- keyword参数不能为空
+- 如果搜索无结果,返回空列表

+ 168 - 0
knowledge_v2/tools_library.py

@@ -0,0 +1,168 @@
+'''
+工具库模块,提供工具库中的工具调用、保存新的待接入工具的信息
+分为两个函数:
+
+1. 调用工具库中的工具
+curl --location 'http://47.84.182.56:8001/tools/call/wechat_search_article' \
+--header 'Content-Type: application/json' \
+--data '{
+    "keyword": "英雄联盟"
+}'
+其中的data和wechat_search_article是函数的入参,把工具名替换到wechat_search_article,把参数字典替换到data
+
+
+2. 保存新的待接入工具的信息
+入参为工具信息,是一个工具的文档字符串
+目前默认将这个文档字符串保存到一个文件中,文件名默认是工具的名称
+'''
+
+import requests
+import os
+import json
+
+TOOL_SERVER_URL = "http://47.84.182.56:8001/tools/call"
+
+def call_tool(tool_name: str, arguments: dict):
+    """
+    调用工具库中的工具
+    :param tool_name: 工具名称
+    :param arguments: 工具参数字典
+    :return: 工具调用结果
+    """
+    url = f"{TOOL_SERVER_URL}/{tool_name}"
+    headers = {
+        'Content-Type': 'application/json'
+    }
+    try:
+        response = requests.post(url, headers=headers, json=arguments)
+        response.raise_for_status()
+        return response.json()
+    except requests.RequestException as e:
+        # 在实际生产中可能需要更复杂的错误处理或日志记录
+        return {"error": f"Failed to call tool {tool_name}: {str(e)}"}
+
+def save_tool_info(tool_name: str, tool_doc: str):
+    """
+    保存新的待接入工具的信息
+    :param tool_name: 工具名称
+    :param tool_doc: 工具文档字符串
+    :return: 保存的文件路径
+    """
+    # 获取当前文件所在目录
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    # 创建 tool_infos 目录(如果不存在)
+    save_dir = os.path.join(current_dir, 'tool_infos')
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir)
+    
+'''
+工具库模块,提供工具库中的工具调用、保存新的待接入工具的信息
+分为两个函数:
+
+1. 调用工具库中的工具
+curl --location 'http://47.84.182.56:8001/tools/call/wechat_search_article' \
+--header 'Content-Type: application/json' \
+--data '{
+    "keyword": "英雄联盟"
+}'
+其中的data和wechat_search_article是函数的入参,把工具名替换到wechat_search_article,把参数字典替换到data
+
+
+2. 保存新的待接入工具的信息
+入参为工具信息,是一个工具的文档字符串
+目前默认将这个文档字符串保存到一个文件中,文件名默认是工具的名称
+'''
+
+import requests
+import os
+import json
+
+TOOL_SERVER_URL = "http://47.84.182.56:8001/tools/call"
+
+def call_tool(tool_name: str, arguments: dict):
+    """
+    调用工具库中的工具
+    :param tool_name: 工具名称
+    :param arguments: 工具参数字典
+    :return: 工具调用结果
+    """
+    url = f"{TOOL_SERVER_URL}/{tool_name}"
+    headers = {
+        'Content-Type': 'application/json'
+    }
+    try:
+        response = requests.post(url, headers=headers, json=arguments)
+        response.raise_for_status()
+        return response.json()
+    except requests.RequestException as e:
+        # 在实际生产中可能需要更复杂的错误处理或日志记录
+        return {"error": f"Failed to call tool {tool_name}: {str(e)}"}
+
+def save_tool_info(tool_name: str, tool_doc: str):
+    """
+    保存新的待接入工具的信息
+    :param tool_name: 工具名称
+    :param tool_doc: 工具文档字符串
+    :return: 保存的文件路径
+    """
+    # 获取当前文件所在目录
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    # 创建 tool_infos 目录(如果不存在)
+    save_dir = os.path.join(current_dir, 'tool_infos')
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir)
+    
+    file_path = os.path.join(save_dir, f"{tool_name}.txt")
+    
+    try:
+        with open(file_path, 'w', encoding='utf-8') as f:
+            f.write(tool_doc)
+        return file_path
+    except IOError as e:
+        return f"Error saving tool info: {str(e)}"
+
+def get_all_tool_infos() -> str:
+    """
+    获取所有已保存工具的信息
+    :return: 所有工具信息的拼接字符串
+    """
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    save_dir = os.path.join(current_dir, 'tool_infos')
+    
+    if not os.path.exists(save_dir):
+        return ""
+    
+    tool_infos = []
+    try:
+        for filename in os.listdir(save_dir):
+            if filename.endswith(".txt"):
+                file_path = os.path.join(save_dir, filename)
+                with open(file_path, 'r', encoding='utf-8') as f:
+                    content = f.read().strip()
+                    tool_infos.append(f"--- Tool: {filename[:-4]} ---\n{content}\n")
+    except Exception as e:
+        return f"Error reading tool infos: {str(e)}"
+        
+    return "\n".join(tool_infos)
+
+def get_tool_info(tool_name: str) -> str:
+    """
+    获取指定工具的信息
+    :param tool_name: 工具名称
+    :return: 工具信息文本,如果不存在返回空字符串
+    """
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    save_dir = os.path.join(current_dir, 'tool_infos')
+    
+    if not os.path.exists(save_dir):
+        return ""
+    
+    file_path = os.path.join(save_dir, f"{tool_name}.txt")
+    if not os.path.exists(file_path):
+        return ""
+    
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            return f.read().strip()
+    except Exception as e:
+        return f"Error reading tool info: {str(e)}"

Alguns ficheiros não foram mostrados porque muitos ficheiros mudaram neste diff