Prechádzať zdrojové kódy

Merge branch 'lzh_knowledge_1202' of https://git.yishihui.com/yangxiaohui/kg_agent into lzh_knowledge_1202

liuzhiheng 15 hodín pred
rodič
commit
fd2ab93a40

Rozdielové dáta súboru neboli zobrazené, pretože súbor je príliš veľký
+ 0 - 16
knowledge_v2/.cache/9f510b2a8348/execution_record.json


Rozdielové dáta súboru neboli zobrazené, pretože súbor je príliš veľký
+ 0 - 7
knowledge_v2/.cache/9f510b2a8348/function_knowledge/execution_detail.json


+ 0 - 1
knowledge_v2/.cache/9f510b2a8348/function_knowledge/generated_query.txt

@@ -1 +0,0 @@
-用什么工具获取教资查分这个信息

+ 0 - 1
knowledge_v2/.cache/9f510b2a8348/function_knowledge/selected_tool.txt

@@ -1 +0,0 @@
-{"工具名": "新红热搜词搜索", "工具调用ID": "new_red_hot_search_words_search", "使用方法": "输入关键词\"教资查分\",获取该词在小红书的热度值、近90天热度趋势曲线、声量概览及相关搜索结果,全面了解该信息的关注度和趋势。"}

+ 0 - 3
knowledge_v2/.cache/9f510b2a8348/function_knowledge/tool_params.json

@@ -1,3 +0,0 @@
-{
-  "prompt": "教资查分"
-}

+ 0 - 6
knowledge_v2/.cache/9f510b2a8348/function_knowledge/tool_result.json

@@ -1,6 +0,0 @@
-{
-  "success": true,
-  "result": "{\"result\": {\"image_url\": [], \"content\": \"## \\\"教资查分\\\"关键词数据表现和趋势分析\\n\\n### 热度值\\n- 近30天热度值超过96.14%全站其他词\\n- 近30天热度值超过95.86%教育领域其他词\\n\\n### 声量概览\\n- **笔记数**: 16篇(环比增长700.00%)\\n- **总热度**: 4082(环比增长408100.00%)\\n- **热门天数**: 4/30天\\n- **近30天热门内容领域**:\\n  - 教育: 56.25%\\n  - 职场: 31.25%\\n  - 运动健身: 6.25%\\n- **高频搭配词**: 暂无相关数据\\n\\n### 热度值趋势\\n- **整体趋势**: 下降趋势\\n- **效果分析**:\\n  - TOP1下降趋势: 2025-11-07 至 2025-11-08\\n  - TOP2下降趋势: 2025-11-04 至 2025-11-05\\n\\n### 相关搜索结果\\n- 共1条搜索结果\\n- 内容领域分布: 教育、职场等领域为主\\n\\n从数据可以看出,\\\"教资查分\\\"这个关键词在近期有显著的热度增长,但目前呈现下降趋势。该关键词主要与教育和职场领域相关,这符合教师资格证查询分数的实际应用场景。虽然笔记数量不多(16篇),但环比增长非常高,表明这是一个短期内突然受到关注的话题。\", \"video_url\": []}, \"success\": true, \"error\": \"\"}",
-  "tool_name": "new_red_hot_search_words_search",
-  "call_type": "browser_auto_operate"
-}

+ 0 - 1
knowledge_v2/.cache/9f510b2a8348/question.txt

@@ -1 +0,0 @@
-教资查分这个信息怎么来的||发帖时间:2025.11.07||

+ 109 - 105
knowledge_v2/execution_collector.py

@@ -52,45 +52,73 @@ class ExecutionCollector:
         # 初始化执行记录
         execution_record = {
             "input": input_info,
-            "execution": {
-                "modules": {}
-            },
-            "result": {
-                "type": None,
-                "content": None,
-                "raw_data": None
+            "execution": {},
+            "output": {
+                "result": None,
             },
             "metadata": {
-                "execution_time": 0,
+                "execution_time": input_info.get("execution_time", 0),
                 "cache_hits": [],
                 "errors": []
             }
         }
         
-        # 收集各模块的执行详情
         try:
-            # 1. 收集 function_knowledge 的详情
-            function_detail = self._collect_function_knowledge_detail(cache_dir)
-            if function_detail:
-                execution_record["execution"]["modules"]["function_knowledge"] = function_detail
-            
-            # 2. 收集 multi_search 的详情
-            multi_detail = self._collect_multi_search_detail(cache_dir)
-            if multi_detail:
-                execution_record["execution"]["modules"]["multi_search"] = multi_detail
+            # 1. Generate Query
+            query_detail = self._read_json(cache_dir, 'function_knowledge', 'generated_query.json')
+            if query_detail:
+                execution_record["execution"]["generate_query"] = query_detail
+                if query_detail.get("cached"):
+                    execution_record["metadata"]["cache_hits"].append("generate_query")
+
+            # 2. Select Tool
+            tool_detail = self._read_json(cache_dir, 'function_knowledge', 'selected_tool.json')
+            if tool_detail:
+                execution_record["execution"]["select_tool"] = tool_detail
+                if tool_detail.get("cached"):
+                    execution_record["metadata"]["cache_hits"].append("select_tool")
+
+            # 3. Check for Search or Tool Call
+            tool_call_detail = self._read_json(cache_dir, 'function_knowledge', 'tool_call.json')
             
-            # 3. 收集 llm_search 的详情
-            llm_detail = self._collect_llm_search_detail(cache_dir)
-            if llm_detail:
-                execution_record["execution"]["modules"]["llm_search"] = llm_detail
+            if tool_call_detail:
+                # Flow A: Tool Call
+                
+                # Extract Params
+                params_detail = self._read_json(cache_dir, 'function_knowledge', 'extracted_params.json')
+                if params_detail:
+                    execution_record["execution"]["extract_params"] = params_detail
+                    if params_detail.get("cached"):
+                        execution_record["metadata"]["cache_hits"].append("extract_params")
+                
+                # Tool Call
+                execution_record["execution"]["tool_call"] = tool_call_detail
+                if tool_call_detail.get("cached"):
+                    execution_record["metadata"]["cache_hits"].append("tool_call")
+                
+                # Result
+                execution_record["output"]["result"] = tool_call_detail.get("result", "")
+                execution_record["output"]["tool"] = tool_call_detail.get("tool_name", "")
 
-            # 4.设置结果信息
-            result_detail = self._collect_result_detail(cache_dir)
-            if result_detail:
-                execution_record["result"] = result_detail
+            else:
+                # Flow B: Search (Multi/LLM)
+                search_detail = self._collect_search_detail(cache_dir)
+                if search_detail:
+                    execution_record["execution"]["knowledge_search"] = search_detail
+                    
+                    # Result
+                    # merged_knowledge_detail from multi_search usually contains final response
+                    merged_detail = search_detail.get("multi_search_merge")
+                    if merged_detail:
+                        execution_record["result"]["type"] = "knowledge_search"
+                        response = merged_detail.get("response", "")
+                        execution_record["result"]["content"] = response
+                        execution_record["result"]["success"] = True
+                        if merged_detail.get("cached"):
+                            execution_record["metadata"]["cache_hits"].append("multi_search_merge")
             
-            # 5. 计算总结信息
-            self._calculate_summary(execution_record)
+            # Clean up metadata
+            # execution_time is retrieved from input_info if provided
             
             logger.info("✓ 执行记录收集完成")
             logger.info("=" * 60)
@@ -100,91 +128,63 @@ class ExecutionCollector:
             execution_record["metadata"]["errors"].append(str(e))
         
         return execution_record
-    
-    def _collect_function_knowledge_detail(self, cache_dir: str) -> Dict[str, Any]:
-        """收集function_knowledge模块的详情"""
-        detail_file = os.path.join(cache_dir, 'function_knowledge', 'execution_detail.json')
-        
-        if os.path.exists(detail_file):
-            try:
-                with open(detail_file, 'r', encoding='utf-8') as f:
-                    detail = json.load(f)
-                logger.info("  ✓ 收集 function_knowledge 详情")
-                return detail
-            except Exception as e:
-                logger.error(f"  ✗ 读取 function_knowledge 详情失败: {e}")
-        
-        return None
-    
-    def _collect_multi_search_detail(self, cache_dir: str) -> Dict[str, Any]:
-        """收集multi_search模块的详情"""
-        detail_file = os.path.join(cache_dir, 'multi_search', 'execution_detail.json')
-        
-        if os.path.exists(detail_file):
-            try:
-                with open(detail_file, 'r', encoding='utf-8') as f:
-                    detail = json.load(f)
-                logger.info("  ✓ 收集 multi_search 详情")
-                return detail
-            except Exception as e:
-                logger.error(f"  ✗ 读取 multi_search 详情失败: {e}")
-        
-        return None
-    
-    def _collect_llm_search_detail(self, cache_dir: str) -> Dict[str, Any]:
-        """收集llm_search模块的详情"""
-        detail_file = os.path.join(cache_dir, 'llm_search', 'execution_detail.json')
-        
-        if os.path.exists(detail_file):
-            try:
-                with open(detail_file, 'r', encoding='utf-8') as f:
-                    detail = json.load(f)
-                logger.info("  ✓ 收集 llm_search 详情")
-                return detail
-            except Exception as e:
-                logger.error(f"  ✗ 读取 llm_search 详情失败: {e}")
-        
-        return None
-
-    def _collect_result_detail(self, cache_dir: str) -> Dict[str, Any]:
-        """收集result模块的详情"""
-        detail_file = os.path.join(cache_dir, 'function_knowledge', 'tool_result.json')
 
-        if os.path.exists(detail_file):
+    def _read_json(self, base_dir: str, *paths) -> Dict[str, Any]:
+        """读取JSON文件"""
+        file_path = os.path.join(base_dir, *paths)
+        if os.path.exists(file_path):
             try:
-                with open(detail_file, 'r', encoding='utf-8') as f:
-                    detail = json.load(f)
-                logger.info("  ✓ 收集 result 详情")
-                return detail
+                with open(file_path, 'r', encoding='utf-8') as f:
+                    return json.load(f)
             except Exception as e:
-                logger.error(f"  ✗ 读取 result 详情失败: {e}")
-
+                logger.warning(f"读取JSON文件失败 {file_path}: {e}")
         return None
-    
-    def _calculate_summary(self, execution_record: Dict[str, Any]):
-        """计算总结信息"""
-        total_time = 0
-        cache_hits = []
+
+    def _collect_search_detail(self, cache_dir: str) -> Dict[str, Any]:
+        """收集搜索流程详情"""
+        search_detail = {}
         
-        # 遍历所有模块
-        for module_name, module_detail in execution_record["execution"]["modules"].items():
-            if "execution_time" in module_detail:
-                total_time += module_detail["execution_time"]
-            
-            if "cache_hits" in module_detail:
-                cache_hits.extend([f"{module_name}/{hit}" for hit in module_detail["cache_hits"]])
+        # 1. LLM Search
+        llm_detail = {}
         
-        execution_record["metadata"]["execution_time"] = total_time
-        execution_record["metadata"]["cache_hits"] = cache_hits
-    
+        # Generated Queries
+        queries_detail = self._read_json(cache_dir, 'llm_search', 'generated_queries.json')
+        if queries_detail:
+            llm_detail["generated_queries"] = queries_detail
+            
+        # Search Results
+        # Search for search_result_XXX.json
+        search_results = []
+        llm_search_dir = os.path.join(cache_dir, 'llm_search', 'search_results')
+        if os.path.exists(llm_search_dir):
+            for filename in sorted(os.listdir(llm_search_dir)):
+                if filename.endswith('.json') and filename.startswith('search_result_'):
+                    res = self._read_json(llm_search_dir, filename)
+                    if res:
+                        search_results.append(res)
+        if search_results:
+            llm_detail["search_results"] = search_results
+            
+        # Merge
+        merge_detail_llm = self._read_json(cache_dir, 'llm_search', 'merged_knowledge_detail.json')
+        if merge_detail_llm:
+            llm_detail["merge"] = merge_detail_llm
+            
+        if llm_detail:
+            search_detail["llm_search"] = llm_detail
+            
+        # 2. Multi Search Merge
+        merge_detail_multi = self._read_json(cache_dir, 'multi_search', 'merged_knowledge_detail.json')
+        if merge_detail_multi:
+            search_detail["multi_search_merge"] = merge_detail_multi
+            
+        return search_detail if search_detail else None
+
     def _create_empty_record(self, input_info: Dict[str, Any]) -> Dict[str, Any]:
         """创建空的执行记录"""
         return {
             "input": input_info,
-            "execution": {
-                "steps": [],
-                "modules": {}
-            },
+            "execution": {},
             "result": {
                 "type": "error",
                 "content": "缓存目录不存在",
@@ -255,5 +255,9 @@ if __name__ == "__main__":
         "timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
     }
     
-    record = collect_and_save_execution_record(cache_key, input_info)
-    print(json.dumps(record, ensure_ascii=False, indent=2))
+    # 注意:测试时需要确保缓存目录有数据,否则会返回空记录
+    try:
+        record = collect_and_save_execution_record(cache_key, input_info)
+        print(json.dumps(record, ensure_ascii=False, indent=2))
+    except Exception as e:
+        print(f"Test failed: {e}")

+ 72 - 98
knowledge_v2/function_knowledge.py

@@ -43,41 +43,12 @@ class FunctionKnowledge:
         self.use_cache = use_cache
         self.cache = CacheManager() if use_cache else None
         
-        # 执行详情收集
-        self.execution_detail = {
-            "generate_query": {},
-            "select_tool": {},
-            "extract_params": {},
-            "execution_time": 0,
-            "cache_hits": []
-        }
+
         
         logger.info(f"缓存状态: {'启用' if use_cache else '禁用'}")
         logger.info("=" * 80)
         
-    def _save_execution_detail(self, cache_key: str):
-        """保存执行详情到缓存"""
-        if not self.use_cache or not self.cache:
-            return
-        
-        try:
-            import hashlib
-            question_hash = hashlib.md5(cache_key.encode('utf-8')).hexdigest()[:12]
-            detail_dir = os.path.join(
-                self.cache.base_cache_dir,
-                question_hash,
-                'function_knowledge'
-            )
-            os.makedirs(detail_dir, exist_ok=True)
-            
-            detail_file = os.path.join(detail_dir, 'execution_detail.json')
-            with open(detail_file, 'w', encoding='utf-8') as f:
-                json.dump(self.execution_detail, f, ensure_ascii=False, indent=2)
-            
-            logger.info(f"✓ 执行详情已保存: {detail_file}")
-            
-        except Exception as e:
-            logger.error(f"✗ 保存执行详情失败: {e}")
+
 
     def _load_prompt(self, filename: str) -> str:
         """加载prompt文件内容"""
@@ -109,12 +80,11 @@ class FunctionKnowledge:
 
             # 尝试从缓存读取
             if self.use_cache:
-                cached_query = self.cache.get(combined_question, 'function_knowledge', 'generated_query.txt')
-                if cached_query:
-                    logger.info(f"✓ 使用缓存的Query: {cached_query}")
-                    # 记录缓存命中
-                    self.execution_detail["generate_query"].update({"cached": True, "query": cached_query, "prompt": prompt})
-                    return cached_query
+                cached_data = self.cache.get(combined_question, 'function_knowledge', 'generated_query.json')
+                if cached_data:
+                    query = cached_data.get('query', cached_data.get('response', ''))
+                    logger.info(f"✓ 使用缓存的Query: {query}")
+                    return query
             
             logger.info("→ 调用Gemini生成Query...")
             query = generate_text(prompt=prompt)
@@ -122,17 +92,14 @@ class FunctionKnowledge:
             
             logger.info(f"✓ 生成Query: {query}")
             
-            # 写入缓存
+            # 保存到缓存(包含完整的prompt和response)
             if self.use_cache:
-                self.cache.set(combined_question, 'function_knowledge', 'generated_query.txt', query)
-            
-            # 记录详情
-            self.execution_detail["generate_query"] = {
-                "cached": False,
-                "prompt": prompt,
-                "response": query,
-                "query": query
-            }
+                query_data = {
+                    "prompt": prompt,
+                    "response": query,
+                    "query": query
+                }
+                self.cache.set(combined_question, 'function_knowledge', 'generated_query.json', query_data)
             
             return query
         except Exception as e:
@@ -159,16 +126,11 @@ class FunctionKnowledge:
 
             # 尝试从缓存读取
             if self.use_cache:
-                cached_tool = self.cache.get(combined_question, 'function_knowledge', 'selected_tool.txt')
-                if cached_tool:
-                    logger.info(f"✓ 使用缓存的工具: {cached_tool}")
-                    # 记录缓存命中
-                    self.execution_detail["select_tool"].update({
-                        "cached": True,
-                        "response": json.loads(cached_tool),
-                        "prompt": prompt,
-                    })
-                    return json.loads(cached_tool)
+                cached_data = self.cache.get(combined_question, 'function_knowledge', 'selected_tool.json')
+                if cached_data:
+                    result_json = cached_data.get('response', {})
+                    logger.info(f"✓ 使用缓存的工具: {result_json}")
+                    return result_json
             
             logger.info("→ 调用Gemini选择工具...")
             result = generate_text(prompt=prompt)
@@ -181,16 +143,13 @@ class FunctionKnowledge:
 
             logger.info(f"✓ 选择结果: {result_json.get('工具名', 'None')}")
             
-            # 写入缓存
+            # 保存到缓存(包含完整的prompt和response)
             if self.use_cache:
-                self.cache.set(combined_question, 'function_knowledge', 'selected_tool.txt', result)
-            
-            # 记录详情
-            self.execution_detail["select_tool"] = {
-                "cached": False,
-                "prompt": prompt,
-                "response": result_json,
-            }
+                tool_data = {
+                    "prompt": prompt,
+                    "response": result_json
+                }
+                self.cache.set(combined_question, 'function_knowledge', 'selected_tool.json', tool_data)
             
             return result_json
         except Exception as e:
@@ -250,22 +209,18 @@ class FunctionKnowledge:
             # 加载prompt
             prompt_template = self._load_prompt("function_knowledge_extract_tool_params_prompt.md")
             prompt = prompt_template.format(
-                query=query,
+                tool_mcp_name=tool_id,
+                tool_instructions=tool_instructions,
                 all_tool_params=tool_params
             )
 
             # 尝试从缓存读取
             if self.use_cache:
-                cached_params = self.cache.get(combined_question, 'function_knowledge', 'tool_params.json')
-                if cached_params:
-                    logger.info(f"✓ 使用缓存的参数: {cached_params}")
-                    # 记录缓存命中
-                    self.execution_detail["extract_params"].update({
-                        "cached": True,
-                        "params": cached_params,
-                        "prompt": prompt,
-                    })
-                    return cached_params
+                cached_data = self.cache.get(combined_question, 'function_knowledge', 'extracted_params.json')
+                if cached_data:
+                    params = cached_data.get('params', {})
+                    logger.info(f"✓ 使用缓存的参数: {params}")
+                    return params
             
             # 调用LLM提取参数
             logger.info("  → 调用Gemini提取参数...")
@@ -288,17 +243,14 @@ class FunctionKnowledge:
                 
                 logger.info(f"✓ 提取参数成功: {params}")
                 
-                # 写入缓存
+                # 保存到缓存(包含完整的prompt和response)
                 if self.use_cache:
-                    self.cache.set(combined_question, 'function_knowledge', 'tool_params.json', params)
-                
-                # 记录详情
-                self.execution_detail["extract_params"].update({
-                    "cached": False,
-                    "prompt": prompt,
-                    "response": response_text,
-                    "params": params
-                })
+                    params_data = {
+                        "prompt": prompt,
+                        "response": response_text,
+                        "params": params
+                    }
+                    self.cache.set(combined_question, 'function_knowledge', 'extracted_params.json', params_data)
                 
                 return params
                 
@@ -343,6 +295,24 @@ class FunctionKnowledge:
         except Exception as e:
             logger.error(f"✗ 保存知识失败: {e}")
 
+    def organize_tool_result(self, tool_result: dict) -> dict:
+        """
+        组织工具调用结果,确保包含必要字段
+
+        Args:
+            tool_result: 原始工具调用结果
+
+        Returns:
+            dict: 组织后的工具调用结果
+        """
+        prompt_template = self._load_prompt("tool_result_prettify_prompt.md")
+        prompt = prompt_template.format(
+            input=tool_result,
+        )
+        organized_result = generate_text(prompt=prompt)
+        organized_result = organized_result.strip()
+        return organized_result
+
     def get_knowledge(self, question: str, post_info: str, persona_info: str) -> dict:
         """
         获取方法知识的主流程(重构后)
@@ -383,15 +353,21 @@ class FunctionKnowledge:
                 
                 # 检查工具调用缓存
                 if self.use_cache:
-                    cached_tool_result = self.cache.get(combined_question, 'function_knowledge', 'tool_result.json')
-                    if cached_tool_result:
+                    cached_tool_call = self.cache.get(combined_question, 'function_knowledge', 'tool_call.json')
+                    if cached_tool_call:
                         logger.info(f"✓ 使用缓存的工具调用结果")
-                        tool_result = cached_tool_result
+                        tool_result = cached_tool_call.get('result', {})
                     else:
                         logger.info(f"  → 调用工具,参数: {arguments}")
                         tool_result = call_tool(tool_id, arguments)
-                        # 缓存工具调用结果
-                        self.cache.set(combined_question, 'function_knowledge', 'tool_result.json', tool_result)
+                        tool_result = self.organize_tool_result(tool_result)
+                        # 保存工具调用信息(包含工具名、入参、结果)
+                        tool_call_data = {
+                            "tool_name": tool_id,
+                            "arguments": arguments,
+                            "result": tool_result
+                        }
+                        self.cache.set(combined_question, 'function_knowledge', 'tool_call.json', tool_call_data)
                 else:
                     logger.info(f"  → 调用工具,参数: {arguments}")
                     tool_result = call_tool(tool_id, arguments)
@@ -408,9 +384,8 @@ class FunctionKnowledge:
                 logger.info("[后台任务] 保存知识到文件...")
                 threading.Thread(target=self.save_knowledge_to_file, args=(knowledge, combined_question)).start()
             
-            # 计算执行时间并保存详情
-            self.execution_detail["execution_time"] = time.time() - start_time
-            self._save_execution_detail(combined_question)
+            # 计算执行时间
+            execution_time = time.time() - start_time
             
             # 收集所有执行记录
             logger.info("=" * 80)
@@ -441,10 +416,9 @@ class FunctionKnowledge:
             import traceback
             logger.error(traceback.format_exc())
             
-            # 即使失败也尝试保存详情和收集记录
+            # 即使失败也尝试收集记录
             try:
-                self.execution_detail["execution_time"] = time.time() - start_time
-                self._save_execution_detail(combined_question)
+                execution_time = time.time() - start_time
                 
                 from knowledge_v2.execution_collector import collect_and_save_execution_record
                 execution_record = collect_and_save_execution_record(

+ 47 - 90
knowledge_v2/llm_search_knowledge.py

@@ -44,14 +44,7 @@ class LLMSearchKnowledge:
         self.use_cache = use_cache
         self.cache = CacheManager() if use_cache else None
         
-        # 执行详情收集
-        self.execution_detail = {
-            "generate_queries": None,
-            "search_results": [],
-            "merge_detail": None,
-            "execution_time": 0,
-            "cache_hits": []
-        }
+
         
         logger.info(f"缓存状态: {'启用' if use_cache else '禁用'}")
         logger.info("=" * 60)
@@ -107,16 +100,17 @@ class LLMSearchKnowledge:
         
         # 尝试从缓存读取
         if self.use_cache:
-            cached_queries = self.cache.get(question, 'llm_search', 'generated_queries.json')
-            if cached_queries:
-                logger.info(f"✓ 使用缓存的queries: {cached_queries}")
-                # 记录缓存命中
-                self.execution_detail["generate_queries"] = {
-                    "cached": True,
-                    "queries_count": len(cached_queries)
-                }
-                self.execution_detail["cache_hits"].append("generated_queries")
-                return cached_queries
+            cached_data = self.cache.get(question, 'llm_search', 'generated_queries.json')
+            if cached_data:
+                # check if it's the new format or old format (list)
+                if isinstance(cached_data, list):
+                    queries = cached_data
+                else:
+                    queries = cached_data.get('queries', [])
+                
+                if queries:
+                    logger.info(f"✓ 使用缓存的queries: {queries}")
+                    return queries
         
         try:
             # 加载prompt
@@ -152,18 +146,14 @@ class LLMSearchKnowledge:
                 for i, q in enumerate(queries, 1):
                     logger.info(f"  {i}. {q}")
                 
-                # 记录执行详情
-                self.execution_detail["generate_queries"] = {
-                    "cached": False,
-                    "prompt": prompt,
-                    "response": response_text,
-                    "queries_count": len(queries),
-                    "queries": queries
-                }
-                
-                # 写入缓存
+                # 保存到缓存(包含完整的prompt和response)
                 if self.use_cache:
-                    self.cache.set(question, 'llm_search', 'generated_queries.json', queries)
+                    queries_data = {
+                        "prompt": prompt,
+                        "response": response_text,
+                        "queries": queries
+                    }
+                    self.cache.set(question, 'llm_search', 'generated_queries.json', queries_data)
                 
                 return queries
                 
@@ -195,19 +185,12 @@ class LLMSearchKnowledge:
         
         # 尝试从缓存读取
         if self.use_cache:
-            cache_filename = f"search_result_{query_index:03d}.txt"
-            cached_result = self.cache.get(question, 'llm_search/search_results', cache_filename)
-            if cached_result:
-                logger.info(f"  ✓ 使用缓存结果 (长度: {len(cached_result)})")
-                # 记录缓存命中
-                self.execution_detail["search_results"].append({
-                    "query": query,
-                    "query_index": query_index,
-                    "cached": True,
-                    "result_length": len(cached_result)
-                })
-                self.execution_detail["cache_hits"].append(f"search_result_{query_index:03d}")
-                return cached_result
+            cache_filename = f"search_result_{query_index:03d}.json"
+            cached_data = self.cache.get(question, 'llm_search/search_results', cache_filename)
+            if cached_data:
+                content = cached_data.get('content', '')
+                logger.info(f"  ✓ 使用缓存结果 (长度: {len(content)})")
+                return content
         
         try:
             # 调用qwen_client的search_and_chat方法
@@ -226,18 +209,14 @@ class LLMSearchKnowledge:
             
             logger.info(f"  ✓ 获取知识文本 (长度: {len(knowledge_text)})")
             
-            # 记录搜索结果详情
-            self.execution_detail["search_results"].append({
-                "query": query,
-                "query_index": query_index,
-                "cached": False,
-                "result_length": len(knowledge_text)
-            })
-            
-            # 写入缓存
+            # 记录搜索结果详情并保存
             if self.use_cache:
-                cache_filename = f"search_result_{query_index:03d}.txt"
-                self.cache.set(question, 'llm_search/search_results', cache_filename, knowledge_text)
+                result_data = {
+                    "query": query,
+                    "content": knowledge_text
+                }
+                cache_filename = f"search_result_{query_index:03d}.json"
+                self.cache.set(question, 'llm_search/search_results', cache_filename, result_data)
             
             return knowledge_text
             
@@ -289,10 +268,11 @@ class LLMSearchKnowledge:
         
         # 尝试从缓存读取
         if self.use_cache:
-            cached_merged = self.cache.get(question, 'llm_search', 'merged_knowledge.txt')
-            if cached_merged:
-                logger.info(f"✓ 使用缓存的合并知识 (长度: {len(cached_merged)})")
-                return cached_merged
+            cached_data = self.cache.get(question, 'llm_search', 'merged_knowledge_detail.json')
+            if cached_data:
+                merged_text = cached_data.get('response', '') or cached_data.get('merged_text', '')
+                logger.info(f"✓ 使用缓存的合并知识 (长度: {len(merged_text)})")
+                return merged_text
         
         try:
             # 过滤空文本
@@ -329,7 +309,12 @@ class LLMSearchKnowledge:
             
             # 写入缓存
             if self.use_cache:
-                self.cache.set(question, 'llm_search', 'merged_knowledge.txt', merged_text.strip())
+                merge_data = {
+                    "prompt": prompt,
+                    "response": merged_text,
+                    "sources_count": len(valid_texts)
+                }
+                self.cache.set(question, 'llm_search', 'merged_knowledge_detail.json', merge_data)
             
             return merged_text.strip()
             
@@ -337,34 +322,7 @@ class LLMSearchKnowledge:
             logger.error(f"✗ 合并知识文本失败: {e}")
             raise
     
-    def _save_execution_detail(self, cache_key: str):
-        """
-        保存执行详情到缓存
-        
-        Args:
-            cache_key: 缓存键
-        """
-        if not self.use_cache or not self.cache:
-            return
-        
-        try:
-            import hashlib
-            question_hash = hashlib.md5(cache_key.encode('utf-8')).hexdigest()[:12]
-            detail_dir = os.path.join(
-                self.cache.base_cache_dir,
-                question_hash,
-                'llm_search'
-            )
-            os.makedirs(detail_dir, exist_ok=True)
-            
-            detail_file = os.path.join(detail_dir, 'execution_detail.json')
-            with open(detail_file, 'w', encoding='utf-8') as f:
-                json.dump(self.execution_detail, f, ensure_ascii=False, indent=2)
-            
-            logger.info(f"✓ 执行详情已保存: {detail_file}")
-            
-        except Exception as e:
-            logger.error(f"✗ 保存执行详情失败: {e}")
+
     
     def get_knowledge(self, question: str, cache_key: str = None) -> str:
         """
@@ -405,16 +363,15 @@ class LLMSearchKnowledge:
             logger.info(f"{'='*60}\n")
             
             # 计算执行时间并保存详情
-            self.execution_detail["execution_time"] = time.time() - start_time
-            self._save_execution_detail(actual_cache_key)
+            execution_time = time.time() - start_time
             
             return merged_knowledge
             
         except Exception as e:
             logger.error(f"✗ 获取知识文本失败,问题: {question[:50]}..., 错误: {e}")
             # 即使失败也保存执行详情
-            self.execution_detail["execution_time"] = time.time() - start_time
-            self._save_execution_detail(actual_cache_key)
+            # 即使失败也保存执行详情
+            execution_time = time.time() - start_time
             raise
 
 

+ 21 - 61
knowledge_v2/multi_search_knowledge.py

@@ -40,13 +40,7 @@ class MultiSearchKnowledge:
         self.use_cache = use_cache
         self.cache = CacheManager() if use_cache else None
         
-        # 执行详情收集
-        self.execution_detail = {
-            "sources": {},
-            "merge_detail": None,
-            "execution_time": 0,
-            "cache_hits": []
-        }
+
         
         logger.info(f"缓存状态: {'启用' if use_cache else '禁用'}")
         logger.info("=" * 60)
@@ -95,16 +89,17 @@ class MultiSearchKnowledge:
         
         # 尝试从缓存读取
         if self.use_cache:
+            cached_data = self.cache.get(question, 'multi_search', 'merged_knowledge_detail.json')
+            if cached_data:
+                # Support reading from detail json
+                merged_text = cached_data.get('response', '') or cached_data.get('merged_text', '')
+                logger.info(f"✓ 使用缓存的合并知识 (长度: {len(merged_text)})")
+                return merged_text
+            
+            # Legacy txt file fallback
             cached_merged = self.cache.get(question, 'multi_search', 'merged_knowledge.txt')
             if cached_merged:
                 logger.info(f"✓ 使用缓存的合并知识 (长度: {len(cached_merged)})")
-                # 记录缓存命中
-                self.execution_detail["merge_detail"].update({
-                    "cached": True,
-                    "sources_count": len(knowledge_map),
-                    "result_length": len(cached_merged)
-                })
-                self.execution_detail["cache_hits"].append("merged_knowledge")
                 return cached_merged
         
         try:
@@ -135,19 +130,16 @@ class MultiSearchKnowledge:
             
             logger.info(f"✓ 多渠道知识合并完成 (长度: {len(merged_text)})")
             
-            # 记录合并详情
-            self.execution_detail["merge_detail"].update({
-                "cached": False,
-                "prompt": prompt,
-                "response": merged_text,
-                "sources_count": len(knowledge_map),
-                "valid_sources_count": len(valid_knowledge),
-                "result_length": len(merged_text)
-            })
-            
             # 写入缓存
             if self.use_cache:
                 self.cache.set(question, 'multi_search', 'merged_knowledge.txt', merged_text.strip())
+                merge_data = {
+                    "prompt": prompt,
+                    "response": merged_text,
+                    "sources_count": len(knowledge_map),
+                    "valid_sources_count": len(valid_knowledge)
+                }
+                self.cache.set(question, 'multi_search', 'merged_knowledge_detail.json', merge_data)
             
             return merged_text.strip()
             
@@ -155,29 +147,7 @@ class MultiSearchKnowledge:
             logger.error(f"✗ 合并知识失败: {e}")
             raise
     
-    def _save_execution_detail(self, cache_key: str):
-        """保存执行详情到缓存"""
-        if not self.use_cache or not self.cache:
-            return
-        
-        try:
-            import hashlib
-            question_hash = hashlib.md5(cache_key.encode('utf-8')).hexdigest()[:12]
-            detail_dir = os.path.join(
-                self.cache.base_cache_dir,
-                question_hash,
-                'multi_search'
-            )
-            os.makedirs(detail_dir, exist_ok=True)
-            
-            detail_file = os.path.join(detail_dir, 'execution_detail.json')
-            with open(detail_file, 'w', encoding='utf-8') as f:
-                json.dump(self.execution_detail, f, ensure_ascii=False, indent=2)
-            
-            logger.info(f"✓ 执行详情已保存: {detail_file}")
-            
-        except Exception as e:
-            logger.error(f"✗ 保存执行详情失败: {e}")
+
 
     def get_knowledge(self, question: str, cache_key: str = None) -> str:
         """
@@ -207,9 +177,8 @@ class MultiSearchKnowledge:
                 logger.info(f"✓ 使用缓存的最终知识 (长度: {len(cached_final)})")
                 logger.info(f"{'='*60}\n")
                 # 记录缓存命中
-                self.execution_detail["cache_hits"].append("final_knowledge")
-                self.execution_detail["execution_time"] = time.time() - start_time
-                self._save_execution_detail(actual_cache_key)
+                # 记录缓存命中
+                execution_time = time.time() - start_time
                 return cached_final
         
         knowledge_map = {}
@@ -220,18 +189,10 @@ class MultiSearchKnowledge:
             llm_knowledge = get_llm_knowledge(question, cache_key=actual_cache_key)
             knowledge_map["LLM Search"] = llm_knowledge
             logger.info(f"✓ LLM Search 完成 (长度: {len(llm_knowledge)})")
-            # 记录来源详情
-            self.execution_detail["sources"]["llm_search"] = {
-                "success": True,
-                "knowledge_length": len(llm_knowledge)
-            }
+            logger.info(f"✓ LLM Search 完成 (长度: {len(llm_knowledge)})")
         except Exception as e:
             logger.error(f"✗ LLM Search 失败: {e}")
             knowledge_map["LLM Search"] = ""
-            self.execution_detail["sources"]["llm_search"] = {
-                "success": False,
-                "error": str(e)
-            }
             
         # 2. 获取 XHS Search 知识 (暂时注释)
         # try:
@@ -254,8 +215,7 @@ class MultiSearchKnowledge:
         logger.info(f"{'='*60}\n")
         
         # 计算执行时间并保存详情
-        self.execution_detail["execution_time"] = time.time() - start_time
-        self._save_execution_detail(actual_cache_key)
+        execution_time = time.time() - start_time
         
         return final_knowledge
 

Niektoré súbory nie sú zobrazené, pretože je v týchto rozdielových dátach zmenené mnoho súborov