|
|
@@ -285,7 +285,10 @@ class LLMSearchKnowledge:
|
|
|
Exception: 合并失败时抛出异常
|
|
|
"""
|
|
|
logger.info(f"[步骤3] 合并知识 - 共 {len(knowledge_texts)} 个文本")
|
|
|
-
|
|
|
+
|
|
|
+ if len(knowledge_texts) == 1:
|
|
|
+ return knowledge_texts[0]
|
|
|
+
|
|
|
# 尝试从缓存读取
|
|
|
if self.use_cache:
|
|
|
cached_merged = self.cache.get(question, 'llm_search', 'merged_knowledge.txt')
|
|
|
@@ -350,17 +353,17 @@ class LLMSearchKnowledge:
|
|
|
except Exception as e:
|
|
|
logger.error(f"✗ 合并知识文本失败: {e}")
|
|
|
raise
|
|
|
-
|
|
|
+
|
|
|
def _save_execution_detail(self, cache_key: str):
|
|
|
"""
|
|
|
保存执行详情到缓存(支持合并旧记录)
|
|
|
-
|
|
|
+
|
|
|
Args:
|
|
|
cache_key: 缓存键
|
|
|
"""
|
|
|
if not self.use_cache or not self.cache:
|
|
|
return
|
|
|
-
|
|
|
+
|
|
|
try:
|
|
|
import hashlib
|
|
|
question_hash = hashlib.md5(cache_key.encode('utf-8')).hexdigest()[:12]
|
|
|
@@ -370,46 +373,46 @@ class LLMSearchKnowledge:
|
|
|
'llm_search'
|
|
|
)
|
|
|
os.makedirs(detail_dir, exist_ok=True)
|
|
|
-
|
|
|
+
|
|
|
detail_file = os.path.join(detail_dir, 'execution_detail.json')
|
|
|
-
|
|
|
+
|
|
|
# 准备最终要保存的数据
|
|
|
final_detail = self.execution_detail.copy()
|
|
|
-
|
|
|
+
|
|
|
# 尝试读取旧文件进行合并
|
|
|
if os.path.exists(detail_file):
|
|
|
try:
|
|
|
with open(detail_file, 'r', encoding='utf-8') as f:
|
|
|
old_detail = json.load(f)
|
|
|
-
|
|
|
+
|
|
|
# 1. 合并 generate_queries
|
|
|
new_gen = self.execution_detail.get("generate_queries")
|
|
|
old_gen = old_detail.get("generate_queries")
|
|
|
- if (new_gen and isinstance(new_gen, dict) and
|
|
|
- new_gen.get("cached") is True and
|
|
|
- old_gen and isinstance(old_gen, dict) and
|
|
|
- "prompt" in old_gen):
|
|
|
+ if (new_gen and isinstance(new_gen, dict) and
|
|
|
+ new_gen.get("cached") is True and
|
|
|
+ old_gen and isinstance(old_gen, dict) and
|
|
|
+ "prompt" in old_gen):
|
|
|
final_detail["generate_queries"] = old_gen
|
|
|
-
|
|
|
+
|
|
|
# 2. 合并 merge_detail
|
|
|
new_merge = self.execution_detail.get("merge_detail")
|
|
|
old_merge = old_detail.get("merge_detail")
|
|
|
- if (new_merge and isinstance(new_merge, dict) and
|
|
|
- new_merge.get("cached") is True and
|
|
|
- old_merge and isinstance(old_merge, dict) and
|
|
|
- "prompt" in old_merge):
|
|
|
+ if (new_merge and isinstance(new_merge, dict) and
|
|
|
+ new_merge.get("cached") is True and
|
|
|
+ old_merge and isinstance(old_merge, dict) and
|
|
|
+ "prompt" in old_merge):
|
|
|
final_detail["merge_detail"] = old_merge
|
|
|
-
|
|
|
+
|
|
|
# 3. 合并 search_results (列表)
|
|
|
new_results = self.execution_detail.get("search_results", [])
|
|
|
old_results = old_detail.get("search_results", [])
|
|
|
-
|
|
|
+
|
|
|
if new_results and old_results:
|
|
|
merged_results = []
|
|
|
# 建立旧结果的索引:(query, index) -> item
|
|
|
- old_map = {(item.get("query"), item.get("query_index")): item
|
|
|
- for item in old_results if isinstance(item, dict)}
|
|
|
-
|
|
|
+ old_map = {(item.get("query"), item.get("query_index")): item
|
|
|
+ for item in old_results if isinstance(item, dict)}
|
|
|
+
|
|
|
for item in new_results:
|
|
|
if item.get("cached") is True:
|
|
|
key = (item.get("query"), item.get("query_index"))
|
|
|
@@ -421,19 +424,19 @@ class LLMSearchKnowledge:
|
|
|
continue
|
|
|
merged_results.append(item)
|
|
|
final_detail["search_results"] = merged_results
|
|
|
-
|
|
|
+
|
|
|
except Exception as e:
|
|
|
logger.warning(f" ⚠ 读取旧详情失败: {e}")
|
|
|
|
|
|
with open(detail_file, 'w', encoding='utf-8') as f:
|
|
|
json.dump(final_detail, f, ensure_ascii=False, indent=2)
|
|
|
-
|
|
|
+
|
|
|
logger.info(f"✓ 执行详情已保存: {detail_file}")
|
|
|
-
|
|
|
+
|
|
|
except Exception as e:
|
|
|
logger.error(f"✗ 保存执行详情失败: {e}")
|
|
|
|
|
|
- def get_knowledge(self, question: str, cache_key: str = None) -> str:
|
|
|
+ def get_knowledge(self, question: str, cache_key: str = None, need_generate_query: bool = True) -> str:
|
|
|
"""
|
|
|
主方法:根据问题获取知识文本
|
|
|
|
|
|
@@ -459,7 +462,10 @@ class LLMSearchKnowledge:
|
|
|
logger.info(f"{'='*60}")
|
|
|
|
|
|
# 步骤1: 生成多个query
|
|
|
- queries = self.generate_queries(actual_cache_key)
|
|
|
+ if need_generate_query:
|
|
|
+ queries = self.generate_queries(actual_cache_key)
|
|
|
+ else:
|
|
|
+ queries = [question]
|
|
|
|
|
|
# 步骤2: 对每个query搜索知识
|
|
|
knowledge_texts = self.search_knowledge_batch(actual_cache_key, queries)
|
|
|
@@ -485,7 +491,7 @@ class LLMSearchKnowledge:
|
|
|
raise
|
|
|
|
|
|
|
|
|
-def get_knowledge(question: str, cache_key: str = None) -> str:
|
|
|
+def get_knowledge(question: str, cache_key: str = None, need_generate_query: bool = True) -> str:
|
|
|
"""
|
|
|
便捷函数:根据问题获取知识文本
|
|
|
|
|
|
@@ -497,7 +503,7 @@ def get_knowledge(question: str, cache_key: str = None) -> str:
|
|
|
str: 最终的知识文本
|
|
|
"""
|
|
|
agent = LLMSearchKnowledge()
|
|
|
- return agent.get_knowledge(question, cache_key=cache_key)
|
|
|
+ return agent.get_knowledge(question, cache_key=cache_key, need_generate_query=need_generate_query)
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
@@ -505,7 +511,7 @@ if __name__ == "__main__":
|
|
|
test_question = "关于猫咪和墨镜的服装造型元素"
|
|
|
|
|
|
try:
|
|
|
- result = get_knowledge(test_question)
|
|
|
+ result = get_knowledge(question=test_question, need_generate_query=False)
|
|
|
print("=" * 50)
|
|
|
print("最终知识文本:")
|
|
|
print("=" * 50)
|