|
@@ -7,6 +7,7 @@
|
|
|
from agents import Agent, Runner, ModelSettings
|
|
from agents import Agent, Runner, ModelSettings
|
|
|
from lib.client import get_model
|
|
from lib.client import get_model
|
|
|
from lib.utils import parse_json_from_text
|
|
from lib.utils import parse_json_from_text
|
|
|
|
|
+from lib.config import get_cache_dir
|
|
|
from typing import Dict, Any, Optional
|
|
from typing import Dict, Any, Optional
|
|
|
import hashlib
|
|
import hashlib
|
|
|
import json
|
|
import json
|
|
@@ -26,8 +27,10 @@ DEFAULT_PROMPT_TEMPLATE = """
|
|
|
```
|
|
```
|
|
|
""".strip()
|
|
""".strip()
|
|
|
|
|
|
|
|
-# 默认缓存目录
|
|
|
|
|
-DEFAULT_CACHE_DIR = "cache/semantic_similarity"
|
|
|
|
|
|
|
+
|
|
|
|
|
+def _get_default_cache_dir() -> str:
|
|
|
|
|
+ """获取默认缓存目录(从配置中读取)"""
|
|
|
|
|
+ return get_cache_dir("semantic_similarity")
|
|
|
|
|
|
|
|
|
|
|
|
|
def _generate_cache_key(
|
|
def _generate_cache_key(
|
|
@@ -91,7 +94,7 @@ def _get_cache_filepath(
|
|
|
phrase_b: str,
|
|
phrase_b: str,
|
|
|
model_name: str,
|
|
model_name: str,
|
|
|
temperature: float,
|
|
temperature: float,
|
|
|
- cache_dir: str = DEFAULT_CACHE_DIR
|
|
|
|
|
|
|
+ cache_dir: Optional[str] = None
|
|
|
) -> Path:
|
|
) -> Path:
|
|
|
"""
|
|
"""
|
|
|
获取缓存文件路径(可读文件名)
|
|
获取缓存文件路径(可读文件名)
|
|
@@ -110,6 +113,9 @@ def _get_cache_filepath(
|
|
|
文件名格式: {phrase_a}_vs_{phrase_b}_{model}_t{temp}_{hash[:8]}.json
|
|
文件名格式: {phrase_a}_vs_{phrase_b}_{model}_t{temp}_{hash[:8]}.json
|
|
|
示例: 宿命感_vs_余华的小说_gpt-4.1-mini_t0.0_a7f3e2d9.json
|
|
示例: 宿命感_vs_余华的小说_gpt-4.1-mini_t0.0_a7f3e2d9.json
|
|
|
"""
|
|
"""
|
|
|
|
|
+ if cache_dir is None:
|
|
|
|
|
+ cache_dir = _get_default_cache_dir()
|
|
|
|
|
+
|
|
|
# 清理短语和模型名
|
|
# 清理短语和模型名
|
|
|
clean_a = _sanitize_for_filename(phrase_a, max_length=20)
|
|
clean_a = _sanitize_for_filename(phrase_a, max_length=20)
|
|
|
clean_b = _sanitize_for_filename(phrase_b, max_length=20)
|
|
clean_b = _sanitize_for_filename(phrase_b, max_length=20)
|
|
@@ -136,7 +142,7 @@ def _load_from_cache(
|
|
|
phrase_b: str,
|
|
phrase_b: str,
|
|
|
model_name: str,
|
|
model_name: str,
|
|
|
temperature: float,
|
|
temperature: float,
|
|
|
- cache_dir: str = DEFAULT_CACHE_DIR
|
|
|
|
|
|
|
+ cache_dir: Optional[str] = None
|
|
|
) -> Optional[str]:
|
|
) -> Optional[str]:
|
|
|
"""
|
|
"""
|
|
|
从缓存加载数据
|
|
从缓存加载数据
|
|
@@ -152,6 +158,9 @@ def _load_from_cache(
|
|
|
Returns:
|
|
Returns:
|
|
|
缓存的结果字符串,如果不存在则返回 None
|
|
缓存的结果字符串,如果不存在则返回 None
|
|
|
"""
|
|
"""
|
|
|
|
|
+ if cache_dir is None:
|
|
|
|
|
+ cache_dir = _get_default_cache_dir()
|
|
|
|
|
+
|
|
|
cache_file = _get_cache_filepath(cache_key, phrase_a, phrase_b, model_name, temperature, cache_dir)
|
|
cache_file = _get_cache_filepath(cache_key, phrase_a, phrase_b, model_name, temperature, cache_dir)
|
|
|
|
|
|
|
|
# 如果文件不存在,尝试通过哈希匹配查找
|
|
# 如果文件不存在,尝试通过哈希匹配查找
|
|
@@ -187,7 +196,7 @@ def _save_to_cache(
|
|
|
instructions: str,
|
|
instructions: str,
|
|
|
tools: str,
|
|
tools: str,
|
|
|
result: str,
|
|
result: str,
|
|
|
- cache_dir: str = DEFAULT_CACHE_DIR
|
|
|
|
|
|
|
+ cache_dir: Optional[str] = None
|
|
|
) -> None:
|
|
) -> None:
|
|
|
"""
|
|
"""
|
|
|
保存数据到缓存
|
|
保存数据到缓存
|
|
@@ -205,6 +214,9 @@ def _save_to_cache(
|
|
|
result: 结果数据(原始字符串)
|
|
result: 结果数据(原始字符串)
|
|
|
cache_dir: 缓存目录
|
|
cache_dir: 缓存目录
|
|
|
"""
|
|
"""
|
|
|
|
|
+ if cache_dir is None:
|
|
|
|
|
+ cache_dir = _get_default_cache_dir()
|
|
|
|
|
+
|
|
|
cache_file = _get_cache_filepath(cache_key, phrase_a, phrase_b, model_name, temperature, cache_dir)
|
|
cache_file = _get_cache_filepath(cache_key, phrase_a, phrase_b, model_name, temperature, cache_dir)
|
|
|
|
|
|
|
|
# 确保缓存目录存在
|
|
# 确保缓存目录存在
|
|
@@ -254,7 +266,7 @@ async def _difference_between_phrases(
|
|
|
tools: list = None,
|
|
tools: list = None,
|
|
|
name: str = "Semantic Similarity Analyzer",
|
|
name: str = "Semantic Similarity Analyzer",
|
|
|
use_cache: bool = True,
|
|
use_cache: bool = True,
|
|
|
- cache_dir: str = DEFAULT_CACHE_DIR
|
|
|
|
|
|
|
+ cache_dir: Optional[str] = None
|
|
|
) -> str:
|
|
) -> str:
|
|
|
"""
|
|
"""
|
|
|
从语义角度判断两个短语的相似度
|
|
从语义角度判断两个短语的相似度
|
|
@@ -277,7 +289,7 @@ async def _difference_between_phrases(
|
|
|
tools: Agent 可用的工具列表,默认为 []
|
|
tools: Agent 可用的工具列表,默认为 []
|
|
|
name: Agent 的名称,默认为 "Semantic Similarity Analyzer"(不参与缓存key构建)
|
|
name: Agent 的名称,默认为 "Semantic Similarity Analyzer"(不参与缓存key构建)
|
|
|
use_cache: 是否使用缓存,默认 True
|
|
use_cache: 是否使用缓存,默认 True
|
|
|
- cache_dir: 缓存目录,默认 'cache/semantic_similarity'
|
|
|
|
|
|
|
+ cache_dir: 缓存目录,默认从配置读取(可通过 lib.config 设置)
|
|
|
|
|
|
|
|
Returns:
|
|
Returns:
|
|
|
JSON 格式的相似度分析结果字符串
|
|
JSON 格式的相似度分析结果字符串
|
|
@@ -345,13 +357,8 @@ async def _difference_between_phrases(
|
|
|
result = await Runner.run(agent, input=prompt)
|
|
result = await Runner.run(agent, input=prompt)
|
|
|
final_output = result.final_output
|
|
final_output = result.final_output
|
|
|
|
|
|
|
|
- # 保存到缓存
|
|
|
|
|
- if use_cache:
|
|
|
|
|
- _save_to_cache(
|
|
|
|
|
- cache_key, phrase_a, phrase_b, model_name,
|
|
|
|
|
- temperature, max_tokens, prompt_template,
|
|
|
|
|
- instructions, tools_str, final_output, cache_dir
|
|
|
|
|
- )
|
|
|
|
|
|
|
+ # 注意:不在这里缓存,而是在解析成功后缓存
|
|
|
|
|
+ # 这样可以避免缓存解析失败的响应
|
|
|
|
|
|
|
|
return final_output
|
|
return final_output
|
|
|
|
|
|
|
@@ -367,7 +374,7 @@ async def _difference_between_phrases_parsed(
|
|
|
tools: list = None,
|
|
tools: list = None,
|
|
|
name: str = "Semantic Similarity Analyzer",
|
|
name: str = "Semantic Similarity Analyzer",
|
|
|
use_cache: bool = True,
|
|
use_cache: bool = True,
|
|
|
- cache_dir: str = DEFAULT_CACHE_DIR
|
|
|
|
|
|
|
+ cache_dir: Optional[str] = None
|
|
|
) -> Dict[str, Any]:
|
|
) -> Dict[str, Any]:
|
|
|
"""
|
|
"""
|
|
|
从语义角度判断两个短语的相似度,并解析返回结果为字典
|
|
从语义角度判断两个短语的相似度,并解析返回结果为字典
|
|
@@ -383,13 +390,16 @@ async def _difference_between_phrases_parsed(
|
|
|
tools: Agent 可用的工具列表,默认为 []
|
|
tools: Agent 可用的工具列表,默认为 []
|
|
|
name: Agent 的名称,默认为 "Semantic Similarity Analyzer"
|
|
name: Agent 的名称,默认为 "Semantic Similarity Analyzer"
|
|
|
use_cache: 是否使用缓存,默认 True
|
|
use_cache: 是否使用缓存,默认 True
|
|
|
- cache_dir: 缓存目录,默认 'cache/semantic_similarity'
|
|
|
|
|
|
|
+ cache_dir: 缓存目录,默认从配置读取(可通过 lib.config 设置)
|
|
|
|
|
|
|
|
Returns:
|
|
Returns:
|
|
|
解析后的字典,包含:
|
|
解析后的字典,包含:
|
|
|
- 说明: 相似度判断的理由
|
|
- 说明: 相似度判断的理由
|
|
|
- 相似度: 0-1之间的浮点数
|
|
- 相似度: 0-1之间的浮点数
|
|
|
|
|
|
|
|
|
|
+ Raises:
|
|
|
|
|
+ ValueError: 当无法解析AI响应为有效JSON时抛出
|
|
|
|
|
+
|
|
|
Examples:
|
|
Examples:
|
|
|
>>> result = await difference_between_phrases_parsed("宿命感", "余华的小说")
|
|
>>> result = await difference_between_phrases_parsed("宿命感", "余华的小说")
|
|
|
>>> print(result['相似度'])
|
|
>>> print(result['相似度'])
|
|
@@ -397,21 +407,68 @@ async def _difference_between_phrases_parsed(
|
|
|
>>> print(result['说明'])
|
|
>>> print(result['说明'])
|
|
|
"两个概念有一定关联..."
|
|
"两个概念有一定关联..."
|
|
|
"""
|
|
"""
|
|
|
|
|
+ # 使用默认模板或自定义模板
|
|
|
|
|
+ if prompt_template is None:
|
|
|
|
|
+ prompt_template = DEFAULT_PROMPT_TEMPLATE
|
|
|
|
|
+
|
|
|
|
|
+ # 默认tools为空列表
|
|
|
|
|
+ if tools is None:
|
|
|
|
|
+ tools = []
|
|
|
|
|
+
|
|
|
|
|
+ # 生成缓存键
|
|
|
|
|
+ tools_str = json.dumps(tools, sort_keys=True) if tools else "[]"
|
|
|
|
|
+ cache_key = _generate_cache_key(
|
|
|
|
|
+ phrase_a, phrase_b, model_name, temperature, max_tokens, prompt_template, instructions, tools_str
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ # 尝试从缓存加载
|
|
|
|
|
+ if use_cache:
|
|
|
|
|
+ cached_result = _load_from_cache(cache_key, phrase_a, phrase_b, model_name, temperature, cache_dir)
|
|
|
|
|
+ if cached_result is not None:
|
|
|
|
|
+ # 缓存命中,直接解析并返回
|
|
|
|
|
+ parsed_result = parse_json_from_text(cached_result)
|
|
|
|
|
+ if parsed_result:
|
|
|
|
|
+ return parsed_result
|
|
|
|
|
+ # 如果缓存的内容也无法解析,继续执行API调用(可能之前缓存了错误响应)
|
|
|
|
|
+
|
|
|
|
|
+ # 调用AI获取原始响应(不传use_cache,因为我们在这里手动处理缓存)
|
|
|
raw_result = await _difference_between_phrases(
|
|
raw_result = await _difference_between_phrases(
|
|
|
phrase_a, phrase_b, model_name, temperature, max_tokens,
|
|
phrase_a, phrase_b, model_name, temperature, max_tokens,
|
|
|
- prompt_template, instructions, tools, name, use_cache, cache_dir
|
|
|
|
|
|
|
+ prompt_template, instructions, tools, name, use_cache=False, cache_dir=cache_dir
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
# 使用 utils.parse_json_from_text 解析结果
|
|
# 使用 utils.parse_json_from_text 解析结果
|
|
|
parsed_result = parse_json_from_text(raw_result)
|
|
parsed_result = parse_json_from_text(raw_result)
|
|
|
|
|
|
|
|
- # 如果解析失败(返回空字典),返回带错误信息的结果
|
|
|
|
|
|
|
+ # 如果解析失败(返回空字典),抛出异常并包含详细信息
|
|
|
if not parsed_result:
|
|
if not parsed_result:
|
|
|
- return {
|
|
|
|
|
- "说明": "解析失败: 无法从响应中提取有效的 JSON",
|
|
|
|
|
- "相似度": 0.0,
|
|
|
|
|
- "raw_response": raw_result
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ # 格式化prompt用于错误信息
|
|
|
|
|
+ formatted_prompt = prompt_template.format(phrase_a=phrase_a, phrase_b=phrase_b)
|
|
|
|
|
+
|
|
|
|
|
+ error_msg = f"""
|
|
|
|
|
+JSON解析失败!
|
|
|
|
|
+================================================================================
|
|
|
|
|
+短语A: {phrase_a}
|
|
|
|
|
+短语B: {phrase_b}
|
|
|
|
|
+模型: {model_name}
|
|
|
|
|
+温度: {temperature}
|
|
|
|
|
+================================================================================
|
|
|
|
|
+Prompt:
|
|
|
|
|
+{formatted_prompt}
|
|
|
|
|
+================================================================================
|
|
|
|
|
+AI响应 (长度: {len(raw_result)}):
|
|
|
|
|
+{raw_result}
|
|
|
|
|
+================================================================================
|
|
|
|
|
+"""
|
|
|
|
|
+ raise ValueError(error_msg)
|
|
|
|
|
+
|
|
|
|
|
+ # 只有解析成功后才缓存
|
|
|
|
|
+ if use_cache:
|
|
|
|
|
+ _save_to_cache(
|
|
|
|
|
+ cache_key, phrase_a, phrase_b, model_name,
|
|
|
|
|
+ temperature, max_tokens, prompt_template,
|
|
|
|
|
+ instructions, tools_str, raw_result, cache_dir
|
|
|
|
|
+ )
|
|
|
|
|
|
|
|
return parsed_result
|
|
return parsed_result
|
|
|
|
|
|
|
@@ -430,7 +487,7 @@ async def compare_phrases(
|
|
|
tools: list = None,
|
|
tools: list = None,
|
|
|
name: str = "Semantic Similarity Analyzer",
|
|
name: str = "Semantic Similarity Analyzer",
|
|
|
use_cache: bool = True,
|
|
use_cache: bool = True,
|
|
|
- cache_dir: str = DEFAULT_CACHE_DIR
|
|
|
|
|
|
|
+ cache_dir: Optional[str] = None
|
|
|
) -> Dict[str, Any]:
|
|
) -> Dict[str, Any]:
|
|
|
"""
|
|
"""
|
|
|
比较两个短语的语义相似度(对外唯一接口)
|
|
比较两个短语的语义相似度(对外唯一接口)
|
|
@@ -446,7 +503,7 @@ async def compare_phrases(
|
|
|
tools: Agent 可用的工具列表,默认为 []
|
|
tools: Agent 可用的工具列表,默认为 []
|
|
|
name: Agent 的名称,默认为 "Semantic Similarity Analyzer"
|
|
name: Agent 的名称,默认为 "Semantic Similarity Analyzer"
|
|
|
use_cache: 是否使用缓存,默认 True
|
|
use_cache: 是否使用缓存,默认 True
|
|
|
- cache_dir: 缓存目录,默认 'cache/semantic_similarity'
|
|
|
|
|
|
|
+ cache_dir: 缓存目录,默认从配置读取(可通过 lib.config 设置)
|
|
|
|
|
|
|
|
Returns:
|
|
Returns:
|
|
|
解析后的字典
|
|
解析后的字典
|
|
@@ -540,7 +597,7 @@ async def compare_phrases_v2(
|
|
|
tools: list = None,
|
|
tools: list = None,
|
|
|
name: str = "Advanced Semantic Analyzer",
|
|
name: str = "Advanced Semantic Analyzer",
|
|
|
use_cache: bool = True,
|
|
use_cache: bool = True,
|
|
|
- cache_dir: str = DEFAULT_CACHE_DIR
|
|
|
|
|
|
|
+ cache_dir: Optional[str] = None
|
|
|
) -> Dict[str, Any]:
|
|
) -> Dict[str, Any]:
|
|
|
"""
|
|
"""
|
|
|
比较两个短语的语义相似度 - V2 版本(详细分析)
|
|
比较两个短语的语义相似度 - V2 版本(详细分析)
|
|
@@ -561,7 +618,7 @@ async def compare_phrases_v2(
|
|
|
tools: Agent 可用的工具列表,默认为 []
|
|
tools: Agent 可用的工具列表,默认为 []
|
|
|
name: Agent 的名称,默认 "Advanced Semantic Analyzer"
|
|
name: Agent 的名称,默认 "Advanced Semantic Analyzer"
|
|
|
use_cache: 是否使用缓存,默认 True
|
|
use_cache: 是否使用缓存,默认 True
|
|
|
- cache_dir: 缓存目录,默认 'cache/semantic_similarity'
|
|
|
|
|
|
|
+ cache_dir: 缓存目录,默认从配置读取(可通过 lib.config 设置)
|
|
|
|
|
|
|
|
Returns:
|
|
Returns:
|
|
|
解析后的字典,包含:
|
|
解析后的字典,包含:
|