|
|
@@ -0,0 +1,287 @@
|
|
|
+"""
|
|
|
+短语关系分析模块
|
|
|
+
|
|
|
+分析两个短语之间的语义关系
|
|
|
+
|
|
|
+提供接口:
|
|
|
+analyze_relation(phrase_a, phrase_b, model_name, context_a="", context_b="") - 分析两个短语的关系
|
|
|
+
|
|
|
+支持可选的 Context 参数:
|
|
|
+- context_a: phrase_a 的补充上下文(帮助理解 phrase_a)
|
|
|
+- context_b: phrase_b 的补充上下文(帮助理解 phrase_b)
|
|
|
+- Context 默认为空,不提供时不会出现在 prompt 中
|
|
|
+
|
|
|
+返回格式:
|
|
|
+{
|
|
|
+ "relation": "same", # 7种关系之一
|
|
|
+ "score": 0.95, # 0-1,语义接近程度
|
|
|
+ "explanation": "说明" # 关系判断的依据
|
|
|
+}
|
|
|
+"""
|
|
|
+import json
|
|
|
+from agents import Agent, Runner, ModelSettings
|
|
|
+from agents.tracing.create import custom_span
|
|
|
+from lib.client import get_model
|
|
|
+
|
|
|
+
|
|
|
+# ========== System Prompt ==========
|
|
|
+RELATION_SYSTEM_PROMPT = """
|
|
|
+# 任务
|
|
|
+分析两个短语 <A> 和 <B> 之间的语义关系。
|
|
|
+
|
|
|
+## 输入说明
|
|
|
+
|
|
|
+- **<A></A>**: 第一个短语(必选)
|
|
|
+- **<B></B>**: 第二个短语(必选)
|
|
|
+- **<A_Context></A_Context>**: A 的补充上下文(可选,帮助理解 A)
|
|
|
+- **<B_Context></B_Context>**: B 的补充上下文(可选,帮助理解 B)
|
|
|
+
|
|
|
+**重要**:关系分析发生在 <A> 和 <B> 之间,Context 仅作为补充理解的辅助信息。
|
|
|
+
|
|
|
+---
|
|
|
+
|
|
|
+## 关系类型(7种)
|
|
|
+
|
|
|
+### 1. same(同义)
|
|
|
+- **定义**:意思完全相同或非常接近,可以互相替换
|
|
|
+- **例子**:
|
|
|
+ - "医生" 和 "大夫" → same
|
|
|
+ - "计算机" 和 "电脑" → same
|
|
|
+ - "快乐" 和 "高兴" → same
|
|
|
+
|
|
|
+### 2. coordinate(同级)
|
|
|
+- **定义**:有共同的上位概念,属于并列关系,通常无交集
|
|
|
+- **例子**:
|
|
|
+ - "轿车" 和 "SUV" → coordinate(都是汽车)
|
|
|
+ - "苹果" 和 "香蕉" → coordinate(都是水果)
|
|
|
+ - "数学" 和 "物理" → coordinate(都是学科)
|
|
|
+
|
|
|
+### 3. contains(包含)
|
|
|
+- **定义**:A 的概念范围包含 B,B 是 A 的子类或特例
|
|
|
+- **例子**:
|
|
|
+ - "水果" contains "苹果"
|
|
|
+ - "汽车" contains "轿车"
|
|
|
+ - "动物" contains "狗"
|
|
|
+
|
|
|
+### 4. contained_by(被包含)
|
|
|
+- **定义**:A 被 B 包含,A 是 B 的子类或特例
|
|
|
+- **例子**:
|
|
|
+ - "苹果" contained_by "水果"
|
|
|
+ - "轿车" contained_by "汽车"
|
|
|
+ - "狗" contained_by "动物"
|
|
|
+
|
|
|
+### 5. overlap(部分重叠)
|
|
|
+- **定义**:两个概念有交集,但互不包含
|
|
|
+- **例子**:
|
|
|
+ - "红苹果" 和 "大苹果" → overlap(有又红又大的苹果)
|
|
|
+ - "亚洲国家" 和 "发展中国家" → overlap(如中国、印度等)
|
|
|
+ - "学生" 和 "运动员" → overlap(有学生运动员)
|
|
|
+
|
|
|
+### 6. related(相关)
|
|
|
+- **定义**:有语义联系,但不属于上述任何层级关系
|
|
|
+- **例子**:
|
|
|
+ - "医生" 和 "医院" → related(工作场所关系)
|
|
|
+ - "阅读" 和 "书籍" → related(动作-对象关系)
|
|
|
+ - "钥匙" 和 "锁" → related(工具-用途关系)
|
|
|
+ - "老师" 和 "学生" → related(角色关系)
|
|
|
+
|
|
|
+### 7. unrelated(无关)
|
|
|
+- **定义**:无明显语义关系
|
|
|
+- **例子**:
|
|
|
+ - "医生" 和 "石头" → unrelated
|
|
|
+ - "苹果" 和 "数学" → unrelated
|
|
|
+
|
|
|
+---
|
|
|
+
|
|
|
+## 评分标准(score: 0-1)
|
|
|
+
|
|
|
+**score 表示两个短语的语义接近程度:**
|
|
|
+
|
|
|
+- **0.9-1.0**:几乎完全相同(完全同义)
|
|
|
+- **0.8-0.9**:非常接近(高度同义、直接包含关系)
|
|
|
+- **0.7-0.8**:比较接近(近义、明确的同级或包含)
|
|
|
+- **0.6-0.7**:有一定接近度(同级但层级稍远、间接包含)
|
|
|
+- **0.5-0.6**:中等程度的关系(中等交集、中度相关)
|
|
|
+- **0.4-0.5**:关系较弱(小交集、弱相关)
|
|
|
+- **0.3-0.4**:关系很弱(勉强算同级、很弱的相关)
|
|
|
+- **0.0-0.3**:几乎无关或完全无关
|
|
|
+
|
|
|
+**不同关系类型的 score 范围参考:**
|
|
|
+- same: 通常 0.7-1.0(完全同义接近1.0,近义0.7-0.8)
|
|
|
+- contains/contained_by: 通常 0.5-0.9(直接包含0.8+,跨层级0.5-0.7)
|
|
|
+- coordinate: 通常 0.3-0.8(同级且上位概念近0.7+,同级但距离远0.3-0.5)
|
|
|
+- overlap: 通常 0.2-0.8(交集大0.6+,交集小0.2-0.4)
|
|
|
+- related: 通常 0.1-0.7(强相关0.5+,弱相关0.1-0.3)
|
|
|
+- unrelated: 通常 0.0-0.2
|
|
|
+
|
|
|
+---
|
|
|
+
|
|
|
+## 判断逻辑(按优先级)
|
|
|
+
|
|
|
+1. **A 和 B 意思相同或非常接近?** → same
|
|
|
+2. **A 包含 B 或 B 包含 A?** → contains 或 contained_by
|
|
|
+3. **A 和 B 有共同上位概念且无交集?** → coordinate
|
|
|
+4. **A 和 B 有交集但互不包含?** → overlap
|
|
|
+5. **A 和 B 有语义联系但不属于上述?** → related
|
|
|
+6. **A 和 B 完全无关?** → unrelated
|
|
|
+
|
|
|
+---
|
|
|
+
|
|
|
+## 输出格式(严格JSON)
|
|
|
+
|
|
|
+```json
|
|
|
+{
|
|
|
+ "relation": "same",
|
|
|
+ "score": 0.95,
|
|
|
+ "explanation": "简要说明为什么是这个关系,以及 score 的依据"
|
|
|
+}
|
|
|
+```
|
|
|
+
|
|
|
+**输出要求**:
|
|
|
+1. 必须严格按照上述JSON格式输出
|
|
|
+2. 所有字段都必须填写
|
|
|
+3. **relation字段**:必须是以下7个值之一:same, coordinate, contains, contained_by, overlap, related, unrelated
|
|
|
+4. **score字段**:必须是0-1之间的浮点数,保留2位小数
|
|
|
+5. **explanation字段**:必须简洁说明关系类型和评分依据(1-2句话)
|
|
|
+""".strip()
|
|
|
+
|
|
|
+
|
|
|
+def create_relation_agent(model_name: str) -> Agent:
|
|
|
+ """创建关系分析的 Agent
|
|
|
+
|
|
|
+ Args:
|
|
|
+ model_name: 模型名称
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ Agent 实例
|
|
|
+ """
|
|
|
+ agent = Agent(
|
|
|
+ name="Phrase Relation Expert",
|
|
|
+ instructions=RELATION_SYSTEM_PROMPT,
|
|
|
+ model=get_model(model_name),
|
|
|
+ model_settings=ModelSettings(
|
|
|
+ temperature=0.0,
|
|
|
+ max_tokens=65536,
|
|
|
+ ),
|
|
|
+ tools=[],
|
|
|
+ )
|
|
|
+
|
|
|
+ return agent
|
|
|
+
|
|
|
+
|
|
|
+def parse_relation_response(response_content: str) -> dict:
|
|
|
+ """解析关系分析响应
|
|
|
+
|
|
|
+ Args:
|
|
|
+ response_content: Agent 返回的响应内容
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 解析后的字典
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ # 如果响应包含在 markdown 代码块中,提取 JSON 部分
|
|
|
+ if "```json" in response_content:
|
|
|
+ json_start = response_content.index("```json") + 7
|
|
|
+ json_end = response_content.index("```", json_start)
|
|
|
+ json_text = response_content[json_start:json_end].strip()
|
|
|
+ elif "```" in response_content:
|
|
|
+ json_start = response_content.index("```") + 3
|
|
|
+ json_end = response_content.index("```", json_start)
|
|
|
+ json_text = response_content[json_start:json_end].strip()
|
|
|
+ else:
|
|
|
+ json_text = response_content.strip()
|
|
|
+
|
|
|
+ return json.loads(json_text)
|
|
|
+ except Exception as e:
|
|
|
+ print(f"解析响应失败: {e}")
|
|
|
+ return {
|
|
|
+ "relation": "unrelated",
|
|
|
+ "score": 0.0,
|
|
|
+ "explanation": f"解析失败: {str(e)}"
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+async def analyze_relation(
|
|
|
+ phrase_a: str,
|
|
|
+ phrase_b: str,
|
|
|
+ model_name: str = None,
|
|
|
+ context_a: str = "",
|
|
|
+ context_b: str = ""
|
|
|
+) -> dict:
|
|
|
+ """分析两个短语之间的关系
|
|
|
+
|
|
|
+ Args:
|
|
|
+ phrase_a: 第一个短语
|
|
|
+ phrase_b: 第二个短语
|
|
|
+ model_name: 使用的模型名称(可选,默认使用 client.py 中的 MODEL_NAME)
|
|
|
+ context_a: phrase_a 的补充上下文(可选,默认为空)
|
|
|
+ context_b: phrase_b 的补充上下文(可选,默认为空)
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 关系分析结果字典:{"relation": "same", "score": 0.95, "explanation": "..."}
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ # 如果未指定模型,使用默认模型
|
|
|
+ if model_name is None:
|
|
|
+ from lib.client import MODEL_NAME
|
|
|
+ model_name = MODEL_NAME
|
|
|
+
|
|
|
+ # 创建 Agent
|
|
|
+ agent = create_relation_agent(model_name)
|
|
|
+
|
|
|
+ # 构建任务描述
|
|
|
+ a_section = f"<A>\n{phrase_a}\n</A>"
|
|
|
+ if context_a:
|
|
|
+ a_section += f"\n\n<A_Context>\n{context_a}\n</A_Context>"
|
|
|
+
|
|
|
+ b_section = f"<B>\n{phrase_b}\n</B>"
|
|
|
+ if context_b:
|
|
|
+ b_section += f"\n\n<B_Context>\n{context_b}\n</B_Context>"
|
|
|
+
|
|
|
+ task_description = f"""## 本次分析任务
|
|
|
+
|
|
|
+{a_section}
|
|
|
+
|
|
|
+{b_section}
|
|
|
+
|
|
|
+请严格按照系统提示中的要求分析 <A> 和 <B> 之间的语义关系,并输出 JSON 格式的结果。"""
|
|
|
+
|
|
|
+ # 构造消息
|
|
|
+ messages = [{
|
|
|
+ "role": "user",
|
|
|
+ "content": [
|
|
|
+ {
|
|
|
+ "type": "input_text",
|
|
|
+ "text": task_description
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ }]
|
|
|
+
|
|
|
+ # 使用 custom_span 追踪分析过程
|
|
|
+ # 截断显示内容,避免 span name 过长
|
|
|
+ a_short = (phrase_a[:30] + "...") if len(phrase_a) > 30 else phrase_a
|
|
|
+ b_short = (phrase_b[:30] + "...") if len(phrase_b) > 30 else phrase_b
|
|
|
+
|
|
|
+ with custom_span(
|
|
|
+ name=f"关系分析: {a_short} <-> {b_short}",
|
|
|
+ data={
|
|
|
+ "phrase_a": phrase_a,
|
|
|
+ "phrase_b": phrase_b,
|
|
|
+ "context_a": context_a if context_a else None,
|
|
|
+ "context_b": context_b if context_b else None,
|
|
|
+ }
|
|
|
+ ):
|
|
|
+ # 运行 Agent
|
|
|
+ result = await Runner.run(agent, input=messages)
|
|
|
+
|
|
|
+ # 解析响应
|
|
|
+ parsed_result = parse_relation_response(result.final_output)
|
|
|
+
|
|
|
+ return parsed_result
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ return {
|
|
|
+ "relation": "unrelated",
|
|
|
+ "score": 0.0,
|
|
|
+ "explanation": f"分析过程出错: {str(e)}"
|
|
|
+ }
|