| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- 分层匹配分析模块
- 实现特征组合的分层匹配逻辑:
- 1. 优先匹配灵感点标签(特征名称)
- 2. 无标签匹配时,匹配第一层分类
- 3. 仍无结果时,匹配第二层上位分类
- 4. 对每个候选进行推理难度打分
- """
- from typing import List, Dict, Optional
- from agents import Agent, Runner, ModelSettings
- from agents.tracing.create import custom_span
- from lib.client import get_model
- from lib.utils import parse_json_from_text
- # ========== System Prompts ==========
- TAG_MATCH_SYSTEM_PROMPT = """
- # 任务
- 判断"当前特征列表"中的特征,是否有与"人设特征标签"在语义上相同或高度接近的。
- ## 评分标准
- - **相似度 ≥ 80**: 语义相同或高度接近,判定为匹配成功
- - **相似度 < 80**: 不够接近,判定为匹配失败
- ## 输出格式(严格JSON)
- ```json
- {
- "匹配成功": true/false,
- "匹配对": [
- {"当前特征": "...", "人设标签": "...", "相似度": 95}
- ],
- "最佳匹配": {"当前特征": "...", "人设标签": "...", "相似度": 95} or null,
- "说明": "匹配结果说明"
- }
- ```
- **要求**:
- 1. 逐一比较当前特征与人设标签
- 2. 找到所有相似度≥80的配对
- 3. 按相似度降序排列匹配对
- 4. 最佳匹配为相似度最高的配对
- """.strip()
- CATEGORY_MATCH_SYSTEM_PROMPT = """
- # 任务
- 为"当前特征列表"在"候选分类"中找到语义最接近的分类,并评估推理难度。
- ## 推理难度评估标准(0-10分)
- - **0-2分**: 几乎直接对应,推理非常容易
- - **3-4分**: 需要简单推理,难度较低
- - **5-6分**: 需要中等程度的推理
- - **7-8分**: 需要较复杂的推理
- - **9-10分**: 推理非常困难,关联很弱
- ## 推理难度得分计算
- ```
- 推理难度得分 = (10 - 推理难度) / 10
- ```
- 例如:推理难度=3,则得分=(10-3)/10=0.7
- ## 输出格式(严格JSON)
- ```json
- {
- "匹配成功": true/false,
- "最佳分类": "分类名称" or null,
- "推理难度": 3,
- "推理难度得分": 0.7,
- "推理路径": "从该分类如何推理到当前特征的说明",
- "说明": "为什么选择这个分类"
- }
- ```
- **要求**:
- 1. 判断当前特征整体的主题/领域
- 2. 在候选分类中找到最符合的分类
- 3. 评估推理难度(0-10)
- 4. 计算推理难度得分
- 5. 只有推理难度得分≥0.5时,判定为匹配成功
- """.strip()
- def create_tag_match_agent(model_name: str) -> Agent:
- """创建标签匹配的Agent"""
- return Agent(
- name="Tag Match Expert",
- instructions=TAG_MATCH_SYSTEM_PROMPT,
- model=get_model(model_name),
- model_settings=ModelSettings(
- temperature=0.0,
- max_tokens=65536,
- ),
- tools=[],
- )
- def create_category_match_agent(model_name: str) -> Agent:
- """创建分类匹配的Agent"""
- return Agent(
- name="Category Match Expert",
- instructions=CATEGORY_MATCH_SYSTEM_PROMPT,
- model=get_model(model_name),
- model_settings=ModelSettings(
- temperature=0.0,
- max_tokens=65536,
- ),
- tools=[],
- )
- async def match_current_features_to_persona_tags(
- current_features: List[str],
- persona_combination: List[Dict],
- model_name: Optional[str] = None
- ) -> Dict:
- """
- 第一层匹配: 将当前特征列表与人设组合的特征标签进行语义匹配
- Args:
- current_features: 当前特征列表,如 ["立冬", "教资查分", "时间巧合"]
- persona_combination: 人设组合特征列表,如:
- [
- {"特征名称": "猫孩子", "所属分类": ["宠物亲子化", "宠物情感", "实质"]},
- {"特征名称": "被拿捏住的无奈感", "所属分类": ["宠物关系主导", "宠物情感", "实质"]}
- ]
- model_name: 模型名称
- Returns:
- {
- "匹配成功": bool,
- "匹配的特征": str or None,
- "得分": 1 or 0,
- "详细结果": {...}
- }
- """
- if model_name is None:
- from lib.client import MODEL_NAME
- model_name = MODEL_NAME
- persona_tags = [f["特征名称"] for f in persona_combination]
- # 创建Agent
- agent = create_tag_match_agent(model_name)
- # 构建任务描述
- task_description = f"""## 本次匹配任务
- <当前特征列表>
- {', '.join(current_features)}
- </当前特征列表>
- <人设特征标签>
- {', '.join(persona_tags)}
- </人设特征标签>
- 请判断当前特征列表中是否有与人设标签语义相同或高度接近的(相似度≥80),输出JSON格式结果。
- """
- messages = [{
- "role": "user",
- "content": [{"type": "input_text", "text": task_description}]
- }]
- with custom_span(
- name=f"标签匹配: {current_features[:2]} vs {len(persona_tags)}个标签",
- data={
- "current_features": current_features,
- "persona_tags": persona_tags
- }
- ):
- result = await Runner.run(agent, input=messages)
- # 解析响应
- parsed_result = parse_json_from_text(result.final_output)
- if not parsed_result:
- return {
- "匹配成功": False,
- "匹配的特征": None,
- "得分": 0,
- "详细结果": {"说明": "解析失败"}
- }
- # 转换为标准格式
- if parsed_result.get("匹配成功"):
- best_match = parsed_result.get("最佳匹配", {})
- return {
- "匹配成功": True,
- "匹配的特征": best_match.get("人设标签"),
- "得分": 1,
- "详细结果": parsed_result
- }
- else:
- return {
- "匹配成功": False,
- "匹配的特征": None,
- "得分": 0,
- "详细结果": parsed_result
- }
- async def match_to_categories(
- current_features: List[str],
- persona_combination: List[Dict],
- layer: str, # "first" or "second"
- model_name: Optional[str] = None
- ) -> Dict:
- """
- 分类匹配(第一层或第二层)
- Args:
- current_features: 当前特征列表
- persona_combination: 人设组合特征列表(带分类)
- layer: "first"=第一层分类, "second"=第二层上位分类
- model_name: 模型名称
- Returns:
- {
- "匹配成功": bool,
- "匹配的分类": str or None,
- "推理难度得分": float (0-1),
- "详细结果": {...}
- }
- """
- if model_name is None:
- from lib.client import MODEL_NAME
- model_name = MODEL_NAME
- # 收集分类
- all_categories = set()
- for feature in persona_combination:
- categories = feature.get("所属分类", [])
- if layer == "first":
- # 第一层:过滤掉"实质"和"形式"
- filtered_cats = [c for c in categories if c not in ["实质", "形式"]]
- all_categories.update(filtered_cats)
- elif layer == "second":
- # 第二层:只保留"实质"和"形式"
- generic_cats = [c for c in categories if c in ["实质", "形式"]]
- all_categories.update(generic_cats)
- if not all_categories:
- # 如果没有可用分类
- if layer == "first":
- # 降级使用所有分类
- for feature in persona_combination:
- all_categories.update(feature.get("所属分类", []))
- else:
- # 第二层没有分类,返回失败
- return {
- "匹配成功": False,
- "匹配的分类": None,
- "推理难度得分": 0,
- "详细结果": {"说明": "没有可用的上位分类"}
- }
- categories_list = list(all_categories)
- persona_tags = [f["特征名称"] for f in persona_combination]
- # 创建Agent
- agent = create_category_match_agent(model_name)
- # 构建任务描述
- layer_desc = "第一层分类(具体领域分类)" if layer == "first" else "第二层上位分类(实质/形式)"
- task_description = f"""## 本次匹配任务 - {layer_desc}
- <当前特征列表>
- {', '.join(current_features)}
- </当前特征列表>
- <候选分类>
- {', '.join(categories_list)}
- </候选分类>
- <人设组合特征>
- {', '.join(persona_tags)}
- </人设组合特征>
- 请为当前特征列表在候选分类中找到最接近的分类,并评估推理难度(0-10),输出JSON格式结果。
- """
- messages = [{
- "role": "user",
- "content": [{"type": "input_text", "text": task_description}]
- }]
- layer_name = "第一层分类" if layer == "first" else "第二层上位分类"
- with custom_span(
- name=f"{layer_name}匹配: {current_features[:2]} vs {len(categories_list)}个分类",
- data={
- "current_features": current_features,
- "categories": categories_list,
- "layer": layer
- }
- ):
- result = await Runner.run(agent, input=messages)
- # 解析响应
- parsed_result = parse_json_from_text(result.final_output)
- if not parsed_result:
- return {
- "匹配成功": False,
- "匹配的分类": None,
- "推理难度得分": 0,
- "详细结果": {"说明": "解析失败"}
- }
- return {
- "匹配成功": parsed_result.get("匹配成功", False),
- "匹配的分类": parsed_result.get("最佳分类"),
- "推理难度得分": parsed_result.get("推理难度得分", 0),
- "详细结果": parsed_result
- }
- async def hierarchical_match(
- current_features: List[str],
- persona_combination: List[Dict],
- model_name: Optional[str] = None
- ) -> Dict:
- """
- 分层匹配主函数
- 依次尝试:
- 1. 标签匹配(特征名称)
- 2. 第一层分类匹配
- 3. 第二层上位分类匹配
- Args:
- current_features: 当前特征列表
- persona_combination: 人设组合特征列表(带分类)
- model_name: 模型名称
- Returns:
- {
- "最终得分": float, // 0-1
- "匹配层级": "标签匹配" | "第一层分类匹配" | "第二层上位分类匹配" | "无匹配",
- "匹配结果": str, // 匹配到的标签/分类名称
- "分层结果": {
- "标签匹配": {...},
- "第一层分类匹配": {...},
- "第二层上位分类匹配": {...}
- },
- "综合说明": str
- }
- """
- # 第一层: 标签匹配
- tag_match = await match_current_features_to_persona_tags(
- current_features, persona_combination, model_name
- )
- if tag_match["匹配成功"]:
- return {
- "最终得分": 1.0,
- "匹配层级": "标签匹配",
- "匹配结果": tag_match["匹配的特征"],
- "分层结果": {
- "标签匹配": tag_match
- },
- "综合说明": f"在标签层级找到完全匹配: {tag_match['匹配的特征']}"
- }
- # 第二层: 第一层分类匹配
- first_cat_match = await match_to_categories(
- current_features, persona_combination, "first", model_name
- )
- if first_cat_match["匹配成功"] and first_cat_match["推理难度得分"] >= 0.5:
- return {
- "最终得分": first_cat_match["推理难度得分"],
- "匹配层级": "第一层分类匹配",
- "匹配结果": first_cat_match["匹配的分类"],
- "分层结果": {
- "标签匹配": tag_match,
- "第一层分类匹配": first_cat_match
- },
- "综合说明": f"在第一层分类找到匹配: {first_cat_match['匹配的分类']}, 推理难度得分: {first_cat_match['推理难度得分']:.2f}"
- }
- # 第三层: 第二层上位分类匹配
- second_cat_match = await match_to_categories(
- current_features, persona_combination, "second", model_name
- )
- if second_cat_match["匹配成功"]:
- return {
- "最终得分": second_cat_match["推理难度得分"],
- "匹配层级": "第二层上位分类匹配",
- "匹配结果": second_cat_match["匹配的分类"],
- "分层结果": {
- "标签匹配": tag_match,
- "第一层分类匹配": first_cat_match,
- "第二层上位分类匹配": second_cat_match
- },
- "综合说明": f"在第二层上位分类找到匹配: {second_cat_match['匹配的分类']}, 推理难度得分: {second_cat_match['推理难度得分']:.2f}"
- }
- # 无匹配
- return {
- "最终得分": 0,
- "匹配层级": "无匹配",
- "匹配结果": None,
- "分层结果": {
- "标签匹配": tag_match,
- "第一层分类匹配": first_cat_match,
- "第二层上位分类匹配": second_cat_match
- },
- "综合说明": "在所有层级都未找到合适的匹配"
- }
|