|
|
@@ -35,6 +35,7 @@ def get_semaphore():
|
|
|
async def match_single_pair(
|
|
|
feature_name: str,
|
|
|
persona_name: str,
|
|
|
+ category_mapping: Dict = None,
|
|
|
model_name: str = None
|
|
|
) -> Dict:
|
|
|
"""
|
|
|
@@ -43,12 +44,15 @@ async def match_single_pair(
|
|
|
Args:
|
|
|
feature_name: 要匹配的特征名称
|
|
|
persona_name: 人设特征名称
|
|
|
+ category_mapping: 特征分类映射字典
|
|
|
model_name: 使用的模型名称
|
|
|
|
|
|
Returns:
|
|
|
单个匹配结果,格式:
|
|
|
{
|
|
|
"人设特征名称": "xxx",
|
|
|
+ "特征类型": "标签",
|
|
|
+ "特征分类": ["分类1", "分类2"],
|
|
|
"匹配结果": {
|
|
|
"相似度": 0.75,
|
|
|
"说明": "..."
|
|
|
@@ -63,8 +67,45 @@ async def match_single_pair(
|
|
|
phrase_b=persona_name,
|
|
|
)
|
|
|
|
|
|
+ # 判断该特征是标签还是分类
|
|
|
+ feature_type = "分类" # 默认为分类
|
|
|
+ categories = []
|
|
|
+
|
|
|
+ if category_mapping:
|
|
|
+ # 先在标签特征中查找(灵感点、关键点、目的点)
|
|
|
+ is_tag_feature = False
|
|
|
+ for ft in ["灵感点", "关键点", "目的点"]:
|
|
|
+ if ft in category_mapping:
|
|
|
+ type_mapping = category_mapping[ft]
|
|
|
+ if persona_name in type_mapping:
|
|
|
+ # 找到了,说明是标签特征
|
|
|
+ feature_type = "标签"
|
|
|
+ categories = type_mapping[persona_name].get("所属分类", [])
|
|
|
+ is_tag_feature = True
|
|
|
+ break
|
|
|
+
|
|
|
+ # 如果不是标签特征,检查是否是分类特征
|
|
|
+ if not is_tag_feature:
|
|
|
+ # 收集所有分类
|
|
|
+ all_categories = set()
|
|
|
+ for ft in ["灵感点", "关键点", "目的点"]:
|
|
|
+ if ft in category_mapping:
|
|
|
+ for fname, fdata in category_mapping[ft].items():
|
|
|
+ cats = fdata.get("所属分类", [])
|
|
|
+ all_categories.update(cats)
|
|
|
+
|
|
|
+ # 如果当前特征名在分类列表中,则是分类特征
|
|
|
+ if persona_name in all_categories:
|
|
|
+ feature_type = "分类"
|
|
|
+ categories = [] # 分类特征本身没有所属分类
|
|
|
+
|
|
|
+ # 去重分类
|
|
|
+ unique_categories = list(dict.fromkeys(categories))
|
|
|
+
|
|
|
return {
|
|
|
"人设特征名称": persona_name,
|
|
|
+ "特征类型": feature_type,
|
|
|
+ "特征分类": unique_categories,
|
|
|
"匹配结果": similarity_result
|
|
|
}
|
|
|
|
|
|
@@ -72,6 +113,7 @@ async def match_single_pair(
|
|
|
async def match_feature_with_persona(
|
|
|
feature_name: str,
|
|
|
persona_features: List[Dict],
|
|
|
+ category_mapping: Dict = None,
|
|
|
model_name: str = None
|
|
|
) -> List[Dict]:
|
|
|
"""
|
|
|
@@ -80,6 +122,7 @@ async def match_feature_with_persona(
|
|
|
Args:
|
|
|
feature_name: 要匹配的特征名称
|
|
|
persona_features: 人设特征列表
|
|
|
+ category_mapping: 特征分类映射字典
|
|
|
model_name: 使用的模型名称
|
|
|
|
|
|
Returns:
|
|
|
@@ -87,7 +130,7 @@ async def match_feature_with_persona(
|
|
|
"""
|
|
|
# 创建所有匹配任务
|
|
|
tasks = [
|
|
|
- match_single_pair(feature_name, persona_feature["特征名称"], model_name)
|
|
|
+ match_single_pair(feature_name, persona_feature["特征名称"], category_mapping, model_name)
|
|
|
for persona_feature in persona_features
|
|
|
]
|
|
|
|
|
|
@@ -100,6 +143,7 @@ async def match_feature_with_persona(
|
|
|
async def match_single_feature(
|
|
|
feature_name: str,
|
|
|
persona_features: List[Dict],
|
|
|
+ category_mapping: Dict = None,
|
|
|
model_name: str = None
|
|
|
) -> Dict:
|
|
|
"""
|
|
|
@@ -108,6 +152,7 @@ async def match_single_feature(
|
|
|
Args:
|
|
|
feature_name: 特征名称
|
|
|
persona_features: 人设特征列表
|
|
|
+ category_mapping: 特征分类映射字典
|
|
|
model_name: 使用的模型名称
|
|
|
|
|
|
Returns:
|
|
|
@@ -117,6 +162,7 @@ async def match_single_feature(
|
|
|
match_results = await match_feature_with_persona(
|
|
|
feature_name=feature_name,
|
|
|
persona_features=persona_features,
|
|
|
+ category_mapping=category_mapping,
|
|
|
model_name=model_name
|
|
|
)
|
|
|
|
|
|
@@ -129,6 +175,7 @@ async def match_single_feature(
|
|
|
async def process_single_inspiration_point(
|
|
|
inspiration_point: Dict,
|
|
|
persona_features: List[Dict],
|
|
|
+ category_mapping: Dict = None,
|
|
|
model_name: str = None
|
|
|
) -> Dict:
|
|
|
"""
|
|
|
@@ -137,6 +184,7 @@ async def process_single_inspiration_point(
|
|
|
Args:
|
|
|
inspiration_point: 灵感点数据
|
|
|
persona_features: 人设灵感特征列表
|
|
|
+ category_mapping: 特征分类映射字典
|
|
|
model_name: 使用的模型名称
|
|
|
|
|
|
Returns:
|
|
|
@@ -150,7 +198,7 @@ async def process_single_inspiration_point(
|
|
|
|
|
|
# 并发匹配所有特征
|
|
|
tasks = [
|
|
|
- match_single_feature(feature_name, persona_features, model_name)
|
|
|
+ match_single_feature(feature_name, persona_features, category_mapping, model_name)
|
|
|
for feature_name in feature_list
|
|
|
]
|
|
|
feature_match_results = await asyncio.gather(*tasks)
|
|
|
@@ -173,6 +221,7 @@ async def process_single_task(
|
|
|
task_index: int,
|
|
|
total_tasks: int,
|
|
|
persona_inspiration_features: List[Dict],
|
|
|
+ category_mapping: Dict = None,
|
|
|
model_name: str = None
|
|
|
) -> Dict:
|
|
|
"""
|
|
|
@@ -183,6 +232,7 @@ async def process_single_task(
|
|
|
task_index: 任务索引(从1开始)
|
|
|
total_tasks: 总任务数
|
|
|
persona_inspiration_features: 人设灵感特征列表
|
|
|
+ category_mapping: 特征分类映射字典
|
|
|
model_name: 使用的模型名称
|
|
|
|
|
|
Returns:
|
|
|
@@ -202,6 +252,7 @@ async def process_single_task(
|
|
|
process_single_inspiration_point(
|
|
|
inspiration_point=inspiration_point,
|
|
|
persona_features=persona_inspiration_features,
|
|
|
+ category_mapping=category_mapping,
|
|
|
model_name=model_name
|
|
|
)
|
|
|
for inspiration_point in inspiration_list
|
|
|
@@ -223,6 +274,7 @@ async def process_single_task(
|
|
|
async def process_task_list(
|
|
|
task_list: List[Dict],
|
|
|
persona_features_dict: Dict,
|
|
|
+ category_mapping: Dict = None,
|
|
|
model_name: str = None
|
|
|
) -> List[Dict]:
|
|
|
"""
|
|
|
@@ -231,13 +283,33 @@ async def process_task_list(
|
|
|
Args:
|
|
|
task_list: 解构任务列表
|
|
|
persona_features_dict: 人设特征字典(包含灵感点、目的点、关键点)
|
|
|
+ category_mapping: 特征分类映射字典
|
|
|
model_name: 使用的模型名称
|
|
|
|
|
|
Returns:
|
|
|
包含 how 解构结果的任务列表
|
|
|
"""
|
|
|
+ # 获取标签特征列表
|
|
|
persona_inspiration_features = persona_features_dict.get("灵感点", [])
|
|
|
- print(f"人设灵感特征数量: {len(persona_inspiration_features)}")
|
|
|
+ print(f"人设标签特征数量: {len(persona_inspiration_features)}")
|
|
|
+
|
|
|
+ # 从分类映射中提取所有唯一的分类作为分类特征(仅从灵感点中提取)
|
|
|
+ category_features = []
|
|
|
+ if category_mapping:
|
|
|
+ all_categories = set()
|
|
|
+ # 只从灵感点中提取分类
|
|
|
+ if "灵感点" in category_mapping:
|
|
|
+ for _, feature_data in category_mapping["灵感点"].items():
|
|
|
+ categories = feature_data.get("所属分类", [])
|
|
|
+ all_categories.update(categories)
|
|
|
+
|
|
|
+ # 转换为特征格式
|
|
|
+ category_features = [{"特征名称": cat} for cat in sorted(all_categories)]
|
|
|
+ print(f"人设分类特征数量: {len(category_features)}")
|
|
|
+
|
|
|
+ # 合并标签特征和分类特征
|
|
|
+ all_features = persona_inspiration_features + category_features
|
|
|
+ print(f"总特征数量(标签+分类): {len(all_features)}")
|
|
|
|
|
|
# 并发处理所有任务
|
|
|
tasks = [
|
|
|
@@ -245,7 +317,8 @@ async def process_task_list(
|
|
|
task=task,
|
|
|
task_index=i,
|
|
|
total_tasks=len(task_list),
|
|
|
- persona_inspiration_features=persona_inspiration_features,
|
|
|
+ persona_inspiration_features=all_features,
|
|
|
+ category_mapping=category_mapping,
|
|
|
model_name=model_name
|
|
|
)
|
|
|
for i, task in enumerate(task_list, 1)
|
|
|
@@ -264,6 +337,7 @@ async def main():
|
|
|
|
|
|
task_list_file = data_dir / "当前帖子_解构任务列表.json"
|
|
|
persona_features_file = data_dir / "特征名称_帖子来源.json"
|
|
|
+ category_mapping_file = data_dir / "特征名称_分类映射.json"
|
|
|
output_dir = data_dir / "当前帖子_how解构结果"
|
|
|
|
|
|
# 创建输出目录
|
|
|
@@ -277,6 +351,10 @@ async def main():
|
|
|
with open(persona_features_file, "r", encoding="utf-8") as f:
|
|
|
persona_features_data = json.load(f)
|
|
|
|
|
|
+ print(f"读取特征分类映射: {category_mapping_file}")
|
|
|
+ with open(category_mapping_file, "r", encoding="utf-8") as f:
|
|
|
+ category_mapping = json.load(f)
|
|
|
+
|
|
|
# 获取任务列表
|
|
|
task_list = task_list_data.get("解构任务列表", [])
|
|
|
print(f"\n总任务数: {len(task_list)}")
|
|
|
@@ -285,6 +363,7 @@ async def main():
|
|
|
updated_task_list = await process_task_list(
|
|
|
task_list=task_list,
|
|
|
persona_features_dict=persona_features_data,
|
|
|
+ category_mapping=category_mapping,
|
|
|
model_name=None # 使用默认模型
|
|
|
)
|
|
|
|