|
|
@@ -1,209 +0,0 @@
|
|
|
-"""
|
|
|
-测试 relation_analyzer 模块
|
|
|
-"""
|
|
|
-import asyncio
|
|
|
-from lib.relation_analyzer import analyze_relation
|
|
|
-
|
|
|
-
|
|
|
-async def test_all_relations():
|
|
|
- """测试所有7种关系类型"""
|
|
|
-
|
|
|
- # 测试用例:每种关系类型的典型例子
|
|
|
- test_cases = [
|
|
|
- # 1. same(同义)
|
|
|
- {
|
|
|
- "phrase_a": "医生",
|
|
|
- "phrase_b": "大夫",
|
|
|
- "expected_relation": "same",
|
|
|
- "description": "完全同义"
|
|
|
- },
|
|
|
- {
|
|
|
- "phrase_a": "计算机",
|
|
|
- "phrase_b": "电脑",
|
|
|
- "expected_relation": "same",
|
|
|
- "description": "完全同义"
|
|
|
- },
|
|
|
-
|
|
|
- # 2. coordinate(同级)
|
|
|
- {
|
|
|
- "phrase_a": "轿车",
|
|
|
- "phrase_b": "SUV",
|
|
|
- "expected_relation": "coordinate",
|
|
|
- "description": "都是汽车的子类"
|
|
|
- },
|
|
|
- {
|
|
|
- "phrase_a": "苹果",
|
|
|
- "phrase_b": "香蕉",
|
|
|
- "expected_relation": "coordinate",
|
|
|
- "description": "都是水果"
|
|
|
- },
|
|
|
-
|
|
|
- # 3. contains(包含)
|
|
|
- {
|
|
|
- "phrase_a": "水果",
|
|
|
- "phrase_b": "苹果",
|
|
|
- "expected_relation": "contains",
|
|
|
- "description": "水果包含苹果"
|
|
|
- },
|
|
|
- {
|
|
|
- "phrase_a": "汽车",
|
|
|
- "phrase_b": "轿车",
|
|
|
- "expected_relation": "contains",
|
|
|
- "description": "汽车包含轿车"
|
|
|
- },
|
|
|
-
|
|
|
- # 4. contained_by(被包含)
|
|
|
- {
|
|
|
- "phrase_a": "苹果",
|
|
|
- "phrase_b": "水果",
|
|
|
- "expected_relation": "contained_by",
|
|
|
- "description": "苹果被水果包含"
|
|
|
- },
|
|
|
- {
|
|
|
- "phrase_a": "轿车",
|
|
|
- "phrase_b": "交通工具",
|
|
|
- "expected_relation": "contained_by",
|
|
|
- "description": "轿车被交通工具包含"
|
|
|
- },
|
|
|
-
|
|
|
- # 5. overlap(部分重叠)
|
|
|
- {
|
|
|
- "phrase_a": "红苹果",
|
|
|
- "phrase_b": "大苹果",
|
|
|
- "expected_relation": "overlap",
|
|
|
- "description": "有交集(又红又大的苹果)"
|
|
|
- },
|
|
|
- {
|
|
|
- "phrase_a": "学生",
|
|
|
- "phrase_b": "运动员",
|
|
|
- "expected_relation": "overlap",
|
|
|
- "description": "有交集(学生运动员)"
|
|
|
- },
|
|
|
-
|
|
|
- # 6. related(相关)
|
|
|
- {
|
|
|
- "phrase_a": "医生",
|
|
|
- "phrase_b": "医院",
|
|
|
- "expected_relation": "related",
|
|
|
- "description": "工作场所关系"
|
|
|
- },
|
|
|
- {
|
|
|
- "phrase_a": "阅读",
|
|
|
- "phrase_b": "书籍",
|
|
|
- "expected_relation": "related",
|
|
|
- "description": "动作-对象关系"
|
|
|
- },
|
|
|
-
|
|
|
- # 7. unrelated(无关)
|
|
|
- {
|
|
|
- "phrase_a": "医生",
|
|
|
- "phrase_b": "石头",
|
|
|
- "expected_relation": "unrelated",
|
|
|
- "description": "完全无关"
|
|
|
- },
|
|
|
- {
|
|
|
- "phrase_a": "苹果",
|
|
|
- "phrase_b": "数学",
|
|
|
- "expected_relation": "unrelated",
|
|
|
- "description": "完全无关"
|
|
|
- },
|
|
|
- ]
|
|
|
-
|
|
|
- # 模型选择(根据你的配置调整)
|
|
|
- model_name = "google/gemini-2.5-flash" # 默认模型
|
|
|
-
|
|
|
- print(f"=" * 80)
|
|
|
- print(f"开始测试 relation_analyzer 模块")
|
|
|
- print(f"使用模型: {model_name}")
|
|
|
- print(f"测试用例数量: {len(test_cases)}")
|
|
|
- print(f"=" * 80)
|
|
|
- print()
|
|
|
-
|
|
|
- results = []
|
|
|
-
|
|
|
- for i, test_case in enumerate(test_cases, 1):
|
|
|
- phrase_a = test_case["phrase_a"]
|
|
|
- phrase_b = test_case["phrase_b"]
|
|
|
- expected = test_case["expected_relation"]
|
|
|
- description = test_case["description"]
|
|
|
-
|
|
|
- print(f"[{i}/{len(test_cases)}] 测试: \"{phrase_a}\" <-> \"{phrase_b}\"")
|
|
|
- print(f" 说明: {description}")
|
|
|
- print(f" 期望关系: {expected}")
|
|
|
-
|
|
|
- # 调用分析函数
|
|
|
- result = await analyze_relation(
|
|
|
- phrase_a=phrase_a,
|
|
|
- phrase_b=phrase_b,
|
|
|
- model_name=model_name
|
|
|
- )
|
|
|
-
|
|
|
- relation = result.get("relation", "unknown")
|
|
|
- score = result.get("score", 0.0)
|
|
|
- explanation = result.get("explanation", "")
|
|
|
-
|
|
|
- # 判断是否符合预期
|
|
|
- is_correct = (relation == expected)
|
|
|
- status = "✓" if is_correct else "✗"
|
|
|
-
|
|
|
- print(f" 实际关系: {relation} (score: {score:.2f}) {status}")
|
|
|
- print(f" 解释: {explanation}")
|
|
|
- print()
|
|
|
-
|
|
|
- results.append({
|
|
|
- "test_case": test_case,
|
|
|
- "result": result,
|
|
|
- "is_correct": is_correct
|
|
|
- })
|
|
|
-
|
|
|
- # 统计结果
|
|
|
- correct_count = sum(1 for r in results if r["is_correct"])
|
|
|
- total_count = len(results)
|
|
|
- accuracy = correct_count / total_count * 100
|
|
|
-
|
|
|
- print(f"=" * 80)
|
|
|
- print(f"测试完成")
|
|
|
- print(f"正确: {correct_count}/{total_count} ({accuracy:.1f}%)")
|
|
|
- print(f"=" * 80)
|
|
|
-
|
|
|
- # 显示错误的测试用例
|
|
|
- errors = [r for r in results if not r["is_correct"]]
|
|
|
- if errors:
|
|
|
- print()
|
|
|
- print("错误的测试用例:")
|
|
|
- for error in errors:
|
|
|
- tc = error["test_case"]
|
|
|
- result = error["result"]
|
|
|
- print(f" - \"{tc['phrase_a']}\" <-> \"{tc['phrase_b']}\"")
|
|
|
- print(f" 期望: {tc['expected_relation']}, 实际: {result['relation']}")
|
|
|
-
|
|
|
- return results
|
|
|
-
|
|
|
-
|
|
|
-async def test_single_example():
|
|
|
- """测试单个例子"""
|
|
|
-
|
|
|
- print("测试单个例子:")
|
|
|
- print()
|
|
|
-
|
|
|
- result = await analyze_relation(
|
|
|
- phrase_a="水果",
|
|
|
- phrase_b="苹果",
|
|
|
- model_name="google/gemini-2.5-flash" # 默认模型
|
|
|
- )
|
|
|
-
|
|
|
- print(f"短语A: 水果")
|
|
|
- print(f"短语B: 苹果")
|
|
|
- print(f"关系: {result['relation']}")
|
|
|
- print(f"分数: {result['score']}")
|
|
|
- print(f"解释: {result['explanation']}")
|
|
|
-
|
|
|
-
|
|
|
-if __name__ == "__main__":
|
|
|
- # 选择测试方式:
|
|
|
-
|
|
|
- # 方式1:测试单个例子(快速验证)
|
|
|
- # asyncio.run(test_single_example())
|
|
|
-
|
|
|
- # 方式2:测试所有关系类型(完整测试)
|
|
|
- asyncio.run(test_all_relations())
|