| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135 |
- #!/usr/bin/env python3
- """
- 演示 text_embedding.compare_phrases() 接口
- 返回格式与 semantic_similarity.compare_phrases() 完全一致
- """
- import asyncio
- from lib.text_embedding import compare_phrases
- async def main():
- print("=" * 60)
- print("text_embedding.compare_phrases() 接口演示")
- print("返回格式与 semantic_similarity.compare_phrases() 一致")
- print("=" * 60)
- print()
- # ========== 示例 1: 基本使用 ==========
- print("示例 1: 基本使用")
- print("-" * 60)
- result = await compare_phrases("深度学习", "神经网络")
- print(f"短语A: 深度学习")
- print(f"短语B: 神经网络")
- print(f"\n返回结果:")
- print(f" 相似度: {result['相似度']}")
- print(f" 说明: {result['说明']}")
- print()
- # ========== 示例 2: 对比不同的短语对 ==========
- print("示例 2: 对比不同的短语对")
- print("-" * 60)
- phrase_pairs = [
- ("机器学习", "人工智能"),
- ("深度学习", "今天吃饭"),
- ("Python编程", "程序设计"),
- ("北京", "上海"),
- ]
- for phrase_a, phrase_b in phrase_pairs:
- result = await compare_phrases(phrase_a, phrase_b)
- print(f"\n【{phrase_a}】 vs 【{phrase_b}】")
- print(f" 相似度: {result['相似度']:.3f}")
- print(f" 说明: {result['说明']}")
- print()
- # ========== 示例 3: 使用不同的计算方法 ==========
- print("示例 3: 使用不同的计算方法")
- print("-" * 60)
- phrase_a = "自然语言处理"
- phrase_b = "文本分析"
- methods = {
- "ai_direct": "AI 直接判断(推荐)",
- "cosine": "余弦相似度(基于向量)",
- "euclidean": "欧氏距离相似度",
- }
- print(f"\n短语A: {phrase_a}")
- print(f"短语B: {phrase_b}\n")
- for method, description in methods.items():
- result = await compare_phrases(
- phrase_a, phrase_b,
- method=method,
- dim=128 # 使用较小维度加快测试
- )
- print(f"{description:30s}")
- print(f" 相似度: {result['相似度']:.3f}")
- print(f" 说明: {result['说明']}")
- print()
- # ========== 示例 4: 与 semantic_similarity 的对比 ==========
- print("示例 4: 与 semantic_similarity.compare_phrases() 的对比")
- print("-" * 60)
- print("\n两个接口返回的数据格式完全一致:")
- print("""
- {
- "说明": "相似度判断的理由",
- "相似度": 0.85
- }
- """)
- print("\n主要区别:")
- print(" semantic_similarity:")
- print(" - 只支持 AI 直接判断")
- print(" - 专注于语义相似度分析")
- print()
- print(" text_embedding.compare_phrases:")
- print(" - 支持 AI 直接判断 + 多种向量方法")
- print(" - 可以选择不同的计算方法")
- print(" - 返回格式完全兼容")
- print()
- # ========== 示例 5: 实际应用 - 批量比较 ==========
- print("示例 5: 实际应用 - 批量比较")
- print("-" * 60)
- target = "机器学习"
- candidates = [
- "深度学习算法",
- "神经网络模型",
- "数据分析",
- "今天天气",
- "人工智能"
- ]
- print(f"\n目标短语: {target}")
- print(f"候选短语: {len(candidates)} 个\n")
- results = []
- for candidate in candidates:
- result = await compare_phrases(target, candidate)
- results.append((candidate, result['相似度'], result['说明']))
- # 按相似度排序
- results.sort(key=lambda x: x[1], reverse=True)
- print("相似度排名:")
- for i, (phrase, score, explanation) in enumerate(results, 1):
- print(f"\n {i}. {phrase} (相似度: {score:.3f})")
- print(f" 说明: {explanation}")
- print()
- print("=" * 60)
- print("示例完成!")
- print("=" * 60)
- if __name__ == "__main__":
- asyncio.run(main())
|