| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566 |
- #!/usr/bin/env python3
- """
- 使用 SentenceModel 类(类似 text2vec 的用法)
- """
- import asyncio
- from lib.text_embedding import SentenceModel
- async def main():
- print("=" * 60)
- print("使用 SentenceModel 类")
- print("=" * 60)
- # 创建模型(只需要创建一次)
- model = SentenceModel(
- model_name='openai/gpt-4.1-mini',
- dim=128, # 向量维度
- use_cache=True # 启用缓存
- )
- # 1. 文本向量化
- print("\n1. 单个文本向量化")
- vector = await model.encode("机器学习很有趣")
- print(f"向量维度: {vector.shape}")
- # 2. 批量向量化
- print("\n2. 批量向量化")
- texts = ["文本1", "文本2", "文本3"]
- vectors = await model.encode(texts)
- print(f"转换了 {len(vectors)} 个文本")
- # 3. 计算相似度
- print("\n3. 计算相似度")
- score = await model.similarity("深度学习", "神经网络")
- print(f"相似度分数: {score:.3f}")
- # 4. 实际应用:找出最相似的文本
- print("\n4. 实际应用:找出最相似的文本")
- query = "人工智能"
- candidates = [
- "机器学习算法",
- "深度神经网络",
- "今天天气很好",
- "自然语言处理",
- "晚上吃什么饭"
- ]
- print(f"\n查询文本: {query}")
- print(f"候选文本: {candidates}")
- print("\n相似度排名:")
- # 计算所有候选文本的相似度
- scores = []
- for candidate in candidates:
- score = await model.similarity(query, candidate)
- scores.append((candidate, score))
- # 按相似度排序
- scores.sort(key=lambda x: x[1], reverse=True)
- for i, (text, score) in enumerate(scores, 1):
- print(f" {i}. {text:20s} -> {score:.3f}")
- if __name__ == "__main__":
- asyncio.run(main())
|