123456789101112131415161718192021222324252627282930 |
- import jieba
- from sklearn.feature_extraction.text import TfidfVectorizer
- from sklearn.metrics.pairwise import cosine_similarity
- class TitleLike:
- @classmethod
- def similarity(cls, title1, title2):
-
- seg1 = jieba.lcut(title1)
- seg2 = jieba.lcut(title2)
-
- tfidf_vectorizer = TfidfVectorizer()
-
- tfidf_matrix = tfidf_vectorizer.fit_transform(["".join(seg1), "".join(seg2)])
-
- similarity = cosine_similarity(tfidf_matrix[0], tfidf_matrix[1])[0][0]
- return similarity
- if __name__ == "__main__":
- t1 = """#发现未来 7月18日(发布)广东(发布)男生满心欢喜准备迎接喜欢的女孩 下一秒"""
- t2 = "...7月18日(发布)广东(发布)男生满心欢喜准备迎接喜欢的女孩 下一秒其他出"
-
- print(TitleLike.similarity(t1, t2))
|