12345678910111213141516171819202122232425262728 |
- """
- @author: luojunhui
- """
- def title_sim_v2(title_a, title_b, thredhold=0.8):
- if len(title_a) < 1 or len(title_b) < 1:
- return False
- set_a = set(title_a)
- set_b = set(title_b)
- set_cross = set_a & set_b
- set_union = set_a | set_b
- if not set_union:
- return False
- min_len = max(min(len(set_a), len(set_b)), 1)
- rate = len(set_cross) / min_len
- if rate >= thredhold:
- return True
- else:
- return False
- def title_sim_v2_by_list(title_target, title_list):
- for title in title_list:
- sim_score = title_sim_v2(title_target, title)
- if sim_score:
- return True
- return False
|