article_tools.py 662 B

12345678910111213141516171819202122232425262728
  1. """
  2. @author: luojunhui
  3. """
  4. def title_sim_v2(title_a, title_b, thredhold=0.8):
  5. if len(title_a) < 1 or len(title_b) < 1:
  6. return False
  7. set_a = set(title_a)
  8. set_b = set(title_b)
  9. set_cross = set_a & set_b
  10. set_union = set_a | set_b
  11. if not set_union:
  12. return False
  13. min_len = max(min(len(set_a), len(set_b)), 1)
  14. rate = len(set_cross) / min_len
  15. if rate >= thredhold:
  16. return True
  17. else:
  18. return False
  19. def title_sim_v2_by_list(title_target, title_list):
  20. for title in title_list:
  21. sim_score = title_sim_v2(title_target, title)
  22. if sim_score:
  23. return True
  24. return False