title_similarity.py 1.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. """
  2. @author: luojunhui
  3. """
  4. import aiohttp
  5. empty_list = []
  6. def jcd_title_similarity(ori_title, search_title):
  7. """
  8. simple ways to calculate the similarity of titles
  9. :param ori_title:
  10. :param search_title:
  11. :return:
  12. """
  13. set1 = set(ori_title)
  14. set2 = set(search_title)
  15. intersection = len(set1 & set2)
  16. union = len(set1 | set2)
  17. return intersection / union
  18. async def nlp_title_similarity(url, ori_title, search_title_list):
  19. """
  20. nlp title similarity
  21. """
  22. headers = {"Content-Type": "application/json"}
  23. body = {
  24. "data": {
  25. "text_list_a": [ori_title],
  26. "text_list_b": search_title_list,
  27. },
  28. "function": "similarities_cross",
  29. "use_cache": False
  30. }
  31. async with aiohttp.ClientSession() as session:
  32. async with session.post(url, headers=headers, json=body) as response:
  33. response_text = await response.text()
  34. if response_text and response.status == 200:
  35. res = await response.json()
  36. score_list = res['score_list_list'][0]
  37. return score_list
  38. else:
  39. return empty_list