rank.py 3.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. """
  2. @author: luojunhui
  3. """
  4. from applications.match_alg.recall import recall_videos
  5. from applications.log import logging
  6. def jac_score(d1, d2):
  7. """
  8. 通过交并集来判断
  9. :param d1:
  10. :param d2:
  11. :return:
  12. """
  13. f1_keys = set(d1["key_words"])
  14. f2_keys = set(d2["key_words"])
  15. keys_union = f1_keys | f2_keys
  16. keys_intersection = f1_keys & f2_keys
  17. f1_search_keys = set(d1["search_keys"])
  18. f2_search_keys = set(d2["search_keys"])
  19. search_keys_union = f1_search_keys | f2_search_keys
  20. search_keys_intersection = f1_search_keys & f2_search_keys
  21. f1_extra_keys = set(d1["extra_keys"])
  22. f2_extra_keys = set(d2["extra_keys"])
  23. extra_keys_union = f1_extra_keys | f2_extra_keys
  24. extra_keys_intersection = f1_extra_keys & f2_extra_keys
  25. score_1 = len(keys_intersection) / len(keys_union)
  26. score_2 = len(search_keys_intersection) / len(search_keys_union)
  27. score_3 = len(extra_keys_intersection) / len(extra_keys_union)
  28. return score_1 * 0.4 + score_2 * 0.4 + score_3 * 0.2, d2['video_id']
  29. async def best_choice(params_obj, trace_id, search_videos):
  30. """
  31. 计算,返回出最合适的 video_id
  32. :return: video_id
  33. """
  34. pq_list, search_list = await recall_videos(trace_id=trace_id, s_videos=search_videos)
  35. def best_video_id(target_list):
  36. """
  37. :param target_list:
  38. :return:
  39. """
  40. score_list = []
  41. for video_obj in target_list:
  42. try:
  43. score, video_id = jac_score(d1=params_obj, d2=video_obj)
  44. score_list.append((video_id, score))
  45. except Exception as e:
  46. print(e)
  47. sorted_list = sorted(score_list, key=lambda x: x[1], reverse=True)
  48. return sorted_list[0] if sorted_list else (0, 0)
  49. if search_list:
  50. logging(
  51. code="1003",
  52. info="Return Best Search Video",
  53. data=search_list,
  54. trace_id=trace_id
  55. )
  56. return search_list[0]
  57. # return best_video_id(search_list)[0]
  58. # best_search_tuple = best_video_id(search_list)
  59. # if best_search_tuple[1] > 0:
  60. # logging(
  61. # code="1003",
  62. # info="search_score---{}".format(best_search_tuple[1]),
  63. # trace_id=trace_id
  64. # )
  65. # return best_search_tuple[0]
  66. # else:
  67. # best_pq_tuple = best_video_id(pq_list)
  68. # if best_pq_tuple[1] > 0:
  69. # logging(
  70. # code="1003",
  71. # info="pq_score---{}".format(best_pq_tuple[1]),
  72. # trace_id=trace_id
  73. # )
  74. # return best_pq_tuple[0]
  75. # else:
  76. # return None
  77. else:
  78. best_pq_tuple = best_video_id(pq_list)
  79. if best_pq_tuple[1] > 0:
  80. logging(
  81. code="1003",
  82. info="pq_score---{}".format(best_pq_tuple[1]),
  83. trace_id=trace_id
  84. )
  85. return best_pq_tuple[0]
  86. else:
  87. return None