calculate.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. """
  2. @author: luojunhui
  3. """
  4. import json
  5. import os
  6. from applications.log import logging
  7. from applications.functions.date import generate_daily_strings, five_days_before
  8. def read_single_file(filename):
  9. """
  10. :param filename:
  11. """
  12. with open(filename, encoding="utf-8") as f:
  13. data = json.loads(f.read())
  14. if data:
  15. return data
  16. else:
  17. return {}
  18. def compute_similarity(file_1, file_2):
  19. """
  20. 计算
  21. :param file_1:
  22. :param file_2:
  23. :return:
  24. """
  25. data_1 = read_single_file(file_1)
  26. data_2 = read_single_file(file_2)
  27. def calculate_v1(d1, d2):
  28. """
  29. 通过交并集来判断
  30. :param d1:
  31. :param d2:
  32. :return:
  33. """
  34. f1_keys = set(d1["key_words"])
  35. f2_keys = set(d2["key_words"])
  36. keys_union = f1_keys | f2_keys
  37. keys_intersection = f1_keys & f2_keys
  38. f1_search_keys = set(d1["search_keys"])
  39. f2_search_keys = set(d2["search_keys"])
  40. search_keys_union = f1_search_keys | f2_search_keys
  41. search_keys_intersection = f1_search_keys & f2_search_keys
  42. f1_extra_keys = set(d1["extra_keys"])
  43. f2_extra_keys = set(d2["extra_keys"])
  44. extra_keys_union = f1_extra_keys | f2_extra_keys
  45. extra_keys_intersection = f1_extra_keys & f2_extra_keys
  46. score_1 = len(keys_intersection) / len(keys_union)
  47. score_2 = len(search_keys_intersection) / len(search_keys_union)
  48. score_3 = len(extra_keys_intersection) / len(extra_keys_union)
  49. return score_1 * 0.4 + score_2 * 0.4 + score_3 * 0.2
  50. def calculate_v2(d1, d2):
  51. """
  52. 计算方法 v2
  53. :param d1:
  54. :param d2:
  55. :return:
  56. """
  57. score = 0
  58. tone_1 = d1["tone"]
  59. tone_2 = d2["tone"]
  60. if tone_1 == tone_2:
  61. score += 0.1
  62. target_audience_1 = d1["target_audience"]
  63. target_audience_2 = d2["target_audience"]
  64. if target_audience_1 == target_audience_2:
  65. score += 0.2
  66. target_age_1 = d1["target_age"]
  67. target_age_2 = d2["target_age"]
  68. if target_age_1 == target_age_2:
  69. score += 0.2
  70. address_1 = d1["address"]
  71. address_2 = d2["address"]
  72. if address_1 == address_2:
  73. score += 0.2
  74. gender_1 = d1["theme"]
  75. gender_2 = d2["theme"]
  76. if gender_1 == gender_2:
  77. score += 0.5
  78. return score
  79. if data_1 and data_2:
  80. try:
  81. score_1 = calculate_v1(data_1, data_2)
  82. return score_1
  83. # score_2 = calculate_v2(data_1, data_2)
  84. # return score_1, score_2
  85. except Exception as e:
  86. return 0
  87. else:
  88. return 0
  89. def title_mix(title_p, dt, trace_id):
  90. """
  91. 执行代码
  92. :param trace_id: 请求唯一 id
  93. :param title_p:
  94. :param dt: dt
  95. """
  96. five_days_ago = five_days_before(ori_dt=dt)
  97. days_list = generate_daily_strings(five_days_ago, dt)
  98. L = []
  99. for day_str in days_list:
  100. json_path = os.path.join(os.getcwd(), 'applications', 'static', day_str)
  101. # 处理标题信息
  102. files = os.listdir(json_path)
  103. for file in files:
  104. if file.endswith(".json"):
  105. L.append(os.path.join(json_path, file))
  106. print("召回的视频量", len(L))
  107. score_list_1 = []
  108. # score_list_2 = []
  109. for file in L:
  110. file_name = file.split('/')[-1].replace(".json", "")
  111. v_id = file_name.split('_')[1]
  112. uid = file_name.split('_')[0]
  113. # score1, score2 = compute_similarity(title_p, file)
  114. score1 = compute_similarity(title_p, file)
  115. score_list_1.append([score1, v_id, uid])
  116. # score_list_2.append([score2, v_id, uid])
  117. s1_list = sorted(score_list_1, key=lambda x: x[0], reverse=True)
  118. # s2_list = sorted(score_list_2, key=lambda x: x[0], reverse=True)
  119. title = title_p.split("/")[-1].replace(".json", "")
  120. obj = {
  121. "title": title,
  122. "s1_vid": s1_list[0][1],
  123. "s1_score": s1_list[0][0],
  124. "s1_uid": s1_list[0][2],
  125. # "s2_vid": s2_list[0][1],
  126. # "s2_score": s2_list[0][0],
  127. # "s2_uid": s2_list[0][2]
  128. }
  129. logging(
  130. code="1003",
  131. info="计算结果得分",
  132. data=obj,
  133. function="title_mix",
  134. trace_id=trace_id
  135. )
  136. return obj