|
@@ -0,0 +1,127 @@
|
|
|
|
+"""
|
|
|
|
+@author: luojunhui
|
|
|
|
+"""
|
|
|
|
+import json
|
|
|
|
+import os
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def read_single_file(filename):
|
|
|
|
+ """
|
|
|
|
+ :param filename:
|
|
|
|
+ """
|
|
|
|
+ with open(filename, encoding="utf-8") as f:
|
|
|
|
+ data = json.loads(f.read())
|
|
|
|
+ if data:
|
|
|
|
+ return data
|
|
|
|
+ else:
|
|
|
|
+ return {}
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def compute_similarity(file_1, file_2):
|
|
|
|
+ """
|
|
|
|
+ 计算
|
|
|
|
+ :param file_1:
|
|
|
|
+ :param file_2:
|
|
|
|
+ :return:
|
|
|
|
+ """
|
|
|
|
+ data_1 = read_single_file(file_1)
|
|
|
|
+ data_2 = read_single_file(file_2)
|
|
|
|
+
|
|
|
|
+ def calculate_v1(d1, d2):
|
|
|
|
+ """
|
|
|
|
+ 通过交并集来判断
|
|
|
|
+ :param d1:
|
|
|
|
+ :param d2:
|
|
|
|
+ :return:
|
|
|
|
+ """
|
|
|
|
+ f1_keys = set(d1["key_words"])
|
|
|
|
+ f2_keys = set(d2["key_words"])
|
|
|
|
+ keys_union = f1_keys | f2_keys
|
|
|
|
+ keys_intersection = f1_keys & f2_keys
|
|
|
|
+ f1_search_keys = set(d1["search_keys"])
|
|
|
|
+ f2_search_keys = set(d2["search_keys"])
|
|
|
|
+ search_keys_union = f1_search_keys | f2_search_keys
|
|
|
|
+ search_keys_intersection = f1_search_keys & f2_search_keys
|
|
|
|
+ f1_extra_keys = set(d1["extra_keys"])
|
|
|
|
+ f2_extra_keys = set(d2["extra_keys"])
|
|
|
|
+ extra_keys_union = f1_extra_keys | f2_extra_keys
|
|
|
|
+ extra_keys_intersection = f1_extra_keys & f2_extra_keys
|
|
|
|
+ score_1 = len(keys_intersection) / len(keys_union)
|
|
|
|
+ score_2 = len(search_keys_intersection) / len(search_keys_union)
|
|
|
|
+ score_3 = len(extra_keys_intersection) / len(extra_keys_union)
|
|
|
|
+ return score_1 * 0.4 + score_2 * 0.4 + score_3 * 0.2
|
|
|
|
+
|
|
|
|
+ def calculate_v2(d1, d2):
|
|
|
|
+ """
|
|
|
|
+ 计算方法 v2
|
|
|
|
+ :param d1:
|
|
|
|
+ :param d2:
|
|
|
|
+ :return:
|
|
|
|
+ """
|
|
|
|
+ score = 0
|
|
|
|
+ tone_1 = d1["tone"]
|
|
|
|
+ tone_2 = d2["tone"]
|
|
|
|
+ if tone_1 == tone_2:
|
|
|
|
+ score += 0.1
|
|
|
|
+ target_audience_1 = d1["target_audience"]
|
|
|
|
+ target_audience_2 = d2["target_audience"]
|
|
|
|
+ if target_audience_1 == target_audience_2:
|
|
|
|
+ score += 0.2
|
|
|
|
+ target_age_1 = d1["target_age"]
|
|
|
|
+ target_age_2 = d2["target_age"]
|
|
|
|
+ if target_age_1 == target_age_2:
|
|
|
|
+ score += 0.2
|
|
|
|
+ address_1 = d1["address"]
|
|
|
|
+ address_2 = d2["address"]
|
|
|
|
+ if address_1 == address_2:
|
|
|
|
+ score += 0.2
|
|
|
|
+ gender_1 = d1["theme"]
|
|
|
|
+ gender_2 = d2["theme"]
|
|
|
|
+ if gender_1 == gender_2:
|
|
|
|
+ score += 0.5
|
|
|
|
+ return score
|
|
|
|
+
|
|
|
|
+ if data_1 and data_2:
|
|
|
|
+ try:
|
|
|
|
+ score_1 = calculate_v1(data_1, data_2)
|
|
|
|
+ score_2 = calculate_v2(data_1, data_2)
|
|
|
|
+ return score_1, score_2
|
|
|
|
+ except Exception as e:
|
|
|
|
+ return 0, 0
|
|
|
|
+ else:
|
|
|
|
+ return 0, 0
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def title_mix(title_p, dt):
|
|
|
|
+ """
|
|
|
|
+ 执行代码
|
|
|
|
+ :param title_p:
|
|
|
|
+ :param dt: dt
|
|
|
|
+ """
|
|
|
|
+ json_path = os.path.join(os.getcwd(), 'applications', 'static', dt)
|
|
|
|
+ # 处理标题信息
|
|
|
|
+ files = os.listdir(json_path)
|
|
|
|
+ pq_files = [os.path.join(json_path, file) for file in files]
|
|
|
|
+ score_list_1 = []
|
|
|
|
+ score_list_2 = []
|
|
|
|
+ for file in pq_files:
|
|
|
|
+ file_name = file.split('/')[-1].replace(".json", "")
|
|
|
|
+ v_id = file_name.split('_')[1]
|
|
|
|
+ uid = file_name.split('_')[0]
|
|
|
|
+ score1, score2 = compute_similarity(title_p, file)
|
|
|
|
+ score_list_1.append([score1, v_id, uid])
|
|
|
|
+ score_list_2.append([score2, v_id, uid])
|
|
|
|
+
|
|
|
|
+ s1_list = sorted(score_list_1, key=lambda x: x[0], reverse=True)
|
|
|
|
+ s2_list = sorted(score_list_2, key=lambda x: x[0], reverse=True)
|
|
|
|
+ title = title_p.split("/")[-1].replace(".json", "")
|
|
|
|
+ obj = {
|
|
|
|
+ "title": title,
|
|
|
|
+ "s1_vid": s1_list[0][1],
|
|
|
|
+ "s1_score": s1_list[0][0],
|
|
|
|
+ "s1_uid": s1_list[0][2],
|
|
|
|
+ "s2_vid": s2_list[0][1],
|
|
|
|
+ "s2_score": s2_list[0][0],
|
|
|
|
+ "s2_uid": s2_list[0][2]
|
|
|
|
+ }
|
|
|
|
+ return obj
|