123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127 |
- """
- @author: luojunhui
- """
- import json
- import os
- def read_single_file(filename):
- """
- :param filename:
- """
- with open(filename, encoding="utf-8") as f:
- data = json.loads(f.read())
- if data:
- return data
- else:
- return {}
- def compute_similarity(file_1, file_2):
- """
- 计算
- :param file_1:
- :param file_2:
- :return:
- """
- data_1 = read_single_file(file_1)
- data_2 = read_single_file(file_2)
- def calculate_v1(d1, d2):
- """
- 通过交并集来判断
- :param d1:
- :param d2:
- :return:
- """
- f1_keys = set(d1["key_words"])
- f2_keys = set(d2["key_words"])
- keys_union = f1_keys | f2_keys
- keys_intersection = f1_keys & f2_keys
- f1_search_keys = set(d1["search_keys"])
- f2_search_keys = set(d2["search_keys"])
- search_keys_union = f1_search_keys | f2_search_keys
- search_keys_intersection = f1_search_keys & f2_search_keys
- f1_extra_keys = set(d1["extra_keys"])
- f2_extra_keys = set(d2["extra_keys"])
- extra_keys_union = f1_extra_keys | f2_extra_keys
- extra_keys_intersection = f1_extra_keys & f2_extra_keys
- score_1 = len(keys_intersection) / len(keys_union)
- score_2 = len(search_keys_intersection) / len(search_keys_union)
- score_3 = len(extra_keys_intersection) / len(extra_keys_union)
- return score_1 * 0.4 + score_2 * 0.4 + score_3 * 0.2
- def calculate_v2(d1, d2):
- """
- 计算方法 v2
- :param d1:
- :param d2:
- :return:
- """
- score = 0
- tone_1 = d1["tone"]
- tone_2 = d2["tone"]
- if tone_1 == tone_2:
- score += 0.1
- target_audience_1 = d1["target_audience"]
- target_audience_2 = d2["target_audience"]
- if target_audience_1 == target_audience_2:
- score += 0.2
- target_age_1 = d1["target_age"]
- target_age_2 = d2["target_age"]
- if target_age_1 == target_age_2:
- score += 0.2
- address_1 = d1["address"]
- address_2 = d2["address"]
- if address_1 == address_2:
- score += 0.2
- gender_1 = d1["theme"]
- gender_2 = d2["theme"]
- if gender_1 == gender_2:
- score += 0.5
- return score
- if data_1 and data_2:
- try:
- score_1 = calculate_v1(data_1, data_2)
- score_2 = calculate_v2(data_1, data_2)
- return score_1, score_2
- except Exception as e:
- return 0, 0
- else:
- return 0, 0
- def title_mix(title_p, dt):
- """
- 执行代码
- :param title_p:
- :param dt: dt
- """
- json_path = os.path.join(os.getcwd(), 'applications', 'static', dt)
- # 处理标题信息
- files = os.listdir(json_path)
- pq_files = [os.path.join(json_path, file) for file in files]
- score_list_1 = []
- score_list_2 = []
- for file in pq_files:
- file_name = file.split('/')[-1].replace(".json", "")
- v_id = file_name.split('_')[1]
- uid = file_name.split('_')[0]
- score1, score2 = compute_similarity(title_p, file)
- score_list_1.append([score1, v_id, uid])
- score_list_2.append([score2, v_id, uid])
- s1_list = sorted(score_list_1, key=lambda x: x[0], reverse=True)
- s2_list = sorted(score_list_2, key=lambda x: x[0], reverse=True)
- title = title_p.split("/")[-1].replace(".json", "")
- obj = {
- "title": title,
- "s1_vid": s1_list[0][1],
- "s1_score": s1_list[0][0],
- "s1_uid": s1_list[0][2],
- "s2_vid": s2_list[0][1],
- "s2_score": s2_list[0][0],
- "s2_uid": s2_list[0][2]
- }
- return obj
|