""" @author: luojunhui """ from typing import Dict from applications.config import nlp_base_url, nlp_aliyun_url from applications.match_algorithm.title_similarity import jcd_title_similarity from applications.match_algorithm.title_similarity import nlp_title_similarity empty_list = [] def jac_score(d1, d2): """ 通过交并集来判断 :param d1: :param d2: :return: """ f1_keys = set(d1["key_words"]) f2_keys = set(d2["key_words"]) keys_union = f1_keys | f2_keys keys_intersection = f1_keys & f2_keys f1_search_keys = set(d1["search_keys"]) f2_search_keys = set(d2["search_keys"]) search_keys_union = f1_search_keys | f2_search_keys search_keys_intersection = f1_search_keys & f2_search_keys f1_extra_keys = set(d1["extra_keys"]) f2_extra_keys = set(d2["extra_keys"]) extra_keys_union = f1_extra_keys | f2_extra_keys extra_keys_intersection = f1_extra_keys & f2_extra_keys score_1 = len(keys_intersection) / len(keys_union) score_2 = len(search_keys_intersection) / len(search_keys_union) score_3 = len(extra_keys_intersection) / len(extra_keys_union) return score_1 * 0.4 + score_2 * 0.4 + score_3 * 0.2, d2['video_id'] def title_similarity_rank(content_title, recall_list): """ :param content_title: :param recall_list: :return: """ include_title_list = [] for item in recall_list: video_info = item['result'] platform = item['platform'] if platform in ['dy_search', 'baidu_search']: title = video_info['title'] elif platform in ['xg_search']: title = video_info['video_title'] else: continue item['title'] = title item['score'] = jcd_title_similarity(content_title, title) include_title_list.append(item) sorted_list = sorted(include_title_list, key=lambda x: x['score'], reverse=True) return sorted_list async def title_similarity_with_nlp(content_title, recall_list) -> Dict: """ 通过相关性模型来计算文章标题和搜索标题之间的相关性 """ title_list = [i['result']['title'] for i in recall_list] score_list = await nlp_title_similarity( url=nlp_base_url, ori_title=content_title, search_title_list=title_list ) # if local machine is down, use aliyun machine if not score_list: score_list = await nlp_title_similarity( url=nlp_aliyun_url, ori_title=content_title, search_title_list=title_list ) # check whether score_list exist if score_list: sorted_list = sorted( ( {**item, 'score': score} for item, score in zip(recall_list, score_list) ), key=lambda x: x['score'], reverse=True ) response = { "alg": "nlp", "result": sorted_list } else: response = { "result": empty_list } return response