""" @author: luojunhui """ import requests import pymysql class Functions(object): """ functions class """ @classmethod def getTitleScore(cls, title_list, account_name): """ 标题打分 :param title_list: :param account_name: :return: """ url = "http://192.168.100.31:6060/score_list" body = { "account_nickname_list": [account_name], "text_list": title_list, "max_time": None, "min_time": None, "interest_type": "avg", "sim_type": "mean", "rate": 0.1 } response = requests.post(url=url, headers={}, json=body).json() return response @classmethod def matchLinkById(cls, channel_content_id): """ Use channelContentId to match articleUrl :param channel_content_id: :return: """ connection = pymysql.connect( host='rm-bp12k5fuh5zyx31d28o.mysql.rds.aliyuncs.com', port=3306, user='wx2023_ad', password='wx2023_adP@assword1234', db='adplatform', charset='utf8mb4' ) sql = f"""select account_id, link, item_index from changwen_article where id = '{channel_content_id}';""" cursor = connection.cursor() cursor.execute(sql) article_link = cursor.fetchone() return article_link @classmethod def matchLinkByIdTuple(cls, channel_id_tuple): """ Use channelContentId to match articleUrl :param channel_id_tuple: :return: """ connection = pymysql.connect( host='rm-bp12k5fuh5zyx31d28o.mysql.rds.aliyuncs.com', port=3306, user='wx2023_ad', password='wx2023_adP@assword1234', db='adplatform', charset='utf8mb4' ) sql = f"""select id, account_id, link, item_index, title from changwen_article where id in {channel_id_tuple};""" cursor = connection.cursor() cursor.execute(sql) article_link = cursor.fetchall() L = {} for line in article_link: key = line[0] value = { "gh_key": "{}_{}".format(line[1], line[3]), "url": line[2], "title": line[4] } L[key] = value return L @classmethod def TitleSimilarity(cls, title_list, target_title): """ 计算标题相似度 :return: """ def title_sim_v2(title_a, title_b, thredhold=0.8): """ :param title_a: :param title_b: :param thredhold: :return: """ if len(title_a) < 1 or len(title_b) < 1: return False set_a = set(title_a) set_b = set(title_b) set_cross = set_a & set_b set_union = set_a | set_b if not set_union: return False min_len = max(min(len(set_a), len(set_b)), 1) rate = len(set_cross) / min_len if rate >= thredhold: return True else: return False for title in title_list: sim_score = title_sim_v2(target_title, title) if sim_score: return True return False