123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147 |
- """
- @author: luojunhui
- """
- import requests
- import pymysql
- class Functions(object):
- """
- functions class
- """
- @classmethod
- def getTitleScore(cls, title_list, account_name):
- """
- 标题打分
- :param title_list:
- :param account_name:
- :return:
- """
- url = "http://192.168.100.31:6060/score_list"
- body = {
- "account_nickname_list": [account_name],
- "text_list": title_list,
- "max_time": None,
- "min_time": None,
- "interest_type": "avg",
- "sim_type": "mean",
- "rate": 0.1
- }
- response = requests.post(url=url, headers={}, json=body).json()
- return response
- @classmethod
- def getTitleAccountScore(cls, title, account_list):
- """
- 标题打分
- :param title:
- :param account_list:
- :return:
- """
- url = "http://192.168.100.31:6060/score_list"
- body = {
- "account_nickname_list": account_list,
- "text_list": [title],
- "max_time": None,
- "min_time": None,
- "interest_type": "avg",
- "sim_type": "mean",
- "rate": 0.1
- }
- response = requests.post(url=url, headers={}, json=body).json()
- L = []
- for account in account_list:
- account_score = response[account]['score_list'][0]
- L.append([account, account_score])
- return L
- @classmethod
- def matchLinkById(cls, channel_content_id):
- """
- Use channelContentId to match articleUrl
- :param channel_content_id:
- :return:
- """
- connection = pymysql.connect(
- host='rm-bp12k5fuh5zyx31d28o.mysql.rds.aliyuncs.com',
- port=3306,
- user='wx2023_ad',
- password='wx2023_adP@assword1234',
- db='adplatform',
- charset='utf8mb4'
- )
- sql = f"""select account_id, link, item_index from changwen_article where id = '{channel_content_id}';"""
- cursor = connection.cursor()
- cursor.execute(sql)
- article_link = cursor.fetchone()
- return article_link
- @classmethod
- def matchLinkByIdTuple(cls, channel_id_tuple):
- """
- Use channelContentId to match articleUrl
- :param channel_id_tuple:
- :return:
- """
- connection = pymysql.connect(
- host='rm-bp12k5fuh5zyx31d28o.mysql.rds.aliyuncs.com',
- port=3306,
- user='wx2023_ad',
- password='wx2023_adP@assword1234',
- db='adplatform',
- charset='utf8mb4'
- )
- sql = f"""select id, account_id, link, item_index, title from changwen_article where id in {channel_id_tuple};"""
- cursor = connection.cursor()
- cursor.execute(sql)
- article_link = cursor.fetchall()
- L = {}
- for line in article_link:
- key = line[0]
- value = {
- "gh_key": "{}_{}".format(line[1], line[3]),
- "url": line[2],
- "title": line[4]
- }
- L[key] = value
- return L
- @classmethod
- def TitleSimilarity(cls, title_list, target_title):
- """
- 计算标题相似度
- :return:
- """
- def title_sim_v2(title_a, title_b, thredhold=0.8):
- """
- :param title_a:
- :param title_b:
- :param thredhold:
- :return:
- """
- if len(title_a) < 1 or len(title_b) < 1:
- return False
- set_a = set(title_a)
- set_b = set(title_b)
- set_cross = set_a & set_b
- set_union = set_a | set_b
- if not set_union:
- return False
- min_len = max(min(len(set_a), len(set_b)), 1)
- rate = len(set_cross) / min_len
- if rate >= thredhold:
- return True
- else:
- return False
- for title in title_list:
- sim_score = title_sim_v2(target_title, title)
- if sim_score:
- return True
- return False
|