functions.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. """
  2. @author: luojunhui
  3. """
  4. import requests
  5. import pymysql
  6. class Functions(object):
  7. """
  8. functions class
  9. """
  10. @classmethod
  11. def getTitleScore(cls, title_list, account_name):
  12. """
  13. 标题打分
  14. :param title_list:
  15. :param account_name:
  16. :return:
  17. """
  18. url = "http://192.168.100.31:6060/score_list"
  19. body = {
  20. "account_nickname_list": [account_name],
  21. "text_list": title_list,
  22. "max_time": None,
  23. "min_time": None,
  24. "interest_type": "avg",
  25. "sim_type": "mean",
  26. "rate": 0.1
  27. }
  28. response = requests.post(url=url, headers={}, json=body).json()
  29. return response
  30. @classmethod
  31. def getTitleAccountScore(cls, title, account_list):
  32. """
  33. 标题打分
  34. :param title:
  35. :param account_list:
  36. :return:
  37. """
  38. url = "http://192.168.100.31:6060/score_list"
  39. body = {
  40. "account_nickname_list": account_list,
  41. "text_list": [title],
  42. "max_time": None,
  43. "min_time": None,
  44. "interest_type": "avg",
  45. "sim_type": "mean",
  46. "rate": 0.1
  47. }
  48. response = requests.post(url=url, headers={}, json=body).json()
  49. L = []
  50. for account in account_list:
  51. account_score = response[account]['score_list'][0]
  52. L.append([account, account_score])
  53. return L
  54. @classmethod
  55. def matchLinkById(cls, channel_content_id):
  56. """
  57. Use channelContentId to match articleUrl
  58. :param channel_content_id:
  59. :return:
  60. """
  61. connection = pymysql.connect(
  62. host='rm-bp12k5fuh5zyx31d28o.mysql.rds.aliyuncs.com',
  63. port=3306,
  64. user='wx2023_ad',
  65. password='wx2023_adP@assword1234',
  66. db='adplatform',
  67. charset='utf8mb4'
  68. )
  69. sql = f"""select account_id, link, item_index from changwen_article where id = '{channel_content_id}';"""
  70. cursor = connection.cursor()
  71. cursor.execute(sql)
  72. article_link = cursor.fetchone()
  73. return article_link
  74. @classmethod
  75. def matchLinkByIdTuple(cls, channel_id_tuple):
  76. """
  77. Use channelContentId to match articleUrl
  78. :param channel_id_tuple:
  79. :return:
  80. """
  81. connection = pymysql.connect(
  82. host='rm-bp12k5fuh5zyx31d28o.mysql.rds.aliyuncs.com',
  83. port=3306,
  84. user='wx2023_ad',
  85. password='wx2023_adP@assword1234',
  86. db='adplatform',
  87. charset='utf8mb4'
  88. )
  89. sql = f"""select id, account_id, link, item_index, title from changwen_article where id in {channel_id_tuple};"""
  90. cursor = connection.cursor()
  91. cursor.execute(sql)
  92. article_link = cursor.fetchall()
  93. L = {}
  94. for line in article_link:
  95. key = line[0]
  96. value = {
  97. "gh_key": "{}_{}".format(line[1], line[3]),
  98. "url": line[2],
  99. "title": line[4]
  100. }
  101. L[key] = value
  102. return L
  103. @classmethod
  104. def TitleSimilarity(cls, title_list, target_title):
  105. """
  106. 计算标题相似度
  107. :return:
  108. """
  109. def title_sim_v2(title_a, title_b, thredhold=0.8):
  110. """
  111. :param title_a:
  112. :param title_b:
  113. :param thredhold:
  114. :return:
  115. """
  116. if len(title_a) < 1 or len(title_b) < 1:
  117. return False
  118. set_a = set(title_a)
  119. set_b = set(title_b)
  120. set_cross = set_a & set_b
  121. set_union = set_a | set_b
  122. if not set_union:
  123. return False
  124. min_len = max(min(len(set_a), len(set_b)), 1)
  125. rate = len(set_cross) / min_len
  126. if rate >= thredhold:
  127. return True
  128. else:
  129. return False
  130. for title in title_list:
  131. sim_score = title_sim_v2(target_title, title)
  132. if sim_score:
  133. return True
  134. return False