functions.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. """
  2. @author: luojunhui
  3. """
  4. import requests
  5. import pymysql
  6. class Functions(object):
  7. """
  8. functions class
  9. """
  10. @classmethod
  11. def getTitleScore(cls, title_list, account_name):
  12. """
  13. 标题打分
  14. :param title_list:
  15. :param account_name:
  16. :return:
  17. """
  18. url = "http://192.168.100.31:6060/score_list"
  19. body = {
  20. "account_nickname_list": [account_name],
  21. "text_list": title_list,
  22. "max_time": None,
  23. "min_time": None,
  24. "interest_type": "avg",
  25. "sim_type": "mean",
  26. "rate": 0.1
  27. }
  28. response = requests.post(url=url, headers={}, json=body).json()
  29. return response
  30. @classmethod
  31. def matchLinkById(cls, channel_content_id):
  32. """
  33. Use channelContentId to match articleUrl
  34. :param channel_content_id:
  35. :return:
  36. """
  37. connection = pymysql.connect(
  38. host='rm-bp12k5fuh5zyx31d28o.mysql.rds.aliyuncs.com',
  39. port=3306,
  40. user='wx2023_ad',
  41. password='wx2023_adP@assword1234',
  42. db='adplatform',
  43. charset='utf8mb4'
  44. )
  45. sql = f"""select account_id, link, item_index from changwen_article where id = '{channel_content_id}';"""
  46. cursor = connection.cursor()
  47. cursor.execute(sql)
  48. article_link = cursor.fetchone()
  49. return article_link
  50. @classmethod
  51. def matchLinkByIdTuple(cls, channel_id_tuple):
  52. """
  53. Use channelContentId to match articleUrl
  54. :param channel_id_tuple:
  55. :return:
  56. """
  57. connection = pymysql.connect(
  58. host='rm-bp12k5fuh5zyx31d28o.mysql.rds.aliyuncs.com',
  59. port=3306,
  60. user='wx2023_ad',
  61. password='wx2023_adP@assword1234',
  62. db='adplatform',
  63. charset='utf8mb4'
  64. )
  65. sql = f"""select id, account_id, link, item_index, title from changwen_article where id in {channel_id_tuple};"""
  66. cursor = connection.cursor()
  67. cursor.execute(sql)
  68. article_link = cursor.fetchall()
  69. L = {}
  70. for line in article_link:
  71. key = line[0]
  72. value = {
  73. "gh_key": "{}_{}".format(line[1], line[3]),
  74. "url": line[2],
  75. "title": line[4]
  76. }
  77. L[key] = value
  78. return L
  79. @classmethod
  80. def TitleSimilarity(cls, title_list, target_title):
  81. """
  82. 计算标题相似度
  83. :return:
  84. """
  85. def title_sim_v2(title_a, title_b, thredhold=0.8):
  86. """
  87. :param title_a:
  88. :param title_b:
  89. :param thredhold:
  90. :return:
  91. """
  92. if len(title_a) < 1 or len(title_b) < 1:
  93. return False
  94. set_a = set(title_a)
  95. set_b = set(title_b)
  96. set_cross = set_a & set_b
  97. set_union = set_a | set_b
  98. if not set_union:
  99. return False
  100. min_len = max(min(len(set_a), len(set_b)), 1)
  101. rate = len(set_cross) / min_len
  102. if rate >= thredhold:
  103. return True
  104. else:
  105. return False
  106. for title in title_list:
  107. sim_score = title_sim_v2(target_title, title)
  108. if sim_score:
  109. return True
  110. return False