functions.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. """
  2. @author: luojunhui
  3. """
  4. from datetime import datetime, timezone
  5. import hashlib
  6. import requests
  7. import pymysql
  8. class Functions(object):
  9. """
  10. functions class
  11. """
  12. @classmethod
  13. def getTitleScore(cls, title_list, account_name):
  14. """
  15. 标题打分
  16. :param title_list:
  17. :param account_name:
  18. :return:
  19. """
  20. url = "http://192.168.100.31:6060/score_list"
  21. body = {
  22. "account_nickname_list": [account_name],
  23. "text_list": title_list,
  24. "max_time": None,
  25. "min_time": None,
  26. "interest_type": "avg",
  27. "sim_type": "mean",
  28. "rate": 0.1
  29. }
  30. response = requests.post(url=url, headers={}, json=body).json()
  31. return response
  32. @classmethod
  33. def getTitleAccountScore(cls, title, account_list):
  34. """
  35. 标题打分
  36. :param title:
  37. :param account_list:
  38. :return:
  39. """
  40. url = "http://192.168.100.31:6060/score_list"
  41. body = {
  42. "account_nickname_list": account_list,
  43. "text_list": [title],
  44. "max_time": None,
  45. "min_time": None,
  46. "interest_type": "avg",
  47. "sim_type": "mean",
  48. "rate": 0.1
  49. }
  50. response = requests.post(url=url, headers={}, json=body).json()
  51. L = []
  52. for account in account_list:
  53. account_score = response[account]['score_list'][0]
  54. L.append([account, account_score])
  55. return L
  56. @classmethod
  57. def matchLinkById(cls, channel_content_id):
  58. """
  59. Use channelContentId to match articleUrl
  60. :param channel_content_id:
  61. :return:
  62. """
  63. connection = pymysql.connect(
  64. host='rm-bp12k5fuh5zyx31d28o.mysql.rds.aliyuncs.com',
  65. port=3306,
  66. user='wx2023_ad',
  67. password='wx2023_adP@assword1234',
  68. db='adplatform',
  69. charset='utf8mb4'
  70. )
  71. sql = f"""select account_id, link, item_index from changwen_article where id = '{channel_content_id}';"""
  72. cursor = connection.cursor()
  73. cursor.execute(sql)
  74. article_link = cursor.fetchone()
  75. return article_link
  76. @classmethod
  77. def matchLinkByIdTuple(cls, channel_id_tuple):
  78. """
  79. Use channelContentId to match articleUrl
  80. :param channel_id_tuple:
  81. :return:
  82. """
  83. connection = pymysql.connect(
  84. host='rm-bp12k5fuh5zyx31d28o.mysql.rds.aliyuncs.com',
  85. port=3306,
  86. user='wx2023_ad',
  87. password='wx2023_adP@assword1234',
  88. db='adplatform',
  89. charset='utf8mb4'
  90. )
  91. sql = f"""select id, account_id, link, item_index, title from changwen_article where id in {channel_id_tuple};"""
  92. cursor = connection.cursor()
  93. cursor.execute(sql)
  94. article_link = cursor.fetchall()
  95. L = {}
  96. for line in article_link:
  97. key = line[0]
  98. value = {
  99. "gh_key": "{}_{}".format(line[1], line[3]),
  100. "url": line[2],
  101. "title": line[4]
  102. }
  103. L[key] = value
  104. return L
  105. @classmethod
  106. def TitleSimilarity(cls, title_list, target_title):
  107. """
  108. 计算标题相似度
  109. :return:
  110. """
  111. def title_sim_v2(title_a, title_b, thredhold=0.8):
  112. """
  113. :param title_a:
  114. :param title_b:
  115. :param thredhold:
  116. :return:
  117. """
  118. if len(title_a) < 1 or len(title_b) < 1:
  119. return False
  120. set_a = set(title_a)
  121. set_b = set(title_b)
  122. set_cross = set_a & set_b
  123. set_union = set_a | set_b
  124. if not set_union:
  125. return False
  126. min_len = max(min(len(set_a), len(set_b)), 1)
  127. rate = len(set_cross) / min_len
  128. if rate >= thredhold:
  129. return True
  130. else:
  131. return False
  132. for title in title_list:
  133. sim_score = title_sim_v2(target_title, title)
  134. if sim_score:
  135. return True
  136. return False
  137. @classmethod
  138. def show_desc_to_sta(cls, show_desc):
  139. """
  140. :return:
  141. """
  142. def decode_show_v(show_v):
  143. """
  144. :param show_v:
  145. :return:
  146. """
  147. foo = show_v.replace('千', 'e3').replace('万', 'e4').replace('亿', 'e8')
  148. foo = eval(foo)
  149. return int(foo)
  150. def decode_show_k(show_k):
  151. """
  152. :param show_k:
  153. :return:
  154. """
  155. this_dict = {
  156. '阅读': 'show_view_count', # 文章
  157. '看过': 'show_view_count', # 图文
  158. '观看': 'show_view_count', # 视频
  159. '赞': 'show_like_count',
  160. '付费': 'show_pay_count',
  161. '赞赏': 'show_zs_count',
  162. }
  163. if show_k not in this_dict:
  164. print(f'error from decode_show_k, show_k not found: {show_k}')
  165. return this_dict.get(show_k, 'show_unknown')
  166. show_desc = show_desc.replace('+', '')
  167. sta = {}
  168. for show_kv in show_desc.split('\u2004\u2005'):
  169. if not show_kv:
  170. continue
  171. show_k, show_v = show_kv.split('\u2006')
  172. k = decode_show_k(show_k)
  173. v = decode_show_v(show_v)
  174. sta[k] = v
  175. res = {
  176. 'show_view_count': sta.get('show_view_count', 0),
  177. 'show_like_count': sta.get('show_like_count', 0),
  178. 'show_pay_count': sta.get('show_pay_count', 0),
  179. 'show_zs_count': sta.get('show_zs_count', 0),
  180. }
  181. return res
  182. @classmethod
  183. def generateGzhId(cls, url):
  184. """
  185. generate url
  186. :param url:
  187. :return:
  188. """
  189. biz = url.split("biz=")[1].split("&")[0]
  190. idx = url.split("&idx=")[1].split("&")[0]
  191. sn = url.split("&sn=")[1].split("&")[0]
  192. url_bit = "{}-{}-{}".format(biz, idx, sn).encode()
  193. md5_hash = hashlib.md5()
  194. md5_hash.update(url_bit)
  195. md5_value = md5_hash.hexdigest()
  196. return md5_value
  197. @classmethod
  198. def time_stamp_to_str(cls, timestamp):
  199. """
  200. :param timestamp:
  201. """
  202. dt_object = datetime.utcfromtimestamp(timestamp).replace(tzinfo=timezone.utc).astimezone()
  203. date_string = dt_object.strftime('%Y-%m-%d %H:%M:%S')
  204. return date_string