common.py 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. # encoding: utf-8
  2. """
  3. @author: luojunhui
  4. """
  5. import json
  6. import time
  7. import uuid
  8. import requests
  9. import pymysql
  10. import urllib.parse
  11. from applications.functions.log import logging
  12. class Functions(object):
  13. """
  14. 通用工具代码
  15. """
  16. # 自动加入白名单逻辑
  17. @classmethod
  18. def auto_white(cls, root_share_id):
  19. """
  20. 自动加入白名单, 保证公众号百分百出广告
  21. :param root_share_id:
  22. :return:
  23. """
  24. def get_cookie():
  25. """
  26. 获取 cookie
  27. :return:
  28. """
  29. url = "https://admin.piaoquantv.com/manager/login?account=luojunhui&passWd=e10adc3949ba59abbe56e057f20f883e&muid=7"
  30. payload = {}
  31. headers = {
  32. 'accept': 'application/json, text/plain, */*',
  33. 'accept-language': 'en',
  34. 'priority': 'u=1, i',
  35. 'sec-ch-ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
  36. 'sec-ch-ua-mobile': '?0',
  37. 'sec-ch-ua-platform': '"macOS"',
  38. 'sec-fetch-dest': 'empty',
  39. 'sec-fetch-mode': 'cors',
  40. 'sec-fetch-site': 'same-origin',
  41. 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
  42. }
  43. response = requests.request("GET", url, headers=headers, data=payload)
  44. return response.cookies.values()[0]
  45. url = "https://admin.piaoquantv.com/manager/ad/own/white/rootShare/save"
  46. dd = {
  47. "rootShareId": root_share_id,
  48. "commit": "算法自动加入白名单--"
  49. }
  50. payload = json.dumps(dd)
  51. cookie = get_cookie()
  52. headers = {
  53. 'accept': 'application/json',
  54. 'accept-language': 'en',
  55. 'content-type': 'application/json;',
  56. 'cookie': "SESSION=" + cookie,
  57. 'origin': 'https://admin.piaoquantv.com',
  58. 'priority': 'u=1, i',
  59. 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
  60. }
  61. response = requests.request("POST", url, headers=headers, data=payload)
  62. return response.json()['content']
  63. # 创建公众号分享卡片
  64. @classmethod
  65. def create_gzh_path(cls, video_id, shared_uid):
  66. """
  67. :param video_id: 视频 id
  68. :param shared_uid: 分享 id
  69. """
  70. root_share_id = str(uuid.uuid4())
  71. url = f"pages/user-videos?id={video_id}&su={shared_uid}&fromGzh=1&rootShareId={root_share_id}&shareId={root_share_id}"
  72. # 自动把 root_share_id 加入到白名单
  73. cls.auto_white(root_share_id)
  74. return root_share_id, f"pages/category?jumpPage={urllib.parse.quote(url, safe='')}"
  75. # 从票圈请求视频
  76. @classmethod
  77. def request_for_info(cls, video_id):
  78. """
  79. 请求数据
  80. :param video_id:
  81. :return:
  82. """
  83. url = "https://longvideoapi.piaoquantv.com/longvideoapi/openapi/video/batchSelectVideoInfo"
  84. data = {
  85. "videoIdList": [video_id]
  86. }
  87. header = {
  88. "Content-Type": "application/json",
  89. }
  90. response = requests.post(url, headers=header, data=json.dumps(data))
  91. return response.json()
  92. # 清理标题
  93. @classmethod
  94. def clean_title(cls, strings):
  95. """
  96. :param strings:
  97. :return:
  98. """
  99. return (
  100. strings.strip()
  101. .replace("\n", "")
  102. .replace("/", "")
  103. .replace("\r", "")
  104. .replace("#", "")
  105. .replace(".", "。")
  106. .replace("\\", "")
  107. .replace("&NBSP", "")
  108. .replace(":", "")
  109. .replace("*", "")
  110. .replace("?", "")
  111. .replace("?", "")
  112. .replace('"', "")
  113. .replace("<", "")
  114. .replace(">", "")
  115. .replace("|", "")
  116. .replace(" ", "")
  117. .replace('"', "")
  118. .replace("'", "")
  119. )
  120. class MySQLServer(object):
  121. """
  122. MySql 服务
  123. """
  124. @classmethod
  125. def select_download_videos(cls, trace_id):
  126. """
  127. 查询
  128. :param trace_id:
  129. :return:
  130. """
  131. sql = "select video_id from crawler_video where out_user_id = '{}' and video_title = '{}';".format(trace_id,
  132. trace_id)
  133. connection = pymysql.connect(
  134. host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
  135. port=3306, # 端口号
  136. user="crawler", # mysql用户名
  137. passwd="crawler123456@", # mysql用户登录密码
  138. db="piaoquan-crawler", # 数据库名
  139. charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
  140. )
  141. cursor = connection.cursor()
  142. cursor.execute(sql)
  143. out_video_list = cursor.fetchall()
  144. if len(out_video_list) > 0:
  145. if out_video_list[0][0] == 0:
  146. video_id = cls.search_id_to_video(trace_id)
  147. else:
  148. video_id = out_video_list[0][0]
  149. vid_list = [video_id]
  150. logging(
  151. code="2003",
  152. trace_id=trace_id,
  153. info="recall_search_list",
  154. function="find_videos_in_mysql",
  155. data=vid_list
  156. )
  157. return {
  158. "search_videos": "success",
  159. "trace_id": trace_id,
  160. "video_list": vid_list
  161. }
  162. else:
  163. return {
  164. "search_videos": "failed",
  165. "trace_id": trace_id,
  166. "video_list": []
  167. }
  168. @classmethod
  169. def select_pq_videos(cls):
  170. """
  171. 查询
  172. :return: info_list
  173. """
  174. connection = pymysql.connect(
  175. host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
  176. port=3306, # 端口号
  177. user="wx2016_longvideo", # mysql用户名
  178. passwd="wx2016_longvideoP@assword1234", # mysql用户登录密码
  179. db="incentive", # 数据库名
  180. charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
  181. )
  182. sql = "select video_id, key_words, search_keys, extra_keys from video_content"
  183. cursor = connection.cursor()
  184. cursor.execute(sql)
  185. data = cursor.fetchall()
  186. result = [
  187. {
  188. "video_id": line[0],
  189. "key_words": json.loads(line[1]),
  190. "search_keys": json.loads(line[2]),
  191. "extra_keys": json.loads(line[3]),
  192. }
  193. for line in data
  194. ]
  195. return result
  196. # 敏感词
  197. @classmethod
  198. def select_sensitive_words(cls):
  199. """
  200. sensitive words
  201. :return:
  202. """
  203. connection = pymysql.connect(
  204. host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
  205. port=3306, # 端口号
  206. user="wx2016_longvideo", # mysql用户名
  207. passwd="wx2016_longvideoP@assword1234", # mysql用户登录密码
  208. db="longvideo", # 数据库名
  209. charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
  210. )
  211. sql = "select `keyword` from wx_sensitive_word where `data_status` = 0"
  212. cursor = connection.cursor()
  213. cursor.execute(sql)
  214. data = cursor.fetchall()
  215. result = [line[0] for line in data]
  216. return result
  217. @classmethod
  218. def search_id_to_video(cls, trace_id):
  219. """
  220. 通过 search_id 返回 video_id
  221. :param trace_id:
  222. :return:
  223. """
  224. sql = "select video_id from crawler_video where out_user_id = '{}' and video_title = '{}';".format(trace_id,
  225. trace_id)
  226. connection = pymysql.connect(
  227. host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
  228. port=3306, # 端口号
  229. user="crawler", # mysql用户名
  230. passwd="crawler123456@", # mysql用户登录密码
  231. db="piaoquan-crawler", # 数据库名
  232. charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
  233. )
  234. cursor = connection.cursor()
  235. cursor.execute(sql)
  236. out_video_list = cursor.fetchall()
  237. if int(out_video_list[0][0]) == 0:
  238. time.sleep(1)
  239. return cls.search_id_to_video(trace_id)
  240. else:
  241. return out_video_list[0][0]