common.py 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. # encoding: utf-8
  2. """
  3. @author: luojunhui
  4. """
  5. import json
  6. import time
  7. import uuid
  8. import requests
  9. import pymysql
  10. import urllib.parse
  11. from applications.functions.log import logging
  12. class Functions(object):
  13. """
  14. 通用工具代码
  15. """
  16. # 自动加入白名单逻辑
  17. @classmethod
  18. def auto_white(cls, root_share_id):
  19. """
  20. 自动加入白名单, 保证公众号百分百出广告
  21. :param root_share_id:
  22. :return:
  23. """
  24. def get_cookie():
  25. """
  26. 获取 cookie
  27. :return:
  28. """
  29. url = "https://admin.piaoquantv.com/manager/login?account=luojunhui&passWd=e10adc3949ba59abbe56e057f20f883e&muid=7"
  30. payload = {}
  31. headers = {
  32. 'accept': 'application/json, text/plain, */*',
  33. 'accept-language': 'en',
  34. 'priority': 'u=1, i',
  35. 'sec-ch-ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
  36. 'sec-ch-ua-mobile': '?0',
  37. 'sec-ch-ua-platform': '"macOS"',
  38. 'sec-fetch-dest': 'empty',
  39. 'sec-fetch-mode': 'cors',
  40. 'sec-fetch-site': 'same-origin',
  41. 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
  42. }
  43. response = requests.request("GET", url, headers=headers, data=payload)
  44. return response.cookies.values()[0]
  45. url = "https://admin.piaoquantv.com/manager/ad/own/white/rootShare/save"
  46. dd = {
  47. "rootShareId": root_share_id,
  48. "commit": "算法自动加入白名单--"
  49. }
  50. payload = json.dumps(dd)
  51. cookie = get_cookie()
  52. headers = {
  53. 'accept': 'application/json',
  54. 'accept-language': 'en',
  55. 'content-type': 'application/json;',
  56. 'cookie': "SESSION=" + cookie,
  57. 'origin': 'https://admin.piaoquantv.com',
  58. 'priority': 'u=1, i',
  59. 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
  60. }
  61. response = requests.request("POST", url, headers=headers, data=payload)
  62. return response.json()['content']
  63. # 创建公众号分享卡片
  64. @classmethod
  65. def create_gzh_path(cls, video_id, shared_uid):
  66. """
  67. :param video_id: 视频 id
  68. :param shared_uid: 分享 id
  69. """
  70. root_share_id = str(uuid.uuid4())
  71. url = f"pages/user-videos?id={video_id}&su={shared_uid}&fromGzh=1&rootShareId={root_share_id}&shareId={root_share_id}"
  72. # 自动把 root_share_id 加入到白名单
  73. cls.auto_white(root_share_id)
  74. return root_share_id, f"pages/category?jumpPage={urllib.parse.quote(url, safe='')}"
  75. # 从票圈请求视频
  76. @classmethod
  77. def request_for_info(cls, video_id):
  78. """
  79. 请求数据
  80. :param video_id:
  81. :return:
  82. """
  83. url = "https://longvideoapi.piaoquantv.com/longvideoapi/openapi/video/batchSelectVideoInfo"
  84. data = {
  85. "videoIdList": [video_id]
  86. }
  87. header = {
  88. "Content-Type": "application/json",
  89. }
  90. response = requests.post(url, headers=header, data=json.dumps(data))
  91. return response.json()
  92. # 清理标题
  93. @classmethod
  94. def clean_title(cls, strings):
  95. """
  96. :param strings:
  97. :return:
  98. """
  99. return (
  100. strings.strip()
  101. .replace("\n", "")
  102. .replace("/", "")
  103. .replace("\r", "")
  104. .replace("#", "")
  105. .replace(".", "。")
  106. .replace("\\", "")
  107. .replace("&NBSP", "")
  108. .replace(":", "")
  109. .replace("*", "")
  110. .replace("?", "")
  111. .replace("?", "")
  112. .replace('"', "")
  113. .replace("<", "")
  114. .replace(">", "")
  115. .replace("|", "")
  116. .replace(" ", "")
  117. .replace('"', "")
  118. .replace("'", "")
  119. )
  120. @classmethod
  121. def sensitive_flag(cls, s_words, ori_title):
  122. """
  123. :param s_words:
  124. :param ori_title:
  125. :return:
  126. """
  127. for word in s_words:
  128. if word in ori_title:
  129. return False
  130. return True
  131. class MySQLServer(object):
  132. """
  133. MySql 服务
  134. """
  135. @classmethod
  136. def select_download_videos(cls, trace_id):
  137. """
  138. 查询
  139. :param trace_id:
  140. :return:
  141. """
  142. sql = "select video_id from crawler_video where out_user_id = '{}' and video_title = '{}';".format(trace_id,
  143. trace_id)
  144. connection = pymysql.connect(
  145. host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
  146. port=3306, # 端口号
  147. user="crawler", # mysql用户名
  148. passwd="crawler123456@", # mysql用户登录密码
  149. db="piaoquan-crawler", # 数据库名
  150. charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
  151. )
  152. cursor = connection.cursor()
  153. cursor.execute(sql)
  154. out_video_list = cursor.fetchall()
  155. if len(out_video_list) > 0:
  156. if out_video_list[0][0] == 0:
  157. video_id = cls.search_id_to_video(trace_id)
  158. else:
  159. video_id = out_video_list[0][0]
  160. vid_list = [video_id]
  161. logging(
  162. code="2003",
  163. trace_id=trace_id,
  164. info="recall_search_list",
  165. function="find_videos_in_mysql",
  166. data=vid_list
  167. )
  168. return {
  169. "search_videos": "success",
  170. "trace_id": trace_id,
  171. "video_list": vid_list
  172. }
  173. else:
  174. return {
  175. "search_videos": "failed",
  176. "trace_id": trace_id,
  177. "video_list": []
  178. }
  179. @classmethod
  180. def select_pq_videos(cls):
  181. """
  182. 查询
  183. :return: info_list
  184. """
  185. connection = pymysql.connect(
  186. host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
  187. port=3306, # 端口号
  188. user="wx2016_longvideo", # mysql用户名
  189. passwd="wx2016_longvideoP@assword1234", # mysql用户登录密码
  190. db="incentive", # 数据库名
  191. charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
  192. )
  193. sql = "select video_id, key_words, search_keys, extra_keys from video_content"
  194. cursor = connection.cursor()
  195. cursor.execute(sql)
  196. data = cursor.fetchall()
  197. result = [
  198. {
  199. "video_id": line[0],
  200. "key_words": json.loads(line[1]),
  201. "search_keys": json.loads(line[2]),
  202. "extra_keys": json.loads(line[3]),
  203. }
  204. for line in data
  205. ]
  206. return result
  207. # 敏感词
  208. @classmethod
  209. def select_sensitive_words(cls):
  210. """
  211. sensitive words
  212. :return:
  213. """
  214. connection = pymysql.connect(
  215. host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
  216. port=3306, # 端口号
  217. user="wx2016_longvideo", # mysql用户名
  218. passwd="wx2016_longvideoP@assword1234", # mysql用户登录密码
  219. db="longvideo", # 数据库名
  220. charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
  221. )
  222. sql = "select `keyword` from wx_sensitive_word where `data_status` = 0"
  223. cursor = connection.cursor()
  224. cursor.execute(sql)
  225. data = cursor.fetchall()
  226. result = [line[0] for line in data]
  227. return result
  228. @classmethod
  229. def search_id_to_video(cls, trace_id):
  230. """
  231. 通过 search_id 返回 video_id
  232. :param trace_id:
  233. :return:
  234. """
  235. sql = "select video_id from crawler_video where out_user_id = '{}' and video_title = '{}';".format(trace_id,
  236. trace_id)
  237. connection = pymysql.connect(
  238. host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
  239. port=3306, # 端口号
  240. user="crawler", # mysql用户名
  241. passwd="crawler123456@", # mysql用户登录密码
  242. db="piaoquan-crawler", # 数据库名
  243. charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
  244. )
  245. cursor = connection.cursor()
  246. cursor.execute(sql)
  247. out_video_list = cursor.fetchall()
  248. if int(out_video_list[0][0]) == 0:
  249. time.sleep(1)
  250. return cls.search_id_to_video(trace_id)
  251. else:
  252. return out_video_list[0][0]