common.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. """
  2. @author: luojunhui
  3. """
  4. import os
  5. import json
  6. import uuid
  7. import requests
  8. import pymysql
  9. import urllib.parse
  10. from openai import OpenAI
  11. from applications.functions.log import logging
  12. class Functions(object):
  13. """
  14. 通用工具代码
  15. """
  16. # 敏感词逻辑
  17. @classmethod
  18. def sensitive_flag(cls, title):
  19. """
  20. 判断标题是否命中过滤词
  21. :param title:
  22. :return:
  23. """
  24. sensitive_words = MySQLServer().select_sensitive_words()
  25. for word in sensitive_words:
  26. if word in title:
  27. # title = title.replace(word, "*")
  28. return False
  29. return True
  30. # 自动加入白名单逻辑
  31. @classmethod
  32. def auto_white(cls, root_share_id):
  33. """
  34. 自动加入白名单, 保证公众号百分百出广告
  35. :param root_share_id:
  36. :return:
  37. """
  38. def get_cookie():
  39. """
  40. 获取 cookie
  41. :return:
  42. """
  43. url = "https://admin.piaoquantv.com/manager/login?account=luojunhui&passWd=e10adc3949ba59abbe56e057f20f883e&muid=7"
  44. payload = {}
  45. headers = {
  46. 'accept': 'application/json, text/plain, */*',
  47. 'accept-language': 'en',
  48. 'priority': 'u=1, i',
  49. 'sec-ch-ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
  50. 'sec-ch-ua-mobile': '?0',
  51. 'sec-ch-ua-platform': '"macOS"',
  52. 'sec-fetch-dest': 'empty',
  53. 'sec-fetch-mode': 'cors',
  54. 'sec-fetch-site': 'same-origin',
  55. 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
  56. }
  57. response = requests.request("GET", url, headers=headers, data=payload)
  58. return response.cookies.values()[0]
  59. url = "https://admin.piaoquantv.com/manager/ad/own/white/rootShare/save"
  60. dd = {
  61. "rootShareId": root_share_id,
  62. "commit": "算法自动加入白名单--"
  63. }
  64. payload = json.dumps(dd)
  65. cookie = get_cookie()
  66. headers = {
  67. 'accept': 'application/json',
  68. 'accept-language': 'en',
  69. 'content-type': 'application/json;',
  70. 'cookie': "SESSION=" + cookie,
  71. 'origin': 'https://admin.piaoquantv.com',
  72. 'priority': 'u=1, i',
  73. 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
  74. }
  75. response = requests.request("POST", url, headers=headers, data=payload)
  76. return response.json()['content']
  77. # 创建公众号分享卡片
  78. @classmethod
  79. def create_gzh_path(cls, video_id, shared_uid):
  80. """
  81. :param video_id: 视频 id
  82. :param shared_uid: 分享 id
  83. """
  84. root_share_id = str(uuid.uuid4())
  85. url = f"pages/user-videos?id={video_id}&su={shared_uid}&fromGzh=1&rootShareId={root_share_id}&shareId={root_share_id}"
  86. # 自动把 root_share_id 加入到白名单
  87. cls.auto_white(root_share_id)
  88. return root_share_id, f"pages/category?jumpPage={urllib.parse.quote(url, safe='')}"
  89. # 从票圈请求视频
  90. @classmethod
  91. def request_for_info(cls, video_id):
  92. """
  93. 请求数据
  94. :param video_id:
  95. :return:
  96. """
  97. url = "https://longvideoapi.piaoquantv.com/longvideoapi/openapi/video/batchSelectVideoInfo"
  98. data = {
  99. "videoIdList": [video_id]
  100. }
  101. header = {
  102. "Content-Type": "application/json",
  103. }
  104. response = requests.post(url, headers=header, data=json.dumps(data))
  105. return response.json()
  106. # 清理标题
  107. @classmethod
  108. def clean_title(cls, strings):
  109. """
  110. :param strings:
  111. :return:
  112. """
  113. return (
  114. strings.strip()
  115. .replace("\n", "")
  116. .replace("/", "")
  117. .replace("\r", "")
  118. .replace("#", "")
  119. .replace(".", "。")
  120. .replace("\\", "")
  121. .replace("&NBSP", "")
  122. .replace(":", "")
  123. .replace("*", "")
  124. .replace("?", "")
  125. .replace("?", "")
  126. .replace('"', "")
  127. .replace("<", "")
  128. .replace(">", "")
  129. .replace("|", "")
  130. .replace(" ", "")
  131. .replace('"', "")
  132. .replace("'", "")
  133. )
  134. class MySQLServer(object):
  135. """
  136. MySql 服务
  137. """
  138. @classmethod
  139. def select_download_videos(cls, trace_id):
  140. """
  141. 查询
  142. :param trace_id:
  143. :return:
  144. """
  145. sql = "select video_id, video_title from crawler_video where out_user_id = '{}' limit 5;".format(trace_id)
  146. connection = pymysql.connect(
  147. host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
  148. port=3306, # 端口号
  149. user="crawler", # mysql用户名
  150. passwd="crawler123456@", # mysql用户登录密码
  151. db="piaoquan-crawler", # 数据库名
  152. charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
  153. )
  154. cursor = connection.cursor()
  155. cursor.execute(sql)
  156. out_video_list = cursor.fetchall()
  157. if len(out_video_list) > 0:
  158. vid_list = [i[0] for i in out_video_list if i[0] != 0]
  159. vid_list = [vid_list[0]]
  160. # dir_path = os.path.join(os.getcwd(), 'applications', 'static', "out_videos")
  161. # os.makedirs(os.path.dirname(dir_path), exist_ok=True)
  162. # done_list = os.listdir(dir_path)
  163. # process_list = [
  164. # (
  165. # i[1],
  166. # trace_id,
  167. # os.path.join(dir_path, "{}.json".format(i[0]))
  168. # ) for i in out_video_list if not "{}.json".format(i[0]) in done_list
  169. # ]
  170. # if process_list:
  171. # ask_kimi_and_save_to_local(process_list[0])
  172. logging(
  173. code="2003",
  174. trace_id=trace_id,
  175. info="recall_search_list",
  176. function="find_videos_in_mysql",
  177. data=vid_list
  178. )
  179. return {
  180. "search_videos": "success",
  181. "trace_id": trace_id,
  182. "video_list": vid_list
  183. }
  184. else:
  185. return {
  186. "search_videos": "failed",
  187. "trace_id": trace_id,
  188. "video_list": []
  189. }
  190. @classmethod
  191. def select_pq_videos(cls):
  192. """
  193. 查询
  194. :return: info_list
  195. """
  196. connection = pymysql.connect(
  197. host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
  198. port=3306, # 端口号
  199. user="wx2016_longvideo", # mysql用户名
  200. passwd="wx2016_longvideoP@assword1234", # mysql用户登录密码
  201. db="incentive", # 数据库名
  202. charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
  203. )
  204. sql = "select video_id, key_words, search_keys, extra_keys from video_content"
  205. cursor = connection.cursor()
  206. cursor.execute(sql)
  207. data = cursor.fetchall()
  208. result = [
  209. {
  210. "video_id": line[0],
  211. "key_words": json.loads(line[1]),
  212. "search_keys": json.loads(line[2]),
  213. "extra_keys": json.loads(line[3]),
  214. }
  215. for line in data
  216. ]
  217. return result
  218. # 敏感词
  219. @classmethod
  220. def select_sensitive_words(cls):
  221. """
  222. sensitive words
  223. :return:
  224. """
  225. connection = pymysql.connect(
  226. host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
  227. port=3306, # 端口号
  228. user="wx2016_longvideo", # mysql用户名
  229. passwd="wx2016_longvideoP@assword1234", # mysql用户登录密码
  230. db="longvideo", # 数据库名
  231. charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
  232. )
  233. sql = "select `keyword` from wx_sensitive_word where `data_status` = 0"
  234. cursor = connection.cursor()
  235. cursor.execute(sql)
  236. data = cursor.fetchall()
  237. result = [line[0] for line in data]
  238. return result
  239. class KimiServer(object):
  240. """
  241. Kimi Server
  242. """
  243. @classmethod
  244. def ask_kimi(cls, question):
  245. """
  246. Ask Kimi for information
  247. :param question: tiny text
  248. :return: "{}"
  249. """
  250. single_title_prompt = """
  251. 我会给你一个视频标题,需要你帮我用你所学的知识来帮我分析出以下信息,信息我都写到 json 里面了
  252. {
  253. "key_words": [], # 返回三个关键词
  254. "search_keys": [], # 标题可能的搜索关键词,返回 3 个
  255. "extra_keys": [], # 关心这个视频的用户还会关心哪些关键词, 返回 3 个
  256. "theme": 标题的主题, 用一个词概括
  257. }
  258. 只需要返回一个 json,key 和上面的一样,
  259. 我给你的标题是:
  260. """
  261. client = OpenAI(
  262. api_key='sk-tz1VaKqksTzk0F8HxlU4YVGwj7oa1g0c0puGNUZrdn9MDtzm',
  263. base_url="https://api.moonshot.cn/v1"
  264. )
  265. chat_completion = client.chat.completions.create(
  266. messages=[
  267. {
  268. "role": "user",
  269. "content": single_title_prompt + question,
  270. }
  271. ],
  272. model="moonshot-v1-8k",
  273. )
  274. response = chat_completion.choices[0].message.content.replace('```json', '').replace('```', '')
  275. try:
  276. response = json.loads(response)
  277. return response
  278. except:
  279. return {}
  280. @classmethod
  281. def ask_kimi_and_save_to_local(cls, info_tuple):
  282. """
  283. save file to local
  284. :return:
  285. """
  286. title, trace_id, save_path = info_tuple[0], info_tuple[1], info_tuple[2]
  287. if os.path.exists(save_path):
  288. logging(
  289. code="2001",
  290. info="该 video 信息已经挖掘完成---{}".format(title),
  291. function="ask_kimi_and_save_to_local",
  292. trace_id=trace_id,
  293. )
  294. else:
  295. os.makedirs(os.path.dirname(save_path), exist_ok=True)
  296. if not title:
  297. result = {}
  298. else:
  299. result = cls.ask_kimi(title)
  300. logging(
  301. code="2001",
  302. info="kimi-result",
  303. data=result,
  304. trace_id=trace_id,
  305. function="ask_kimi_and_save_to_local"
  306. )
  307. with open(save_path, "w", encoding="utf-8") as f:
  308. f.write(json.dumps(result, ensure_ascii=False))