common.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372
  1. """
  2. @author: luojunhui
  3. """
  4. import os
  5. import json
  6. import time
  7. import uuid
  8. import requests
  9. import pymysql
  10. import urllib.parse
  11. from openai import OpenAI
  12. from applications.functions.log import logging
  13. class Functions(object):
  14. """
  15. 通用工具代码
  16. """
  17. # 自动加入白名单逻辑
  18. @classmethod
  19. def auto_white(cls, root_share_id):
  20. """
  21. 自动加入白名单, 保证公众号百分百出广告
  22. :param root_share_id:
  23. :return:
  24. """
  25. def get_cookie():
  26. """
  27. 获取 cookie
  28. :return:
  29. """
  30. url = "https://admin.piaoquantv.com/manager/login?account=luojunhui&passWd=e10adc3949ba59abbe56e057f20f883e&muid=7"
  31. payload = {}
  32. headers = {
  33. 'accept': 'application/json, text/plain, */*',
  34. 'accept-language': 'en',
  35. 'priority': 'u=1, i',
  36. 'sec-ch-ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
  37. 'sec-ch-ua-mobile': '?0',
  38. 'sec-ch-ua-platform': '"macOS"',
  39. 'sec-fetch-dest': 'empty',
  40. 'sec-fetch-mode': 'cors',
  41. 'sec-fetch-site': 'same-origin',
  42. 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
  43. }
  44. response = requests.request("GET", url, headers=headers, data=payload)
  45. return response.cookies.values()[0]
  46. url = "https://admin.piaoquantv.com/manager/ad/own/white/rootShare/save"
  47. dd = {
  48. "rootShareId": root_share_id,
  49. "commit": "算法自动加入白名单--"
  50. }
  51. payload = json.dumps(dd)
  52. cookie = get_cookie()
  53. headers = {
  54. 'accept': 'application/json',
  55. 'accept-language': 'en',
  56. 'content-type': 'application/json;',
  57. 'cookie': "SESSION=" + cookie,
  58. 'origin': 'https://admin.piaoquantv.com',
  59. 'priority': 'u=1, i',
  60. 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
  61. }
  62. response = requests.request("POST", url, headers=headers, data=payload)
  63. return response.json()['content']
  64. # 创建公众号分享卡片
  65. @classmethod
  66. def create_gzh_path(cls, video_id, shared_uid):
  67. """
  68. :param video_id: 视频 id
  69. :param shared_uid: 分享 id
  70. """
  71. root_share_id = str(uuid.uuid4())
  72. url = f"pages/user-videos?id={video_id}&su={shared_uid}&fromGzh=1&rootShareId={root_share_id}&shareId={root_share_id}"
  73. # 自动把 root_share_id 加入到白名单
  74. cls.auto_white(root_share_id)
  75. return root_share_id, f"pages/category?jumpPage={urllib.parse.quote(url, safe='')}"
  76. # 从票圈请求视频
  77. @classmethod
  78. def request_for_info(cls, video_id):
  79. """
  80. 请求数据
  81. :param video_id:
  82. :return:
  83. """
  84. url = "https://longvideoapi.piaoquantv.com/longvideoapi/openapi/video/batchSelectVideoInfo"
  85. data = {
  86. "videoIdList": [video_id]
  87. }
  88. header = {
  89. "Content-Type": "application/json",
  90. }
  91. response = requests.post(url, headers=header, data=json.dumps(data))
  92. return response.json()
  93. # 清理标题
  94. @classmethod
  95. def clean_title(cls, strings):
  96. """
  97. :param strings:
  98. :return:
  99. """
  100. return (
  101. strings.strip()
  102. .replace("\n", "")
  103. .replace("/", "")
  104. .replace("\r", "")
  105. .replace("#", "")
  106. .replace(".", "。")
  107. .replace("\\", "")
  108. .replace("&NBSP", "")
  109. .replace(":", "")
  110. .replace("*", "")
  111. .replace("?", "")
  112. .replace("?", "")
  113. .replace('"', "")
  114. .replace("<", "")
  115. .replace(">", "")
  116. .replace("|", "")
  117. .replace(" ", "")
  118. .replace('"', "")
  119. .replace("'", "")
  120. )
  121. class MySQLServer(object):
  122. """
  123. MySql 服务
  124. """
  125. @classmethod
  126. def select_download_videos(cls, trace_id):
  127. """
  128. 查询
  129. :param trace_id:
  130. :return:
  131. """
  132. sql = "select video_id from crawler_video where out_user_id = '{}' and video_title = '{}';".format(trace_id,
  133. trace_id)
  134. connection = pymysql.connect(
  135. host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
  136. port=3306, # 端口号
  137. user="crawler", # mysql用户名
  138. passwd="crawler123456@", # mysql用户登录密码
  139. db="piaoquan-crawler", # 数据库名
  140. charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
  141. )
  142. cursor = connection.cursor()
  143. cursor.execute(sql)
  144. out_video_list = cursor.fetchall()
  145. if len(out_video_list) > 0:
  146. if out_video_list[0][0] == 0:
  147. video_id = cls.search_id_to_video(trace_id)
  148. else:
  149. video_id = out_video_list[0][0]
  150. vid_list = [video_id]
  151. logging(
  152. code="2003",
  153. trace_id=trace_id,
  154. info="recall_search_list",
  155. function="find_videos_in_mysql",
  156. data=vid_list
  157. )
  158. return {
  159. "search_videos": "success",
  160. "trace_id": trace_id,
  161. "video_list": vid_list
  162. }
  163. else:
  164. return {
  165. "search_videos": "failed",
  166. "trace_id": trace_id,
  167. "video_list": []
  168. }
  169. @classmethod
  170. def select_pq_videos(cls):
  171. """
  172. 查询
  173. :return: info_list
  174. """
  175. connection = pymysql.connect(
  176. host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
  177. port=3306, # 端口号
  178. user="wx2016_longvideo", # mysql用户名
  179. passwd="wx2016_longvideoP@assword1234", # mysql用户登录密码
  180. db="incentive", # 数据库名
  181. charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
  182. )
  183. sql = "select video_id, key_words, search_keys, extra_keys from video_content"
  184. cursor = connection.cursor()
  185. cursor.execute(sql)
  186. data = cursor.fetchall()
  187. result = [
  188. {
  189. "video_id": line[0],
  190. "key_words": json.loads(line[1]),
  191. "search_keys": json.loads(line[2]),
  192. "extra_keys": json.loads(line[3]),
  193. }
  194. for line in data
  195. ]
  196. return result
  197. # 敏感词
  198. @classmethod
  199. def select_sensitive_words(cls):
  200. """
  201. sensitive words
  202. :return:
  203. """
  204. connection = pymysql.connect(
  205. host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
  206. port=3306, # 端口号
  207. user="wx2016_longvideo", # mysql用户名
  208. passwd="wx2016_longvideoP@assword1234", # mysql用户登录密码
  209. db="longvideo", # 数据库名
  210. charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
  211. )
  212. sql = "select `keyword` from wx_sensitive_word where `data_status` = 0"
  213. cursor = connection.cursor()
  214. cursor.execute(sql)
  215. data = cursor.fetchall()
  216. result = [line[0] for line in data]
  217. return result
  218. @classmethod
  219. def search_id_to_video(cls, trace_id):
  220. """
  221. 通过 search_id 返回 video_id
  222. :param trace_id:
  223. :return:
  224. """
  225. sql = "select video_id from crawler_video where out_user_id = '{}' and video_title = '{}';".format(trace_id,
  226. trace_id)
  227. connection = pymysql.connect(
  228. host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
  229. port=3306, # 端口号
  230. user="crawler", # mysql用户名
  231. passwd="crawler123456@", # mysql用户登录密码
  232. db="piaoquan-crawler", # 数据库名
  233. charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
  234. )
  235. cursor = connection.cursor()
  236. cursor.execute(sql)
  237. out_video_list = cursor.fetchall()
  238. if int(out_video_list[0][0]) == 0:
  239. time.sleep(1)
  240. return cls.search_id_to_video(trace_id)
  241. else:
  242. return out_video_list[0][0]
  243. class KimiServer(object):
  244. """
  245. Kimi Server
  246. """
  247. @classmethod
  248. def ask_kimi(cls, question):
  249. """
  250. Ask Kimi for information
  251. :param question: tiny text
  252. :return: "{}"
  253. """
  254. single_title_prompt = """
  255. 我会给你一个视频标题,需要你帮我用你所学的知识来帮我分析出以下信息,信息我都写到 json 里面了
  256. {
  257. "key_words": [], # 返回三个关键词
  258. "search_keys": [], # 标题可能的搜索关键词,返回 3 个
  259. "extra_keys": [], # 关心这个视频的用户还会关心哪些关键词, 返回 3 个
  260. "theme": 标题的主题, 用一个词概括
  261. }
  262. 只需要返回一个 json,key 和上面的一样,
  263. 我给你的标题是:
  264. """
  265. client = OpenAI(
  266. api_key='sk-tz1VaKqksTzk0F8HxlU4YVGwj7oa1g0c0puGNUZrdn9MDtzm',
  267. base_url="https://api.moonshot.cn/v1"
  268. )
  269. chat_completion = client.chat.completions.create(
  270. messages=[
  271. {
  272. "role": "user",
  273. "content": single_title_prompt + question,
  274. }
  275. ],
  276. model="moonshot-v1-8k",
  277. )
  278. response = chat_completion.choices[0].message.content.replace('```json', '').replace('```', '')
  279. try:
  280. response = json.loads(response)
  281. return response
  282. except:
  283. return {}
  284. @classmethod
  285. def ask_kimi_and_save_to_local(cls, info_tuple):
  286. """
  287. save file to local
  288. :return:
  289. """
  290. title, trace_id, save_path = info_tuple[0], info_tuple[1], info_tuple[2]
  291. if os.path.exists(save_path):
  292. logging(
  293. code="2001",
  294. info="该 video 信息已经挖掘完成---{}".format(title),
  295. function="ask_kimi_and_save_to_local",
  296. trace_id=trace_id,
  297. )
  298. else:
  299. os.makedirs(os.path.dirname(save_path), exist_ok=True)
  300. if not title:
  301. result = {}
  302. else:
  303. result = cls.ask_kimi(title)
  304. logging(
  305. code="2001",
  306. info="kimi-result",
  307. data=result,
  308. trace_id=trace_id,
  309. function="ask_kimi_and_save_to_local"
  310. )
  311. with open(save_path, "w", encoding="utf-8") as f:
  312. f.write(json.dumps(result, ensure_ascii=False))
  313. @classmethod
  314. def kimi_title(cls, ori_title):
  315. """
  316. prompt + kimi + ori_title generate new title
  317. :param ori_title:
  318. :return:
  319. """
  320. single_title_prompt = """
  321. 请将以上标题改写成适合小程序点击和传播的小程序标题,小程序标题的写作规范如下,请学习后进行小程序标题的编写。直接输出最终的小程序标题
  322. 小程序标题写作规范:
  323. 1.要点前置:将最重要的信息放在标题的最前面,以快速吸引读者的注意力。例如,“5月一辈子同学,三辈子亲,送给我的老同学,听哭无数人!”中的“5月”和“一辈子同学,三辈子亲”都是重要的信息点。
  324. 2.激发情绪:使用能够触动人心的语言,激发读者的情感共鸣。如“只剩两人同学聚会,看后感动落泪。”使用“感动落泪”激发读者的同情和怀旧情绪。
  325. 3.使用数字和特殊符号:数字可以提供具体性,而特殊符号如“🔴”、“😄”、“🔥”等可以吸引视觉注意力,增加点击率。
  326. 4.悬念和好奇心:创建悬念或提出问题,激发读者的好奇心。例如,“太神奇了!长江水位下降,重庆出现惊奇一幕!”中的“惊奇一幕”就是一个悬念。
  327. 5.名人效应:如果内容与知名人士相关,提及他们的名字可以增加标题的吸引力。
  328. 6.社会价值观:触及读者的文化和社会价值观,如家庭、友情、国家荣誉等。
  329. 7.标点符号的运用:使用感叹号、问号等标点来增强语气和情感表达。
  330. 8.直接的语言:使用直白、口语化的语言,易于理解,如“狗屁股,笑死我了!”。
  331. 9.热点人物或事件:提及当前的热点人物或事件,利用热点效应吸引读者。
  332. 10.字数适中:保持标题在10-20个字之间,既不过长也不过短,确保信息的完整性和吸引力。
  333. 11.适当的紧迫感:使用“最新”、“首次”、“紧急”等词汇,创造一种紧迫感,促使读者立即行动。
  334. 12.情感或价值诉求:使用如“感动”、“泪目”、“经典”等词汇,直接与读者的情感或价值观产生共鸣。
  335. 避免误导:确保标题准确反映内容,避免夸大或误导读者。
  336. """
  337. client = OpenAI(
  338. api_key='sk-tz1VaKqksTzk0F8HxlU4YVGwj7oa1g0c0puGNUZrdn9MDtzm',
  339. base_url="https://api.moonshot.cn/v1"
  340. )
  341. chat_completion = client.chat.completions.create(
  342. messages=[
  343. {
  344. "role": "user",
  345. "content": ori_title + "\n" + single_title_prompt,
  346. }
  347. ],
  348. model="moonshot-v1-8k",
  349. )
  350. response = chat_completion.choices[0].message.content
  351. return response