common.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367
  1. """
  2. @author: luojunhui
  3. """
  4. import os
  5. import json
  6. import uuid
  7. import requests
  8. import pymysql
  9. import urllib.parse
  10. from openai import OpenAI
  11. from applications.functions.log import logging
  12. class Functions(object):
  13. """
  14. 通用工具代码
  15. """
  16. # 敏感词逻辑
  17. @classmethod
  18. def sensitive_flag(cls, title):
  19. """
  20. 判断标题是否命中过滤词
  21. :param title:
  22. :return:
  23. """
  24. sensitive_words = MySQLServer().select_sensitive_words()
  25. for word in sensitive_words:
  26. if word in title:
  27. # title = title.replace(word, "*")
  28. return False
  29. return True
  30. # 自动加入白名单逻辑
  31. @classmethod
  32. def auto_white(cls, root_share_id):
  33. """
  34. 自动加入白名单, 保证公众号百分百出广告
  35. :param root_share_id:
  36. :return:
  37. """
  38. def get_cookie():
  39. """
  40. 获取 cookie
  41. :return:
  42. """
  43. url = "https://admin.piaoquantv.com/manager/login?account=luojunhui&passWd=e10adc3949ba59abbe56e057f20f883e&muid=7"
  44. payload = {}
  45. headers = {
  46. 'accept': 'application/json, text/plain, */*',
  47. 'accept-language': 'en',
  48. 'priority': 'u=1, i',
  49. 'sec-ch-ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
  50. 'sec-ch-ua-mobile': '?0',
  51. 'sec-ch-ua-platform': '"macOS"',
  52. 'sec-fetch-dest': 'empty',
  53. 'sec-fetch-mode': 'cors',
  54. 'sec-fetch-site': 'same-origin',
  55. 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
  56. }
  57. response = requests.request("GET", url, headers=headers, data=payload)
  58. return response.cookies.values()[0]
  59. url = "https://admin.piaoquantv.com/manager/ad/own/white/rootShare/save"
  60. dd = {
  61. "rootShareId": root_share_id,
  62. "commit": "算法自动加入白名单--"
  63. }
  64. payload = json.dumps(dd)
  65. cookie = get_cookie()
  66. headers = {
  67. 'accept': 'application/json',
  68. 'accept-language': 'en',
  69. 'content-type': 'application/json;',
  70. 'cookie': "SESSION=" + cookie,
  71. 'origin': 'https://admin.piaoquantv.com',
  72. 'priority': 'u=1, i',
  73. 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
  74. }
  75. response = requests.request("POST", url, headers=headers, data=payload)
  76. return response.json()['content']
  77. # 创建公众号分享卡片
  78. @classmethod
  79. def create_gzh_path(cls, video_id, shared_uid):
  80. """
  81. :param video_id: 视频 id
  82. :param shared_uid: 分享 id
  83. """
  84. root_share_id = str(uuid.uuid4())
  85. url = f"pages/user-videos?id={video_id}&su={shared_uid}&fromGzh=1&rootShareId={root_share_id}&shareId={root_share_id}"
  86. # 自动把 root_share_id 加入到白名单
  87. cls.auto_white(root_share_id)
  88. return root_share_id, f"pages/category?jumpPage={urllib.parse.quote(url, safe='')}"
  89. # 从票圈请求视频
  90. @classmethod
  91. def request_for_info(cls, video_id):
  92. """
  93. 请求数据
  94. :param video_id:
  95. :return:
  96. """
  97. url = "https://longvideoapi.piaoquantv.com/longvideoapi/openapi/video/batchSelectVideoInfo"
  98. data = {
  99. "videoIdList": [video_id]
  100. }
  101. header = {
  102. "Content-Type": "application/json",
  103. }
  104. response = requests.post(url, headers=header, data=json.dumps(data))
  105. return response.json()
  106. # 清理标题
  107. @classmethod
  108. def clean_title(cls, strings):
  109. """
  110. :param strings:
  111. :return:
  112. """
  113. return (
  114. strings.strip()
  115. .replace("\n", "")
  116. .replace("/", "")
  117. .replace("\r", "")
  118. .replace("#", "")
  119. .replace(".", "。")
  120. .replace("\\", "")
  121. .replace("&NBSP", "")
  122. .replace(":", "")
  123. .replace("*", "")
  124. .replace("?", "")
  125. .replace("?", "")
  126. .replace('"', "")
  127. .replace("<", "")
  128. .replace(">", "")
  129. .replace("|", "")
  130. .replace(" ", "")
  131. .replace('"', "")
  132. .replace("'", "")
  133. )
  134. class MySQLServer(object):
  135. """
  136. MySql 服务
  137. """
  138. @classmethod
  139. def select_download_videos(cls, trace_id):
  140. """
  141. 查询
  142. :param trace_id:
  143. :return:
  144. """
  145. sql = "select video_id from crawler_video where out_user_id = '{}' and video_title = '{}';".format(trace_id, trace_id)
  146. connection = pymysql.connect(
  147. host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
  148. port=3306, # 端口号
  149. user="crawler", # mysql用户名
  150. passwd="crawler123456@", # mysql用户登录密码
  151. db="piaoquan-crawler", # 数据库名
  152. charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
  153. )
  154. cursor = connection.cursor()
  155. cursor.execute(sql)
  156. out_video_list = cursor.fetchall()
  157. if len(out_video_list) > 0:
  158. vid_list = [i[0] for i in out_video_list if i[0] != 0]
  159. vid_list = [vid_list[0]]
  160. # dir_path = os.path.join(os.getcwd(), 'applications', 'static', "out_videos")
  161. # os.makedirs(os.path.dirname(dir_path), exist_ok=True)
  162. # done_list = os.listdir(dir_path)
  163. # process_list = [
  164. # (
  165. # i[1],
  166. # trace_id,
  167. # os.path.join(dir_path, "{}.json".format(i[0]))
  168. # ) for i in out_video_list if not "{}.json".format(i[0]) in done_list
  169. # ]
  170. # if process_list:
  171. # ask_kimi_and_save_to_local(process_list[0])
  172. logging(
  173. code="2003",
  174. trace_id=trace_id,
  175. info="recall_search_list",
  176. function="find_videos_in_mysql",
  177. data=vid_list
  178. )
  179. return {
  180. "search_videos": "success",
  181. "trace_id": trace_id,
  182. "video_list": vid_list
  183. }
  184. else:
  185. return {
  186. "search_videos": "failed",
  187. "trace_id": trace_id,
  188. "video_list": []
  189. }
  190. @classmethod
  191. def select_pq_videos(cls):
  192. """
  193. 查询
  194. :return: info_list
  195. """
  196. connection = pymysql.connect(
  197. host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
  198. port=3306, # 端口号
  199. user="wx2016_longvideo", # mysql用户名
  200. passwd="wx2016_longvideoP@assword1234", # mysql用户登录密码
  201. db="incentive", # 数据库名
  202. charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
  203. )
  204. sql = "select video_id, key_words, search_keys, extra_keys from video_content"
  205. cursor = connection.cursor()
  206. cursor.execute(sql)
  207. data = cursor.fetchall()
  208. result = [
  209. {
  210. "video_id": line[0],
  211. "key_words": json.loads(line[1]),
  212. "search_keys": json.loads(line[2]),
  213. "extra_keys": json.loads(line[3]),
  214. }
  215. for line in data
  216. ]
  217. return result
  218. # 敏感词
  219. @classmethod
  220. def select_sensitive_words(cls):
  221. """
  222. sensitive words
  223. :return:
  224. """
  225. connection = pymysql.connect(
  226. host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com", # 数据库IP地址,内网地址
  227. port=3306, # 端口号
  228. user="wx2016_longvideo", # mysql用户名
  229. passwd="wx2016_longvideoP@assword1234", # mysql用户登录密码
  230. db="longvideo", # 数据库名
  231. charset="utf8mb4" # 如果数据库里面的文本是utf8编码的,charset指定是utf8
  232. )
  233. sql = "select `keyword` from wx_sensitive_word where `data_status` = 0"
  234. cursor = connection.cursor()
  235. cursor.execute(sql)
  236. data = cursor.fetchall()
  237. result = [line[0] for line in data]
  238. return result
  239. class KimiServer(object):
  240. """
  241. Kimi Server
  242. """
  243. @classmethod
  244. def ask_kimi(cls, question):
  245. """
  246. Ask Kimi for information
  247. :param question: tiny text
  248. :return: "{}"
  249. """
  250. single_title_prompt = """
  251. 我会给你一个视频标题,需要你帮我用你所学的知识来帮我分析出以下信息,信息我都写到 json 里面了
  252. {
  253. "key_words": [], # 返回三个关键词
  254. "search_keys": [], # 标题可能的搜索关键词,返回 3 个
  255. "extra_keys": [], # 关心这个视频的用户还会关心哪些关键词, 返回 3 个
  256. "theme": 标题的主题, 用一个词概括
  257. }
  258. 只需要返回一个 json,key 和上面的一样,
  259. 我给你的标题是:
  260. """
  261. client = OpenAI(
  262. api_key='sk-tz1VaKqksTzk0F8HxlU4YVGwj7oa1g0c0puGNUZrdn9MDtzm',
  263. base_url="https://api.moonshot.cn/v1"
  264. )
  265. chat_completion = client.chat.completions.create(
  266. messages=[
  267. {
  268. "role": "user",
  269. "content": single_title_prompt + question,
  270. }
  271. ],
  272. model="moonshot-v1-8k",
  273. )
  274. response = chat_completion.choices[0].message.content.replace('```json', '').replace('```', '')
  275. try:
  276. response = json.loads(response)
  277. return response
  278. except:
  279. return {}
  280. @classmethod
  281. def ask_kimi_and_save_to_local(cls, info_tuple):
  282. """
  283. save file to local
  284. :return:
  285. """
  286. title, trace_id, save_path = info_tuple[0], info_tuple[1], info_tuple[2]
  287. if os.path.exists(save_path):
  288. logging(
  289. code="2001",
  290. info="该 video 信息已经挖掘完成---{}".format(title),
  291. function="ask_kimi_and_save_to_local",
  292. trace_id=trace_id,
  293. )
  294. else:
  295. os.makedirs(os.path.dirname(save_path), exist_ok=True)
  296. if not title:
  297. result = {}
  298. else:
  299. result = cls.ask_kimi(title)
  300. logging(
  301. code="2001",
  302. info="kimi-result",
  303. data=result,
  304. trace_id=trace_id,
  305. function="ask_kimi_and_save_to_local"
  306. )
  307. with open(save_path, "w", encoding="utf-8") as f:
  308. f.write(json.dumps(result, ensure_ascii=False))
  309. @classmethod
  310. def kimi_title(cls, ori_title):
  311. """
  312. prompt + kimi + ori_title generate new title
  313. :param ori_title:
  314. :return:
  315. """
  316. single_title_prompt = """
  317. 请将以上标题改写成适合小程序点击和传播的小程序标题,小程序标题的写作规范如下,请学习后进行小程序标题的编写。直接输出最终的小程序标题
  318. 小程序标题写作规范:
  319. 1.要点前置:将最重要的信息放在标题的最前面,以快速吸引读者的注意力。例如,“5月一辈子同学,三辈子亲,送给我的老同学,听哭无数人!”中的“5月”和“一辈子同学,三辈子亲”都是重要的信息点。
  320. 2.激发情绪:使用能够触动人心的语言,激发读者的情感共鸣。如“只剩两人同学聚会,看后感动落泪。”使用“感动落泪”激发读者的同情和怀旧情绪。
  321. 3.使用数字和特殊符号:数字可以提供具体性,而特殊符号如“🔴”、“😄”、“🔥”等可以吸引视觉注意力,增加点击率。
  322. 4.悬念和好奇心:创建悬念或提出问题,激发读者的好奇心。例如,“太神奇了!长江水位下降,重庆出现惊奇一幕!”中的“惊奇一幕”就是一个悬念。
  323. 5.名人效应:如果内容与知名人士相关,提及他们的名字可以增加标题的吸引力。
  324. 6.社会价值观:触及读者的文化和社会价值观,如家庭、友情、国家荣誉等。
  325. 7.标点符号的运用:使用感叹号、问号等标点来增强语气和情感表达。
  326. 8.直接的语言:使用直白、口语化的语言,易于理解,如“狗屁股,笑死我了!”。
  327. 9.热点人物或事件:提及当前的热点人物或事件,利用热点效应吸引读者。
  328. 10.字数适中:保持标题在10-20个字之间,既不过长也不过短,确保信息的完整性和吸引力。
  329. 11.适当的紧迫感:使用“最新”、“首次”、“紧急”等词汇,创造一种紧迫感,促使读者立即行动。
  330. 12.情感或价值诉求:使用如“感动”、“泪目”、“经典”等词汇,直接与读者的情感或价值观产生共鸣。
  331. 避免误导:确保标题准确反映内容,避免夸大或误导读者。
  332. """
  333. client = OpenAI(
  334. api_key='sk-tz1VaKqksTzk0F8HxlU4YVGwj7oa1g0c0puGNUZrdn9MDtzm',
  335. base_url="https://api.moonshot.cn/v1"
  336. )
  337. chat_completion = client.chat.completions.create(
  338. messages=[
  339. {
  340. "role": "user",
  341. "content": ori_title + "\n" + single_title_prompt,
  342. }
  343. ],
  344. model="moonshot-v1-8k",
  345. )
  346. response = chat_completion.choices[0].message.content
  347. return response