music_album_recommend.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2022/7/12
  4. import os
  5. import random
  6. import shutil
  7. import sys
  8. import time
  9. import ffmpeg
  10. import requests
  11. import urllib3
  12. sys.path.append(os.getcwd())
  13. from main.common import Common
  14. from main.feishu_lib import Feishu
  15. from main.publish import Publish
  16. class Recommend:
  17. wechat_sheet = Feishu.get_values_batch("log", "music_album", "L7Y9vz")
  18. wechat_token = wechat_sheet[0][0]
  19. # 过滤词库
  20. @classmethod
  21. def sensitive_words(cls, log_type):
  22. try:
  23. # 敏感词库列表
  24. word_list = []
  25. # 从云文档读取所有敏感词,添加到词库列表
  26. lists = Feishu.get_values_batch(log_type, "music_album", "kNTEno")
  27. for i in lists:
  28. for j in i:
  29. # 过滤空的单元格内容
  30. if j is None:
  31. pass
  32. else:
  33. word_list.append(j)
  34. return word_list
  35. except Exception as e:
  36. Common.logger(log_type).error("获取过滤词库异常:{}", e)
  37. # 抓取规则
  38. @classmethod
  39. def get_rule(cls, play_cnt, share_cnt, duration):
  40. """
  41. - 播放量≥20000
  42. - 分享量≥200
  43. - 视频时长≥1分钟
  44. :return:
  45. """
  46. if int(play_cnt) >= 20000:
  47. if int(share_cnt) >= 200:
  48. if int(duration) >= 60:
  49. return True
  50. else:
  51. return False
  52. else:
  53. return False
  54. else:
  55. return False
  56. # 下载规则
  57. @classmethod
  58. def download_rule(cls, share_cnt, play_cnt):
  59. """
  60. - 标题为空的视频,不进行下载。
  61. - 视频时长<1分钟,不进行下载。
  62. - 分享率<1%的视频,不进行下载(分享量/播放量)
  63. - 标题中含有以下词汇的视频,直接过滤掉,不进行下载。
  64. :return:
  65. """
  66. if int(share_cnt) / int(play_cnt) >= 0.01:
  67. return True
  68. else:
  69. return False
  70. # 获取已下载视频宽高、时长等信息
  71. @classmethod
  72. def get_video_info_from_local(cls, video_path):
  73. probe = ffmpeg.probe(video_path)
  74. # print('video_path: {}'.format(video_path))
  75. # format1 = probe['format']
  76. # bit_rate = int(format1['bit_rate']) / 1000
  77. # duration = format['duration']
  78. # size = int(format1['size']) / 1024 / 1024
  79. video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
  80. if video_stream is None:
  81. print('No video stream found!')
  82. return
  83. width = int(video_stream['width'])
  84. height = int(video_stream['height'])
  85. # num_frames = int(video_stream['nb_frames'])
  86. # fps = int(video_stream['r_frame_rate'].split('/')[0]) / int(video_stream['r_frame_rate'].split('/')[1])
  87. duration = float(video_stream['duration'])
  88. # print('width: {}'.format(width))
  89. # print('height: {}'.format(height))
  90. # print('num_frames: {}'.format(num_frames))
  91. # print('bit_rate: {}k'.format(bit_rate))
  92. # print('fps: {}'.format(fps))
  93. # print('size: {}MB'.format(size))
  94. # print('duration: {}'.format(duration))
  95. return width, height, duration
  96. # 获取视频ID
  97. @classmethod
  98. def get_video_list(cls, log_type):
  99. try:
  100. # 翻50页
  101. for num in range(1, 51):
  102. Common.logger(log_type).info("正在抓取第{}页\n", num)
  103. url = "https://pro.yaoman.net/api/work/list/%E6%8E%A8%E8%8D%90/" + str(num)
  104. headers = {
  105. "accept": "*/*",
  106. "content-type": "application/json",
  107. "token": cls.wechat_token,
  108. "accept-language": "zh-CN,zh-Hans;q=0.9",
  109. "user-agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 11_3 like Mac OS X)"
  110. " AppleWebKit/605.1.15 (KHTML, like Gecko)"
  111. " Mobile/15E217 MicroMessenger/6.8.0(0x16080000)"
  112. " NetType/WIFI Language/en Branch/Br_trunk MiniProgramEnv/Mac",
  113. "accept-encoding": "gzip, deflate, br",
  114. "referer": "https://servicewechat.com/wx6d6f1348072452e9/21/page-frame.html"
  115. }
  116. urllib3.disable_warnings()
  117. r = requests.get(url=url, headers=headers, verify=False)
  118. if "data" not in r.json()["d"] or len(r.json()["d"]["data"]) == 0:
  119. Common.logger(log_type).warning("response:{}", r.text)
  120. data = r.json()["d"]["data"]
  121. for i in range(len(data)):
  122. if "id" in data[i]:
  123. video_id = data[i]["id"]
  124. else:
  125. video_id = 0
  126. if "cover_url" in data[i]:
  127. cover_url = data[i]["cover_url"]
  128. else:
  129. cover_url = 0
  130. cls.get_video_info(log_type, video_id, cover_url)
  131. except Exception as e:
  132. Common.logger(log_type).error("get_video_list异常:{}", e)
  133. # 获取视频详情
  134. @classmethod
  135. def get_video_info(cls, log_type, video_id, cover_url):
  136. try:
  137. url = "https://pro.yaoman.net/api/work/detail/" + str(video_id)
  138. headers = {
  139. "accept": "*/*",
  140. "content-type": "application/json",
  141. "token": cls.wechat_token,
  142. "accept-language": "zh-CN,zh-Hans;q=0.9",
  143. "user-agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 11_3 like Mac OS X)"
  144. " AppleWebKit/605.1.15 (KHTML, like Gecko)"
  145. " Mobile/15E217 MicroMessenger/6.8.0(0x16080000)"
  146. " NetType/WIFI Language/en Branch/Br_trunk MiniProgramEnv/Mac",
  147. "accept-encoding": "gzip, deflate, br",
  148. "referer": "https://servicewechat.com/wx6d6f1348072452e9/21/page-frame.html"
  149. }
  150. urllib3.disable_warnings()
  151. r = requests.get(url=url, headers=headers, verify=False)
  152. if r.json()["m"] != "success":
  153. Common.logger(log_type).warning("response:{}", r.text)
  154. # video_title
  155. if "name" in r.json()["d"]:
  156. video_title = r.json()["d"]["name"][:30]
  157. else:
  158. video_title = 0
  159. # video_id
  160. if "id" in r.json()["d"]:
  161. video_id = r.json()["d"]["id"]
  162. else:
  163. video_id = 0
  164. # play_cnt
  165. if "view_number" in r.json()["d"]:
  166. play_cnt = r.json()["d"]["view_number"]
  167. else:
  168. play_cnt = 0
  169. # like_cnt
  170. if "flower_number" in r.json()["d"]:
  171. like_cnt = r.json()["d"]["flower_number"]
  172. else:
  173. like_cnt = 0
  174. # share_cnt
  175. if "share_number" in r.json()["d"]:
  176. share_cnt = r.json()["d"]["share_number"]
  177. else:
  178. share_cnt = 0
  179. # comment_cnt
  180. if "comment_number" in r.json()["d"]:
  181. comment_cnt = r.json()["d"]["comment_number"]
  182. else:
  183. comment_cnt = 0
  184. # send_time
  185. if "updated_at" in r.json()["d"]:
  186. send_time = r.json()["d"]["updated_at"]
  187. else:
  188. send_time = 0
  189. # user_id
  190. if "user" not in r.json()["d"]:
  191. user_id = 0
  192. elif "id" not in r.json()["d"]["user"]:
  193. user_id = 0
  194. else:
  195. user_id = r.json()["d"]["user"]["id"]
  196. # user_name
  197. if "user" not in r.json()["d"]:
  198. user_name = 0
  199. elif "nickname" not in r.json()["d"]["user"]:
  200. user_name = 0
  201. else:
  202. user_name = r.json()["d"]["user"]["nickname"]
  203. # head_url
  204. if "user" not in r.json()["d"]:
  205. head_url = 0
  206. elif "avatar" not in r.json()["d"]["user"]:
  207. head_url = 0
  208. else:
  209. head_url = r.json()["d"]["user"]["avatar"]
  210. # video_url
  211. if "video_url" in r.json()["d"]:
  212. video_url = r.json()["d"]["video_url"]
  213. else:
  214. video_url = 0
  215. Common.logger(log_type).info("video_title:{}", video_title)
  216. Common.logger(log_type).info("video_id:{}", video_id)
  217. Common.logger(log_type).info("play_cnt:{}", play_cnt)
  218. Common.logger(log_type).info("like_cnt:{}", like_cnt)
  219. Common.logger(log_type).info("share_cnt:{}", share_cnt)
  220. Common.logger(log_type).info("comment_cnt:{}", comment_cnt)
  221. Common.logger(log_type).info("send_time:{}", send_time)
  222. Common.logger(log_type).info("user_name:{}", user_name)
  223. Common.logger(log_type).info("user_id:{}", user_id)
  224. Common.logger(log_type).info("head_url:{}", head_url)
  225. Common.logger(log_type).info("cover_url:{}", cover_url)
  226. Common.logger(log_type).info("video_url:{}", video_url)
  227. # 过滤无效视频
  228. if video_title == 0 or video_id == 0 or send_time == 0 or head_url == 0 or cover_url == 0 or video_url == 0:
  229. Common.logger(log_type).info("无效视频\n")
  230. # 抓取规则
  231. elif cls.get_rule(play_cnt, share_cnt, "60") is False:
  232. Common.logger(log_type).info("不满足抓取规则\n")
  233. # 已下载视频表去重
  234. elif str(video_id) in [n for m in Feishu.get_values_batch(log_type, "music_album", "f5a76e") for n in m]:
  235. Common.logger(log_type).info("该视频已下载\n")
  236. # recommend_feeds去重
  237. elif str(video_id) in [n for m in Feishu.get_values_batch(log_type, "music_album", "69UxPo") for n in m]:
  238. Common.logger(log_type).info("该视频已在recommend_feeds中\n")
  239. # 竖版视频表去重
  240. elif str(video_id) in [n for m in Feishu.get_values_batch(log_type, "music_album", "hYSZsW") for n in m]:
  241. Common.logger(log_type).info("该视频已在竖版视频表中\n")
  242. else:
  243. time.sleep(1)
  244. Feishu.insert_columns(log_type, "music_album", "69UxPo", "ROWS", 1, 2)
  245. get_feeds_time = int(time.time())
  246. values = [[str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(get_feeds_time))),
  247. "推荐榜",
  248. str(video_id),
  249. video_title,
  250. play_cnt,
  251. like_cnt,
  252. share_cnt,
  253. comment_cnt,
  254. send_time,
  255. user_name,
  256. user_id,
  257. head_url,
  258. cover_url,
  259. video_url]]
  260. time.sleep(1)
  261. Feishu.update_values(log_type, "music_album", "69UxPo", "A2:N2", values)
  262. Common.logger(log_type).info("添加至recommend_feeds成功\n")
  263. except Exception as e:
  264. Common.logger(log_type).error("get_video_info异常:{}", e)
  265. # 下载/上传视频
  266. @classmethod
  267. def download_publish(cls, log_type, env):
  268. try:
  269. recommend_feeds_sheet = Feishu.get_values_batch(log_type, "music_album", "69UxPo")
  270. for i in range(1, len(recommend_feeds_sheet)):
  271. download_video_id = recommend_feeds_sheet[i][2]
  272. download_video_title = recommend_feeds_sheet[i][3]
  273. download_video_play_cnt = recommend_feeds_sheet[i][4]
  274. download_video_comment_cnt = recommend_feeds_sheet[i][7]
  275. download_video_like_cnt = recommend_feeds_sheet[i][5]
  276. download_video_share_cnt = recommend_feeds_sheet[i][6]
  277. download_video_send_time = recommend_feeds_sheet[i][8]
  278. download_user_name = recommend_feeds_sheet[i][9]
  279. download_user_id = recommend_feeds_sheet[i][10]
  280. download_head_url = recommend_feeds_sheet[i][11]
  281. download_cover_url = recommend_feeds_sheet[i][12]
  282. download_video_url = recommend_feeds_sheet[i][13]
  283. Common.logger(log_type).info("正在判断第{}行", i + 1)
  284. Common.logger(log_type).info("download_video_title:{}", download_video_title)
  285. Common.logger(log_type).info("download_video_play_cnt:{}", download_video_play_cnt)
  286. # Common.logger(log_type).info("download_video_id:{}", download_video_id)
  287. # Common.logger(log_type).info("download_video_comment_cnt:{}", download_video_comment_cnt)
  288. # Common.logger(log_type).info("download_video_like_cnt:{}", download_video_like_cnt)
  289. Common.logger(log_type).info("download_video_share_cnt:{}", download_video_share_cnt)
  290. # Common.logger(log_type).info("download_video_send_time:{}", download_video_send_time)
  291. # Common.logger(log_type).info("download_user_name:{}", download_user_name)
  292. # Common.logger(log_type).info("download_user_id:{}", download_user_id)
  293. # Common.logger(log_type).info("download_head_url:{}", download_head_url)
  294. # Common.logger(log_type).info("download_cover_url:{}", download_cover_url)
  295. Common.logger(log_type).info("download_video_url:{}", download_video_url)
  296. # 过滤空行
  297. if download_video_id is None or download_video_title is None or download_video_play_cnt is None:
  298. Common.logger(log_type).warning("空行,略过\n")
  299. # 过滤敏感词
  300. elif any(word if word in download_video_title else False for word in
  301. cls.sensitive_words(log_type)) is True:
  302. Feishu.dimension_range(log_type, "music_album", "69UxPo", "ROWS", i + 1, i + 1)
  303. Common.logger(log_type).info("视频已中敏感词,删除成功\n")
  304. return
  305. # # 下载规则
  306. # elif cls.download_rule(download_video_share_cnt, download_video_play_cnt) is False:
  307. # Feishu.dimension_range(log_type, "music_album", "69UxPo", "ROWS", i + 1, i + 1)
  308. # Common.logger(log_type).info("不满足下载规则,删除成功\n")
  309. # return
  310. # 已下载视频表去重
  311. elif str(download_video_id) in [n for m in Feishu.get_values_batch(log_type, "music_album", "f5a76e")
  312. for n in m]:
  313. Feishu.dimension_range(log_type, "music_album", "69UxPo", "ROWS", i + 1, i + 1)
  314. Common.logger(log_type).info("该视频已下载,删除成功\n")
  315. return
  316. # 满足下载规则
  317. else:
  318. # 下载视频
  319. Common.download_method(log_type=log_type, text="video",
  320. d_name=str(download_video_title), d_url=str(download_video_url))
  321. # 获取视频时长
  322. video_info = cls.get_video_info_from_local("./videos/" + download_video_title + "/video.mp4")
  323. download_video_resolution = str(video_info[0]) + "*" + str(video_info[1])
  324. download_video_duration = video_info[2]
  325. # 视频时长<60s,直接删除
  326. if int(download_video_duration) < 60:
  327. shutil.rmtree("./videos/" + download_video_title + "/")
  328. Feishu.dimension_range(log_type, "music_album", "69UxPo", "ROWS", i + 1, i + 1)
  329. Common.logger(log_type).info("时长:{}<60秒,删除成功\n", int(download_video_duration))
  330. return
  331. # 竖版视频不下载,写入竖版视频表
  332. elif int(video_info[0]) < int(video_info[1]):
  333. shutil.rmtree("./videos/" + download_video_title + "/")
  334. # 删除在 recommend_feeds 的记录
  335. Feishu.dimension_range(log_type, "music_album", "69UxPo", "ROWS", i + 1, i + 1)
  336. Common.logger(log_type).info("宽:{}<高:{},删除成功", int(video_info[0]), int(video_info[1]))
  337. # 添加到竖版视频表
  338. time.sleep(1)
  339. Feishu.insert_columns(log_type, "music_album", "hYSZsW", "ROWS", 1, 2)
  340. # 视频ID工作表,首行写入数据
  341. upload_time = int(time.time())
  342. values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
  343. "推荐榜",
  344. str(download_video_id),
  345. str(download_video_title),
  346. download_video_play_cnt,
  347. download_video_comment_cnt,
  348. download_video_like_cnt,
  349. download_video_share_cnt,
  350. int(download_video_duration),
  351. str(download_video_resolution),
  352. str(download_video_send_time),
  353. str(download_user_name),
  354. str(download_user_id),
  355. str(download_head_url),
  356. str(download_cover_url),
  357. str(download_video_url)]]
  358. time.sleep(1)
  359. Feishu.update_values(log_type, "music_album", "hYSZsW", "A2:P2", values)
  360. Common.logger(log_type).info("写入竖版视频表成功\n")
  361. return
  362. else:
  363. # 下载封面
  364. Common.download_method(log_type=log_type, text="cover",
  365. d_name=str(download_video_title), d_url=str(download_cover_url))
  366. # 保存视频信息至 "./videos/{download_video_title}/info.txt"
  367. with open("./videos/" + download_video_title
  368. + "/" + "info.txt", "a", encoding="UTF-8") as f_a:
  369. f_a.write(str(download_video_id) + "\n" +
  370. str(download_video_title) + "\n" +
  371. str(int(download_video_duration)) + "\n" +
  372. str(download_video_play_cnt) + "\n" +
  373. str(download_video_comment_cnt) + "\n" +
  374. str(download_video_like_cnt) + "\n" +
  375. str(download_video_share_cnt) + "\n" +
  376. str(download_video_resolution) + "\n" +
  377. str(int(time.mktime(
  378. time.strptime(download_video_send_time, "%Y-%m-%d %H:%M:%S")))) + "\n" +
  379. str(download_user_name) + "\n" +
  380. str(download_head_url) + "\n" +
  381. str(download_video_url) + "\n" +
  382. str(download_cover_url) + "\n" +
  383. "YINYUEXIANGCE")
  384. Common.logger(log_type).info("==========视频信息已保存至info.txt==========")
  385. # 上传视频
  386. Common.logger(log_type).info("开始上传视频:{}".format(download_video_title))
  387. our_video_id = Publish.upload_and_publish(log_type, env, "play")
  388. our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(our_video_id) + "/info"
  389. Common.logger(log_type).info("视频上传完成:{}", download_video_title)
  390. # 保存视频 ID 到云文档:https://w42nne6hzg.feishu.cn/sheets/shtcnT6zvmfsYe1g0iv4pt7855g?sheet=f5a76e
  391. Common.logger(log_type).info("保存视频ID至云文档:{}", download_video_title)
  392. # 视频ID工作表,插入首行
  393. Feishu.insert_columns(log_type, "music_album", "f5a76e", "ROWS", 1, 2)
  394. # 视频ID工作表,首行写入数据
  395. upload_time = int(time.time())
  396. values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
  397. "推荐榜",
  398. str(download_video_id),
  399. str(download_video_title),
  400. our_video_link,
  401. download_video_play_cnt,
  402. download_video_comment_cnt,
  403. download_video_like_cnt,
  404. download_video_share_cnt,
  405. int(download_video_duration),
  406. str(download_video_resolution),
  407. str(download_video_send_time),
  408. str(download_user_name),
  409. str(download_user_id),
  410. str(download_head_url),
  411. str(download_cover_url),
  412. str(download_video_url)]]
  413. time.sleep(1)
  414. Feishu.update_values(log_type, "music_album", "f5a76e", "F2:V2", values)
  415. # 删除行或列,可选 ROWS、COLUMNS
  416. Feishu.dimension_range(log_type, "music_album", "69UxPo", "ROWS", i + 1, i + 1)
  417. Common.logger(log_type).info("视频:{},下载/上传成功\n", download_video_title)
  418. return
  419. except Exception as e:
  420. Feishu.dimension_range(log_type, "music_album", "69UxPo", "ROWS", 2, 2)
  421. Common.logger(log_type).error("download_publish异常:{},删除成功", e)
  422. # 执行下载/上传
  423. @classmethod
  424. def run_download_publish(cls, log_type, env):
  425. try:
  426. while True:
  427. time.sleep(1)
  428. recommend_feeds_sheet = Feishu.get_values_batch(log_type, "music_album", "69UxPo")
  429. if len(recommend_feeds_sheet) == 1:
  430. Common.logger(log_type).info("下载/上传完成\n")
  431. break
  432. else:
  433. cls.download_publish(log_type, env)
  434. time.sleep(random.randint(1, 3))
  435. except Exception as e:
  436. Common.logger(log_type).error("run_download_publish异常:{}", e)
  437. if __name__ == "__main__":
  438. recommend = Recommend()
  439. # recommend.get_video_list("recommend")
  440. # recommend.get_video_info("recommend", "16911678")
  441. recommend.download_publish("recommend", "dev")