recommend_list.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2022/7/12
  4. import os
  5. import shutil
  6. import sys
  7. import time
  8. import ffmpeg
  9. import requests
  10. import urllib3
  11. sys.path.append(os.getcwd())
  12. from main.common import Common
  13. from main.feishu_lib import Feishu
  14. from main.publish import Publish
  15. class Recommend:
  16. # 过滤词库
  17. @classmethod
  18. def sensitive_words(cls, log_type):
  19. try:
  20. # 敏感词库列表
  21. word_list = []
  22. # 从云文档读取所有敏感词,添加到词库列表
  23. lists = Feishu.get_values_batch(log_type, "music_album", "kNTEno")
  24. for i in lists:
  25. for j in i:
  26. # 过滤空的单元格内容
  27. if j is None:
  28. pass
  29. else:
  30. word_list.append(j)
  31. return word_list
  32. except Exception as e:
  33. Common.logger(log_type).error("获取过滤词库异常:{}", e)
  34. # 抓取规则
  35. @classmethod
  36. def get_rule(cls, play_cnt, share_cnt, duration):
  37. """
  38. - 播放量≥20000
  39. - 分享量≥200
  40. - 视频时长≥1分钟
  41. :return:
  42. """
  43. if int(play_cnt) >= 20000:
  44. if int(share_cnt) >= 200:
  45. if int(duration) >= 60:
  46. return True
  47. else:
  48. return False
  49. else:
  50. return False
  51. else:
  52. return False
  53. # 下载规则
  54. @classmethod
  55. def download_rule(cls, share_cnt, play_cnt):
  56. """
  57. - 标题为空的视频,不进行下载。
  58. - 视频时长<1分钟,不进行下载。
  59. - 分享率<1%的视频,不进行下载(分享量/播放量)
  60. - 标题中含有以下词汇的视频,直接过滤掉,不进行下载。
  61. :return:
  62. """
  63. if int(share_cnt) / int(play_cnt) >= 0.01:
  64. return True
  65. else:
  66. return False
  67. # 获取已下载视频宽高、时长等信息
  68. @classmethod
  69. def get_video_info_from_local(cls, video_path):
  70. probe = ffmpeg.probe(video_path)
  71. # print('video_path: {}'.format(video_path))
  72. # format1 = probe['format']
  73. # bit_rate = int(format1['bit_rate']) / 1000
  74. # duration = format['duration']
  75. # size = int(format1['size']) / 1024 / 1024
  76. video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
  77. if video_stream is None:
  78. print('No video stream found!')
  79. return
  80. width = int(video_stream['width'])
  81. height = int(video_stream['height'])
  82. # num_frames = int(video_stream['nb_frames'])
  83. # fps = int(video_stream['r_frame_rate'].split('/')[0]) / int(video_stream['r_frame_rate'].split('/')[1])
  84. duration = float(video_stream['duration'])
  85. # print('width: {}'.format(width))
  86. # print('height: {}'.format(height))
  87. # print('num_frames: {}'.format(num_frames))
  88. # print('bit_rate: {}k'.format(bit_rate))
  89. # print('fps: {}'.format(fps))
  90. # print('size: {}MB'.format(size))
  91. # print('duration: {}'.format(duration))
  92. return width, height, duration
  93. # 获取视频ID
  94. @classmethod
  95. def get_video_list(cls, log_type):
  96. try:
  97. # 翻十页
  98. for num in range(1, 11):
  99. Common.logger(log_type).info("正在抓取第{}页\n", num)
  100. url = "https://pro.yaoman.net/api/work/list/%E6%8E%A8%E8%8D%90/" + str(num)
  101. headers = {
  102. "accept": "*/*",
  103. "content-type": "application/json",
  104. "token": "%2FaEAzNLTTD%2FsGLazE6FsaaNUpTrgL%2FcNPVKzHD%2BgvhLiY"
  105. "ypZm%2BQWA2OQFvQEcPYPZXJNJ8Cm79KY2ueyjn"
  106. "5laR9RKc0D33DWK7ErpVFst4tVT%2BnQpxzdA1Fg%2F0Eop8LY",
  107. "accept-language": "zh-CN,zh-Hans;q=0.9",
  108. "user-agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 11_3 like Mac OS X)"
  109. " AppleWebKit/605.1.15 (KHTML, like Gecko)"
  110. " Mobile/15E217 MicroMessenger/6.8.0(0x16080000)"
  111. " NetType/WIFI Language/en Branch/Br_trunk MiniProgramEnv/Mac",
  112. "accept-encoding": "gzip, deflate, br",
  113. "referer": "https://servicewechat.com/wx6d6f1348072452e9/21/page-frame.html"
  114. }
  115. urllib3.disable_warnings()
  116. r = requests.get(url=url, headers=headers, verify=False)
  117. if "data" not in r.json()["d"] or len(r.json()["d"]["data"]) == 0:
  118. Common.logger(log_type).warning("response:{}", r.text)
  119. data = r.json()["d"]["data"]
  120. for i in range(len(data)):
  121. if "id" in data[i]:
  122. video_id = data[i]["id"]
  123. else:
  124. video_id = 0
  125. if "cover_url" in data[i]:
  126. cover_url = data[i]["cover_url"]
  127. else:
  128. cover_url = 0
  129. cls.get_video_info(log_type, video_id, cover_url)
  130. except Exception as e:
  131. Common.logger(log_type).error("get_video_list异常:{}", e)
  132. # 获取视频详情
  133. @classmethod
  134. def get_video_info(cls, log_type, video_id, cover_url):
  135. try:
  136. url = "https://pro.yaoman.net/api/work/detail/" + str(video_id)
  137. headers = {
  138. "accept": "*/*",
  139. "content-type": "application/json",
  140. "token": "%2FaEAzNLTTD%2FsGLazE6FsaaNUpTrgL%2FcNPVKzHD%2BgvhLiYypZm%2BQWA2OQFvQEcPYPZXJNJ8Cm79KY2ueyjn"
  141. "5laR9RKc0D33DWK7ErpVFst4tVT%2BnQpxzdA1Fg%2F0Eop8LY",
  142. "accept-language": "zh-CN,zh-Hans;q=0.9",
  143. "user-agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 11_3 like Mac OS X)"
  144. " AppleWebKit/605.1.15 (KHTML, like Gecko)"
  145. " Mobile/15E217 MicroMessenger/6.8.0(0x16080000)"
  146. " NetType/WIFI Language/en Branch/Br_trunk MiniProgramEnv/Mac",
  147. "accept-encoding": "gzip, deflate, br",
  148. "referer": "https://servicewechat.com/wx6d6f1348072452e9/21/page-frame.html"
  149. }
  150. urllib3.disable_warnings()
  151. r = requests.get(url=url, headers=headers, verify=False)
  152. if r.json()["m"] != "success":
  153. Common.logger(log_type).warning("response:{}", r.text)
  154. # video_title
  155. if "name" in r.json()["d"]:
  156. video_title = r.json()["d"]["name"][:30]
  157. else:
  158. video_title = 0
  159. # video_id
  160. if "id" in r.json()["d"]:
  161. video_id = r.json()["d"]["id"]
  162. else:
  163. video_id = 0
  164. # play_cnt
  165. if "view_number" in r.json()["d"]:
  166. play_cnt = r.json()["d"]["view_number"]
  167. else:
  168. play_cnt = 0
  169. # like_cnt
  170. if "flower_number" in r.json()["d"]:
  171. like_cnt = r.json()["d"]["flower_number"]
  172. else:
  173. like_cnt = 0
  174. # share_cnt
  175. if "share_number" in r.json()["d"]:
  176. share_cnt = r.json()["d"]["share_number"]
  177. else:
  178. share_cnt = 0
  179. # comment_cnt
  180. if "comment_number" in r.json()["d"]:
  181. comment_cnt = r.json()["d"]["comment_number"]
  182. else:
  183. comment_cnt = 0
  184. # send_time
  185. if "updated_at" in r.json()["d"]:
  186. send_time = r.json()["d"]["updated_at"]
  187. else:
  188. send_time = 0
  189. # user_id
  190. if "user" not in r.json()["d"]:
  191. user_id = 0
  192. elif "id" not in r.json()["d"]["user"]:
  193. user_id = 0
  194. else:
  195. user_id = r.json()["d"]["user"]["id"]
  196. # user_name
  197. if "user" not in r.json()["d"]:
  198. user_name = 0
  199. elif "nickname" not in r.json()["d"]["user"]:
  200. user_name = 0
  201. else:
  202. user_name = r.json()["d"]["user"]["nickname"]
  203. # head_url
  204. if "user" not in r.json()["d"]:
  205. head_url = 0
  206. elif "avatar" not in r.json()["d"]["user"]:
  207. head_url = 0
  208. else:
  209. head_url = r.json()["d"]["user"]["avatar"]
  210. # video_url
  211. if "video_url" in r.json()["d"]:
  212. video_url = r.json()["d"]["video_url"]
  213. else:
  214. video_url = 0
  215. Common.logger(log_type).info("video_title:{}", video_title)
  216. Common.logger(log_type).info("video_id:{}", video_id)
  217. Common.logger(log_type).info("play_cnt:{}", play_cnt)
  218. Common.logger(log_type).info("like_cnt:{}", like_cnt)
  219. Common.logger(log_type).info("share_cnt:{}", share_cnt)
  220. Common.logger(log_type).info("comment_cnt:{}", comment_cnt)
  221. Common.logger(log_type).info("send_time:{}", send_time)
  222. Common.logger(log_type).info("user_name:{}", user_name)
  223. Common.logger(log_type).info("user_id:{}", user_id)
  224. Common.logger(log_type).info("head_url:{}", head_url)
  225. Common.logger(log_type).info("cover_url:{}", cover_url)
  226. Common.logger(log_type).info("video_url:{}", video_url)
  227. # 过滤无效视频
  228. if video_title == 0 or video_id == 0 or send_time == 0 or head_url == 0 or cover_url == 0 or video_url == 0:
  229. Common.logger(log_type).info("无效视频\n")
  230. # 抓取规则
  231. elif cls.get_rule(play_cnt, share_cnt, "60") is False:
  232. Common.logger(log_type).info("不满足抓取规则\n")
  233. # 已下载视频表去重
  234. elif video_id in [n for m in Feishu.get_values_batch(log_type, "music_album", "f5a76e") for n in m]:
  235. Common.logger(log_type).info("该视频已下载\n")
  236. # recommend_feeds去重
  237. elif video_id in [n for m in Feishu.get_values_batch(log_type, "music_album", "69UxPo") for n in m]:
  238. Common.logger(log_type).info("该视频已在recommend_feeds中\n")
  239. else:
  240. time.sleep(1)
  241. Feishu.insert_columns(log_type, "music_album", "69UxPo", "ROWS", 1, 2)
  242. get_feeds_time = int(time.time())
  243. values = [[str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(get_feeds_time))),
  244. "推荐榜",
  245. video_id,
  246. video_title,
  247. play_cnt,
  248. like_cnt,
  249. share_cnt,
  250. comment_cnt,
  251. send_time,
  252. user_name,
  253. user_id,
  254. head_url,
  255. cover_url,
  256. video_url]]
  257. time.sleep(1)
  258. Feishu.update_values(log_type, "music_album", "69UxPo", "A2:N2", values)
  259. Common.logger(log_type).info("添加至recommend_feeds成功\n")
  260. except Exception as e:
  261. Common.logger(log_type).error("get_video_info异常:{}", e)
  262. # 下载/上传视频
  263. @classmethod
  264. def download_publish(cls, log_type, env):
  265. try:
  266. recommend_feeds_sheet = Feishu.get_values_batch(log_type, "music_album", "69UxPo")
  267. for i in range(1, len(recommend_feeds_sheet)):
  268. download_video_id = recommend_feeds_sheet[i][2]
  269. download_video_title = recommend_feeds_sheet[i][3]
  270. download_video_play_cnt = recommend_feeds_sheet[i][4]
  271. download_video_comment_cnt = recommend_feeds_sheet[i][7]
  272. download_video_like_cnt = recommend_feeds_sheet[i][5]
  273. download_video_share_cnt = recommend_feeds_sheet[i][6]
  274. download_video_send_time = recommend_feeds_sheet[i][8]
  275. download_user_name = recommend_feeds_sheet[i][9]
  276. download_user_id = recommend_feeds_sheet[i][10]
  277. download_head_url = recommend_feeds_sheet[i][11]
  278. download_cover_url = recommend_feeds_sheet[i][12]
  279. download_video_url = recommend_feeds_sheet[i][13]
  280. Common.logger(log_type).info("正在判断第{}行", i + 1)
  281. Common.logger(log_type).info("download_video_title:{}", download_video_title)
  282. Common.logger(log_type).info("download_video_play_cnt:{}", download_video_play_cnt)
  283. # Common.logger(log_type).info("download_video_id:{}", download_video_id)
  284. # Common.logger(log_type).info("download_video_comment_cnt:{}", download_video_comment_cnt)
  285. # Common.logger(log_type).info("download_video_like_cnt:{}", download_video_like_cnt)
  286. Common.logger(log_type).info("download_video_share_cnt:{}", download_video_share_cnt)
  287. # Common.logger(log_type).info("download_video_send_time:{}", download_video_send_time)
  288. # Common.logger(log_type).info("download_user_name:{}", download_user_name)
  289. # Common.logger(log_type).info("download_user_id:{}", download_user_id)
  290. # Common.logger(log_type).info("download_head_url:{}", download_head_url)
  291. # Common.logger(log_type).info("download_cover_url:{}", download_cover_url)
  292. Common.logger(log_type).info("download_video_url:{}", download_video_url)
  293. # 过滤空行
  294. if download_video_id is None or download_video_title is None or download_video_play_cnt is None:
  295. Common.logger(log_type).warning("空行,略过\n")
  296. # 过滤敏感词
  297. elif any(word if word in download_video_title else False for word in
  298. cls.sensitive_words(log_type)) is True:
  299. Feishu.dimension_range(log_type, "music_album", "69UxPo", "ROWS", i + 1, i + 1)
  300. Common.logger(log_type).info("视频已中敏感词,删除成功\n")
  301. return
  302. # 下载规则
  303. elif cls.download_rule(download_video_share_cnt, download_video_play_cnt) is False:
  304. Feishu.dimension_range(log_type, "music_album", "69UxPo", "ROWS", i + 1, i + 1)
  305. Common.logger(log_type).info("不满足下载规则,删除成功\n")
  306. return
  307. # 已下载视频表去重
  308. elif download_video_id in [n for m in Feishu.get_values_batch(log_type, "music_album", "f5a76e") for n
  309. in m]:
  310. Feishu.dimension_range(log_type, "music_album", "69UxPo", "ROWS", i + 1, i + 1)
  311. Common.logger(log_type).info("该视频已下载,删除成功\n")
  312. return
  313. # 满足下载规则
  314. else:
  315. # 下载视频
  316. Common.download_method(log_type=log_type, text="video",
  317. d_name=str(download_video_title), d_url=str(download_video_url))
  318. # 获取视频时长
  319. video_info = cls.get_video_info_from_local("./videos/" + download_video_title + "/video.mp4")
  320. download_video_resolution = str(video_info[0])+"*"+str(video_info[1])
  321. download_video_duration = video_info[2]
  322. # 视频时长<60s,直接删除
  323. if int(download_video_duration) < 60:
  324. shutil.rmtree("./videos/" + download_video_title + "/")
  325. Feishu.dimension_range(log_type, "music_album", "69UxPo", "ROWS", i + 1, i + 1)
  326. Common.logger(log_type).info("时长:{}<60秒,删除成功\n", int(download_video_duration))
  327. return
  328. else:
  329. # 下载封面
  330. Common.download_method(log_type=log_type, text="cover",
  331. d_name=str(download_video_title), d_url=str(download_cover_url))
  332. # 保存视频信息至 "./videos/{download_video_title}/info.txt"
  333. with open("./videos/" + download_video_title
  334. + "/" + "info.txt", "a", encoding="UTF-8") as f_a:
  335. f_a.write(str(download_video_id) + "\n" +
  336. str(download_video_title) + "\n" +
  337. str(int(download_video_duration)) + "\n" +
  338. str(download_video_play_cnt) + "\n" +
  339. str(download_video_comment_cnt) + "\n" +
  340. str(download_video_like_cnt) + "\n" +
  341. str(download_video_share_cnt) + "\n" +
  342. str(download_video_resolution) + "\n" +
  343. str(int(time.mktime(
  344. time.strptime(download_video_send_time, "%Y-%m-%d %H:%M:%S")))) + "\n" +
  345. str(download_user_name) + "\n" +
  346. str(download_head_url) + "\n" +
  347. str(download_video_url) + "\n" +
  348. str(download_cover_url) + "\n" +
  349. "YINYUEXIANGCE")
  350. Common.logger(log_type).info("==========视频信息已保存至info.txt==========")
  351. # 上传视频
  352. Common.logger(log_type).info("开始上传视频:{}".format(download_video_title))
  353. our_video_id = Publish.upload_and_publish(log_type, env, "play")
  354. our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(our_video_id) + "/info"
  355. Common.logger(log_type).info("视频上传完成:{}", download_video_title)
  356. # 保存视频 ID 到云文档:https://w42nne6hzg.feishu.cn/sheets/shtcnT6zvmfsYe1g0iv4pt7855g?sheet=f5a76e
  357. Common.logger(log_type).info("保存视频ID至云文档:{}", download_video_title)
  358. # 视频ID工作表,插入首行
  359. Feishu.insert_columns(log_type, "music_album", "f5a76e", "ROWS", 1, 2)
  360. # 视频ID工作表,首行写入数据
  361. upload_time = int(time.time())
  362. values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
  363. "推荐榜",
  364. str(download_video_id),
  365. str(download_video_title),
  366. our_video_link,
  367. download_video_play_cnt,
  368. download_video_comment_cnt,
  369. download_video_like_cnt,
  370. download_video_share_cnt,
  371. int(download_video_duration),
  372. str(download_video_resolution),
  373. str(download_video_send_time),
  374. str(download_user_name),
  375. str(download_user_id),
  376. str(download_head_url),
  377. str(download_cover_url),
  378. str(download_video_url)]]
  379. time.sleep(1)
  380. Feishu.update_values(log_type, "music_album", "f5a76e", "F2:V2", values)
  381. # 删除行或列,可选 ROWS、COLUMNS
  382. Feishu.dimension_range(log_type, "music_album", "69UxPo", "ROWS", i + 1, i + 1)
  383. Common.logger(log_type).info("视频:{},下载/上传成功\n", download_video_title)
  384. return
  385. except Exception as e:
  386. Feishu.dimension_range(log_type, "music_album", "69UxPo", "ROWS", 2, 2)
  387. Common.logger(log_type).error("download_publish异常:{},删除成功", e)
  388. # 执行下载/上传
  389. @classmethod
  390. def run_download_publish(cls, log_type, env):
  391. try:
  392. while True:
  393. recommend_feeds_sheet = Feishu.get_values_batch(log_type, "music_album", "69UxPo")
  394. if len(recommend_feeds_sheet) == 1:
  395. Common.logger(log_type).info("下载/上传完成\n")
  396. break
  397. else:
  398. cls.download_publish(log_type, env)
  399. except Exception as e:
  400. Common.logger(log_type).error("run_download_publish异常:{}", e)
  401. if __name__ == "__main__":
  402. recommend = Recommend()
  403. # recommend.get_video_list("recommend")
  404. # recommend.get_video_info("recommend", "16911678")
  405. recommend.download_publish("recommend", "dev")