# -*- coding: utf-8 -*- # @Author: wangkun # @Time: 2022/7/12 import os import shutil import sys import time import ffmpeg import requests import urllib3 sys.path.append(os.getcwd()) from main.common import Common from main.feishu_lib import Feishu from main.publish import Publish class Recommend: wechat_sheet = Feishu.get_values_batch("log", "music_album", "L7Y9vz") wechat_token = wechat_sheet[0][0] # 过滤词库 @classmethod def sensitive_words(cls, log_type): try: # 敏感词库列表 word_list = [] # 从云文档读取所有敏感词,添加到词库列表 lists = Feishu.get_values_batch(log_type, "music_album", "kNTEno") for i in lists: for j in i: # 过滤空的单元格内容 if j is None: pass else: word_list.append(j) return word_list except Exception as e: Common.logger(log_type).error("获取过滤词库异常:{}", e) # 抓取规则 @classmethod def get_rule(cls, play_cnt, share_cnt, duration): """ - 播放量≥20000 - 分享量≥200 - 视频时长≥1分钟 :return: """ if int(play_cnt) >= 20000: if int(share_cnt) >= 200: if int(duration) >= 60: return True else: return False else: return False else: return False # 下载规则 @classmethod def download_rule(cls, share_cnt, play_cnt): """ - 标题为空的视频,不进行下载。 - 视频时长<1分钟,不进行下载。 - 分享率<1%的视频,不进行下载(分享量/播放量) - 标题中含有以下词汇的视频,直接过滤掉,不进行下载。 :return: """ if int(share_cnt) / int(play_cnt) >= 0.01: return True else: return False # 获取已下载视频宽高、时长等信息 @classmethod def get_video_info_from_local(cls, video_path): probe = ffmpeg.probe(video_path) # print('video_path: {}'.format(video_path)) # format1 = probe['format'] # bit_rate = int(format1['bit_rate']) / 1000 # duration = format['duration'] # size = int(format1['size']) / 1024 / 1024 video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None) if video_stream is None: print('No video stream found!') return width = int(video_stream['width']) height = int(video_stream['height']) # num_frames = int(video_stream['nb_frames']) # fps = int(video_stream['r_frame_rate'].split('/')[0]) / int(video_stream['r_frame_rate'].split('/')[1]) duration = float(video_stream['duration']) # print('width: {}'.format(width)) # print('height: {}'.format(height)) # print('num_frames: {}'.format(num_frames)) # print('bit_rate: {}k'.format(bit_rate)) # print('fps: {}'.format(fps)) # print('size: {}MB'.format(size)) # print('duration: {}'.format(duration)) return width, height, duration # 获取视频ID @classmethod def get_video_list(cls, log_type): try: # 翻十页 for num in range(1, 51): Common.logger(log_type).info("正在抓取第{}页\n", num) url = "https://pro.yaoman.net/api/work/list/%E6%8E%A8%E8%8D%90/" + str(num) headers = { "accept": "*/*", "content-type": "application/json", "token": cls.wechat_token, "accept-language": "zh-CN,zh-Hans;q=0.9", "user-agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 11_3 like Mac OS X)" " AppleWebKit/605.1.15 (KHTML, like Gecko)" " Mobile/15E217 MicroMessenger/6.8.0(0x16080000)" " NetType/WIFI Language/en Branch/Br_trunk MiniProgramEnv/Mac", "accept-encoding": "gzip, deflate, br", "referer": "https://servicewechat.com/wx6d6f1348072452e9/21/page-frame.html" } urllib3.disable_warnings() r = requests.get(url=url, headers=headers, verify=False) if "data" not in r.json()["d"] or len(r.json()["d"]["data"]) == 0: Common.logger(log_type).warning("response:{}", r.text) data = r.json()["d"]["data"] for i in range(len(data)): if "id" in data[i]: video_id = data[i]["id"] else: video_id = 0 if "cover_url" in data[i]: cover_url = data[i]["cover_url"] else: cover_url = 0 cls.get_video_info(log_type, video_id, cover_url) except Exception as e: Common.logger(log_type).error("get_video_list异常:{}", e) # 获取视频详情 @classmethod def get_video_info(cls, log_type, video_id, cover_url): try: url = "https://pro.yaoman.net/api/work/detail/" + str(video_id) headers = { "accept": "*/*", "content-type": "application/json", "token": cls.wechat_token, "accept-language": "zh-CN,zh-Hans;q=0.9", "user-agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 11_3 like Mac OS X)" " AppleWebKit/605.1.15 (KHTML, like Gecko)" " Mobile/15E217 MicroMessenger/6.8.0(0x16080000)" " NetType/WIFI Language/en Branch/Br_trunk MiniProgramEnv/Mac", "accept-encoding": "gzip, deflate, br", "referer": "https://servicewechat.com/wx6d6f1348072452e9/21/page-frame.html" } urllib3.disable_warnings() r = requests.get(url=url, headers=headers, verify=False) if r.json()["m"] != "success": Common.logger(log_type).warning("response:{}", r.text) # video_title if "name" in r.json()["d"]: video_title = r.json()["d"]["name"][:30] else: video_title = 0 # video_id if "id" in r.json()["d"]: video_id = r.json()["d"]["id"] else: video_id = 0 # play_cnt if "view_number" in r.json()["d"]: play_cnt = r.json()["d"]["view_number"] else: play_cnt = 0 # like_cnt if "flower_number" in r.json()["d"]: like_cnt = r.json()["d"]["flower_number"] else: like_cnt = 0 # share_cnt if "share_number" in r.json()["d"]: share_cnt = r.json()["d"]["share_number"] else: share_cnt = 0 # comment_cnt if "comment_number" in r.json()["d"]: comment_cnt = r.json()["d"]["comment_number"] else: comment_cnt = 0 # send_time if "updated_at" in r.json()["d"]: send_time = r.json()["d"]["updated_at"] else: send_time = 0 # user_id if "user" not in r.json()["d"]: user_id = 0 elif "id" not in r.json()["d"]["user"]: user_id = 0 else: user_id = r.json()["d"]["user"]["id"] # user_name if "user" not in r.json()["d"]: user_name = 0 elif "nickname" not in r.json()["d"]["user"]: user_name = 0 else: user_name = r.json()["d"]["user"]["nickname"] # head_url if "user" not in r.json()["d"]: head_url = 0 elif "avatar" not in r.json()["d"]["user"]: head_url = 0 else: head_url = r.json()["d"]["user"]["avatar"] # video_url if "video_url" in r.json()["d"]: video_url = r.json()["d"]["video_url"] else: video_url = 0 Common.logger(log_type).info("video_title:{}", video_title) Common.logger(log_type).info("video_id:{}", video_id) Common.logger(log_type).info("play_cnt:{}", play_cnt) Common.logger(log_type).info("like_cnt:{}", like_cnt) Common.logger(log_type).info("share_cnt:{}", share_cnt) Common.logger(log_type).info("comment_cnt:{}", comment_cnt) Common.logger(log_type).info("send_time:{}", send_time) Common.logger(log_type).info("user_name:{}", user_name) Common.logger(log_type).info("user_id:{}", user_id) Common.logger(log_type).info("head_url:{}", head_url) Common.logger(log_type).info("cover_url:{}", cover_url) Common.logger(log_type).info("video_url:{}", video_url) # 过滤无效视频 if video_title == 0 or video_id == 0 or send_time == 0 or head_url == 0 or cover_url == 0 or video_url == 0: Common.logger(log_type).info("无效视频\n") # 抓取规则 elif cls.get_rule(play_cnt, share_cnt, "60") is False: Common.logger(log_type).info("不满足抓取规则\n") # 已下载视频表去重 elif str(video_id) in [n for m in Feishu.get_values_batch(log_type, "music_album", "f5a76e") for n in m]: Common.logger(log_type).info("该视频已下载\n") # recommend_feeds去重 elif str(video_id) in [n for m in Feishu.get_values_batch(log_type, "music_album", "69UxPo") for n in m]: Common.logger(log_type).info("该视频已在recommend_feeds中\n") else: time.sleep(1) Feishu.insert_columns(log_type, "music_album", "69UxPo", "ROWS", 1, 2) get_feeds_time = int(time.time()) values = [[str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(get_feeds_time))), "推荐榜", video_id, video_title, play_cnt, like_cnt, share_cnt, comment_cnt, send_time, user_name, user_id, head_url, cover_url, video_url]] time.sleep(1) Feishu.update_values(log_type, "music_album", "69UxPo", "A2:N2", values) Common.logger(log_type).info("添加至recommend_feeds成功\n") except Exception as e: Common.logger(log_type).error("get_video_info异常:{}", e) # 下载/上传视频 @classmethod def download_publish(cls, log_type, env): try: recommend_feeds_sheet = Feishu.get_values_batch(log_type, "music_album", "69UxPo") for i in range(1, len(recommend_feeds_sheet)): download_video_id = recommend_feeds_sheet[i][2] download_video_title = recommend_feeds_sheet[i][3] download_video_play_cnt = recommend_feeds_sheet[i][4] download_video_comment_cnt = recommend_feeds_sheet[i][7] download_video_like_cnt = recommend_feeds_sheet[i][5] download_video_share_cnt = recommend_feeds_sheet[i][6] download_video_send_time = recommend_feeds_sheet[i][8] download_user_name = recommend_feeds_sheet[i][9] download_user_id = recommend_feeds_sheet[i][10] download_head_url = recommend_feeds_sheet[i][11] download_cover_url = recommend_feeds_sheet[i][12] download_video_url = recommend_feeds_sheet[i][13] Common.logger(log_type).info("正在判断第{}行", i + 1) Common.logger(log_type).info("download_video_title:{}", download_video_title) Common.logger(log_type).info("download_video_play_cnt:{}", download_video_play_cnt) # Common.logger(log_type).info("download_video_id:{}", download_video_id) # Common.logger(log_type).info("download_video_comment_cnt:{}", download_video_comment_cnt) # Common.logger(log_type).info("download_video_like_cnt:{}", download_video_like_cnt) Common.logger(log_type).info("download_video_share_cnt:{}", download_video_share_cnt) # Common.logger(log_type).info("download_video_send_time:{}", download_video_send_time) # Common.logger(log_type).info("download_user_name:{}", download_user_name) # Common.logger(log_type).info("download_user_id:{}", download_user_id) # Common.logger(log_type).info("download_head_url:{}", download_head_url) # Common.logger(log_type).info("download_cover_url:{}", download_cover_url) Common.logger(log_type).info("download_video_url:{}", download_video_url) # 过滤空行 if download_video_id is None or download_video_title is None or download_video_play_cnt is None: Common.logger(log_type).warning("空行,略过\n") # 过滤敏感词 elif any(word if word in download_video_title else False for word in cls.sensitive_words(log_type)) is True: Feishu.dimension_range(log_type, "music_album", "69UxPo", "ROWS", i + 1, i + 1) Common.logger(log_type).info("视频已中敏感词,删除成功\n") return # 下载规则 elif cls.download_rule(download_video_share_cnt, download_video_play_cnt) is False: Feishu.dimension_range(log_type, "music_album", "69UxPo", "ROWS", i + 1, i + 1) Common.logger(log_type).info("不满足下载规则,删除成功\n") return # 已下载视频表去重 elif str(download_video_id) in [n for m in Feishu.get_values_batch(log_type, "music_album", "f5a76e") for n in m]: Feishu.dimension_range(log_type, "music_album", "69UxPo", "ROWS", i + 1, i + 1) Common.logger(log_type).info("该视频已下载,删除成功\n") return # 满足下载规则 else: # 下载视频 Common.download_method(log_type=log_type, text="video", d_name=str(download_video_title), d_url=str(download_video_url)) # 获取视频时长 video_info = cls.get_video_info_from_local("./videos/" + download_video_title + "/video.mp4") download_video_resolution = str(video_info[0]) + "*" + str(video_info[1]) download_video_duration = video_info[2] # 视频时长<60s,直接删除 if int(download_video_duration) < 60: shutil.rmtree("./videos/" + download_video_title + "/") Feishu.dimension_range(log_type, "music_album", "69UxPo", "ROWS", i + 1, i + 1) Common.logger(log_type).info("时长:{}<60秒,删除成功\n", int(download_video_duration)) return # 竖版视频不下载,写入竖版视频表 elif int(video_info[0]) < int(video_info[1]): # 删除在 recommend_feeds 的记录 Feishu.dimension_range(log_type, "music_album", "69UxPo", "ROWS", i + 1, i + 1) Common.logger(log_type).info("宽:{}<高:{},删除成功", int(video_info[0]), int(video_info[1])) # 添加到竖版视频表 time.sleep(1) Feishu.insert_columns(log_type, "music_album", "hYSZsW", "ROWS", 1, 2) # 视频ID工作表,首行写入数据 upload_time = int(time.time()) values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)), "推荐榜", str(download_video_id), str(download_video_title), download_video_play_cnt, download_video_comment_cnt, download_video_like_cnt, download_video_share_cnt, int(download_video_duration), str(download_video_resolution), str(download_video_send_time), str(download_user_name), str(download_user_id), str(download_head_url), str(download_cover_url), str(download_video_url)]] time.sleep(1) Feishu.update_values(log_type, "music_album", "hYSZsW", "A2:P2", values) Common.logger(log_type).info("写入竖版视频表成功\n") return else: # 下载封面 Common.download_method(log_type=log_type, text="cover", d_name=str(download_video_title), d_url=str(download_cover_url)) # 保存视频信息至 "./videos/{download_video_title}/info.txt" with open("./videos/" + download_video_title + "/" + "info.txt", "a", encoding="UTF-8") as f_a: f_a.write(str(download_video_id) + "\n" + str(download_video_title) + "\n" + str(int(download_video_duration)) + "\n" + str(download_video_play_cnt) + "\n" + str(download_video_comment_cnt) + "\n" + str(download_video_like_cnt) + "\n" + str(download_video_share_cnt) + "\n" + str(download_video_resolution) + "\n" + str(int(time.mktime( time.strptime(download_video_send_time, "%Y-%m-%d %H:%M:%S")))) + "\n" + str(download_user_name) + "\n" + str(download_head_url) + "\n" + str(download_video_url) + "\n" + str(download_cover_url) + "\n" + "YINYUEXIANGCE") Common.logger(log_type).info("==========视频信息已保存至info.txt==========") # 上传视频 Common.logger(log_type).info("开始上传视频:{}".format(download_video_title)) our_video_id = Publish.upload_and_publish(log_type, env, "play") our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(our_video_id) + "/info" Common.logger(log_type).info("视频上传完成:{}", download_video_title) # 保存视频 ID 到云文档:https://w42nne6hzg.feishu.cn/sheets/shtcnT6zvmfsYe1g0iv4pt7855g?sheet=f5a76e Common.logger(log_type).info("保存视频ID至云文档:{}", download_video_title) # 视频ID工作表,插入首行 Feishu.insert_columns(log_type, "music_album", "f5a76e", "ROWS", 1, 2) # 视频ID工作表,首行写入数据 upload_time = int(time.time()) values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)), "推荐榜", str(download_video_id), str(download_video_title), our_video_link, download_video_play_cnt, download_video_comment_cnt, download_video_like_cnt, download_video_share_cnt, int(download_video_duration), str(download_video_resolution), str(download_video_send_time), str(download_user_name), str(download_user_id), str(download_head_url), str(download_cover_url), str(download_video_url)]] time.sleep(1) Feishu.update_values(log_type, "music_album", "f5a76e", "F2:V2", values) # 删除行或列,可选 ROWS、COLUMNS Feishu.dimension_range(log_type, "music_album", "69UxPo", "ROWS", i + 1, i + 1) Common.logger(log_type).info("视频:{},下载/上传成功\n", download_video_title) return except Exception as e: Feishu.dimension_range(log_type, "music_album", "69UxPo", "ROWS", 2, 2) Common.logger(log_type).error("download_publish异常:{},删除成功", e) # 执行下载/上传 @classmethod def run_download_publish(cls, log_type, env): try: while True: recommend_feeds_sheet = Feishu.get_values_batch(log_type, "music_album", "69UxPo") if len(recommend_feeds_sheet) == 1: Common.logger(log_type).info("下载/上传完成\n") break else: cls.download_publish(log_type, env) except Exception as e: Common.logger(log_type).error("run_download_publish异常:{}", e) if __name__ == "__main__": recommend = Recommend() # recommend.get_video_list("recommend") # recommend.get_video_info("recommend", "16911678") recommend.download_publish("recommend", "dev")