# -*- coding: utf-8 -*- # @Author: wangkun # @Time: 2022/4/18 """ 下载并上传:上升榜视频 规则: 1.满足基本规则 2.每隔一小时,检查视频播放量,>=1000,则下载及上传 3.超过 2 小时,则删除该视频信息 """ import json import os import random import sys import time import requests import urllib3 sys.path.append(os.getcwd()) from main.common import Common from main.get_feeds import get_feeds from main.publish import Publish class DownloadUp: @staticmethod def up_rule(up_width, up_height, up_duration, up_play_cnt): """ 1.分辨率,宽或者高 >= 720 or ==0 2.600s >= 时长 >= 60s 3.视频播放量 >= 0 """ if int(up_width) >= 720 or int(up_height) >= 720 or str(up_width) == "0" or str(up_height) == "0": if 600 >= int(up_duration) >= 60: if int(up_play_cnt) >= 0: return True else: return False else: return False else: return False @classmethod def download_up_video(cls, env): """ 1.从 kanyikan_feeds.txt 中获取 videoid 2.根据 videoid,从 videoinfo 接口,获取当前视频最新的信息 3.根据下载规则判断,符合规则进行下载: 1 更新视频 ID 到 "./txt/kanyikan_videoid.txt" 2 视频信息写入文件 "./videos/{d_title}/info.txt" 4.上传完成: 1 删除该视频在 "./txt/kanyikan_feeds.txt" 中的信息 """ get_video_info_session = Common.get_session() Common.crawler_log().info("获取视频info时,session:{}".format(get_video_info_session)) lines = Common.read_txt("kanyikan_feeds.txt") for line in lines: v_time = line.strip().split(" + ")[0] # 第一次获取该视频的时间 v_id = line.strip().split(" + ")[1] # 外网视频 ID v_play_ctn = line.strip().split(" + ")[2] # 播放量 url = "https://search.weixin.qq.com/cgi-bin/recwxa/recwxagetonevideoinfo?" param = { "session": get_video_info_session, "vid": v_id, "wxaVersion": "3.9.2", "channelid": "208201", "scene": "32", "subscene": "1089", "model": "iPhone 1114.7.1", "clientVersion": "8.0.18", "sharesearchid": "447665862521758270", "sharesource": "-1" } try: urllib3.disable_warnings() r = requests.get(url=url, params=param, verify=False) response = json.loads(r.content.decode("utf8")) if "data" not in response: Common.crawler_log().error("获取视频info时,session过期,等待30秒") # 如果返回空信息,则随机睡眠 31-35 秒 time.sleep(random.randint(31, 35)) else: data = response["data"] v_title = data["title"] v_duration = data["duration"] v_play_cnt_up = data["played_cnt"] v_comment_cnt = data["comment_cnt"] v_liked_cnt = data["liked_cnt"] v_shared_cnt = data["shared_cnt"] v_width = data["width"] v_height = data["height"] v_resolution = str(v_width) + "*" + str(v_height) v_send_date = data["upload_time"] v_username = data["user_info"]["nickname"] v_user_cover = data["user_info"]["headimg_url"] v_video_cover = data["cover_url"] if "items" not in data["play_info"]: if len(data["play_info"]) > 2: download_url_up = data["play_info"][2]["play_url"] else: download_url_up = data["play_info"][0]["play_url"] else: if len(data["play_info"]["items"]) > 2: download_url_up = data["play_info"]["items"][2]["play_url"] else: download_url_up = data["play_info"]["items"][0]["play_url"] # 判断基本规则 if cls.up_rule(v_width, v_height, v_duration, v_play_cnt_up) is True \ and v_id != "" and v_title != "" and v_duration != "" \ and v_play_cnt_up != "" and v_comment_cnt != "" and v_liked_cnt != "" \ and v_shared_cnt != "" and v_width != "" and v_height != "" \ and v_send_date != "" and v_username != "" and v_user_cover != "" \ and v_video_cover != "" and download_url_up != "": if int(time.time()) - int(v_time) < 3600: Common.crawler_log().info("距上次获取该视频时间:{}分钟".format( int((int(int(time.time()) - int(v_time))) / 60)) + ";{}".format(v_title)) elif 7200 >= int(time.time()) - int(v_time) >= 3600: if int(v_play_cnt_up) - int(v_play_ctn) >= 1000: Common.crawler_log().info("该视频:{}".format( v_title) + " " + "在1小时内的播放量{}>=1000".format(int(v_play_cnt_up) - int(v_play_ctn))) # 下载封面 Common.download_method("cover", v_title, v_video_cover) # 下载视频 Common.download_method("video", v_title, download_url_up) # 保存视频 ID 到 "./txt/kanyikan_videoid.txt" with open("./txt/kanyikan_videoid.txt", "a", encoding="utf8") as f_a: f_a.write(v_id + "\n") # 保存视频信息到 "./files/{视频标题}/videoinfo.txt" with open("./videos/" + v_title + "/" + "info.txt", "a", encoding="utf8") as f_a2: f_a2.write(str(v_id) + "\n" + str(v_title) + "\n" + str(v_duration) + "\n" + str(v_play_cnt_up) + "\n" + str(v_comment_cnt) + "\n" + str(v_liked_cnt) + "\n" + str(v_shared_cnt) + "\n" + str(v_resolution) + "\n" + str(v_send_date) + "\n" + str(v_username) + "\n" + str(v_user_cover) + "\n" + str(download_url_up) + "\n" + str(v_video_cover) + "\n" + str(get_video_info_session)) # 上传该视频 Common.crawler_log().info("开始上传视频:{}".format(v_title)) Publish.upload_and_publish(env, "up") # 删除该视频在kanyikan_feeds.txt中的信息 Common.crawler_log().info("删除该视频在kanyikan_feeds.txt中的信息:{}".format(v_title)) with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f1: lines = f1.readlines() with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w1: for line1 in lines: if v_id in line1.split(" + ")[1]: continue f_w1.write(line1) else: # 删除之前保存的该视频信息,并把现在的信息保存进去 Common.crawler_log().info("该视频1小时内的播放量:{}<1000".format( int(v_play_cnt_up) - int(v_play_ctn)) + ";" + "更新该视频在kanyikan_feeds.txt中的信息:{}".format(v_title)) with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r: lines = f_r.readlines() with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w: for line2 in lines: if v_id in line2.split(" + ")[1]: continue f_w.write(line2) with open("./txt/kanyikan_feeds.txt", "a", encoding="utf-8") as f_a: f_a.write(str(int(time.time())) + " + " + str(v_id) + " + " + str(v_play_cnt_up) + " + " + str(v_title) + " + " + str(v_duration) + " + " + str(v_comment_cnt) + " + " + str(v_liked_cnt) + " + " + str(v_shared_cnt) + " + " + str(v_resolution) + " + " + str(v_send_date) + " + " + str(v_username) + " + " + str(v_user_cover) + " + " + str(v_video_cover) + " + " + str(download_url_up) + " + " + str(get_video_info_session) + "\n") elif int(time.time()) - int(v_time) > 7200: Common.crawler_log().info("距上次获取该视频时间:{}分钟。超过2小时,删除该视频".format( int((int(time.time()) - int(v_time)) / 60)) + ";" + "标题:{}".format(v_title)) # 删除之前保存的该视频信息 Common.crawler_log().info("删除该视频在kanyikan_feeds.txt中的信息:{}".format(v_title)) with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r: lines = f_r.readlines() with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w: for line2 in lines: if v_id in line2.split(" + ")[1]: continue f_w.write(line2) else: Common.crawler_log().info("不满足下载规则:{}".format(v_title)) # 删除之前保存的该视频信息 Common.crawler_log().info("删除该视频在kanyikan_feeds.txt中的信息:{}".format(v_title)) with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r: lines = f_r.readlines() with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w: for line3 in lines: if v_id in line3.split(" + ")[1]: continue f_w.write(line3) except Exception as e: Common.crawler_log().error("获取视频info异常:{},删除该视频".format(e)) # 删除之前保存的该视频信息 with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r: lines = f_r.readlines() with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w: for line4 in lines: if v_id in line4.split(" + ")[1]: continue f_w.write(line4) if __name__ == "__main__": downloadup = DownloadUp() get_feeds() downloadup.download_up_video("dev")