123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197 |
- # -*- coding: utf-8 -*-
- # @Author: wangkun
- # @Time: 2022/4/18
- """
- 下载并上传:发布时间榜
- 规则:
- 1.基本规则:send_time_rule()
- 2.视频发布3日内,播放量大于2万(当前时间 - 发布时间 <= 3 天)
- """
- import json
- import os
- import random
- import sys
- import time
- import requests
- import urllib3
- sys.path.append(os.getcwd())
- from main.common import Common
- from main.get_feeds import get_feeds
- from main.publish import Publish
- class DownloadSendtime:
- @staticmethod
- def send_time_rule(send_time_width, send_time_height, send_time_duration, send_time_share_cnt):
- """
- 1.分辨率,宽或者高 >= 720 or == 0
- 2.600s >= 时长 >= 60s
- 3.视频播放量 >= 0
- """
- if int(send_time_width) >= 720 or int(send_time_height) >= 720 \
- or send_time_width == "0" or send_time_height == "0":
- if 600 >= int(send_time_duration) >= 60:
- if int(send_time_share_cnt) > 0:
- return True
- else:
- return False
- else:
- return False
- else:
- return False
- @classmethod
- def download_sendtime_video(cls, env):
- """
- 视频发布3日内,播放量大于2万(当前时间 - 发布时间 <= 3 天)
- :param env: 测试环境:dev;正式环境:prod
- :return: 下载并上传视频
- """
- get_sendtime_session = Common.get_session()
- Common.crawler_log().info("获取视频info时,session:{}".format(get_sendtime_session))
- lines = Common.read_txt("kanyikan_feeds.txt")
- for line in lines:
- v_id = line.strip().split(" + ")[1] # 视频外网 ID
- # v_send_date = line.strip().split(" + ")[9] # 发布时间
- url = "https://search.weixin.qq.com/cgi-bin/recwxa/recwxagetonevideoinfo?"
- param = {
- "session": get_sendtime_session,
- "vid": v_id,
- "wxaVersion": "3.9.2",
- "channelid": "208201",
- "scene": "32",
- "subscene": "1089",
- "model": "iPhone 11<iPhone12,1>14.7.1",
- "clientVersion": "8.0.18",
- "sharesearchid": "447665862521758270",
- "sharesource": "-1"
- }
- try:
- urllib3.disable_warnings()
- r = requests.get(url=url, params=param, verify=False)
- response = json.loads(r.content.decode("utf8"))
- if "data" not in response:
- Common.crawler_log().info("获取视频info时,session过期,等待 30 秒")
- # 如果返回空信息,则随机睡眠 30-35 秒
- time.sleep(random.randint(31, 35))
- else:
- data = response["data"]
- v_title = data["title"]
- v_duration = data["duration"]
- v_play_cnt_sendtime = data["played_cnt"]
- v_comment_cnt = data["comment_cnt"]
- v_liked_cnt = data["liked_cnt"]
- v_shared_cnt = data["shared_cnt"]
- v_width = data["width"]
- v_height = data["height"]
- v_resolution = str(v_width) + "*" + str(v_height)
- v_send_date = data["upload_time"]
- v_username = data["user_info"]["nickname"]
- v_user_cover = data["user_info"]["headimg_url"]
- v_video_cover = data["cover_url"]
- if "items" not in data["play_info"]:
- if len(data["play_info"]) > 1:
- download_url_up = data["play_info"][2]["play_url"]
- # Common.crawler_log().info('视频下载地址:{}'.format(download_url_up))
- else:
- download_url_up = data["play_info"][0]["play_url"]
- # Common.crawler_log().info('视频下载地址:{}'.format(download_url_up))
- else:
- if len(data["play_info"]["items"]) > 1:
- download_url_up = data["play_info"]["items"][2]["play_url"]
- # Common.crawler_log().info('视频下载地址:{}'.format(download_url_up))
- else:
- download_url_up = data["play_info"]["items"][0]["play_url"]
- # Common.crawler_log().info('视频下载地址:{}'.format(download_url_up))
- # 判断基本规则
- if cls.send_time_rule(v_width, v_height, v_duration, v_play_cnt_sendtime) is True \
- and v_id != "" and v_title != "" and v_duration != "" \
- and v_play_cnt_sendtime != "" and v_comment_cnt != "" and v_liked_cnt != "" \
- and v_shared_cnt != "" and v_width != "" and v_height != "" \
- and v_send_date != "" and v_username != "" and v_user_cover != "" \
- and v_video_cover != "" and download_url_up != "":
- # 满足下载条件:当前时间 - 发布时间 <= 3天,播放量大于2万
- if int(time.time()) - int(v_send_date) <= 259200:
- if int(v_play_cnt_sendtime) >= 20000:
- Common.crawler_log().info("该视频:{}".format(
- v_title) + " " + "在3天内的播放量{}>=20000".format(v_play_cnt_sendtime))
- # 下载封面
- Common.download_method("cover", v_title, v_video_cover)
- # 下载视频
- Common.download_method("video", v_title, download_url_up)
- # 保存视频 ID 到 "./txt/kanyikan_videoid.txt"
- with open("./txt/kanyikan_videoid.txt", "a", encoding="utf8") as f_a:
- f_a.write(v_id + "\n")
- # 保存视频信息到 "./files/{视频标题}/videoinfo.txt"
- with open("./videos/" + v_title + "/" + "info.txt",
- "a", encoding="utf8") as f_a2:
- f_a2.write(str(v_id) + "\n" +
- str(v_title) + "\n" +
- str(v_duration) + "\n" +
- str(v_play_cnt_sendtime) + "\n" +
- str(v_comment_cnt) + "\n" +
- str(v_liked_cnt) + "\n" +
- str(v_shared_cnt) + "\n" +
- str(v_resolution) + "\n" +
- str(v_send_date) + "\n" +
- str(v_username) + "\n" +
- str(v_user_cover) + "\n" +
- str(download_url_up) + "\n" +
- str(v_video_cover) + "\n" +
- str(get_sendtime_session))
- # 上传该视频
- Common.crawler_log().info("开始上传视频:{}".format(v_title))
- Publish.upload_and_publish(env, "send_time")
- # 删除该视频在kanyikan_feeds.txt中的信息
- Common.crawler_log().info("删除该视频在kanyikan_feeds.txt中的信息:{}".format(v_title))
- with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f1:
- lines = f1.readlines()
- with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w1:
- for line1 in lines:
- if v_id in line1.split(" + ")[1]:
- continue
- f_w1.write(line1)
- else:
- # 删除之前保存的该视频信息
- Common.crawler_log().info("该视频3天播放量:{}<20000".format(
- int(v_play_cnt_sendtime)) + ";" + "不满足下载规则:{}".format(v_title))
- with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
- lines = f_r.readlines()
- with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
- for line2 in lines:
- if v_id in line2.split(" + ")[1]:
- continue
- f_w.write(line2)
- else:
- Common.crawler_log().info("视频发布时间大于3天:{}天".format(
- int((int(time.time()) - int(v_send_date)) / 86400))
- + ";" + "标题:{}".format(v_title))
- with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
- lines = f_r.readlines()
- with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
- for line2 in lines:
- if v_id in line2.split(" + ")[1]:
- continue
- f_w.write(line2)
- else:
- Common.crawler_log().info("不满足下载规则:{}".format(v_title))
- with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
- lines = f_r.readlines()
- with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
- for line3 in lines:
- if v_id in line3.split(" + ")[1]:
- continue
- f_w.write(line3)
- except Exception as e:
- Common.crawler_log().error("获取视频info异常:{}".format(e))
- if __name__ == "__main__":
- download_sendtime = DownloadSendtime()
- get_feeds()
- download_sendtime.download_sendtime_video("dev")
|