123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133 |
- # -*- coding: utf-8 -*-
- # @Author: wangkun
- # @Time: 2022/4/18
- """
- 下载并上传:播放量视频
- """
- import os
- import sys
- sys.path.append(os.getcwd())
- from main.common import Common
- from main.get_feeds import get_feeds
- from main.publish import Publish
- class DownloadPlay:
- @staticmethod
- def play_rule(play_width, play_height, play_duration, play_play_cnt):
- """
- 1.分辨率,宽或者高 >= 720 or ==0
- 2.600s >= 时长 >= 60s
- 3.视频播放量 >= 150000
- """
- if int(play_width) >= 720 or int(play_height) >= 720 or play_width == "0" or play_height == "0":
- if 600 >= int(play_duration) >= 60:
- if int(play_play_cnt) >= 150000:
- return True
- else:
- return False
- else:
- return False
- else:
- return False
- @classmethod
- def download_play_video(cls, env):
- """
- 下载播放量视频
- 测试环境:env == dev
- 正式环境:env == prod
- """
- videos = Common.read_txt("kanyikan_feeds.txt")
- for video in videos:
- download_video_id = video.strip().split(" + ")[1]
- try:
- # download_video_id = video.strip().split(" + ")[1]
- download_video_title = video.strip().split(" + ")[3]
- download_video_duration = video.strip().split(" + ")[4]
- download_video_play_cnt = video.strip().split(" + ")[2]
- download_video_comment_cnt = video.strip().split(" + ")[5]
- download_video_like_cnt = video.strip().split(" + ")[6]
- download_video_share_cnt = video.strip().split(" + ")[7]
- download_video_resolution = video.strip().split(" + ")[8]
- download_video_width = download_video_resolution.split("*")[0]
- download_video_height = download_video_resolution.split("*")[-1]
- download_video_send_time = video.strip().split(" + ")[9]
- download_user_name = video.strip().split(" + ")[10]
- download_head_url = video.strip().split(" + ")[11]
- download_cover_url = video.strip().split(" + ")[12]
- download_video_url = video.strip().split(" + ")[13]
- download_video_session = video.strip().split(" + ")[-1]
- if cls.play_rule(download_video_width,
- download_video_height,
- download_video_duration,
- download_video_play_cnt) is True:
- Common.crawler_log().info("开始下载视频:{}".format(download_video_title))
- # 下载封面
- Common.download_method(text="cover", d_name=download_video_title, d_url=download_cover_url)
- # 下载视频
- Common.download_method(text="video", d_name=download_video_title, d_url=download_video_url)
- # 保存视频 ID 到 "./txt/kanyikan_videoid.txt"
- with open("./txt/kanyikan_videoid.txt", "a", encoding="utf8") as f_a:
- f_a.write(download_video_id + "\n")
- # 保存视频信息至 "./videos/{download_video_title}/info.txt"
- with open("./videos/" + download_video_title + "/info.txt", "a", encoding="utf8") as f_a:
- f_a.write(str(download_video_id) + "\n" +
- str(download_video_title) + "\n" +
- str(download_video_duration) + "\n" +
- str(download_video_play_cnt) + "\n" +
- str(download_video_comment_cnt) + "\n" +
- str(download_video_like_cnt) + "\n" +
- str(download_video_share_cnt) + "\n" +
- str(download_video_resolution) + "\n" +
- str(download_video_send_time) + "\n" +
- str(download_user_name) + "\n" +
- str(download_head_url) + "\n" +
- str(download_video_url) + "\n" +
- str(download_cover_url) + "\n" +
- str(download_video_session))
- # 上传视频
- Common.crawler_log().info("开始上传视频:{}".format(download_video_title))
- Publish.upload_and_publish(env, "play")
- # 删除该视频在kanyikan_feeds.txt中的信息
- Common.crawler_log().info("删除该视频在kanyikan_feeds.txt中的信息:{}".format(download_video_title))
- with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
- lines = f_r.readlines()
- with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
- for line in lines:
- if download_video_id in line.split(" + ")[1]:
- continue
- f_w.write(line)
- else:
- # 删除该视频在 recommend.txt中的信息
- Common.crawler_log().info("该视频不满足下载规则,删除在kanyikan_feeds.txt中的信息:{}".format(download_video_title))
- with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
- lines = f_r.readlines()
- with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
- for line in lines:
- if download_video_id in line.split(" + ")[1]:
- continue
- f_w.write(line)
- except Exception as e:
- Common.crawler_log().info("视频 info 异常".format(e))
- with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
- lines = f_r.readlines()
- with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
- for line in lines:
- if download_video_id in line.split(" + ")[1]:
- continue
- f_w.write(line)
- if __name__ == "__main__":
- download_play = DownloadPlay()
- get_feeds()
- download_play.download_play_video("dev")
|