Server
/
crawler_kanyikan


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
							# -*- coding: utf-8 -*-
# @Author: wangkun
# @Time: 2022/4/18

"""
下载并上传：上升榜视频
规则：
    1.满足基本规则
    2.每隔一小时，检查视频播放量，>=1000，则下载及上传
    3.超过 2 小时，则删除该视频信息
"""

import json
import os
import random
import sys
import time
import requests
import urllib3
sys.path.append(os.getcwd())
from main.common import Common
from main.get_feeds import get_feeds
from main.publish import Publish


class DownloadUp:

    @staticmethod
    def up_rule(up_width, up_height, up_duration, up_play_cnt):
        """
        1.分辨率，宽或者高 >= 720 or ==0
        2.600s >= 时长 >= 60s
        3.视频播放量 >= 0
        """
        if int(up_width) >= 720 or int(up_height) >= 720 or str(up_width) == "0" or str(up_height) == "0":
            if 600 >= int(up_duration) >= 60:
                if int(up_play_cnt) >= 0:
                    return True
                else:
                    return False
            else:
                return False
        else:
            return False

    @classmethod
    def download_up_video(cls, env):
        """
        1.从 kanyikan_feeds.txt 中获取 videoid
        2.根据 videoid，从 videoinfo 接口，获取当前视频最新的信息
        3.根据下载规则判断，符合规则进行下载：
            1 更新视频 ID 到 "./txt/kanyikan_videoid.txt"
            2 视频信息写入文件 "./videos/{d_title}/info.txt"
        4.上传完成：
            1 删除该视频在 "./txt/kanyikan_feeds.txt" 中的信息
        """
        get_video_info_session = Common.get_session()
        Common.crawler_log().info("获取视频info时，session：{}".format(get_video_info_session))
        lines = Common.read_txt("kanyikan_feeds.txt")
        for line in lines:
            v_time = line.strip().split(" + ")[0]  # 第一次获取该视频的时间
            v_id = line.strip().split(" + ")[1]  # 外网视频 ID
            v_play_ctn = line.strip().split(" + ")[2]  # 播放量
            url = "https://search.weixin.qq.com/cgi-bin/recwxa/recwxagetonevideoinfo?"
            param = {
                "session": get_video_info_session,
                "vid": v_id,
                "wxaVersion": "3.9.2",
                "channelid": "208201",
                "scene": "32",
                "subscene": "1089",
                "model": "iPhone 11<iPhone12,1>14.7.1",
                "clientVersion": "8.0.18",
                "sharesearchid": "447665862521758270",
                "sharesource": "-1"
            }
            try:
                urllib3.disable_warnings()
                r = requests.get(url=url, params=param, verify=False)
                response = json.loads(r.content.decode("utf8"))
                if "data" not in response:
                    Common.crawler_log().error("获取视频info时，session过期，等待30秒")
                    # 如果返回空信息，则随机睡眠 31-35 秒
                    time.sleep(random.randint(31, 35))
                else:
                    data = response["data"]
                    v_title = data["title"]
                    v_duration = data["duration"]
                    v_play_cnt_up = data["played_cnt"]
                    v_comment_cnt = data["comment_cnt"]
                    v_liked_cnt = data["liked_cnt"]
                    v_shared_cnt = data["shared_cnt"]
                    v_width = data["width"]
                    v_height = data["height"]
                    v_resolution = str(v_width) + "*" + str(v_height)
                    v_send_date = data["upload_time"]
                    v_username = data["user_info"]["nickname"]
                    v_user_cover = data["user_info"]["headimg_url"]
                    v_video_cover = data["cover_url"]
                    if "items" not in data["play_info"]:
                        if len(data["play_info"]) > 2:
                            download_url_up = data["play_info"][2]["play_url"]
                        else:
                            download_url_up = data["play_info"][0]["play_url"]
                    else:
                        if len(data["play_info"]["items"]) > 2:
                            download_url_up = data["play_info"]["items"][2]["play_url"]
                        else:
                            download_url_up = data["play_info"]["items"][0]["play_url"]

                    # 判断基本规则
                    if cls.up_rule(v_width, v_height, v_duration, v_play_cnt_up) is True \
                            and v_id != "" and v_title != "" and v_duration != "" \
                            and v_play_cnt_up != "" and v_comment_cnt != "" and v_liked_cnt != "" \
                            and v_shared_cnt != "" and v_width != "" and v_height != "" \
                            and v_send_date != "" and v_username != "" and v_user_cover != "" \
                            and v_video_cover != "" and download_url_up != "":
                        if int(time.time()) - int(v_time) < 3600:
                            Common.crawler_log().info("距上次获取该视频时间：{}分钟".format(
                                int(int(int(time.time()) - int(v_time)) / 60)) + "；{}".format(v_title))
                        elif 7200 >= int(time.time()) - int(v_time) >= 3600:
                            if int(v_play_cnt_up) - int(v_play_ctn) >= 1000:
                                Common.crawler_log().info("该视频:{}".format(
                                    v_title) + " " + "在1小时内的播放量{}>=1000".format(int(v_play_cnt_up) - int(v_play_ctn)))
                                # 下载封面
                                Common.download_method("cover", v_title, v_video_cover)
                                # 下载视频
                                Common.download_method("video", v_title, download_url_up)
                                # 保存视频 ID 到 "./txt/kanyikan_videoid.txt"
                                with open("./txt/kanyikan_videoid.txt", "a", encoding="utf8") as f_a:
                                    f_a.write(v_id + "\n")
                                # 保存视频信息到 "./files/{视频标题}/videoinfo.txt"
                                with open("./videos/" + v_title + "/" + "info.txt",
                                          "a", encoding="utf8") as f_a2:
                                    f_a2.write(str(v_id) + "\n" +
                                               str(v_title) + "\n" +
                                               str(v_duration) + "\n" +
                                               str(v_play_cnt_up) + "\n" +
                                               str(v_comment_cnt) + "\n" +
                                               str(v_liked_cnt) + "\n" +
                                               str(v_shared_cnt) + "\n" +
                                               str(v_resolution) + "\n" +
                                               str(v_send_date) + "\n" +
                                               str(v_username) + "\n" +
                                               str(v_user_cover) + "\n" +
                                               str(download_url_up) + "\n" +
                                               str(v_video_cover) + "\n" +
                                               str(get_video_info_session))

                                # 上传该视频
                                Common.crawler_log().info("开始上传视频:{}".format(v_title))
                                Publish.upload_and_publish(env, "up")

                                # 删除该视频在kanyikan_feeds.txt中的信息
                                Common.crawler_log().info("删除该视频在kanyikan_feeds.txt中的信息:{}".format(v_title))
                                with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f1:
                                    lines = f1.readlines()
                                with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w1:
                                    for line1 in lines:
                                        if v_id in line1.split(" + ")[1]:
                                            continue
                                        f_w1.write(line1)
                            else:
                                # 删除之前保存的该视频信息，并把现在的信息保存进去
                                Common.crawler_log().info("该视频1小时内的播放量：{}<1000".format(
                                    int(v_play_cnt_up) - int(v_play_ctn)) + "；"
                                                          + "更新该视频在kanyikan_feeds.txt中的信息:{}".format(v_title))
                                with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
                                    lines = f_r.readlines()
                                with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
                                    for line2 in lines:
                                        if v_id in line2.split(" + ")[1]:
                                            continue
                                        f_w.write(line2)
                                with open("./txt/kanyikan_feeds.txt", "a", encoding="utf-8") as f_a:
                                    f_a.write(str(time.time()) + " + "
                                              + str(v_id) + " + "
                                              + str(v_play_cnt_up) + " + "
                                              + str(v_title) + " + "
                                              + str(v_duration) + " + "
                                              + str(v_comment_cnt) + " + "
                                              + str(v_liked_cnt) + " + "
                                              + str(v_shared_cnt) + " + "
                                              + str(v_resolution) + " + "
                                              + str(v_send_date) + " + "
                                              + str(v_username) + " + "
                                              + str(v_user_cover) + " + "
                                              + str(v_video_cover) + " + "
                                              + str(download_url_up) + " + "
                                              + str(get_video_info_session) + "\n")

                        elif int(time.time()) - int(v_time) > 7200:
                            Common.crawler_log().info("距上次获取该视频时间：{}分钟。超过2小时，删除该视频".format(
                                int((int(time.time()) - int(v_time)) / 60)) + "；" + "标题：{}".format(v_title))
                            # 删除之前保存的该视频信息
                            Common.crawler_log().info("删除该视频在kanyikan_feeds.txt中的信息:{}".format(v_title))
                            with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
                                lines = f_r.readlines()
                            with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
                                for line2 in lines:
                                    if v_id in line2.split(" + ")[1]:
                                        continue
                                    f_w.write(line2)
                    else:
                        Common.crawler_log().info("不满足下载规则：{}".format(v_title))
                        # 删除之前保存的该视频信息
                        Common.crawler_log().info("删除该视频在kanyikan_feeds.txt中的信息:{}".format(v_title))
                        with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
                            lines = f_r.readlines()
                        with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
                            for line3 in lines:
                                if v_id in line3.split(" + ")[1]:
                                    continue
                                f_w.write(line3)
            except Exception as e:
                Common.crawler_log().error("获取视频info异常：{}".format(e))


if __name__ == "__main__":
    downloadup = DownloadUp()
    get_feeds()
    downloadup.download_up_video("dev")