|  | @@ -0,0 +1,579 @@
 | 
	
		
			
				|  |  | +# -*- coding: utf-8 -*-
 | 
	
		
			
				|  |  | +# @Author: wangkun
 | 
	
		
			
				|  |  | +# @Time: 2023/3/28
 | 
	
		
			
				|  |  | +import datetime
 | 
	
		
			
				|  |  | +import difflib
 | 
	
		
			
				|  |  | +import json
 | 
	
		
			
				|  |  | +import os
 | 
	
		
			
				|  |  | +import shutil
 | 
	
		
			
				|  |  | +import sys
 | 
	
		
			
				|  |  | +import time
 | 
	
		
			
				|  |  | +from hashlib import md5
 | 
	
		
			
				|  |  | +import requests
 | 
	
		
			
				|  |  | +import urllib3
 | 
	
		
			
				|  |  | +from selenium.webdriver import DesiredCapabilities
 | 
	
		
			
				|  |  | +from selenium.webdriver.chrome.service import Service
 | 
	
		
			
				|  |  | +from selenium.webdriver.common.by import By
 | 
	
		
			
				|  |  | +from selenium import webdriver
 | 
	
		
			
				|  |  | +sys.path.append(os.getcwd())
 | 
	
		
			
				|  |  | +from common.common import Common
 | 
	
		
			
				|  |  | +from common.feishu import Feishu
 | 
	
		
			
				|  |  | +from common.publish import Publish
 | 
	
		
			
				|  |  | +from common.scheduling_db import MysqlHelper
 | 
	
		
			
				|  |  | +from common.public import get_config_from_mysql
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +class GongzhonghaoAuthor1:
 | 
	
		
			
				|  |  | +    # 翻页参数
 | 
	
		
			
				|  |  | +    begin = 0
 | 
	
		
			
				|  |  | +    platform = "公众号"
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    # 基础门槛规则
 | 
	
		
			
				|  |  | +    @staticmethod
 | 
	
		
			
				|  |  | +    def download_rule(log_type, crawler, video_dict, rule_dict):
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        下载视频的基本规则
 | 
	
		
			
				|  |  | +        :param log_type: 日志
 | 
	
		
			
				|  |  | +        :param crawler: 哪款爬虫
 | 
	
		
			
				|  |  | +        :param video_dict: 视频信息,字典格式
 | 
	
		
			
				|  |  | +        :param rule_dict: 规则信息,字典格式
 | 
	
		
			
				|  |  | +        :return: 满足规则,返回 True;反之,返回 False
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        rule_play_cnt_min = rule_dict.get('play_cnt', {}).get('min', 0)
 | 
	
		
			
				|  |  | +        rule_play_cnt_max = rule_dict.get('play_cnt', {}).get('max', 100000000)
 | 
	
		
			
				|  |  | +        if rule_play_cnt_max == 0:
 | 
	
		
			
				|  |  | +            rule_play_cnt_max = 100000000
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        rule_duration_min = rule_dict.get('duration', {}).get('min', 0)
 | 
	
		
			
				|  |  | +        rule_duration_max = rule_dict.get('duration', {}).get('max', 100000000)
 | 
	
		
			
				|  |  | +        if rule_duration_max == 0:
 | 
	
		
			
				|  |  | +            rule_duration_max = 100000000
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        rule_period_min = rule_dict.get('period', {}).get('min', 0)
 | 
	
		
			
				|  |  | +        # rule_period_max = rule_dict.get('period', {}).get('max', 100000000)
 | 
	
		
			
				|  |  | +        # if rule_period_max == 0:
 | 
	
		
			
				|  |  | +        #     rule_period_max = 100000000
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        rule_fans_cnt_min = rule_dict.get('fans_cnt', {}).get('min', 0)
 | 
	
		
			
				|  |  | +        rule_fans_cnt_max = rule_dict.get('fans_cnt', {}).get('max', 100000000)
 | 
	
		
			
				|  |  | +        if rule_fans_cnt_max == 0:
 | 
	
		
			
				|  |  | +            rule_fans_cnt_max = 100000000
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        rule_videos_cnt_min = rule_dict.get('videos_cnt', {}).get('min', 0)
 | 
	
		
			
				|  |  | +        rule_videos_cnt_max = rule_dict.get('videos_cnt', {}).get('max', 100000000)
 | 
	
		
			
				|  |  | +        if rule_videos_cnt_max == 0:
 | 
	
		
			
				|  |  | +            rule_videos_cnt_max = 100000000
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        rule_like_cnt_min = rule_dict.get('like_cnt', {}).get('min', 0)
 | 
	
		
			
				|  |  | +        rule_like_cnt_max = rule_dict.get('like_cnt', {}).get('max', 100000000)
 | 
	
		
			
				|  |  | +        if rule_like_cnt_max == 0:
 | 
	
		
			
				|  |  | +            rule_like_cnt_max = 100000000
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        rule_width_min = rule_dict.get('width', {}).get('min', 0)
 | 
	
		
			
				|  |  | +        rule_width_max = rule_dict.get('width', {}).get('max', 100000000)
 | 
	
		
			
				|  |  | +        if rule_width_max == 0:
 | 
	
		
			
				|  |  | +            rule_width_max = 100000000
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        rule_height_min = rule_dict.get('height', {}).get('min', 0)
 | 
	
		
			
				|  |  | +        rule_height_max = rule_dict.get('height', {}).get('max', 100000000)
 | 
	
		
			
				|  |  | +        if rule_height_max == 0:
 | 
	
		
			
				|  |  | +            rule_height_max = 100000000
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        rule_share_cnt_min = rule_dict.get('share_cnt', {}).get('min', 0)
 | 
	
		
			
				|  |  | +        rule_share_cnt_max = rule_dict.get('share_cnt', {}).get('max', 100000000)
 | 
	
		
			
				|  |  | +        if rule_share_cnt_max == 0:
 | 
	
		
			
				|  |  | +            rule_share_cnt_max = 100000000
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        rule_comment_cnt_min = rule_dict.get('comment_cnt', {}).get('min', 0)
 | 
	
		
			
				|  |  | +        rule_comment_cnt_max = rule_dict.get('comment_cnt', {}).get('max', 100000000)
 | 
	
		
			
				|  |  | +        if rule_comment_cnt_max == 0:
 | 
	
		
			
				|  |  | +            rule_comment_cnt_max = 100000000
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        rule_publish_time_min = rule_dict.get('publish_time', {}).get('min', 0)
 | 
	
		
			
				|  |  | +        rule_publish_time_max = rule_dict.get('publish_time', {}).get('max', 100000000)
 | 
	
		
			
				|  |  | +        if rule_publish_time_max == 0:
 | 
	
		
			
				|  |  | +            rule_publish_time_max = 4102415999000  # 2099-12-31 23:59:59
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        Common.logger(log_type, crawler).info(
 | 
	
		
			
				|  |  | +            f'rule_duration_max:{rule_duration_max} >= duration:{int(float(video_dict["duration"]))} >= rule_duration_min:{int(rule_duration_min)}')
 | 
	
		
			
				|  |  | +        Common.logger(log_type, crawler).info(
 | 
	
		
			
				|  |  | +            f'rule_play_cnt_max:{int(rule_play_cnt_max)} >= play_cnt:{int(video_dict["play_cnt"])} >= rule_play_cnt_min:{int(rule_play_cnt_min)}')
 | 
	
		
			
				|  |  | +        Common.logger(log_type, crawler).info(
 | 
	
		
			
				|  |  | +            f'now:{int(time.time())} - publish_time_stamp:{int(video_dict["publish_time_stamp"])} <= {3600 * 24 * int(rule_period_min)}')
 | 
	
		
			
				|  |  | +        Common.logger(log_type, crawler).info(
 | 
	
		
			
				|  |  | +            f'rule_like_cnt_max:{int(rule_like_cnt_max)} >= like_cnt:{int(video_dict["like_cnt"])} >= rule_like_cnt_min:{int(rule_like_cnt_min)}')
 | 
	
		
			
				|  |  | +        Common.logger(log_type, crawler).info(
 | 
	
		
			
				|  |  | +            f'rule_comment_cnt_max:{int(rule_comment_cnt_max)} >= comment_cnt:{int(video_dict["comment_cnt"])} >= rule_comment_cnt_min:{int(rule_comment_cnt_min)}')
 | 
	
		
			
				|  |  | +        Common.logger(log_type, crawler).info(
 | 
	
		
			
				|  |  | +            f'rule_share_cnt_max:{int(rule_share_cnt_max)} >= share_cnt:{int(video_dict["share_cnt"])} >= rule_share_cnt_min:{int(rule_share_cnt_min)}')
 | 
	
		
			
				|  |  | +        Common.logger(log_type, crawler).info(
 | 
	
		
			
				|  |  | +            f'rule_width_max:{int(rule_width_max)} >= video_width:{int(video_dict["video_width"])} >= rule_width_min:{int(rule_width_min)}')
 | 
	
		
			
				|  |  | +        Common.logger(log_type, crawler).info(
 | 
	
		
			
				|  |  | +            f'rule_height_max:{int(rule_height_max)} >= video_height:{int(video_dict["video_height"])} >= rule_height_min:{int(rule_height_min)}')
 | 
	
		
			
				|  |  | +        Common.logger(log_type, crawler).info(
 | 
	
		
			
				|  |  | +            f'rule_publish_time_max:{int(rule_publish_time_max)} >= publish_time_stamp:{int(video_dict["publish_time_stamp"])} >= rule_publish_time_min:{int(rule_publish_time_min)}')
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        if int(rule_duration_max) >= int(float(video_dict["duration"])) >= int(rule_duration_min) \
 | 
	
		
			
				|  |  | +                and int(rule_play_cnt_max) >= int(video_dict['play_cnt']) >= int(rule_play_cnt_min) \
 | 
	
		
			
				|  |  | +                and int(rule_like_cnt_max) >= int(video_dict['like_cnt']) >= int(rule_like_cnt_min) \
 | 
	
		
			
				|  |  | +                and int(rule_comment_cnt_max) >= int(video_dict['comment_cnt']) >= int(rule_comment_cnt_min) \
 | 
	
		
			
				|  |  | +                and int(rule_share_cnt_max) >= int(video_dict['share_cnt']) >= int(rule_share_cnt_min) \
 | 
	
		
			
				|  |  | +                and int(rule_width_max) >= int(video_dict['video_width']) >= int(rule_width_min) \
 | 
	
		
			
				|  |  | +                and int(rule_height_max) >= int(video_dict['video_height']) >= int(rule_height_min) \
 | 
	
		
			
				|  |  | +                and int(rule_publish_time_max) >= int(video_dict['publish_time_stamp']) >= int(rule_publish_time_min)\
 | 
	
		
			
				|  |  | +                and int(time.time()) - int(video_dict["publish_time_stamp"]) <= 3600 * 24 * int(rule_period_min):
 | 
	
		
			
				|  |  | +            return True
 | 
	
		
			
				|  |  | +        else:
 | 
	
		
			
				|  |  | +            return False
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    @classmethod
 | 
	
		
			
				|  |  | +    def title_like(cls, log_type, crawler, title, env):
 | 
	
		
			
				|  |  | +        select_sql = f""" select * from crawler_video where platform="公众号" """
 | 
	
		
			
				|  |  | +        video_list = MysqlHelper.get_values(log_type, crawler, select_sql, env, action="")
 | 
	
		
			
				|  |  | +        if len(video_list) == 0:
 | 
	
		
			
				|  |  | +            return None
 | 
	
		
			
				|  |  | +        for video_dict in video_list:
 | 
	
		
			
				|  |  | +            video_title = video_dict["video_title"]
 | 
	
		
			
				|  |  | +            if difflib.SequenceMatcher(None, title, video_title).quick_ratio() >= 0.8:
 | 
	
		
			
				|  |  | +                return True
 | 
	
		
			
				|  |  | +            else:
 | 
	
		
			
				|  |  | +                pass
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    # 获取 token
 | 
	
		
			
				|  |  | +    @classmethod
 | 
	
		
			
				|  |  | +    def get_token(cls, log_type, crawler, env):
 | 
	
		
			
				|  |  | +        select_sql = f""" select * from crawler_config where source="{crawler}" and title LIKE "%公众号_1%";"""
 | 
	
		
			
				|  |  | +        configs = MysqlHelper.get_values(log_type, crawler, select_sql, env, action="")
 | 
	
		
			
				|  |  | +        if len(configs) == 0:
 | 
	
		
			
				|  |  | +            # Common.logger(log_type, crawler).warning(f"公众号_1未配置token")
 | 
	
		
			
				|  |  | +            Feishu.bot(log_type, crawler, "公众号_1:未配置token")
 | 
	
		
			
				|  |  | +            time.sleep(60)
 | 
	
		
			
				|  |  | +            return None
 | 
	
		
			
				|  |  | +        token_dict = {
 | 
	
		
			
				|  |  | +            "token_id": configs[0]["id"],
 | 
	
		
			
				|  |  | +            "title": configs[0]["title"],
 | 
	
		
			
				|  |  | +            "token": dict(eval(configs[0]["config"]))["token"],
 | 
	
		
			
				|  |  | +            "cookie": dict(eval(configs[0]["config"]))["cookie"],
 | 
	
		
			
				|  |  | +            "update_time": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(configs[0]["update_time"]/1000))),
 | 
	
		
			
				|  |  | +            "operator": configs[0]["operator"]
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +        # for k, v in token_dict.items():
 | 
	
		
			
				|  |  | +        #     print(f"{k}:{v}")
 | 
	
		
			
				|  |  | +        return token_dict
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    # 获取用户 fakeid
 | 
	
		
			
				|  |  | +    @classmethod
 | 
	
		
			
				|  |  | +    def get_fakeid(cls, log_type, crawler, wechat_name, env):
 | 
	
		
			
				|  |  | +        while True:
 | 
	
		
			
				|  |  | +            token_dict = cls.get_token(log_type, crawler, env)
 | 
	
		
			
				|  |  | +            url = "https://mp.weixin.qq.com/cgi-bin/searchbiz?"
 | 
	
		
			
				|  |  | +            headers = {
 | 
	
		
			
				|  |  | +                "accept": "*/*",
 | 
	
		
			
				|  |  | +                "accept-encoding": "gzip, deflate, br",
 | 
	
		
			
				|  |  | +                "accept-language": "zh-CN,zh;q=0.9",
 | 
	
		
			
				|  |  | +                "referer": "https://mp.weixin.qq.com/cgi-bin/appmsg?"
 | 
	
		
			
				|  |  | +                           "t=media/appmsg_edit_v2&action=edit&isNew=1"
 | 
	
		
			
				|  |  | +                           "&type=77&createType=5&token=1011071554&lang=zh_CN",
 | 
	
		
			
				|  |  | +                'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
 | 
	
		
			
				|  |  | +                "sec-ch-ua-mobile": "?0",
 | 
	
		
			
				|  |  | +                "sec-ch-ua-platform": '"Windows"',
 | 
	
		
			
				|  |  | +                "sec-fetch-dest": "empty",
 | 
	
		
			
				|  |  | +                "sec-fetch-mode": "cors",
 | 
	
		
			
				|  |  | +                "sec-fetch-site": "same-origin",
 | 
	
		
			
				|  |  | +                "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
 | 
	
		
			
				|  |  | +                              " (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
 | 
	
		
			
				|  |  | +                "x-requested-with": "XMLHttpRequest",
 | 
	
		
			
				|  |  | +                'cookie': token_dict['cookie'],
 | 
	
		
			
				|  |  | +            }
 | 
	
		
			
				|  |  | +            params = {
 | 
	
		
			
				|  |  | +                "action": "search_biz",
 | 
	
		
			
				|  |  | +                "begin": "0",
 | 
	
		
			
				|  |  | +                "count": "5",
 | 
	
		
			
				|  |  | +                "query": str(wechat_name),
 | 
	
		
			
				|  |  | +                "token": token_dict['token'],
 | 
	
		
			
				|  |  | +                "lang": "zh_CN",
 | 
	
		
			
				|  |  | +                "f": "json",
 | 
	
		
			
				|  |  | +                "ajax": "1",
 | 
	
		
			
				|  |  | +            }
 | 
	
		
			
				|  |  | +            urllib3.disable_warnings()
 | 
	
		
			
				|  |  | +            r = requests.get(url=url, headers=headers, params=params, verify=False)
 | 
	
		
			
				|  |  | +            r.close()
 | 
	
		
			
				|  |  | +            if r.json()["base_resp"]["err_msg"] == "invalid session":
 | 
	
		
			
				|  |  | +                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
 | 
	
		
			
				|  |  | +                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
 | 
	
		
			
				|  |  | +                # Common.logger(log_type, crawler).warning(f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} 过期啦\n")
 | 
	
		
			
				|  |  | +                if 20 >= datetime.datetime.now().hour >= 10:
 | 
	
		
			
				|  |  | +                    Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
 | 
	
		
			
				|  |  | +                time.sleep(60 * 10)
 | 
	
		
			
				|  |  | +                continue
 | 
	
		
			
				|  |  | +            if r.json()["base_resp"]["err_msg"] == "freq control":
 | 
	
		
			
				|  |  | +                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
 | 
	
		
			
				|  |  | +                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
 | 
	
		
			
				|  |  | +                # Common.logger(log_type, crawler).warning(f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} 频控啦\n")
 | 
	
		
			
				|  |  | +                if 20 >= datetime.datetime.now().hour >= 10:
 | 
	
		
			
				|  |  | +                    Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
 | 
	
		
			
				|  |  | +                time.sleep(60 * 10)
 | 
	
		
			
				|  |  | +                continue
 | 
	
		
			
				|  |  | +            if "list" not in r.json() or len(r.json()["list"]) == 0:
 | 
	
		
			
				|  |  | +                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
 | 
	
		
			
				|  |  | +                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
 | 
	
		
			
				|  |  | +                # Common.logger(log_type, crawler).warning(f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} 频控啦\n")
 | 
	
		
			
				|  |  | +                if 20 >= datetime.datetime.now().hour >= 10:
 | 
	
		
			
				|  |  | +                    Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
 | 
	
		
			
				|  |  | +                time.sleep(60 * 10)
 | 
	
		
			
				|  |  | +                continue
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +            fakeid = r.json()["list"][0]["fakeid"]
 | 
	
		
			
				|  |  | +            head_url = r.json()["list"][0]["round_head_img"]
 | 
	
		
			
				|  |  | +            fakeid_dict = {'fakeid': fakeid, 'head_url': head_url}
 | 
	
		
			
				|  |  | +            return fakeid_dict
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    # 获取腾讯视频下载链接
 | 
	
		
			
				|  |  | +    @classmethod
 | 
	
		
			
				|  |  | +    def get_tencent_video_url(cls, video_id):
 | 
	
		
			
				|  |  | +        # try:
 | 
	
		
			
				|  |  | +        url = 'https://vv.video.qq.com/getinfo?vids=' + str(video_id) + '&platform=101001&charge=0&otype=json'
 | 
	
		
			
				|  |  | +        response = requests.get(url=url).text.replace('QZOutputJson=', '').replace('"};', '"}')
 | 
	
		
			
				|  |  | +        response = json.loads(response)
 | 
	
		
			
				|  |  | +        url = response['vl']['vi'][0]['ul']['ui'][0]['url']
 | 
	
		
			
				|  |  | +        fvkey = response['vl']['vi'][0]['fvkey']
 | 
	
		
			
				|  |  | +        video_url = url + str(video_id) + '.mp4?vkey=' + fvkey
 | 
	
		
			
				|  |  | +        return video_url
 | 
	
		
			
				|  |  | +        # except Exception as e:
 | 
	
		
			
				|  |  | +        #     Common.logger(log_type, crawler).error(f"get_tencent_video_url异常:{e}\n")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    @classmethod
 | 
	
		
			
				|  |  | +    def get_video_url(cls, article_url, env):
 | 
	
		
			
				|  |  | +        # try:
 | 
	
		
			
				|  |  | +        # 打印请求配置
 | 
	
		
			
				|  |  | +        ca = DesiredCapabilities.CHROME
 | 
	
		
			
				|  |  | +        ca["goog:loggingPrefs"] = {"performance": "ALL"}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        # 不打开浏览器运行
 | 
	
		
			
				|  |  | +        chrome_options = webdriver.ChromeOptions()
 | 
	
		
			
				|  |  | +        chrome_options.add_argument("headless")
 | 
	
		
			
				|  |  | +        chrome_options.add_argument(
 | 
	
		
			
				|  |  | +            f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
 | 
	
		
			
				|  |  | +        chrome_options.add_argument("--no-sandbox")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        # driver初始化
 | 
	
		
			
				|  |  | +        if env == "prod":
 | 
	
		
			
				|  |  | +            driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
 | 
	
		
			
				|  |  | +        else:
 | 
	
		
			
				|  |  | +            driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service(
 | 
	
		
			
				|  |  | +                '/Users/wangkun/Downloads/chromedriver/chromedriver_v111/chromedriver'))
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        driver.implicitly_wait(10)
 | 
	
		
			
				|  |  | +        # Common.logger(log_type, crawler).info('打开文章链接')
 | 
	
		
			
				|  |  | +        driver.get(article_url)
 | 
	
		
			
				|  |  | +        time.sleep(1)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        if len(driver.find_elements(By.XPATH, '//div[@class="js_video_poster video_poster"]/*[2]')) != 0:
 | 
	
		
			
				|  |  | +            video_url = driver.find_element(
 | 
	
		
			
				|  |  | +                By.XPATH, '//div[@class="js_video_poster video_poster"]/*[2]').get_attribute('src')
 | 
	
		
			
				|  |  | +        elif len(driver.find_elements(By.XPATH, '//span[@class="js_tx_video_container"]/*[1]')) != 0:
 | 
	
		
			
				|  |  | +            iframe = driver.find_element(By.XPATH, '//span[@class="js_tx_video_container"]/*[1]').get_attribute(
 | 
	
		
			
				|  |  | +                'src')
 | 
	
		
			
				|  |  | +            video_id = iframe.split('vid=')[-1].split('&')[0]
 | 
	
		
			
				|  |  | +            video_url = cls.get_tencent_video_url(video_id)
 | 
	
		
			
				|  |  | +        else:
 | 
	
		
			
				|  |  | +            video_url = 0
 | 
	
		
			
				|  |  | +        driver.quit()
 | 
	
		
			
				|  |  | +        return video_url
 | 
	
		
			
				|  |  | +        # except Exception as e:
 | 
	
		
			
				|  |  | +        #     Common.logger(log_type, crawler).info(f'get_video_url异常:{e}\n')
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    # 获取文章列表
 | 
	
		
			
				|  |  | +    @classmethod
 | 
	
		
			
				|  |  | +    def get_videoList(cls, log_type, crawler, wechat_name, rule_dict, user_name, uid, oss_endpoint, env):
 | 
	
		
			
				|  |  | +        # try:
 | 
	
		
			
				|  |  | +        while True:
 | 
	
		
			
				|  |  | +            token_dict = cls.get_token(log_type, crawler, env)
 | 
	
		
			
				|  |  | +            fakeid_dict = cls.get_fakeid(log_type=log_type,
 | 
	
		
			
				|  |  | +                                         crawler=crawler,
 | 
	
		
			
				|  |  | +                                         wechat_name=wechat_name,
 | 
	
		
			
				|  |  | +                                         env=env)
 | 
	
		
			
				|  |  | +            url = "https://mp.weixin.qq.com/cgi-bin/appmsg?"
 | 
	
		
			
				|  |  | +            headers = {
 | 
	
		
			
				|  |  | +                "accept": "*/*",
 | 
	
		
			
				|  |  | +                "accept-encoding": "gzip, deflate, br",
 | 
	
		
			
				|  |  | +                "accept-language": "zh-CN,zh;q=0.9",
 | 
	
		
			
				|  |  | +                "referer": "https://mp.weixin.qq.com/cgi-bin/appmsg?"
 | 
	
		
			
				|  |  | +                           "t=media/appmsg_edit_v2&action=edit&isNew=1"
 | 
	
		
			
				|  |  | +                           "&type=77&createType=5&token=" + str(token_dict['token']) + "&lang=zh_CN",
 | 
	
		
			
				|  |  | +                'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
 | 
	
		
			
				|  |  | +                "sec-ch-ua-mobile": "?0",
 | 
	
		
			
				|  |  | +                "sec-ch-ua-platform": '"Windows"',
 | 
	
		
			
				|  |  | +                "sec-fetch-dest": "empty",
 | 
	
		
			
				|  |  | +                "sec-fetch-mode": "cors",
 | 
	
		
			
				|  |  | +                "sec-fetch-site": "same-origin",
 | 
	
		
			
				|  |  | +                "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
 | 
	
		
			
				|  |  | +                              " (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
 | 
	
		
			
				|  |  | +                "x-requested-with": "XMLHttpRequest",
 | 
	
		
			
				|  |  | +                'cookie': token_dict['cookie'],
 | 
	
		
			
				|  |  | +            }
 | 
	
		
			
				|  |  | +            params = {
 | 
	
		
			
				|  |  | +                "action": "list_ex",
 | 
	
		
			
				|  |  | +                "begin": str(cls.begin),
 | 
	
		
			
				|  |  | +                "count": "5",
 | 
	
		
			
				|  |  | +                "fakeid": fakeid_dict['fakeid'],
 | 
	
		
			
				|  |  | +                "type": "9",
 | 
	
		
			
				|  |  | +                "query": "",
 | 
	
		
			
				|  |  | +                "token": str(token_dict['token']),
 | 
	
		
			
				|  |  | +                "lang": "zh_CN",
 | 
	
		
			
				|  |  | +                "f": "json",
 | 
	
		
			
				|  |  | +                "ajax": "1",
 | 
	
		
			
				|  |  | +            }
 | 
	
		
			
				|  |  | +            urllib3.disable_warnings()
 | 
	
		
			
				|  |  | +            r = requests.get(url=url, headers=headers, params=params, verify=False)
 | 
	
		
			
				|  |  | +            r.close()
 | 
	
		
			
				|  |  | +            if r.json()["base_resp"]["err_msg"] == "invalid session":
 | 
	
		
			
				|  |  | +                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
 | 
	
		
			
				|  |  | +                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
 | 
	
		
			
				|  |  | +                # Common.logger(log_type, crawler).warning(f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} 过期啦\n")
 | 
	
		
			
				|  |  | +                if 20 >= datetime.datetime.now().hour >= 10:
 | 
	
		
			
				|  |  | +                    Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
 | 
	
		
			
				|  |  | +                time.sleep(60 * 10)
 | 
	
		
			
				|  |  | +                continue
 | 
	
		
			
				|  |  | +            if r.json()["base_resp"]["err_msg"] == "freq control":
 | 
	
		
			
				|  |  | +                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
 | 
	
		
			
				|  |  | +                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
 | 
	
		
			
				|  |  | +                # Common.logger(log_type, crawler).warning(f"{token_dict['title']}, 操作人:{token_dict['operator']}, 更换日期:{token_dict['update_time']} 频控啦\n")
 | 
	
		
			
				|  |  | +                if 20 >= datetime.datetime.now().hour >= 10:
 | 
	
		
			
				|  |  | +                    Feishu.bot(log_type, crawler,f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
 | 
	
		
			
				|  |  | +                time.sleep(60 * 10)
 | 
	
		
			
				|  |  | +                continue
 | 
	
		
			
				|  |  | +            if 'app_msg_list' not in r.json():
 | 
	
		
			
				|  |  | +                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
 | 
	
		
			
				|  |  | +                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
 | 
	
		
			
				|  |  | +                # Common.logger(log_type, crawler).warning(f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} 频控啦\n")
 | 
	
		
			
				|  |  | +                if 20 >= datetime.datetime.now().hour >= 10:
 | 
	
		
			
				|  |  | +                    Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
 | 
	
		
			
				|  |  | +                time.sleep(60 * 10)
 | 
	
		
			
				|  |  | +                continue
 | 
	
		
			
				|  |  | +            if len(r.json()['app_msg_list']) == 0:
 | 
	
		
			
				|  |  | +                Common.logger(log_type, crawler).info('没有更多视频了\n')
 | 
	
		
			
				|  |  | +                return
 | 
	
		
			
				|  |  | +            else:
 | 
	
		
			
				|  |  | +                cls.begin += 5
 | 
	
		
			
				|  |  | +                app_msg_list = r.json()['app_msg_list']
 | 
	
		
			
				|  |  | +                for article_url in app_msg_list:
 | 
	
		
			
				|  |  | +                    # title
 | 
	
		
			
				|  |  | +                    video_title = article_url.get("title", "").replace('/', '').replace('\n', '') \
 | 
	
		
			
				|  |  | +                            .replace('.', '').replace('“', '').replace('”', '').replace(' ', '')\
 | 
	
		
			
				|  |  | +                            .replace('"', '').replace("'", "")
 | 
	
		
			
				|  |  | +                    # aid
 | 
	
		
			
				|  |  | +                    aid = article_url.get('aid', '')
 | 
	
		
			
				|  |  | +                    # create_time
 | 
	
		
			
				|  |  | +                    create_time = article_url.get('create_time', 0)
 | 
	
		
			
				|  |  | +                    publish_time_stamp = int(create_time)
 | 
	
		
			
				|  |  | +                    publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
 | 
	
		
			
				|  |  | +                    avatar_url = fakeid_dict['head_url']
 | 
	
		
			
				|  |  | +                    # cover_url
 | 
	
		
			
				|  |  | +                    cover_url = article_url.get('cover', '')
 | 
	
		
			
				|  |  | +                    # article_url
 | 
	
		
			
				|  |  | +                    article_url = article_url.get('link', '')
 | 
	
		
			
				|  |  | +                    video_url = cls.get_video_url(article_url, env)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +                    video_dict = {
 | 
	
		
			
				|  |  | +                        'video_id': aid,
 | 
	
		
			
				|  |  | +                        'video_title': video_title,
 | 
	
		
			
				|  |  | +                        'publish_time_stamp': publish_time_stamp,
 | 
	
		
			
				|  |  | +                        'publish_time_str': publish_time_str,
 | 
	
		
			
				|  |  | +                        'user_name': user_name,
 | 
	
		
			
				|  |  | +                        'play_cnt': 0,
 | 
	
		
			
				|  |  | +                        'comment_cnt': 0,
 | 
	
		
			
				|  |  | +                        'like_cnt': 0,
 | 
	
		
			
				|  |  | +                        'share_cnt': 0,
 | 
	
		
			
				|  |  | +                        'user_id': fakeid_dict['fakeid'],
 | 
	
		
			
				|  |  | +                        'avatar_url': avatar_url,
 | 
	
		
			
				|  |  | +                        'cover_url': cover_url,
 | 
	
		
			
				|  |  | +                        'article_url': article_url,
 | 
	
		
			
				|  |  | +                        'video_url': video_url,
 | 
	
		
			
				|  |  | +                        'session': f'gongzhonghao-author1-{int(time.time())}'
 | 
	
		
			
				|  |  | +                    }
 | 
	
		
			
				|  |  | +                    for k, v in video_dict.items():
 | 
	
		
			
				|  |  | +                        Common.logger(log_type, crawler).info(f"{k}:{v}")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +                    if int(time.time()) - publish_time_stamp > 3600 * 24 * int(rule_dict.get('period', {}).get('min', 1000)):
 | 
	
		
			
				|  |  | +                        Common.logger(log_type, crawler).info(f"发布时间超过{int(rule_dict.get('period', {}).get('min', 1000))}天\n")
 | 
	
		
			
				|  |  | +                        cls.begin = 0
 | 
	
		
			
				|  |  | +                        return
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +                    if video_dict['article_url'] == 0 or video_dict['video_url'] == 0:
 | 
	
		
			
				|  |  | +                        Common.logger(log_type, crawler).info("文章涉嫌违反相关法律法规和政策\n")
 | 
	
		
			
				|  |  | +                    # 标题敏感词过滤
 | 
	
		
			
				|  |  | +                    elif any(str(word) if str(word) in video_dict['video_title'] else False
 | 
	
		
			
				|  |  | +                             for word in get_config_from_mysql(log_type=log_type,
 | 
	
		
			
				|  |  | +                                                               source=crawler,
 | 
	
		
			
				|  |  | +                                                               env=env,
 | 
	
		
			
				|  |  | +                                                               text="filter",
 | 
	
		
			
				|  |  | +                                                               action="")) is True:
 | 
	
		
			
				|  |  | +                        Common.logger(log_type, crawler).info("标题已中过滤词\n")
 | 
	
		
			
				|  |  | +                    # 已下载判断
 | 
	
		
			
				|  |  | +                    elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
 | 
	
		
			
				|  |  | +                        Common.logger(log_type, crawler).info("视频已下载\n")
 | 
	
		
			
				|  |  | +                    # 标题相似度
 | 
	
		
			
				|  |  | +                    elif cls.title_like(log_type, crawler, video_dict['video_title'], env) is True:
 | 
	
		
			
				|  |  | +                        Common.logger(log_type, crawler).info(f'标题相似度>=80%:{video_dict["video_title"]}\n')
 | 
	
		
			
				|  |  | +                    else:
 | 
	
		
			
				|  |  | +                        cls.download_publish(log_type=log_type,
 | 
	
		
			
				|  |  | +                                             crawler=crawler,
 | 
	
		
			
				|  |  | +                                             video_dict=video_dict,
 | 
	
		
			
				|  |  | +                                             rule_dict=rule_dict,
 | 
	
		
			
				|  |  | +                                             uid=uid,
 | 
	
		
			
				|  |  | +                                             oss_endpoint=oss_endpoint,
 | 
	
		
			
				|  |  | +                                             env=env)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +                Common.logger(log_type, crawler).info('休眠 60 秒\n')
 | 
	
		
			
				|  |  | +                time.sleep(60)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    @classmethod
 | 
	
		
			
				|  |  | +    def repeat_video(cls, log_type, crawler, video_id, env):
 | 
	
		
			
				|  |  | +        sql = f""" select * from crawler_video where platform="公众号" and out_video_id="{video_id}"; """
 | 
	
		
			
				|  |  | +        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
 | 
	
		
			
				|  |  | +        return len(repeat_video)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    # 下载/上传
 | 
	
		
			
				|  |  | +    @classmethod
 | 
	
		
			
				|  |  | +    def download_publish(cls, log_type, crawler, video_dict, rule_dict, uid, oss_endpoint, env):
 | 
	
		
			
				|  |  | +        # 下载视频
 | 
	
		
			
				|  |  | +        Common.download_method(log_type=log_type, crawler=crawler, text="video",
 | 
	
		
			
				|  |  | +                               title=video_dict["video_title"], url=video_dict["video_url"])
 | 
	
		
			
				|  |  | +        md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
 | 
	
		
			
				|  |  | +        # 获取视频时长
 | 
	
		
			
				|  |  | +        ffmpeg_dict = Common.ffmpeg(log_type, crawler,
 | 
	
		
			
				|  |  | +                                    f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
 | 
	
		
			
				|  |  | +        if ffmpeg_dict is None:
 | 
	
		
			
				|  |  | +            # 删除视频文件夹
 | 
	
		
			
				|  |  | +            shutil.rmtree(f"./{crawler}/videos/{md_title}")
 | 
	
		
			
				|  |  | +            Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
 | 
	
		
			
				|  |  | +            return
 | 
	
		
			
				|  |  | +        video_dict["video_width"] = ffmpeg_dict["width"]
 | 
	
		
			
				|  |  | +        video_dict["video_height"] = ffmpeg_dict["height"]
 | 
	
		
			
				|  |  | +        video_dict["duration"] = ffmpeg_dict["duration"]
 | 
	
		
			
				|  |  | +        video_size = ffmpeg_dict["size"]
 | 
	
		
			
				|  |  | +        Common.logger(log_type, crawler).info(f'video_width:{video_dict["video_width"]}')
 | 
	
		
			
				|  |  | +        Common.logger(log_type, crawler).info(f'video_height:{video_dict["video_height"]}')
 | 
	
		
			
				|  |  | +        Common.logger(log_type, crawler).info(f'duration:{video_dict["duration"]}')
 | 
	
		
			
				|  |  | +        Common.logger(log_type, crawler).info(f'video_size:{video_size}')
 | 
	
		
			
				|  |  | +        # 视频size=0,直接删除
 | 
	
		
			
				|  |  | +        if int(video_size) == 0 or cls.download_rule(log_type, crawler, video_dict, rule_dict) is False:
 | 
	
		
			
				|  |  | +            # 删除视频文件夹
 | 
	
		
			
				|  |  | +            shutil.rmtree(f"./{crawler}/videos/{md_title}")
 | 
	
		
			
				|  |  | +            Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
 | 
	
		
			
				|  |  | +            return
 | 
	
		
			
				|  |  | +        if cls.download_rule(log_type, crawler, video_dict, rule_dict) is False:
 | 
	
		
			
				|  |  | +            shutil.rmtree(f"./{crawler}/videos/{md_title}")
 | 
	
		
			
				|  |  | +            Common.logger(log_type, crawler).info("不满足抓取规则,删除成功\n")
 | 
	
		
			
				|  |  | +            return
 | 
	
		
			
				|  |  | +        # 下载封面
 | 
	
		
			
				|  |  | +        Common.download_method(log_type=log_type, crawler=crawler, text="cover",
 | 
	
		
			
				|  |  | +                               title=video_dict["video_title"], url=video_dict["cover_url"])
 | 
	
		
			
				|  |  | +        # 保存视频信息至 "./videos/{video_title}/info.txt"
 | 
	
		
			
				|  |  | +        Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        # 上传视频
 | 
	
		
			
				|  |  | +        Common.logger(log_type, crawler).info("开始上传视频...")
 | 
	
		
			
				|  |  | +        strategy = "定向榜爬虫策略"
 | 
	
		
			
				|  |  | +        our_video_id = Publish.upload_and_publish(log_type=log_type,
 | 
	
		
			
				|  |  | +                                                  crawler=crawler,
 | 
	
		
			
				|  |  | +                                                  strategy=strategy,
 | 
	
		
			
				|  |  | +                                                  our_uid=uid,
 | 
	
		
			
				|  |  | +                                                  oss_endpoint=oss_endpoint,
 | 
	
		
			
				|  |  | +                                                  env=env)
 | 
	
		
			
				|  |  | +        if env == 'prod':
 | 
	
		
			
				|  |  | +            our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{str(our_video_id)}/info"
 | 
	
		
			
				|  |  | +        else:
 | 
	
		
			
				|  |  | +            our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{str(our_video_id)}/info"
 | 
	
		
			
				|  |  | +        Common.logger(log_type, crawler).info("视频上传完成")
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        if our_video_id is None:
 | 
	
		
			
				|  |  | +            # 删除视频文件夹
 | 
	
		
			
				|  |  | +            shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
 | 
	
		
			
				|  |  | +            return
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        insert_sql = f""" insert into crawler_video(video_id,
 | 
	
		
			
				|  |  | +                                                    out_user_id,
 | 
	
		
			
				|  |  | +                                                    platform,
 | 
	
		
			
				|  |  | +                                                    strategy,
 | 
	
		
			
				|  |  | +                                                    out_video_id,
 | 
	
		
			
				|  |  | +                                                    video_title,
 | 
	
		
			
				|  |  | +                                                    cover_url,
 | 
	
		
			
				|  |  | +                                                    video_url,
 | 
	
		
			
				|  |  | +                                                    duration,
 | 
	
		
			
				|  |  | +                                                    publish_time,
 | 
	
		
			
				|  |  | +                                                    play_cnt,
 | 
	
		
			
				|  |  | +                                                    crawler_rule,
 | 
	
		
			
				|  |  | +                                                    width,
 | 
	
		
			
				|  |  | +                                                    height)
 | 
	
		
			
				|  |  | +                                                    values({our_video_id},
 | 
	
		
			
				|  |  | +                                                    "{video_dict['user_id']}",
 | 
	
		
			
				|  |  | +                                                    "{cls.platform}",
 | 
	
		
			
				|  |  | +                                                    "定向爬虫策略",
 | 
	
		
			
				|  |  | +                                                    "{video_dict['video_id']}",
 | 
	
		
			
				|  |  | +                                                    "{video_dict['video_title']}",
 | 
	
		
			
				|  |  | +                                                    "{video_dict['cover_url']}",
 | 
	
		
			
				|  |  | +                                                    "{video_dict['video_url']}",
 | 
	
		
			
				|  |  | +                                                    {int(video_dict['duration'])},
 | 
	
		
			
				|  |  | +                                                    "{video_dict['publish_time_str']}",
 | 
	
		
			
				|  |  | +                                                    {int(video_dict['play_cnt'])},
 | 
	
		
			
				|  |  | +                                                    '{json.dumps(rule_dict)}',
 | 
	
		
			
				|  |  | +                                                    {int(video_dict['video_width'])},
 | 
	
		
			
				|  |  | +                                                    {int(video_dict['video_height'])}) """
 | 
	
		
			
				|  |  | +        Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
 | 
	
		
			
				|  |  | +        MysqlHelper.update_values(log_type, crawler, insert_sql, env)
 | 
	
		
			
				|  |  | +        Common.logger(log_type, crawler).info('视频信息插入数据库成功!')
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        # 视频写入飞书
 | 
	
		
			
				|  |  | +        Feishu.insert_columns(log_type, crawler, "47e39d", "ROWS", 1, 2)
 | 
	
		
			
				|  |  | +        # 视频ID工作表,首行写入数据
 | 
	
		
			
				|  |  | +        upload_time = int(time.time())
 | 
	
		
			
				|  |  | +        values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
 | 
	
		
			
				|  |  | +                   "用户主页",
 | 
	
		
			
				|  |  | +                   video_dict['video_title'],
 | 
	
		
			
				|  |  | +                   video_dict['video_id'],
 | 
	
		
			
				|  |  | +                   our_video_link,
 | 
	
		
			
				|  |  | +                   int(video_dict['duration']),
 | 
	
		
			
				|  |  | +                   f"{video_dict['video_width']}*{video_dict['video_height']}",
 | 
	
		
			
				|  |  | +                   video_dict['publish_time_str'],
 | 
	
		
			
				|  |  | +                   video_dict['user_name'],
 | 
	
		
			
				|  |  | +                   video_dict['user_id'],
 | 
	
		
			
				|  |  | +                   video_dict['avatar_url'],
 | 
	
		
			
				|  |  | +                   video_dict['cover_url'],
 | 
	
		
			
				|  |  | +                   video_dict['article_url'],
 | 
	
		
			
				|  |  | +                   video_dict['video_url']]]
 | 
	
		
			
				|  |  | +        time.sleep(0.5)
 | 
	
		
			
				|  |  | +        Feishu.update_values(log_type, crawler, "47e39d", "F2:Z2", values)
 | 
	
		
			
				|  |  | +        Common.logger(log_type, crawler).info('视频下载/上传成功\n')
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    @classmethod
 | 
	
		
			
				|  |  | +    def get_all_videos(cls, log_type, crawler, user_list, rule_dict, oss_endpoint, env):
 | 
	
		
			
				|  |  | +        if len(user_list) == 0:
 | 
	
		
			
				|  |  | +            Common.logger(log_type, crawler).warning(f"抓取用户列表为空\n")
 | 
	
		
			
				|  |  | +            return
 | 
	
		
			
				|  |  | +        for user in user_list:
 | 
	
		
			
				|  |  | +            # try:
 | 
	
		
			
				|  |  | +            user_name = user['nick_name']
 | 
	
		
			
				|  |  | +            wechat_name = user['link']
 | 
	
		
			
				|  |  | +            uid = user['uid']
 | 
	
		
			
				|  |  | +            Common.logger(log_type, crawler).info(f'获取 {user_name} 公众号视频\n')
 | 
	
		
			
				|  |  | +            cls.get_videoList(log_type=log_type,
 | 
	
		
			
				|  |  | +                              crawler=crawler,
 | 
	
		
			
				|  |  | +                              wechat_name=wechat_name,
 | 
	
		
			
				|  |  | +                              rule_dict=rule_dict,
 | 
	
		
			
				|  |  | +                              user_name=user_name,
 | 
	
		
			
				|  |  | +                              uid=uid,
 | 
	
		
			
				|  |  | +                              oss_endpoint=oss_endpoint,
 | 
	
		
			
				|  |  | +                              env=env)
 | 
	
		
			
				|  |  | +            cls.begin = 0
 | 
	
		
			
				|  |  | +            Common.logger(log_type, crawler).info('休眠 60 秒\n')
 | 
	
		
			
				|  |  | +            time.sleep(60)
 | 
	
		
			
				|  |  | +            # except Exception as e:
 | 
	
		
			
				|  |  | +            #     Common.logger(log_type, crawler).info(f'get_all_videos异常:{e}\n')
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +if __name__ == "__main__":
 | 
	
		
			
				|  |  | +    GongzhonghaoAuthor1.get_token("author", "gongzhonghao", "dev")
 | 
	
		
			
				|  |  | +    # print(get_config_from_mysql("author", "gongzhonghao", "dev", "filter", action=""))
 | 
	
		
			
				|  |  | +    pass
 |