Server
/
piaoquan_crawler


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563
							# -*- coding: utf-8 -*-
# @Author: wangkun
# @Time: 2023/3/28
import datetime
import difflib
import json
import os
import shutil
import sys
import time
from hashlib import md5
import requests
import urllib3
from selenium.webdriver import DesiredCapabilities
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium import webdriver
sys.path.append(os.getcwd())
from common.common import Common
from common.feishu import Feishu
from common.publish import Publish
from common.scheduling_db import MysqlHelper
from common.public import get_config_from_mysql


class GongzhonghaoAuthor4:
    platform = "公众号"

    # 基础门槛规则
    @staticmethod
    def download_rule(log_type, crawler, video_dict, rule_dict):
        """
        下载视频的基本规则
        :param log_type: 日志
        :param crawler: 哪款爬虫
        :param video_dict: 视频信息，字典格式
        :param rule_dict: 规则信息，字典格式
        :return: 满足规则，返回 True；反之，返回 False
        """
        rule_play_cnt_min = rule_dict.get('play_cnt', {}).get('min', 0)
        rule_play_cnt_max = rule_dict.get('play_cnt', {}).get('max', 100000000)
        if rule_play_cnt_max == 0:
            rule_play_cnt_max = 100000000

        rule_duration_min = rule_dict.get('duration', {}).get('min', 0)
        rule_duration_max = rule_dict.get('duration', {}).get('max', 100000000)
        if rule_duration_max == 0:
            rule_duration_max = 100000000

        rule_period_min = rule_dict.get('period', {}).get('min', 0)
        # rule_period_max = rule_dict.get('period', {}).get('max', 100000000)
        # if rule_period_max == 0:
        #     rule_period_max = 100000000

        rule_fans_cnt_min = rule_dict.get('fans_cnt', {}).get('min', 0)
        rule_fans_cnt_max = rule_dict.get('fans_cnt', {}).get('max', 100000000)
        if rule_fans_cnt_max == 0:
            rule_fans_cnt_max = 100000000

        rule_videos_cnt_min = rule_dict.get('videos_cnt', {}).get('min', 0)
        rule_videos_cnt_max = rule_dict.get('videos_cnt', {}).get('max', 100000000)
        if rule_videos_cnt_max == 0:
            rule_videos_cnt_max = 100000000

        rule_like_cnt_min = rule_dict.get('like_cnt', {}).get('min', 0)
        rule_like_cnt_max = rule_dict.get('like_cnt', {}).get('max', 100000000)
        if rule_like_cnt_max == 0:
            rule_like_cnt_max = 100000000

        rule_width_min = rule_dict.get('width', {}).get('min', 0)
        rule_width_max = rule_dict.get('width', {}).get('max', 100000000)
        if rule_width_max == 0:
            rule_width_max = 100000000

        rule_height_min = rule_dict.get('height', {}).get('min', 0)
        rule_height_max = rule_dict.get('height', {}).get('max', 100000000)
        if rule_height_max == 0:
            rule_height_max = 100000000

        rule_share_cnt_min = rule_dict.get('share_cnt', {}).get('min', 0)
        rule_share_cnt_max = rule_dict.get('share_cnt', {}).get('max', 100000000)
        if rule_share_cnt_max == 0:
            rule_share_cnt_max = 100000000

        rule_comment_cnt_min = rule_dict.get('comment_cnt', {}).get('min', 0)
        rule_comment_cnt_max = rule_dict.get('comment_cnt', {}).get('max', 100000000)
        if rule_comment_cnt_max == 0:
            rule_comment_cnt_max = 100000000

        rule_publish_time_min = rule_dict.get('publish_time', {}).get('min', 0)
        rule_publish_time_max = rule_dict.get('publish_time', {}).get('max', 0)
        if rule_publish_time_max == 0:
            rule_publish_time_max = 4102415999000  # 2099-12-31 23:59:59

        Common.logger(log_type, crawler).info(
            f'rule_duration_max:{rule_duration_max} >= duration:{int(float(video_dict["duration"]))} >= rule_duration_min:{int(rule_duration_min)}')
        Common.logger(log_type, crawler).info(
            f'rule_play_cnt_max:{int(rule_play_cnt_max)} >= play_cnt:{int(video_dict["play_cnt"])} >= rule_play_cnt_min:{int(rule_play_cnt_min)}')
        Common.logger(log_type, crawler).info(
            f'now:{int(time.time())} - publish_time_stamp:{int(video_dict["publish_time_stamp"])} <= {3600 * 24 * int(rule_period_min)}')
        Common.logger(log_type, crawler).info(
            f'rule_like_cnt_max:{int(rule_like_cnt_max)} >= like_cnt:{int(video_dict["like_cnt"])} >= rule_like_cnt_min:{int(rule_like_cnt_min)}')
        Common.logger(log_type, crawler).info(
            f'rule_comment_cnt_max:{int(rule_comment_cnt_max)} >= comment_cnt:{int(video_dict["comment_cnt"])} >= rule_comment_cnt_min:{int(rule_comment_cnt_min)}')
        Common.logger(log_type, crawler).info(
            f'rule_share_cnt_max:{int(rule_share_cnt_max)} >= share_cnt:{int(video_dict["share_cnt"])} >= rule_share_cnt_min:{int(rule_share_cnt_min)}')
        Common.logger(log_type, crawler).info(
            f'rule_width_max:{int(rule_width_max)} >= video_width:{int(video_dict["video_width"])} >= rule_width_min:{int(rule_width_min)}')
        Common.logger(log_type, crawler).info(
            f'rule_height_max:{int(rule_height_max)} >= video_height:{int(video_dict["video_height"])} >= rule_height_min:{int(rule_height_min)}')
        Common.logger(log_type, crawler).info(
            f'rule_publish_time_max:{int(rule_publish_time_max)} >= publish_time_stamp:{int(video_dict["publish_time_stamp"])} >= rule_publish_time_min:{int(rule_publish_time_min)}')

        if int(rule_duration_max) >= int(float(video_dict["duration"])) >= int(rule_duration_min) \
                and int(rule_play_cnt_max) >= int(video_dict['play_cnt']) >= int(rule_play_cnt_min) \
                and int(rule_like_cnt_max) >= int(video_dict['like_cnt']) >= int(rule_like_cnt_min) \
                and int(rule_comment_cnt_max) >= int(video_dict['comment_cnt']) >= int(rule_comment_cnt_min) \
                and int(rule_share_cnt_max) >= int(video_dict['share_cnt']) >= int(rule_share_cnt_min) \
                and int(rule_width_max) >= int(video_dict['video_width']) >= int(rule_width_min) \
                and int(rule_height_max) >= int(video_dict['video_height']) >= int(rule_height_min) \
                and int(rule_publish_time_max) >= int(video_dict['publish_time_stamp']) >= int(rule_publish_time_min) \
                and int(time.time()) - int(video_dict["publish_time_stamp"]) <= 3600 * 24 * int(rule_period_min):
            return True
        else:
            return False

    @classmethod
    def title_like(cls, log_type, crawler, title, env):
        select_sql = f""" select * from crawler_video where platform="公众号" """
        video_list = MysqlHelper.get_values(log_type, crawler, select_sql, env, action="")
        if len(video_list) == 0:
            return None
        for video_dict in video_list:
            video_title = video_dict["video_title"]
            if difflib.SequenceMatcher(None, title, video_title).quick_ratio() >= 0.8:
                return True
            else:
                pass

    # 获取 token
    @classmethod
    def get_token(cls, log_type, crawler, env):
        select_sql = f""" select * from crawler_config where source="{crawler}" and title LIKE "%公众号_4%";"""
        configs = MysqlHelper.get_values(log_type, crawler, select_sql, env, action="")
        if len(configs) == 0:
            Feishu.bot(log_type, crawler, "公众号_4:未配置token")
            time.sleep(60)
            return None
        token_dict = {
            "token_id": configs[0]["id"],
            "title": configs[0]["title"],
            "token": dict(eval(configs[0]["config"]))["token"],
            "cookie": dict(eval(configs[0]["config"]))["cookie"],
            "update_time": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(configs[0]["update_time"]/1000))),
            "operator": configs[0]["operator"]
        }
        # for k, v in token_dict.items():
        #     print(f"{k}:{v}")
        return token_dict

    # 获取用户 fakeid
    @classmethod
    def get_fakeid(cls, log_type, crawler, wechat_name, env):
        while True:
            token_dict = cls.get_token(log_type, crawler, env)
            url = "https://mp.weixin.qq.com/cgi-bin/searchbiz?"
            headers = {
                "accept": "*/*",
                "accept-encoding": "gzip, deflate, br",
                "accept-language": "zh-CN,zh;q=0.9",
                "referer": "https://mp.weixin.qq.com/cgi-bin/appmsg?"
                           "t=media/appmsg_edit_v2&action=edit&isNew=1"
                           "&type=77&createType=5&token=1011071554&lang=zh_CN",
                'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
                "sec-ch-ua-mobile": "?0",
                "sec-ch-ua-platform": '"Windows"',
                "sec-fetch-dest": "empty",
                "sec-fetch-mode": "cors",
                "sec-fetch-site": "same-origin",
                "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
                              " (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
                "x-requested-with": "XMLHttpRequest",
                'cookie': token_dict['cookie'],
            }
            params = {
                "action": "search_biz",
                "begin": "0",
                "count": "5",
                "query": str(wechat_name),
                "token": token_dict['token'],
                "lang": "zh_CN",
                "f": "json",
                "ajax": "1",
            }
            urllib3.disable_warnings()
            r = requests.get(url=url, headers=headers, params=params, verify=False)
            r.close()
            if r.json()["base_resp"]["err_msg"] == "invalid session":
                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
                if 20 >= datetime.datetime.now().hour >= 10:
                    Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n过期啦，请扫码更换token\nhttps://mp.weixin.qq.com/")
                time.sleep(60 * 10)
                continue
            if r.json()["base_resp"]["err_msg"] == "freq control":
                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
                if 20 >= datetime.datetime.now().hour >= 10:
                    Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n频控啦，请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                time.sleep(60 * 10)
                continue
            if "list" not in r.json() or len(r.json()["list"]) == 0:
                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
                if 20 >= datetime.datetime.now().hour >= 10:
                    Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n频控啦，请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                time.sleep(60 * 10)
                continue

            fakeid = r.json()["list"][0]["fakeid"]
            head_url = r.json()["list"][0]["round_head_img"]
            fakeid_dict = {'fakeid': fakeid, 'head_url': head_url}
            return fakeid_dict

    # 获取腾讯视频下载链接
    @classmethod
    def get_tencent_video_url(cls, video_id):
        url = 'https://vv.video.qq.com/getinfo?vids=' + str(video_id) + '&platform=101001&charge=0&otype=json'
        response = requests.get(url=url).text.replace('QZOutputJson=', '').replace('"};', '"}')
        response = json.loads(response)
        url = response['vl']['vi'][0]['ul']['ui'][0]['url']
        fvkey = response['vl']['vi'][0]['fvkey']
        video_url = url + str(video_id) + '.mp4?vkey=' + fvkey
        return video_url

    @classmethod
    def get_video_url(cls, article_url, env):
        # 打印请求配置
        ca = DesiredCapabilities.CHROME
        ca["goog:loggingPrefs"] = {"performance": "ALL"}

        # 不打开浏览器运行
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument("headless")
        chrome_options.add_argument(
            f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
        chrome_options.add_argument("--no-sandbox")

        # driver初始化
        if env == "prod":
            driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
        else:
            driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service(
                '/Users/wangkun/Downloads/chromedriver/chromedriver_v111/chromedriver'))

        driver.implicitly_wait(10)
        # Common.logger(log_type, crawler).info('打开文章链接')
        driver.get(article_url)
        time.sleep(1)

        if len(driver.find_elements(By.XPATH, '//div[@class="js_video_poster video_poster"]/*[2]')) != 0:
            video_url = driver.find_element(
                By.XPATH, '//div[@class="js_video_poster video_poster"]/*[2]').get_attribute('src')
        elif len(driver.find_elements(By.XPATH, '//span[@class="js_tx_video_container"]/*[1]')) != 0:
            iframe = driver.find_element(By.XPATH, '//span[@class="js_tx_video_container"]/*[1]').get_attribute(
                'src')
            video_id = iframe.split('vid=')[-1].split('&')[0]
            video_url = cls.get_tencent_video_url(video_id)
        else:
            video_url = 0
        driver.quit()
        return video_url

    # 获取文章列表
    @classmethod
    def get_videoList(cls, log_type, crawler, wechat_name, rule_dict, user_name, uid, oss_endpoint, env):
        begin = 0
        while True:
            token_dict = cls.get_token(log_type, crawler, env)
            fakeid_dict = cls.get_fakeid(log_type=log_type,
                                         crawler=crawler,
                                         wechat_name=wechat_name,
                                         env=env)
            url = "https://mp.weixin.qq.com/cgi-bin/appmsg?"
            headers = {
                "accept": "*/*",
                "accept-encoding": "gzip, deflate, br",
                "accept-language": "zh-CN,zh;q=0.9",
                "referer": "https://mp.weixin.qq.com/cgi-bin/appmsg?"
                           "t=media/appmsg_edit_v2&action=edit&isNew=1"
                           "&type=77&createType=5&token=" + str(token_dict['token']) + "&lang=zh_CN",
                'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
                "sec-ch-ua-mobile": "?0",
                "sec-ch-ua-platform": '"Windows"',
                "sec-fetch-dest": "empty",
                "sec-fetch-mode": "cors",
                "sec-fetch-site": "same-origin",
                "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
                              " (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
                "x-requested-with": "XMLHttpRequest",
                'cookie': token_dict['cookie'],
            }
            params = {
                "action": "list_ex",
                "begin": str(begin),
                "count": "5",
                "fakeid": fakeid_dict['fakeid'],
                "type": "9",
                "query": "",
                "token": str(token_dict['token']),
                "lang": "zh_CN",
                "f": "json",
                "ajax": "1",
            }
            urllib3.disable_warnings()
            r = requests.get(url=url, headers=headers, params=params, verify=False)
            r.close()
            if r.json()["base_resp"]["err_msg"] == "invalid session":
                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
                if 20 >= datetime.datetime.now().hour >= 10:
                    Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']}\n过期啦，请扫码更换token\nhttps://mp.weixin.qq.com/")
                time.sleep(60 * 10)
                continue
            if r.json()["base_resp"]["err_msg"] == "freq control":
                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
                if 20 >= datetime.datetime.now().hour >= 10:
                    Feishu.bot(log_type, crawler,f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n频控啦，请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                time.sleep(60 * 10)
                continue
            if 'app_msg_list' not in r.json():
                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
                if 20 >= datetime.datetime.now().hour >= 10:
                    Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']}\n频控啦，请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                time.sleep(60 * 10)
                continue
            if len(r.json()['app_msg_list']) == 0:
                Common.logger(log_type, crawler).info('没有更多视频了\n')
                return
            else:
                begin += 5
                app_msg_list = r.json()['app_msg_list']
                for article_url in app_msg_list:
                    # title
                    video_title = article_url.get("title", "").replace('/', '').replace('\n', '') \
                            .replace('.', '').replace('“', '').replace('”', '').replace(' ', '')\
                            .replace('"', '').replace("'", "")
                    # aid
                    aid = article_url.get('aid', '')
                    # create_time
                    create_time = article_url.get('create_time', 0)
                    publish_time_stamp = int(create_time)
                    publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
                    avatar_url = fakeid_dict['head_url']
                    # cover_url
                    cover_url = article_url.get('cover', '')
                    # article_url
                    article_url = article_url.get('link', '')
                    video_url = cls.get_video_url(article_url, env)

                    video_dict = {
                        'video_id': aid,
                        'video_title': video_title,
                        'publish_time_stamp': publish_time_stamp,
                        'publish_time_str': publish_time_str,
                        'user_name': user_name,
                        'play_cnt': 0,
                        'comment_cnt': 0,
                        'like_cnt': 0,
                        'share_cnt': 0,
                        'user_id': fakeid_dict['fakeid'],
                        'avatar_url': avatar_url,
                        'cover_url': cover_url,
                        'article_url': article_url,
                        'video_url': video_url,
                        'session': f'gongzhonghao-author1-{int(time.time())}'
                    }
                    for k, v in video_dict.items():
                        Common.logger(log_type, crawler).info(f"{k}:{v}")

                    if int(time.time()) - publish_time_stamp > 3600 * 24 * int(rule_dict.get('period', {}).get('min', 1000)):
                        Common.logger(log_type, crawler).info(f"发布时间超过{int(rule_dict.get('period', {}).get('min', 1000))}天\n")
                        cls.begin = 0
                        return

                    if video_dict['article_url'] == 0 or video_dict['video_url'] == 0:
                        Common.logger(log_type, crawler).info("文章涉嫌违反相关法律法规和政策\n")
                    # 标题敏感词过滤
                    elif any(str(word) if str(word) in video_dict['video_title'] else False
                             for word in get_config_from_mysql(log_type=log_type,
                                                               source=crawler,
                                                               env=env,
                                                               text="filter",
                                                               action="")) is True:
                        Common.logger(log_type, crawler).info("标题已中过滤词\n")
                    # 已下载判断
                    elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
                        Common.logger(log_type, crawler).info("视频已下载\n")
                    # 标题相似度
                    elif cls.title_like(log_type, crawler, video_dict['video_title'], env) is True:
                        Common.logger(log_type, crawler).info(f'标题相似度>=80%:{video_dict["video_title"]}\n')
                    else:
                        cls.download_publish(log_type=log_type,
                                             crawler=crawler,
                                             video_dict=video_dict,
                                             rule_dict=rule_dict,
                                             uid=uid,
                                             oss_endpoint=oss_endpoint,
                                             env=env)

                Common.logger(log_type, crawler).info('休眠 60 秒\n')
                time.sleep(60)

    @classmethod
    def repeat_video(cls, log_type, crawler, video_id, env):
        sql = f""" select * from crawler_video where platform="公众号" and out_video_id="{video_id}"; """
        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
        return len(repeat_video)

    # 下载/上传
    @classmethod
    def download_publish(cls, log_type, crawler, video_dict, rule_dict, uid, oss_endpoint, env):
        # 下载视频
        Common.download_method(log_type=log_type, crawler=crawler, text="video",
                               title=video_dict["video_title"], url=video_dict["video_url"])
        md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
        # 获取视频时长
        ffmpeg_dict = Common.ffmpeg(log_type, crawler,
                                    f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
        if ffmpeg_dict is None:
            # 删除视频文件夹
            shutil.rmtree(f"./{crawler}/videos/{md_title}")
            Common.logger(log_type, crawler).info("视频size=0，删除成功\n")
            return
        video_dict["video_width"] = ffmpeg_dict["width"]
        video_dict["video_height"] = ffmpeg_dict["height"]
        video_dict["duration"] = ffmpeg_dict["duration"]
        video_size = ffmpeg_dict["size"]
        Common.logger(log_type, crawler).info(f'video_width:{video_dict["video_width"]}')
        Common.logger(log_type, crawler).info(f'video_height:{video_dict["video_height"]}')
        Common.logger(log_type, crawler).info(f'duration:{video_dict["duration"]}')
        Common.logger(log_type, crawler).info(f'video_size:{video_size}')
        # 视频size=0，直接删除
        if int(video_size) == 0 or cls.download_rule(log_type, crawler, video_dict, rule_dict) is False:
            # 删除视频文件夹
            shutil.rmtree(f"./{crawler}/videos/{md_title}")
            Common.logger(log_type, crawler).info("视频size=0，删除成功\n")
            return
        if cls.download_rule(log_type, crawler, video_dict, rule_dict) is False:
            shutil.rmtree(f"./{crawler}/videos/{md_title}")
            Common.logger(log_type, crawler).info("不满足抓取规则，删除成功\n")
            return
        # 下载封面
        Common.download_method(log_type=log_type, crawler=crawler, text="cover",
                               title=video_dict["video_title"], url=video_dict["cover_url"])
        # 保存视频信息至 "./videos/{video_title}/info.txt"
        Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)

        # 上传视频
        Common.logger(log_type, crawler).info("开始上传视频...")
        strategy = "定向榜爬虫策略"
        our_video_id = Publish.upload_and_publish(log_type=log_type,
                                                  crawler=crawler,
                                                  strategy=strategy,
                                                  our_uid=uid,
                                                  oss_endpoint=oss_endpoint,
                                                  env=env)
        if env == 'prod':
            our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{str(our_video_id)}/info"
        else:
            our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{str(our_video_id)}/info"
        Common.logger(log_type, crawler).info("视频上传完成")

        if our_video_id is None:
            # 删除视频文件夹
            shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
            return

        insert_sql = f""" insert into crawler_video(video_id,
                                                    out_user_id,
                                                    platform,
                                                    strategy,
                                                    out_video_id,
                                                    video_title,
                                                    cover_url,
                                                    video_url,
                                                    duration,
                                                    publish_time,
                                                    play_cnt,
                                                    crawler_rule,
                                                    width,
                                                    height)
                                                    values({our_video_id},
                                                    "{video_dict['user_id']}",
                                                    "{cls.platform}",
                                                    "定向爬虫策略",
                                                    "{video_dict['video_id']}",
                                                    "{video_dict['video_title']}",
                                                    "{video_dict['cover_url']}",
                                                    "{video_dict['video_url']}",
                                                    {int(video_dict['duration'])},
                                                    "{video_dict['publish_time_str']}",
                                                    {int(video_dict['play_cnt'])},
                                                    '{json.dumps(rule_dict)}',
                                                    {int(video_dict['video_width'])},
                                                    {int(video_dict['video_height'])}) """
        Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
        MysqlHelper.update_values(log_type, crawler, insert_sql, env)
        Common.logger(log_type, crawler).info('视频信息插入数据库成功！')

        # 视频写入飞书
        Feishu.insert_columns(log_type, crawler, "47e39d", "ROWS", 1, 2)
        # 视频ID工作表，首行写入数据
        upload_time = int(time.time())
        values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
                   "用户主页",
                   video_dict['video_title'],
                   video_dict['video_id'],
                   our_video_link,
                   int(video_dict['duration']),
                   f"{video_dict['video_width']}*{video_dict['video_height']}",
                   video_dict['publish_time_str'],
                   video_dict['user_name'],
                   video_dict['user_id'],
                   video_dict['avatar_url'],
                   video_dict['cover_url'],
                   video_dict['article_url'],
                   video_dict['video_url']]]
        time.sleep(0.5)
        Feishu.update_values(log_type, crawler, "47e39d", "F2:Z2", values)
        Common.logger(log_type, crawler).info('视频下载/上传成功\n')

    @classmethod
    def get_all_videos(cls, log_type, crawler, user_list, rule_dict, oss_endpoint, env):
        if len(user_list) == 0:
            Common.logger(log_type, crawler).warning(f"抓取用户列表为空\n")
            return
        for user in user_list:
            # try:
            user_name = user['nick_name']
            wechat_name = user['link']
            uid = user['uid']
            Common.logger(log_type, crawler).info(f'获取 {user_name} 公众号视频\n')
            cls.get_videoList(log_type=log_type,
                              crawler=crawler,
                              wechat_name=wechat_name,
                              rule_dict=rule_dict,
                              user_name=user_name,
                              uid=uid,
                              oss_endpoint=oss_endpoint,
                              env=env)
            Common.logger(log_type, crawler).info('休眠 60 秒\n')
            time.sleep(60)
            # except Exception as e:
            #     Common.logger(log_type, crawler).info(f'get_all_videos异常:{e}\n')


if __name__ == "__main__":
    GongzhonghaoAuthor4.get_token("author", "gongzhonghao", "dev")
    # print(get_config_from_mysql("author", "gongzhonghao", "dev", "filter", action=""))
    pass