Server
/
piaoquan_crawler


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162
							# -*- coding: utf-8 -*-
# @Author: wangkun
# @Time: 2023/2/3
"""
YouTube 定向榜
    1. 发布时间<=1个月
    2. 10分钟>=时长>=1分钟
"""
import os
import re
import shutil
import sys
import time
import json
# import emoji
import requests
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
sys.path.append(os.getcwd())
from common.common import Common
from common.db import MysqlHelper
from common.feishu import Feishu
from common.users import Users
from common.publish import Publish
from common.translate import Translate


class Follow:
    # 翻页参数
    continuation = ''
    # 抓取平台
    platform = 'youtube'

    @classmethod
    def get_browse_id(cls, log_type, crawler, out_user_id, machine):
        """
        获取每个用户的 browse_id
        :param log_type: 日志
        :param crawler: 哪款爬虫
        :param out_user_id: 站外用户 UID
        :param machine: 部署机器，阿里云填写 aliyun / aliyun_hk，线下分别填写 macpro，macair，local
        :return: browse_id
        """
        try:
            # 打印请求配置
            ca = DesiredCapabilities.CHROME
            ca["goog:loggingPrefs"] = {"performance": "ALL"}

            # 不打开浏览器运行
            chrome_options = webdriver.ChromeOptions()
            chrome_options.add_argument("--headless")
            chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
            chrome_options.add_argument("--no-sandbox")

            # driver初始化
            if machine == 'aliyun' or machine == 'aliyun_hk':
                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
            elif machine == 'macpro':
                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service('/Users/lieyunye/Downloads/chromedriver_v86/chromedriver'))
            elif machine == 'macair':
                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service('/Users/piaoquan/Downloads/chromedriver'))
            else:
                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service('/Users/wangkun/Downloads/chromedriver/chromedriver_v110/chromedriver'))

            driver.implicitly_wait(10)
            url = f'https://www.youtube.com/{out_user_id}/videos'
            driver.get(url)
            # driver.save_screenshot("./1.png")
            # 向上滑动 1000 个像素
            # driver.execute_script('window.scrollBy(0, 2000)')
            # driver.save_screenshot("./2.png")
            time.sleep(3)
            accept_btns = driver.find_elements(By.XPATH, '//span[text()="全部接受"]')
            accept_btns_eng = driver.find_elements(By.XPATH, '//span[text()="Accept all"]')
            if len(accept_btns) != 0:
                accept_btns[0].click()
                time.sleep(2)
            elif len(accept_btns_eng) != 0:
                accept_btns_eng[0].click()
                time.sleep(2)
            browse_id = driver.find_element(By.XPATH, '//meta[@itemprop="channelId"]').get_attribute('content')
            driver.quit()
            return browse_id
        except Exception as e:
            Common.logger(log_type, crawler).error(f'get_browse_id异常:{e}\n')

    @classmethod
    def get_out_user_info(cls, log_type, crawler, browse_id, out_user_id):
        """
        获取站外用户信息
        :param log_type: 日志
        :param crawler: 哪款爬虫
        :param browse_id: browse_id
        :param out_user_id: 站外用户 UID
        :return: out_user_dict = {'out_user_name': 站外用户昵称,
                                'out_avatar_url': 站外用户头像,
                                'out_fans': 站外用户粉丝量,
                                'out_play_cnt': 站外用户总播放量,
                                'out_create_time': 站外用户创建时间}
        """
        try:
            url = "https://www.youtube.com/youtubei/v1/browse?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8&prettyPrint=false"
            payload = json.dumps({
                "context": {
                    "client": {
                        "hl": "zh-CN",
                        "gl": "US",
                        "remoteHost": "38.93.247.21",
                        "deviceMake": "Apple",
                        "deviceModel": "",
                        "visitorData": "CgtraDZfVnB4NXdIWSjL1IKfBg%3D%3D",
                        "userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36,gzip(gfe)",
                        "clientName": "WEB",
                        "clientVersion": "2.20230201.01.00",
                        "osName": "Macintosh",
                        "osVersion": "10_15_7",
                        "originalUrl": f"https://www.youtube.com/{out_user_id}/about",
                        "screenPixelDensity": 1,
                        "platform": "DESKTOP",
                        "clientFormFactor": "UNKNOWN_FORM_FACTOR",
                        "configInfo": {
                            "appInstallData": "CMvUgp8GEKLsrgUQzN-uBRC41K4FENfkrgUQsvWuBRDkoP4SELiLrgUQo_muBRDn964FENnprgUQlPiuBRC2nP4SEPuj_hIQ4tSuBRCJ6K4FEILdrgUQh92uBRD-7q4FEMz1rgUQ76P-EhDJya4FEJan_hIQkfj8Eg%3D%3D"
                        },
                        "screenDensityFloat": 1,
                        "timeZone": "Asia/Shanghai",
                        "browserName": "Chrome",
                        "browserVersion": "109.0.0.0",
                        "acceptHeader": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
                        "deviceExperimentId": "ChxOekU1TlRReU5qWTBOVFExTVRRNU5qRTBOdz09EMvUgp8GGOmU7Z4G",
                        "screenWidthPoints": 805,
                        "screenHeightPoints": 969,
                        "utcOffsetMinutes": 480,
                        "userInterfaceTheme": "USER_INTERFACE_THEME_LIGHT",
                        "memoryTotalKbytes": "8000000",
                        "mainAppWebInfo": {
                            "graftUrl": f"/{out_user_id}/about",
                            "pwaInstallabilityStatus": "PWA_INSTALLABILITY_STATUS_CAN_BE_INSTALLED",
                            "webDisplayMode": "WEB_DISPLAY_MODE_FULLSCREEN",
                            "isWebNativeShareAvailable": True
                        }
                    },
                    "user": {
                        "lockedSafetyMode": False
                    },
                    "request": {
                        "useSsl": True,
                        "internalExperimentFlags": [],
                        "consistencyTokenJars": []
                    },
                    "clickTracking": {
                        "clickTrackingParams": "CBMQ8JMBGAoiEwjY34r0rYD9AhURSEwIHfHZAak="
                    },
                    "adSignalsInfo": {
                        "params": [
                            {
                                "key": "dt",
                                "value": "1675668045032"
                            },
                            {
                                "key": "flash",
                                "value": "0"
                            },
                            {
                                "key": "frm",
                                "value": "0"
                            },
                            {
                                "key": "u_tz",
                                "value": "480"
                            },
                            {
                                "key": "u_his",
                                "value": "1"
                            },
                            {
                                "key": "u_h",
                                "value": "1080"
                            },
                            {
                                "key": "u_w",
                                "value": "1920"
                            },
                            {
                                "key": "u_ah",
                                "value": "1080"
                            },
                            {
                                "key": "u_aw",
                                "value": "1920"
                            },
                            {
                                "key": "u_cd",
                                "value": "24"
                            },
                            {
                                "key": "bc",
                                "value": "31"
                            },
                            {
                                "key": "bih",
                                "value": "969"
                            },
                            {
                                "key": "biw",
                                "value": "805"
                            },
                            {
                                "key": "brdim",
                                "value": "-269,-1080,-269,-1080,1920,-1080,1920,1080,805,969"
                            },
                            {
                                "key": "vis",
                                "value": "1"
                            },
                            {
                                "key": "wgl",
                                "value": "true"
                            },
                            {
                                "key": "ca_type",
                                "value": "image"
                            }
                        ],
                        "bid": "ANyPxKqvCBKtjNeHQ6uTC7sKj2ZwIvEkk3oRlmdU7H_soRJWLc4IQCkqMVP68RR-Xae0h3nMdOKYOtVh_Yb2OYr4znd60I5j7A"
                    }
                },
                "browseId": browse_id,
                "params": "EgVhYm91dPIGBAoCEgA%3D"
            })
            headers = {
                'authority': 'www.youtube.com',
                'accept': '*/*',
                'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
                'cache-control': 'no-cache',
                'content-type': 'application/json',
                'cookie': 'VISITOR_INFO1_LIVE=kh6_Vpx5wHY; YSC=UupqFrWvAR0; DEVICE_INFO=ChxOekU1TlRReU5qWTBOVFExTVRRNU5qRTBOdz09EOmU7Z4GGOmU7Z4G; GPS=1; PREF=tz=Asia.Shanghai; ST-h076le=itct=CBMQ8JMBGAoiEwjY34r0rYD9AhURSEwIHfHZAak%3D&csn=MC45NDM2MjgyNzM1ODE5NDAz&endpoint=%7B%22clickTrackingParams%22%3A%22CBMQ8JMBGAoiEwjY34r0rYD9AhURSEwIHfHZAak%3D%22%2C%22commandMetadata%22%3A%7B%22webCommandMetadata%22%3A%7B%22url%22%3A%22%2F%40weitravel%2Fabout%22%2C%22webPageType%22%3A%22WEB_PAGE_TYPE_CHANNEL%22%2C%22rootVe%22%3A3611%2C%22apiUrl%22%3A%22%2Fyoutubei%2Fv1%2Fbrowse%22%7D%7D%2C%22browseEndpoint%22%3A%7B%22browseId%22%3A%22UC08jgxf119fzynp2uHCvZIg%22%2C%22params%22%3A%22EgVhYm91dPIGBAoCEgA%253D%22%2C%22canonicalBaseUrl%22%3A%22%2F%40weitravel%22%7D%7D',
                'origin': 'https://www.youtube.com',
                'pragma': 'no-cache',
                'referer': f'https://www.youtube.com/{out_user_id}/videos',
                'sec-ch-ua': '"Not_A Brand";v="99", "Chromium";v="109", "Google Chrome";v="109.0.5414.87"',
                'sec-ch-ua-arch': '"arm"',
                'sec-ch-ua-bitness': '"64"',
                'sec-ch-ua-full-version': '"109.0.1518.52"',
                'sec-ch-ua-full-version-list': '"Not_A Brand";v="99.0.0.0", "Microsoft Edge";v="109.0.1518.52", "Chromium";v="109.0.5414.87"',
                'sec-ch-ua-mobile': '?0',
                'sec-ch-ua-model': '',
                'sec-ch-ua-platform': '"macOS"',
                'sec-ch-ua-platform-version': '"12.4.0"',
                'sec-ch-ua-wow64': '?0',
                'sec-fetch-dest': 'empty',
                'sec-fetch-mode': 'same-origin',
                'sec-fetch-site': 'same-origin',
                'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
                'x-goog-visitor-id': 'CgtraDZfVnB4NXdIWSjL1IKfBg%3D%3D',
                'x-youtube-bootstrap-logged-in': 'false',
                'x-youtube-client-name': '1',
                'x-youtube-client-version': '2.20230201.01.00'
            }
            response = requests.post(url=url, headers=headers, data=payload)
            if response.status_code != 200:
                Common.logger(log_type, crawler).warning(f'get_out_user_info:{response.text}\n')
            elif 'contents' not in response.text or 'header' not in response.text:
                Common.logger(log_type, crawler).warning(f'get_out_user_info:{response.text}\n')
            elif 'c4TabbedHeaderRenderer' not in response.json()['header']:
                Common.logger(log_type, crawler).warning(f'get_out_user_info:{response.json()["header"]}\n')
            elif 'twoColumnBrowseResultsRenderer' not in response.json()['contents']:
                Common.logger(log_type, crawler).warning(f'get_out_user_info:{response.json()}\n')
            elif 'tabs' not in response.json()['contents']['twoColumnBrowseResultsRenderer']:
                Common.logger(log_type, crawler).warning(f"get_out_user_info:{response.json()['contents']['twoColumnBrowseResultsRenderer']}\n")
            else:
                header = response.json()['header']['c4TabbedHeaderRenderer']
                tabs = response.json()['contents']['twoColumnBrowseResultsRenderer']['tabs']
                for i in range(len(tabs)):
                    if 'tabRenderer' not in tabs[i]:
                        title = ''
                    elif 'title' not in tabs[i]['tabRenderer']:
                        title = ''
                    else:
                        title = tabs[i]['tabRenderer']['title']

                    if title == '简介':
                        if 'tabRenderer' not in tabs[i]:
                            Common.logger(log_type, crawler).warning(f"get_out_user_info:{tabs[i]}\n")
                        elif 'content' not in tabs[i]['tabRenderer']:
                            Common.logger(log_type, crawler).warning(f"get_out_user_info:{tabs[i]['tabRenderer']}\n")
                        elif 'sectionListRenderer' not in tabs[i]['tabRenderer']['content']:
                            Common.logger(log_type, crawler).warning(f"get_out_user_info:{tabs[i]['tabRenderer']['content']}\n")
                        elif 'contents' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']:
                            Common.logger(log_type, crawler).warning(f"get_out_user_info:{tabs[i]['tabRenderer']['content']['sectionListRenderer']}\n")
                        elif len(tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents']) == 0:
                            Common.logger(log_type, crawler).warning(f"get_out_user_info:{tabs[i]['tabRenderer']['content']['sectionListRenderer']}\n")
                        elif 'itemSectionRenderer' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]:
                            Common.logger(log_type, crawler).warning(f"get_out_user_info:{tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]}\n")
                        elif 'contents' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']:
                            Common.logger(log_type, crawler).warning(f"get_out_user_info:{tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']}\n")
                        elif len(tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents']) == 0:
                            Common.logger(log_type, crawler).warning(f"get_out_user_info:{tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']}\n")
                        elif 'channelAboutFullMetadataRenderer' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]:
                            Common.logger(log_type, crawler).warning(f"get_out_user_info:{tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]}\n")
                        else:
                            # 站外用户昵称
                            if 'title' not in header and 'title' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']:
                                out_user_name = ''
                            elif 'title' in header:
                                out_user_name = header['title']
                            elif 'simpleText' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']['title']:
                                out_user_name = ''
                            else:
                                out_user_name = tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']['title']['simpleText']

                            # 站外用户头像
                            if 'avatar' not in header and 'avatar' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']:
                                out_avatar_url = ''
                            elif 'thumbnails' not in header['avatar'] and 'thumbnails' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']['avatar']:
                                out_avatar_url = ''
                            elif len(header['avatar']['thumbnails']) == 0 and len(tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']['avatar']['thumbnails']) == 0:
                                out_avatar_url = ''
                            elif 'url' not in header['avatar']['thumbnails'][-1] and 'url' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']['avatar']['thumbnails'][-1]:
                                out_avatar_url = ''
                            elif 'url' in header['avatar']['thumbnails'][-1]:
                                out_avatar_url = header['avatar']['thumbnails'][-1]['url']
                            else:
                                out_avatar_url = tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']['avatar']['thumbnails'][-1]['url']

                            # 站外用户粉丝
                            if 'subscriberCountText' not in header:
                                out_fans = 0
                            elif 'accessibility' not in header['subscriberCountText']:
                                out_fans = 0
                            elif 'accessibilityData' not in header['subscriberCountText']['accessibility']:
                                out_fans = 0
                            elif 'label' not in header['subscriberCountText']['accessibility']['accessibilityData']:
                                out_fans = 0
                            else:
                                out_fans = header['subscriberCountText']['accessibility']['accessibilityData']['label']
                                if '万' in out_fans:
                                    out_fans = int(float(out_fans.split('万')[0])*10000)
                                elif "位" in out_fans:
                                    out_fans = int(out_fans.split('位')[0].replace(",", ""))
                                else:
                                    pass

                            # 站外用户总播放量
                            if 'viewCountText' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']:
                                out_play_cnt = 0
                            elif 'simpleText' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']['viewCountText']:
                                out_play_cnt = 0
                            else:
                                out_play_cnt = int(tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']['viewCountText']['simpleText'].split('次')[0].replace(',', ''))

                            # 站外用户注册时间
                            if 'joinedDateText' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']:
                                out_create_time = ''
                            elif 'runs' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']['joinedDateText']:
                                out_create_time = ''
                            elif len(tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']['joinedDateText']['runs']) == 0:
                                out_create_time = ''
                            elif 'text' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']['joinedDateText']['runs'][0]:
                                out_create_time = ''
                            else:
                                out_create_time = tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']['joinedDateText']['runs'][0]['text'].replace('年', '-').replace('月', '-').replace('日', '')
                            out_user_dict = {
                                'out_user_name': out_user_name,
                                'out_avatar_url': out_avatar_url,
                                'out_fans': out_fans,
                                'out_play_cnt': out_play_cnt,
                                'out_create_time': out_create_time,
                            }
                            # print(out_user_dict)
                            return out_user_dict
        except Exception as e:
            Common.logger(log_type, crawler).error(f'get_out_user_info异常:{e}\n')

    @classmethod
    def get_user_from_feishu(cls, log_type, crawler, sheetid, env, machine):
        """
        补全飞书用户表信息，并返回
        :param log_type: 日志
        :param crawler: 哪款爬虫
        :param sheetid: 飞书表
        :param env: 正式环境:prod，测试环境:dev
        :param machine: 部署机器，阿里云填写 aliyun，aliyun_hk ，线下分别填写 macpro，macair，local
        :return: user_list
        """
        try:
            user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
            user_list = []
            for i in range(1, len(user_sheet)):
                out_uid = user_sheet[i][2]
                user_name = user_sheet[i][3]
                browse_id = user_sheet[i][5]
                our_uid = user_sheet[i][6]
                Common.logger(log_type, crawler).info(f"正在更新 {user_name} 用户信息\n")
                # 获取站外browse_id，并写入飞书
                if browse_id is None:
                    browse_id = cls.get_browse_id(log_type, crawler, out_uid, machine)
                    if browse_id is None:
                        Common.logger(log_type, crawler).warning('browse_id is None !')
                    else:
                        Feishu.update_values(log_type, crawler, sheetid, f'F{i+1}:F{i+1}', [[browse_id]])
                        Common.logger(log_type, crawler).info(f'browse_id写入成功:{browse_id}')
                # 站内 UID 为空，且数据库中（youtube+out_user_id）返回数量 == 0，则创建新的站内账号
                if our_uid is None:
                    sql = f""" select * from crawler_user where platform="{cls.platform}" and out_user_id="{out_uid}" """
                    our_user_info = MysqlHelper.get_values(log_type, crawler, sql, env, machine)
                    # 数据库中（youtube + out_user_id）返回数量 == 0，则创建站内账号UID，并写入定向账号飞书表。并结合站外用户信息，一并写入爬虫账号数据库
                    if our_user_info is None or len(our_user_info) == 0:
                        # 获取站外账号信息，写入数据库
                        out_user_dict = cls.get_out_user_info(log_type, crawler, browse_id, out_uid)
                        out_avatar_url = out_user_dict['out_avatar_url']
                        out_create_time = out_user_dict['out_create_time']
                        out_play_cnt = out_user_dict['out_play_cnt']
                        out_fans = out_user_dict['out_fans']
                        tag = 'youtube爬虫,定向爬虫策略'

                        # 创建站内账号
                        create_user_dict = {
                            'nickName': user_name,
                            'avatarUrl': out_avatar_url,
                            'tagName': tag,
                        }
                        our_uid = Users.create_user(log_type, crawler, create_user_dict, env)
                        Common.logger(log_type, crawler).info(f'新创建的站内UID:{our_uid}')
                        if env == 'dev':
                            our_user_link = f'https://testadmin.piaoquantv.com/ums/user/{our_uid}/post'
                        else:
                            our_user_link = f'https://admin.piaoquantv.com/ums/user/{our_uid}/post'
                        Common.logger(log_type, crawler).info(f'站内用户主页链接:{our_user_link}')
                        Feishu.update_values(log_type, crawler, sheetid, f'G{i + 1}:H{i + 1}', [[our_uid, our_user_link]])
                        Common.logger(log_type, crawler).info(f'站内用户信息写入飞书成功！')

                        sql = f""" insert into crawler_user(user_id, 
                                            out_user_id, 
                                            out_user_name, 
                                            out_avatar_url, 
                                            out_create_time, 
                                            out_play_cnt, 
                                            out_fans, 
                                            platform, 
                                            tag)
                                            values({our_uid}, 
                                            "{out_uid}", 
                                            "{user_name}", 
                                            "{out_avatar_url}", 
                                            "{out_create_time}", 
                                            {out_play_cnt}, 
                                            {out_fans}, 
                                            "{cls.platform}",
                                            "{tag}") """
                        Common.logger(log_type, crawler).info(f'sql:{sql}')
                        MysqlHelper.update_values(log_type, crawler, sql, env, machine)
                        Common.logger(log_type, crawler).info('用户信息插入数据库成功！\n')
                    # 数据库中（youtube + out_user_id）返回数量 != 0，则直接把数据库中的站内 UID 写入飞书
                    else:
                        our_uid = our_user_info[0][1]
                        if 'env' == 'prod':
                            our_user_link = f'https://admin.piaoquantv.com/ums/user/{our_uid}/post'
                        else:
                            our_user_link = f'https://testadmin.piaoquantv.com/ums/user/{our_uid}/post'
                        Common.logger(log_type, crawler).info(f'站内用户主页链接:{our_user_link}')
                        Feishu.update_values(log_type, crawler, sheetid, f'G{i+1}:H{i+1}', [[our_uid, our_user_link]])
                        Common.logger(log_type, crawler).info(f'站内用户信息写入飞书成功！\n')

                user_dict = {
                    'out_user_id': out_uid,
                    'out_user_name': user_name,
                    'out_browse_id': browse_id,
                    'our_user_id': our_uid,
                }
                user_list.append(user_dict)
            return user_list
        except Exception as e:
            Common.logger(log_type, crawler).error(f"get_user_from_feishu异常:{e}\n")

    @classmethod
    def get_feeds(cls, log_type, crawler, browse_id, out_uid):
        """
        获取个人主页视频列表
        :param log_type: 日志
        :param crawler: 哪款爬虫
        :param browse_id: 每个用户主页的请求参数中唯一值
        :param out_uid: 站外用户UID
        :return: video_list
        """
        url = "https://www.youtube.com/youtubei/v1/browse?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8&prettyPrint=false"
        payload = json.dumps({
            "context": {
                "client": {
                    "hl": "zh-CN",
                    "gl": "US",
                    "remoteHost": "38.93.247.21",
                    "deviceMake": "Apple",
                    "deviceModel": "",
                    "visitorData": "CgtraDZfVnB4NXdIWSi6mIOfBg%3D%3D",
                    "userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36,gzip(gfe)",
                    "clientName": "WEB",
                    "clientVersion": "2.20230201.01.00",
                    "osName": "Macintosh",
                    "osVersion": "10_15_7",
                    "originalUrl": f"https://www.youtube.com/{out_uid}/videos",
                    "platform": "DESKTOP",
                    "clientFormFactor": "UNKNOWN_FORM_FACTOR",
                    "configInfo": {
                        "appInstallData": "CLqYg58GEInorgUQuIuuBRCU-K4FENfkrgUQuNSuBRC2nP4SEPuj_hIQ5_euBRCy9a4FEKLsrgUQt-CuBRDi1K4FEILdrgUQh92uBRDM364FEP7urgUQzPWuBRDZ6a4FEOSg_hIQo_muBRDvo_4SEMnJrgUQlqf-EhCR-PwS"
                    },
                    "timeZone": "Asia/Shanghai",
                    "browserName": "Chrome",
                    "browserVersion": "109.0.0.0",
                    "acceptHeader": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
                    "deviceExperimentId": "ChxOekU1TlRReU5qWTBOVFExTVRRNU5qRTBOdz09ELqYg58GGOmU7Z4G",
                    "screenWidthPoints": 944,
                    "screenHeightPoints": 969,
                    "screenPixelDensity": 1,
                    "screenDensityFloat": 1,
                    "utcOffsetMinutes": 480,
                    "userInterfaceTheme": "USER_INTERFACE_THEME_LIGHT",
                    "memoryTotalKbytes": "8000000",
                    "mainAppWebInfo": {
                        "graftUrl": f"/{out_uid}/videos",
                        "pwaInstallabilityStatus": "PWA_INSTALLABILITY_STATUS_CAN_BE_INSTALLED",
                        "webDisplayMode": "WEB_DISPLAY_MODE_FULLSCREEN",
                        "isWebNativeShareAvailable": True
                    }
                },
                "user": {
                    "lockedSafetyMode": False
                },
                "request": {
                    "useSsl": True,
                    "internalExperimentFlags": [],
                    "consistencyTokenJars": []
                },
                "clickTracking": {
                    "clickTrackingParams": "CBcQ8JMBGAYiEwiNhIXX9IL9AhUFSUwIHWnnDks="
                },
                "adSignalsInfo": {
                    "params": [
                        {
                            "key": "dt",
                            "value": "1675676731048"
                        },
                        {
                            "key": "flash",
                            "value": "0"
                        },
                        {
                            "key": "frm",
                            "value": "0"
                        },
                        {
                            "key": "u_tz",
                            "value": "480"
                        },
                        {
                            "key": "u_his",
                            "value": "4"
                        },
                        {
                            "key": "u_h",
                            "value": "1080"
                        },
                        {
                            "key": "u_w",
                            "value": "1920"
                        },
                        {
                            "key": "u_ah",
                            "value": "1080"
                        },
                        {
                            "key": "u_aw",
                            "value": "1920"
                        },
                        {
                            "key": "u_cd",
                            "value": "24"
                        },
                        {
                            "key": "bc",
                            "value": "31"
                        },
                        {
                            "key": "bih",
                            "value": "969"
                        },
                        {
                            "key": "biw",
                            "value": "944"
                        },
                        {
                            "key": "brdim",
                            "value": "-269,-1080,-269,-1080,1920,-1080,1920,1080,944,969"
                        },
                        {
                            "key": "vis",
                            "value": "1"
                        },
                        {
                            "key": "wgl",
                            "value": "true"
                        },
                        {
                            "key": "ca_type",
                            "value": "image"
                        }
                    ],
                    "bid": "ANyPxKpfiaAf-DBzNeKLgkceMEA9UIeCWFRTRm4AQMCuejhI3PGwDB1jizQIX60YcEYtt_CX7tZWAbYerQ-rWLvV7y_KCLkBww"
                }
            },
            "browseId": browse_id,
            "params": "EgZ2aWRlb3PyBgQKAjoA",
            "continuation": cls.continuation
        })
        headers = {
            'authority': 'www.youtube.com',
            'accept': '*/*',
            'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
            'cache-control': 'no-cache',
            'content-type': 'application/json',
            'cookie': 'VISITOR_INFO1_LIVE=kh6_Vpx5wHY; YSC=UupqFrWvAR0; DEVICE_INFO=ChxOekU1TlRReU5qWTBOVFExTVRRNU5qRTBOdz09EOmU7Z4GGOmU7Z4G; PREF=tz=Asia.Shanghai; ST-1kg1gfd=itct=CBcQ8JMBGAYiEwiNhIXX9IL9AhUFSUwIHWnnDks%3D&csn=MC4zNzI3MDcwMDA1Mjg4NzE5Ng..&endpoint=%7B%22clickTrackingParams%22%3A%22CBcQ8JMBGAYiEwiNhIXX9IL9AhUFSUwIHWnnDks%3D%22%2C%22commandMetadata%22%3A%7B%22webCommandMetadata%22%3A%7B%22url%22%3A%22%2F%40chinatravel5971%2Fvideos%22%2C%22webPageType%22%3A%22WEB_PAGE_TYPE_CHANNEL%22%2C%22rootVe%22%3A3611%2C%22apiUrl%22%3A%22%2Fyoutubei%2Fv1%2Fbrowse%22%7D%7D%2C%22browseEndpoint%22%3A%7B%22browseId%22%3A%22UCpLXnfBCNhj8KLnt54RQMKA%22%2C%22params%22%3A%22EgZ2aWRlb3PyBgQKAjoA%22%2C%22canonicalBaseUrl%22%3A%22%2F%40chinatravel5971%22%7D%7D',
            'origin': 'https://www.youtube.com',
            'pragma': 'no-cache',
            'referer': f'https://www.youtube.com/{out_uid}/featured',
            'sec-ch-ua': '"Not_A Brand";v="99", "Chromium";v="109", "Google Chrome";v="109.0.5414.87"',
            'sec-ch-ua-arch': '"arm"',
            'sec-ch-ua-bitness': '"64"',
            'sec-ch-ua-full-version': '"109.0.1518.52"',
            'sec-ch-ua-full-version-list': '"Not_A Brand";v="99.0.0.0", "Microsoft Edge";v="109.0.1518.52", "Chromium";v="109.0.5414.87"',
            'sec-ch-ua-mobile': '?0',
            'sec-ch-ua-model': '',
            'sec-ch-ua-platform': '"macOS"',
            'sec-ch-ua-platform-version': '"12.4.0"',
            'sec-ch-ua-wow64': '?0',
            'sec-fetch-dest': 'empty',
            'sec-fetch-mode': 'same-origin',
            'sec-fetch-site': 'same-origin',
            'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
            'x-goog-visitor-id': 'CgtraDZfVnB4NXdIWSi6mIOfBg%3D%3D',
            'x-youtube-bootstrap-logged-in': 'false',
            'x-youtube-client-name': '1',
            'x-youtube-client-version': '2.20230201.01.00'
        }
        try:
            response = requests.post(url=url, headers=headers, data=payload)
            # Common.logger(log_type, crawler).info(f"get_feeds_response:{response.json()}\n")
            cls.continuation = response.json()['trackingParams']
            if response.status_code != 200:
                Common.logger(log_type, crawler).warning(f'get_feeds_response:{response.text}\n')
            elif 'continuationContents' not in response.text and 'onResponseReceivedActions' not in response.text:
                Common.logger(log_type, crawler).warning(f'get_feeds_response:{response.text}\n')
            elif 'continuationContents' in response.json():
                # Common.logger(log_type, crawler).info("'continuationContents' in response.json()\n")
                if 'richGridContinuation' not in response.json()['continuationContents']:
                    # Common.logger(log_type, crawler).warning(f"'richGridContinuation' not in response.json()['continuationContents']\n")
                    Common.logger(log_type, crawler).warning(f'get_feeds_response:{response.json()["continuationContents"]}\n')
                elif 'contents' not in response.json()['continuationContents']['richGridContinuation']:
                    Common.logger(log_type, crawler).warning(f'get_feeds_response:{response.json()["continuationContents"]["richGridContinuation"]}\n')
                elif 'contents' in response.json()["continuationContents"]["richGridContinuation"]:
                    feeds = response.json()["continuationContents"]["richGridContinuation"]['contents']
                    return feeds
            elif 'onResponseReceivedActions' in response.json():
                Common.logger(log_type, crawler).info("'onResponseReceivedActions' in response.json()\n")
                if len(response.json()['onResponseReceivedActions']) == 0:
                    Common.logger(log_type, crawler).warning(f'get_feeds_response:{response.json()["onResponseReceivedActions"]}\n')
                elif 'appendContinuationItemsAction' not in response.json()['onResponseReceivedActions'][0]:
                    Common.logger(log_type, crawler).warning(f'get_feeds_response:{response.json()["onResponseReceivedActions"][0]}\n')
                elif 'continuationItems' not in response.json()['onResponseReceivedActions'][0]['appendContinuationItemsAction']:
                    Common.logger(log_type, crawler).warning(f'get_feeds_response:{response.json()["onResponseReceivedActions"][0]["appendContinuationItemsAction"]}\n')
                elif len(response.json()['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems']) == 0:
                    Common.logger(log_type, crawler).warning(f'get_feeds_response:{response.json()["onResponseReceivedActions"][0]["appendContinuationItemsAction"]["continuationItems"]}\n')
                else:
                    feeds = response.json()["onResponseReceivedActions"][0]["appendContinuationItemsAction"]["continuationItems"]
                    return feeds
            else:
                Common.logger(log_type, crawler).info('feeds is None\n')

        except Exception as e:
            Common.logger(log_type, crawler).error(f'get_feeds异常:{e}\n')

    @classmethod
    def get_videos(cls, log_type, crawler, strategy, oss_endpoint, env, browse_id, out_uid, our_uid, machine):
        try:
            while True:
                feeds = cls.get_feeds(log_type, crawler, browse_id, out_uid)
                for i in range(len(feeds)):
                    if 'richItemRenderer' not in feeds[i]:
                        Common.logger(log_type, crawler).warning(f'feeds:{feeds[i]}\n')
                        return
                    elif 'content' not in feeds[i]['richItemRenderer']:
                        Common.logger(log_type, crawler).warning(f'feeds:{feeds[i]["richItemRenderer"]}\n')
                        return
                    elif 'videoRenderer' not in feeds[i]['richItemRenderer']['content']:
                        Common.logger(log_type, crawler).warning(f'feeds:{feeds[i]["richItemRenderer"]["content"]}\n')
                        return
                    elif 'videoId' not in feeds[i]["richItemRenderer"]["content"]['videoRenderer']:
                        Common.logger(log_type, crawler).warning(f'feeds:{feeds[i]["richItemRenderer"]["content"]["videoRenderer"]}\n')
                        return
                    else:
                        video_id = feeds[i]["richItemRenderer"]["content"]['videoRenderer']['videoId']
                        video_dict = cls.get_video_info(log_type, crawler, out_uid, video_id, machine)
                        # 发布时间<=30天
                        publish_time = int(time.mktime(time.strptime(video_dict['publish_time'], "%Y-%m-%d")))
                        if int(time.time()) - publish_time <= 3600*24*180:
                            cls.download_publish(log_type, crawler, video_dict, strategy, our_uid, env, oss_endpoint, machine)
                        else:
                            Common.logger(log_type, crawler).info('发布时间超过180天\n')
                            return
        except Exception as e:
            Common.logger(log_type, crawler).error(f"get_videos异常:{e}\n")

    @classmethod
    def filter_emoji(cls, title):
        # 过滤表情
        try:
            co = re.compile(u'[\U00010000-\U0010ffff]')
        except re.error:
            co = re.compile(u'[\uD800-\uDBFF][\uDC00-\uDFFF]')
        return co.sub("", title)

    @classmethod
    def get_video_info(cls, log_type, crawler, out_uid, video_id, machine):
        try:
            url = "https://www.youtube.com/youtubei/v1/player?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8&prettyPrint=false"
            payload = json.dumps({
                "context": {
                    "client": {
                        "hl": "zh-CN",
                        "gl": "US",
                        "remoteHost": "38.93.247.21",
                        "deviceMake": "Apple",
                        "deviceModel": "",
                        "visitorData": "CgtraDZfVnB4NXdIWSjkzoefBg%3D%3D",
                        "userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36,gzip(gfe)",
                        "clientName": "WEB",
                        "clientVersion": "2.20230201.01.00",
                        "osName": "Macintosh",
                        "osVersion": "10_15_7",
                        "originalUrl": f"https://www.youtube.com/watch?v={video_id}",
                        "platform": "DESKTOP",
                        "clientFormFactor": "UNKNOWN_FORM_FACTOR",
                        "configInfo": {
                            "appInstallData": "COTOh58GEPuj_hIQ1-SuBRC4i64FEMzfrgUQgt2uBRCi7K4FEOLUrgUQzPWuBRCKgK8FEOSg_hIQtpz-EhDa6a4FEP7urgUQieiuBRDn964FELjUrgUQlPiuBRCH3a4FELfgrgUQ76P-EhDJya4FEJan_hIQkfj8Eg%3D%3D"
                        },
                        "timeZone": "Asia/Shanghai",
                        "browserName": "Chrome",
                        "browserVersion": "109.0.0.0",
                        "acceptHeader": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
                        "deviceExperimentId": "ChxOekU1TlRReU5qWTBOVFExTVRRNU5qRTBOdz09EOTOh58GGOmU7Z4G",
                        "screenWidthPoints": 1037,
                        "screenHeightPoints": 969,
                        "screenPixelDensity": 1,
                        "screenDensityFloat": 1,
                        "utcOffsetMinutes": 480,
                        "userInterfaceTheme": "USER_INTERFACE_THEME_LIGHT",
                        "memoryTotalKbytes": "8000000",
                        "clientScreen": "WATCH",
                        "mainAppWebInfo": {
                            "graftUrl": f"/watch?v={video_id}",
                            "pwaInstallabilityStatus": "PWA_INSTALLABILITY_STATUS_CAN_BE_INSTALLED",
                            "webDisplayMode": "WEB_DISPLAY_MODE_FULLSCREEN",
                            "isWebNativeShareAvailable": True
                        }
                    },
                    "user": {
                        "lockedSafetyMode": False
                    },
                    "request": {
                        "useSsl": True,
                        "internalExperimentFlags": [],
                        "consistencyTokenJars": []
                    },
                    "clickTracking": {
                        "clickTrackingParams": "CIwBEKQwGAYiEwipncqx3IL9AhXs4cQKHbKZDO4yB3JlbGF0ZWRInsS1qbGFtIlUmgEFCAEQ-B0="
                    },
                    "adSignalsInfo": {
                        "params": [
                            {
                                "key": "dt",
                                "value": "1675749222611"
                            },
                            {
                                "key": "flash",
                                "value": "0"
                            },
                            {
                                "key": "frm",
                                "value": "0"
                            },
                            {
                                "key": "u_tz",
                                "value": "480"
                            },
                            {
                                "key": "u_his",
                                "value": "3"
                            },
                            {
                                "key": "u_h",
                                "value": "1080"
                            },
                            {
                                "key": "u_w",
                                "value": "1920"
                            },
                            {
                                "key": "u_ah",
                                "value": "1080"
                            },
                            {
                                "key": "u_aw",
                                "value": "1920"
                            },
                            {
                                "key": "u_cd",
                                "value": "24"
                            },
                            {
                                "key": "bc",
                                "value": "31"
                            },
                            {
                                "key": "bih",
                                "value": "969"
                            },
                            {
                                "key": "biw",
                                "value": "1037"
                            },
                            {
                                "key": "brdim",
                                "value": "-269,-1080,-269,-1080,1920,-1080,1920,1080,1037,969"
                            },
                            {
                                "key": "vis",
                                "value": "1"
                            },
                            {
                                "key": "wgl",
                                "value": "true"
                            },
                            {
                                "key": "ca_type",
                                "value": "image"
                            }
                        ],
                        "bid": "ANyPxKop8SijebwUCq4ZfKbJwlSjVQa_RTdS6c6a6WPYpCKnxpWCJ33B1SzRuSXjSfH9O2MhURebAs0CngRg6B4nOjBpeJDKgA"
                    }
                },
                "videoId": str(video_id),
                "playbackContext": {
                    "contentPlaybackContext": {
                        "currentUrl": f"/watch?v={video_id}",
                        "vis": 0,
                        "splay": False,
                        "autoCaptionsDefaultOn": False,
                        "autonavState": "STATE_NONE",
                        "html5Preference": "HTML5_PREF_WANTS",
                        "signatureTimestamp": 19394,
                        "referer": f"https://www.youtube.com/watch?v={video_id}",
                        "lactMilliseconds": "-1",
                        "watchAmbientModeContext": {
                            "watchAmbientModeEnabled": True
                        }
                    }
                },
                "racyCheckOk": False,
                "contentCheckOk": False
            })
            headers = {
                'authority': 'www.youtube.com',
                'accept': '*/*',
                'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
                'cache-control': 'no-cache',
                'content-type': 'application/json',
                'cookie': f'VISITOR_INFO1_LIVE=kh6_Vpx5wHY; YSC=UupqFrWvAR0; DEVICE_INFO=ChxOekU1TlRReU5qWTBOVFExTVRRNU5qRTBOdz09EOmU7Z4GGOmU7Z4G; PREF=tz=Asia.Shanghai; ST-180dxzo=itct=CIwBEKQwGAYiEwipncqx3IL9AhXs4cQKHbKZDO4yB3JlbGF0ZWRInsS1qbGFtIlUmgEFCAEQ-B0%3D&csn=MC41MTQ1NTQzMTE3NTA4MjY0&endpoint=%7B%22clickTrackingParams%22%3A%22CIwBEKQwGAYiEwipncqx3IL9AhXs4cQKHbKZDO4yB3JlbGF0ZWRInsS1qbGFtIlUmgEFCAEQ-B0%3D%22%2C%22commandMetadata%22%3A%7B%22webCommandMetadata%22%3A%7B%22url%22%3A%22%2Fwatch%3Fv%3D{video_id}%22%2C%22webPageType%22%3A%22WEB_PAGE_TYPE_WATCH%22%2C%22rootVe%22%3A3832%7D%7D%2C%22watchEndpoint%22%3A%7B%22videoId%22%3A%22{video_id}%22%2C%22nofollow%22%3Atrue%2C%22watchEndpointSupportedOnesieConfig%22%3A%7B%22html5PlaybackOnesieConfig%22%3A%7B%22commonConfig%22%3A%7B%22url%22%3A%22https%3A%2F%2Frr5---sn-nx5s7n76.googlevideo.com%2Finitplayback%3Fsource%3Dyoutube%26oeis%3D1%26c%3DWEB%26oad%3D3200%26ovd%3D3200%26oaad%3D11000%26oavd%3D11000%26ocs%3D700%26oewis%3D1%26oputc%3D1%26ofpcc%3D1%26msp%3D1%26odepv%3D1%26id%3D38654ad085c12212%26ip%3D38.93.247.21%26initcwndbps%3D11346250%26mt%3D1675748964%26oweuc%3D%26pxtags%3DCg4KAnR4EggyNDQ1MTI4OA%26rxtags%3DCg4KAnR4EggyNDQ1MTI4Ng%252CCg4KAnR4EggyNDQ1MTI4Nw%252CCg4KAnR4EggyNDQ1MTI4OA%252CCg4KAnR4EggyNDQ1MTI4OQ%22%7D%7D%7D%7D%7D',
                'origin': 'https://www.youtube.com',
                'pragma': 'no-cache',
                'referer': f'https://www.youtube.com/watch?v={video_id}',
                'sec-ch-ua': '"Not_A Brand";v="99", "Chromium";v="109", "Google Chrome";v="109.0.5414.87"',
                'sec-ch-ua-arch': '"arm"',
                'sec-ch-ua-bitness': '"64"',
                'sec-ch-ua-full-version': '"109.0.1518.52"',
                'sec-ch-ua-full-version-list': '"Not_A Brand";v="99.0.0.0", "Microsoft Edge";v="109.0.1518.52", "Chromium";v="109.0.5414.87"',
                'sec-ch-ua-mobile': '?0',
                'sec-ch-ua-model': '',
                'sec-ch-ua-platform': '"macOS"',
                'sec-ch-ua-platform-version': '"12.4.0"',
                'sec-ch-ua-wow64': '?0',
                'sec-fetch-dest': 'empty',
                'sec-fetch-mode': 'same-origin',
                'sec-fetch-site': 'same-origin',
                'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
                'x-goog-visitor-id': 'CgtraDZfVnB4NXdIWSjkzoefBg%3D%3D',
                'x-youtube-bootstrap-logged-in': 'false',
                'x-youtube-client-name': '1',
                'x-youtube-client-version': '2.20230201.01.00'
            }
            response = requests.post(url=url, headers=headers, data=payload)
            if response.status_code != 200:
                Common.logger(log_type, crawler).warning(f"get_video_info_response:{response.text}\n")
            elif 'streamingData' not in response.json():
                Common.logger(log_type, crawler).warning(f"get_video_info_response:{response.json()}\n")
            elif 'videoDetails' not in response.json():
                Common.logger(log_type, crawler).warning(f"get_video_info_response:{response.json()}\n")
            elif 'microformat' not in response.json():
                Common.logger(log_type, crawler).warning(f"get_video_info_response:{response.json()}\n")
            else:
                playerMicroformatRenderer = response.json()['microformat']['playerMicroformatRenderer']
                videoDetails = response.json()['videoDetails']
                # streamingData = response.json()['streamingData']

                # video_title
                if 'title' not in  videoDetails:
                    video_title = ''
                else:
                    video_title = videoDetails['title'].replace("&", "").strip().replace("\n", "") \
                            .replace("/", "").replace("\r", "").replace("#", "") \
                            .replace(".", "。").replace("\\", "").replace("&NBSP", "")
                video_title = cls.filter_emoji(video_title)
                # if Translate.is_contains_chinese(video_title) is False:
                video_title = Translate.google_translate(video_title, machine)  # 自动翻译标题为中文

                if 'lengthSeconds' not in videoDetails:
                    duration = 0
                else:
                    duration = int(videoDetails['lengthSeconds'])

                # play_cnt
                if 'viewCount' not in videoDetails:
                    play_cnt = 0
                else:
                    play_cnt = int(videoDetails['viewCount'])

                # publish_time
                if 'publishDate' not in playerMicroformatRenderer:
                    publish_time = ''
                else:
                    publish_time = playerMicroformatRenderer['publishDate']

                if publish_time == '':
                    publish_time_stamp = 0
                elif ':' in publish_time:
                    publish_time_stamp = int(time.mktime(time.strptime(publish_time, "%Y-%m-%d %H:%M:%S")))
                else:
                    publish_time_stamp = int(time.mktime(time.strptime(publish_time, "%Y-%m-%d")))


                # user_name
                if 'author' not in videoDetails:
                    user_name = ''
                else:
                    user_name = videoDetails['author']

                # cover_url
                if 'thumbnail' not in videoDetails:
                    cover_url = ''
                elif 'thumbnails' not in videoDetails['thumbnail']:
                    cover_url = ''
                elif len(videoDetails['thumbnail']['thumbnails']) == 0:
                    cover_url = ''
                elif 'url' not in videoDetails['thumbnail']['thumbnails'][-1]:
                    cover_url = ''
                else:
                    cover_url = videoDetails['thumbnail']['thumbnails'][-1]['url']

                # video_url
                # if 'formats' not in streamingData:
                #     video_url = ''
                # elif len(streamingData['formats']) == 0:
                #     video_url = ''
                # elif 'url' not in streamingData['formats'][-1]:
                #     video_url = ''
                # else:
                #     video_url = streamingData['formats'][-1]['url']
                video_url = f"https://www.youtube.com/watch?v={video_id}"

                Common.logger(log_type, crawler).info(f'video_title:{video_title}')
                Common.logger(log_type, crawler).info(f'video_id:{video_id}')
                Common.logger(log_type, crawler).info(f'play_cnt:{play_cnt}')
                Common.logger(log_type, crawler).info(f'publish_time:{publish_time}')
                Common.logger(log_type, crawler).info(f'user_name:{user_name}')
                Common.logger(log_type, crawler).info(f'cover_url:{cover_url}')
                Common.logger(log_type, crawler).info(f'video_url:{video_url}')

                video_dict = {
                    'video_title': video_title,
                    'video_id': video_id,
                    'duration': duration,
                    'play_cnt': play_cnt,
                    'publish_time': publish_time,
                    'publish_time_stamp': publish_time_stamp,
                    'user_name': user_name,
                    'out_uid': out_uid,
                    'cover_url': cover_url,
                    'video_url': video_url,
                }
                return video_dict
        except Exception as e:
            Common.logger(log_type, crawler).error(f"get_video_info异常:{e}\n")

    @classmethod
    def repeat_video(cls, log_type, crawler, video_id, env, machine):
        sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{video_id}"; """
        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env, machine)
        return len(repeat_video)

    @classmethod
    def download_publish(cls, log_type, crawler, video_dict, strategy, our_uid, env, oss_endpoint, machine):
        try:
            # sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{video_dict['video_id']}" """
            # repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env, machine)
            if video_dict['video_title'] == '' or  video_dict['video_url'] == '':
                Common.logger(log_type, crawler).info('无效视频\n')
            elif video_dict['duration'] > 1200 or video_dict['duration'] < 60:
                Common.logger(log_type, crawler).info(f"时长:{video_dict['duration']}不满足规则\n")
            # elif repeat_video is not None and len(repeat_video) != 0:
            elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env, machine) != 0:
                Common.logger(log_type, crawler).info('视频已下载\n')
            elif video_dict['video_id'] in [x for y in Feishu.get_values_batch(log_type, crawler, 'GVxlYk') for x in y]:
                Common.logger(log_type, crawler).info('视频已下载\n')
            else:
                # 下载视频
                Common.logger(log_type, crawler).info('开始下载视频...')
                # Common.download_method(log_type, crawler, 'video', video_dict['video_title'], video_dict['video_url'])
                Common.download_method(log_type, crawler, 'youtube_video', video_dict['video_title'], video_dict['video_url'])
                # ffmpeg_dict = Common.ffmpeg(log_type, crawler, f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
                # video_width = int(ffmpeg_dict['width'])
                video_width = 1280
                # video_height = int(ffmpeg_dict['height'])
                video_height = 720
                duration = int(video_dict['duration'])
                # video_size = int(ffmpeg_dict['size'])

                Common.logger(log_type, crawler).info(f'video_width:{video_width}')
                Common.logger(log_type, crawler).info(f'video_height:{video_height}')
                Common.logger(log_type, crawler).info(f'duration:{duration}')
                # Common.logger(log_type, crawler).info(f'video_size:{video_size}\n')

                video_dict['video_width'] = video_width
                video_dict['video_height'] = video_height
                video_dict['duration'] = duration
                video_dict['comment_cnt'] = 0
                video_dict['like_cnt'] = 0
                video_dict['share_cnt'] = 0
                video_dict['avatar_url'] = video_dict['cover_url']
                video_dict['session'] = f'youtube{int(time.time())}'
                rule='1,2'
                # if duration < 60 or duration > 600:
                #     # 删除视频文件夹
                #     shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}/")
                #     Common.logger(log_type, crawler).info(f"时长:{video_dict['duration']}不满足抓取规则，删除成功\n")
                #     return
                if duration == 0  or duration is None:
                    # 删除视频文件夹
                    shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}/")
                    Common.logger(log_type, crawler).info(f"视频下载出错，删除成功\n")
                    return
                else:
                    # 下载封面
                    Common.download_method(log_type, crawler, 'cover', video_dict['video_title'], video_dict['cover_url'])
                    # 保存视频文本信息
                    Common.save_video_info(log_type, crawler, video_dict)

                    # 上传视频
                    Common.logger(log_type, crawler).info(f"开始上传视频")
                    if env == 'dev':
                        our_video_id = Publish.upload_and_publish(log_type, crawler, strategy, our_uid, env, oss_endpoint)
                        our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
                    else:
                        our_video_id = Publish.upload_and_publish(log_type, crawler, strategy, our_uid, env, oss_endpoint)
                        our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
                    Common.logger(log_type, crawler).info("视频上传完成")

                    # 视频信息保存至飞书
                    Feishu.insert_columns(log_type, crawler, "GVxlYk", "ROWS", 1, 2)
                    # 视频ID工作表，首行写入数据
                    upload_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time())))
                    values = [[upload_time,
                               "定向榜",
                               video_dict['video_id'],
                               video_dict['video_title'],
                               our_video_link,
                               video_dict['play_cnt'],
                               video_dict['duration'],
                               f'{video_width}*{video_height}',
                               video_dict['publish_time'],
                               video_dict['user_name'],
                               video_dict['cover_url'],
                               video_dict['video_url']
                               ]]
                    time.sleep(1)
                    Feishu.update_values(log_type, crawler, "GVxlYk", "F2:Z2", values)
                    Common.logger(log_type, crawler).info('视频信息写入定向_已下载表成功\n')

                    # 视频信息保存数据库
                    sql = f""" insert into crawler_video(video_id, 
                    user_id, 
                    out_user_id, 
                    platform, 
                    strategy, 
                    out_video_id, 
                    video_title, 
                    cover_url, 
                    video_url, 
                    duration, 
                    publish_time, 
                    play_cnt, 
                    crawler_rule, 
                    width, 
                    height) 
                    values({our_video_id}, 
                    "{our_uid}", 
                    "{video_dict['out_uid']}", 
                    "{cls.platform}", 
                    "定向爬虫策略", 
                    "{video_dict['video_id']}", 
                    "{video_dict['video_title']}", 
                    "{video_dict['cover_url']}",
                    "{video_dict['video_url']}",
                    {int(duration)},
                    "{video_dict['publish_time']}",
                    {int(video_dict['play_cnt'])},
                    "{rule}",
                    {int(video_width)},
                    {int(video_height)}) """
                    MysqlHelper.update_values(log_type, crawler, sql, env, machine)
                    Common.logger(log_type, crawler).info('视频信息插入数据库成功！\n')
        except Exception as e:
            Common.logger(log_type, crawler).info(f"download_publish异常:{e}\n")

    @classmethod
    def get_follow_videos(cls, log_type, crawler, strategy, oss_endpoint, env, machine):
        try:
            user_list = cls.get_user_from_feishu(log_type, crawler, 'c467d7', env, machine)
            if len(user_list) == 0:
                Common.logger(log_type, crawler).warning('用户列表为空\n')
            else:
                for user_dict in user_list:
                    out_uid = user_dict['out_user_id']
                    user_name = user_dict['out_user_name']
                    browse_id = user_dict['out_browse_id']
                    our_uid = user_dict['our_user_id']
                    Common.logger(log_type, crawler).info(f'获取 {user_name} 主页视频\n')
                    cls.get_videos(log_type, crawler, strategy, oss_endpoint, env, browse_id, out_uid, our_uid, machine)
                    Common.logger(log_type, crawler).info('休眠 10 秒')
                    time.sleep(10)
                    cls.continuation = ''
        except Exception as e:
            Common.logger(log_type, crawler).error(f"get_follow_videos异常:{e}\n")


if __name__ == "__main__":
    # print(Follow.get_browse_id('follow', 'youtube', '@chinatravel5971', "local"))
    # print(Follow.get_user_from_feishu('follow', 'youtube', 'c467d7', 'dev', 'local'))
    # Follow.get_out_user_info('follow', 'youtube', 'UC08jgxf119fzynp2uHCvZIg', '@weitravel')
    # Follow.get_video_info('follow', 'youtube', 'OGVK0IXBIhI')
    # Follow.get_follow_videos('follow', 'youtube', 'youtube_follow', 'out', 'dev', 'local')
    # print(Follow.filter_emoji("姐妹倆一唱一和，完美配合，終於把大慶降服了😅😅#萌娃搞笑日常"))
    # Follow.repeat_video('follow', 'youtube', 4, "dev", "local")
    pass