2 yıl önce · a654a13503
--- a/.DS_Store
+++ b/.DS_Store
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,59 @@
 
				+# ---> Python
			
 
				+# Byte-compiled / optimized / DLL files
			
 
				+__pycache__/
			
 
				+*.py[cod]
			
 
				+*$py.class
			
 
				+
			
 
				+# C extensions
			
 
				+*.so
			
 
				+
			
 
				+# Distribution / packaging
			
 
				+.Python
			
 
				+env/
			
 
				+build/
			
 
				+develop-eggs/
			
 
				+dist/
			
 
				+downloads/
			
 
				+eggs/
			
 
				+.eggs/
			
 
				+lib/
			
 
				+lib64/
			
 
				+parts/
			
 
				+sdist/
			
 
				+var/
			
 
				+*.egg-info/
			
 
				+.installed.cfg
			
 
				+*.egg
			
 
				+
			
 
				+# PyInstaller
			
 
				+#  Usually these files are written by a python script from a template
			
 
				+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
			
 
				+*.manifest
			
 
				+*.spec
			
 
				+
			
 
				+# Installer logs
			
 
				+pip-log.txt
			
 
				+pip-delete-this-directory.txt
			
 
				+
			
 
				+# Unit test / coverage reports
			
 
				+htmlcov/
			
 
				+.tox/
			
 
				+.coverage
			
 
				+.coverage.*
			
 
				+.cache
			
 
				+nosetests.xml
			
 
				+coverage.xml
			
 
				+*,cover
			
 
				+
			
 
				+# Translations
			
 
				+*.mo
			
 
				+*.pot
			
 
				+
			
 
				+# Django stuff:
			
 
				+*.log
			
 
				+.idea/
			
 
				+# Sphinx documentation
			
 
				+docs/_build/
			
 
				+
			
 
				+# PyBuilder
			
 
				+target/
			
--- a/README.MD
+++ b/README.MD
@@ -0,0 +1,24 @@
 
				+# 爬虫平台
			
 
				+
			
 
				+### 启动
			
 
				+1. cd ./piaoquan_crawler
			
 
				+2. sh ./main/main.sh ${crawler_dir} ${log_type} ${crawler} ${strategy} ${oss_endpoint} ${env} ${machine} ${nohup_dir} 
			
 
				+
			
 
				+```
			
 
				+参数说明
			
 
				+${crawler_dir}: 爬虫执行路径，如: ./youtube/youtube_main/run_youtube_follow.py
			
 
				+${log_type}: 日志命名格式，如: follow，则在 youtube/logs/目录下，生成 2023-02-08-follow.log
			
 
				+${crawler}: 哪款爬虫，如: youtube / kanyikan / weixinzhishu
			
 
				+${strategy}: 爬虫策略，如: 定向爬虫策略 / 小时榜爬虫策略 / 热榜爬虫策略
			
 
				+${oss_endpoint}: OSS网关，内网: inner / 外网: out
			
 
				+${env}: 爬虫执行环境，正式环境: prod / 测试环境: dev
			
 
				+${machine}: 爬虫运行机器，阿里云服务器: aliyun / macpro / macair / local
			
 
				+${nohup_dir}: nohup日志存储路径，如: ./youtube/nohup.log
			
 
				+```
			
 
				+
			
 
				+### 已上线爬虫运行命令示例
			
 
				+```
			
 
				+youtube定向榜: 
			
 
				+sh ./main/main.sh ./youtube/youtube_main/run_youtube_follow.py --log_type="follow" --crawler="youtube" --strategy="定向爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" ./youtube/nohup.log
			
 
				+
			
 
				+```
			
--- a/common/__init__.py
+++ b/common/__init__.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/1/31
			
--- a/common/common.py
+++ b/common/common.py
@@ -0,0 +1,232 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/1/31
			
 
				+"""
			
 
				+公共方法，包含：生成log / 删除log / 下载方法 / 删除 chlsfiles / 过滤词库 / 保存视频信息至本地 txt / 翻译 / ffmpeg
			
 
				+"""
			
 
				+from datetime import date, timedelta
			
 
				+from loguru import logger
			
 
				+import datetime
			
 
				+import os
			
 
				+import time
			
 
				+import requests
			
 
				+import json
			
 
				+import ffmpeg
			
 
				+from urllib import parse, request
			
 
				+import urllib3
			
 
				+proxies = {"http": None, "https": None}
			
 
				+
			
 
				+
			
 
				+class Common:
			
 
				+    # 统一获取当前时间 <class 'datetime.datetime'>  2022-04-14 20:13:51.244472
			
 
				+    now = datetime.datetime.now()
			
 
				+    # 昨天 <class 'str'>  2022-04-13
			
 
				+    yesterday = (date.today() + timedelta(days=-1)).strftime("%Y/%m/%d")
			
 
				+    # 今天 <class 'datetime.date'>  2022-04-14
			
 
				+    today = date.today()
			
 
				+    # 明天 <class 'str'>  2022-04-15
			
 
				+    tomorrow = (date.today() + timedelta(days=1)).strftime("%Y/%m/%d")
			
 
				+
			
 
				+    # 使用 logger 模块生成日志
			
 
				+    @staticmethod
			
 
				+    def logger(log_type, crawler):
			
 
				+        """
			
 
				+        使用 logger 模块生成日志
			
 
				+        """
			
 
				+        # 日志路径
			
 
				+        log_dir = f"./{crawler}/logs/"
			
 
				+        log_path = os.getcwd() + os.sep + log_dir
			
 
				+        if not os.path.isdir(log_path):
			
 
				+            os.makedirs(log_path)
			
 
				+
			
 
				+        # 日志文件名
			
 
				+        log_name = time.strftime("%Y-%m-%d", time.localtime(time.time())) + f'-{crawler}-{log_type}.log'
			
 
				+
			
 
				+        # 日志不打印到控制台
			
 
				+        logger.remove(handler_id=None)
			
 
				+
			
 
				+        # rotation="500 MB"，实现每 500MB 存储一个文件
			
 
				+        # rotation="12:00"，实现每天 12:00 创建一个文件
			
 
				+        # rotation="1 week"，每周创建一个文件
			
 
				+        # retention="10 days"，每隔10天之后就会清理旧的日志
			
 
				+        # 初始化日志
			
 
				+        logger.add(log_dir + log_name, level="INFO", rotation='00:00')
			
 
				+
			
 
				+        return logger
			
 
				+
			
 
				+    # 清除日志，保留最近 10 个文件
			
 
				+    @classmethod
			
 
				+    def del_logs(cls, log_type, crawler):
			
 
				+        """
			
 
				+        清除冗余日志文件
			
 
				+        :return: 保留最近 10 个日志
			
 
				+        """
			
 
				+        log_dir = f"./{crawler}/logs/"
			
 
				+        all_files = sorted(os.listdir(log_dir))
			
 
				+        all_logs = []
			
 
				+        for log in all_files:
			
 
				+            name = os.path.splitext(log)[-1]
			
 
				+            if name == ".log":
			
 
				+                all_logs.append(log)
			
 
				+
			
 
				+        if len(all_logs) <= 10:
			
 
				+            pass
			
 
				+        else:
			
 
				+            for file in all_logs[:len(all_logs) - 10]:
			
 
				+                os.remove(log_dir + file)
			
 
				+        cls.logger(log_type, crawler).info("清除日志成功")
			
 
				+
			
 
				+    # 删除 charles 缓存文件，只保留最近的两个文件
			
 
				+    @classmethod
			
 
				+    def del_charles_files(cls, log_type, crawler):
			
 
				+        # 目标文件夹下所有文件
			
 
				+        all_file = sorted(os.listdir(f"./{crawler}/chlsfiles/"))
			
 
				+        for file in all_file[0:-3]:
			
 
				+            os.remove(f"./{crawler}/chlsfiles/{file}")
			
 
				+        cls.logger(log_type, crawler).info("删除 charles 缓存文件成功")
			
 
				+
			
 
				+    # 保存视频信息至 "./videos/{video_dict['video_title}/info.txt"
			
 
				+    @classmethod
			
 
				+    def save_video_info(cls, log_type, crawler, video_dict):
			
 
				+        with open(f"./{crawler}/videos/{video_dict['video_title']}/info.txt",
			
 
				+                  "a", encoding="UTF-8") as f_a:
			
 
				+            f_a.write(str(video_dict['video_id']) + "\n" +
			
 
				+                      str(video_dict['video_title']) + "\n" +
			
 
				+                      str(video_dict['duration']) + "\n" +
			
 
				+                      str(video_dict['play_cnt']) + "\n" +
			
 
				+                      str(video_dict['comment_cnt']) + "\n" +
			
 
				+                      str(video_dict['like_cnt']) + "\n" +
			
 
				+                      str(video_dict['share_cnt']) + "\n" +
			
 
				+                      f"{video_dict['video_width']}*{video_dict['video_height']}" + "\n" +
			
 
				+                      str(video_dict['publish_time']) + "\n" +
			
 
				+                      str(video_dict['user_name']) + "\n" +
			
 
				+                      str(video_dict['avatar_url']) + "\n" +
			
 
				+                      str(video_dict['video_url']) + "\n" +
			
 
				+                      str(video_dict['cover_url']) + "\n" +
			
 
				+                      str(video_dict['session']))
			
 
				+        Common.logger(log_type, crawler).info("==========视频信息已保存至info.txt==========")
			
 
				+
			
 
				+    # 封装下载视频或封面的方法
			
 
				+    @classmethod
			
 
				+    def download_method(cls, log_type, crawler, text, title, url):
			
 
				+        """
			
 
				+        下载封面：text == "cover" ； 下载视频：text == "video"
			
 
				+        需要下载的视频标题：d_title
			
 
				+        视频封面，或视频播放地址：d_url
			
 
				+        下载保存路径："./files/{d_title}/"
			
 
				+        """
			
 
				+        videos_dir = f"./{crawler}/videos/"
			
 
				+        if not os.path.exists(videos_dir):
			
 
				+            os.mkdir(videos_dir)
			
 
				+        # 首先创建一个保存该视频相关信息的文件夹
			
 
				+        video_dir = f"./{crawler}/videos/{title}/"
			
 
				+        if not os.path.exists(video_dir):
			
 
				+            os.mkdir(video_dir)
			
 
				+
			
 
				+        # 下载视频
			
 
				+        if text == "video":
			
 
				+            # 需要下载的视频地址
			
 
				+            video_url = str(url).replace('http://', 'https://')
			
 
				+            # 视频名
			
 
				+            video_name = "video.mp4"
			
 
				+
			
 
				+            # 下载视频
			
 
				+            urllib3.disable_warnings()
			
 
				+            response = requests.get(video_url, stream=True, proxies=proxies, verify=False)
			
 
				+            try:
			
 
				+                with open(video_dir + video_name, "wb") as f:
			
 
				+                    for chunk in response.iter_content(chunk_size=10240):
			
 
				+                        f.write(chunk)
			
 
				+                cls.logger(log_type, crawler).info("==========视频下载完成==========")
			
 
				+            except Exception as e:
			
 
				+                cls.logger(log_type, crawler).error(f"视频下载失败：{e}\n")
			
 
				+
			
 
				+        # 下载音频
			
 
				+        elif text == "audio":
			
 
				+            # 需要下载的视频地址
			
 
				+            audio_url = str(url).replace('http://', 'https://')
			
 
				+            # 音频名
			
 
				+            audio_name = "audio.mp4"
			
 
				+
			
 
				+            # 下载视频
			
 
				+            urllib3.disable_warnings()
			
 
				+            response = requests.get(audio_url, stream=True, proxies=proxies, verify=False)
			
 
				+            try:
			
 
				+                with open(video_dir + audio_name, "wb") as f:
			
 
				+                    for chunk in response.iter_content(chunk_size=10240):
			
 
				+                        f.write(chunk)
			
 
				+                cls.logger(log_type, crawler).info("==========音频下载完成==========")
			
 
				+            except Exception as e:
			
 
				+                cls.logger(log_type, crawler).error(f"音频下载失败：{e}\n")
			
 
				+
			
 
				+        # 下载封面
			
 
				+        elif text == "cover":
			
 
				+            # 需要下载的封面地址
			
 
				+            cover_url = str(url)
			
 
				+            # 封面名
			
 
				+            cover_name = "image.jpg"
			
 
				+
			
 
				+            # 下载封面
			
 
				+            urllib3.disable_warnings()
			
 
				+            response = requests.get(cover_url, proxies=proxies, verify=False)
			
 
				+            try:
			
 
				+                with open(video_dir + cover_name, "wb") as f:
			
 
				+                    f.write(response.content)
			
 
				+                cls.logger(log_type, crawler).info("==========封面下载完成==========")
			
 
				+            except Exception as e:
			
 
				+                cls.logger(log_type, crawler).error(f"封面下载失败：{e}\n")
			
 
				+
			
 
				+    # 有道翻译：英文 → 中文
			
 
				+    @classmethod
			
 
				+    def fanyi(cls, query):
			
 
				+        req_url = 'http://fanyi.youdao.com/translate'  # 创建连接接口
			
 
				+        # 创建要提交的数据
			
 
				+        Form_Date = {'i': query,
			
 
				+                     'doctype': 'json',
			
 
				+                     'form': 'AUTO',
			
 
				+                     'to': 'AUTO',
			
 
				+                     # 'to': 'Chinese',
			
 
				+                     'smartresult': 'dict',
			
 
				+                     'client': 'fanyideskweb',
			
 
				+                     'salt': '1526995097962',
			
 
				+                     'sign': '8e4c4765b52229e1f3ad2e633af89c76',
			
 
				+                     'version': '2.1',
			
 
				+                     'keyform': 'fanyi.web',
			
 
				+                     'action': 'FY_BY_REALTIME',
			
 
				+                     'typoResult': 'false'}
			
 
				+
			
 
				+        data = parse.urlencode(Form_Date).encode('utf-8')  # 数据转换
			
 
				+        response = request.urlopen(req_url, data)  # 提交数据并解析
			
 
				+        html = response.read().decode('utf-8')  # 服务器返回结果读取
			
 
				+        # print(html)
			
 
				+        # 可以看出html是一个json格式
			
 
				+        translate_results = json.loads(html)  # 以json格式载入
			
 
				+        translate_results = translate_results['translateResult'][0][0]['tgt']  # json格式调取
			
 
				+        # print(translate_results)  # 输出结果
			
 
				+        return translate_results  # 返回结果
			
 
				+
			
 
				+    @classmethod
			
 
				+    def ffmpeg(cls, log_type, crawler, video_path):
			
 
				+        probe = ffmpeg.probe(video_path)
			
 
				+        video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
			
 
				+        if video_stream is None:
			
 
				+            Common.logger(log_type, crawler).info('No video Stream found!')
			
 
				+            return
			
 
				+        format1 = probe['format']
			
 
				+        size = int(format1['size']) / 1024 / 1024
			
 
				+        width = int(video_stream['width'])
			
 
				+        height = int(video_stream['height'])
			
 
				+        duration = int(float(video_stream['duration']))
			
 
				+        ffmpeg_dict = {
			
 
				+            'width': width,
			
 
				+            'height': height,
			
 
				+            'duration': duration,
			
 
				+            'size': size
			
 
				+        }
			
 
				+        return ffmpeg_dict
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    res = Common.fanyi("10 MOST UNIQUE Dance Groups EVER On Britain's Got Talent!")
			
 
				+    print(res)
			
 
				+
			
--- a/common/db.py
+++ b/common/db.py
@@ -0,0 +1,89 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/2/2
			
 
				+"""
			
 
				+数据库连接及操作
			
 
				+"""
			
 
				+import pymysql
			
 
				+from common.common import Common
			
 
				+
			
 
				+class MysqlHelper:
			
 
				+    @classmethod
			
 
				+    def connect_mysql(cls, env):
			
 
				+        if env == 'prod':
			
 
				+            # 创建一个 Connection 对象，代表了一个数据库连接
			
 
				+            connection = pymysql.connect(
			
 
				+                # host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",# 数据库IP地址，内网地址
			
 
				+                host="rm-bp1159bu17li9hi94ro.mysql.rds.aliyuncs.com",# 数据库IP地址，外网地址
			
 
				+                port=3306,                      # 端口号
			
 
				+                user="crawler",                 #  mysql用户名
			
 
				+                passwd="crawler123456@",        # mysql用户登录密码
			
 
				+                db="piaoquan-crawler" ,         # 数据库名
			
 
				+                # 如果数据库里面的文本是utf8编码的，charset指定是utf8
			
 
				+                charset = "utf8")
			
 
				+        else:
			
 
				+            # 创建一个 Connection 对象，代表了一个数据库连接
			
 
				+            connection = pymysql.connect(
			
 
				+                # host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com",# 数据库IP地址，内网地址
			
 
				+                host="rm-bp1k5853td1r25g3ndo.mysql.rds.aliyuncs.com",  # 数据库IP地址，外网地址
			
 
				+                port=3306,  # 端口号
			
 
				+                user="crawler",  # mysql用户名
			
 
				+                passwd="crawler123456@",  # mysql用户登录密码
			
 
				+                db="piaoquan-crawler",  # 数据库名
			
 
				+                # 如果数据库里面的文本是utf8编码的，charset指定是utf8
			
 
				+                charset="utf8")
			
 
				+
			
 
				+        return connection
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_values(cls, log_type, crawler, sql, env):
			
 
				+        # try:
			
 
				+        # 连接数据库
			
 
				+        connect = cls.connect_mysql(env)
			
 
				+        # 返回一个 Cursor对象
			
 
				+        mysql = connect.cursor()
			
 
				+
			
 
				+        # 执行 sql 语句
			
 
				+        mysql.execute(sql)
			
 
				+
			
 
				+        # fetchall方法返回的是一个元组，里面每个元素也是元组，代表一行记录
			
 
				+        data = mysql.fetchall()
			
 
				+
			
 
				+        # 关闭数据库连接
			
 
				+        connect.close()
			
 
				+
			
 
				+        # 返回查询结果，元组
			
 
				+        return data
			
 
				+        # except Exception as e:
			
 
				+        #     Common.logger(log_type, crawler).error(f"get_values异常:{e}\n")
			
 
				+
			
 
				+    @classmethod
			
 
				+    def update_values(cls, log_type, crawler, sql, env):
			
 
				+        # 连接数据库
			
 
				+        connect = cls.connect_mysql(env)
			
 
				+        # 返回一个 Cursor对象
			
 
				+        mysql = connect.cursor()
			
 
				+
			
 
				+        try:
			
 
				+            # 执行 sql 语句
			
 
				+            res = mysql.execute(sql)
			
 
				+            # 注意 一定要commit，否则添加数据不生效
			
 
				+            connect.commit()
			
 
				+            return res
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error(f"update_values异常，进行回滚操作:{e}\n")
			
 
				+            # 发生错误时回滚
			
 
				+            connect.rollback()
			
 
				+
			
 
				+        # 关闭数据库连接
			
 
				+        connect.close()
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    # sql_statement = f"INSERT INTO crawler_user ( user_id, out_user_id, out_user_name, out_avatar_url, platform, tag) " \
			
 
				+    #       f"VALUES ('6282398', 'out_uid_003', 'out_user_name', '', 'xiaoniangao', 'xiaoniangao_play')"
			
 
				+    # edit_data = MysqlHelper.edit_data(sql=sql_statement)
			
 
				+    # print(edit_data)
			
 
				+
			
 
				+    get_data = MysqlHelper.get_values("demo", "youtube", "select * from crawler_user", "dev")
			
 
				+    print(get_data)
			
 
				+
			
--- a/common/feishu.py
+++ b/common/feishu.py
@@ -0,0 +1,488 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/1/31
			
 
				+"""
			
 
				+飞书表配置: token 鉴权 / 增删改查 / 机器人报警
			
 
				+"""
			
 
				+import json
			
 
				+import os
			
 
				+import sys
			
 
				+import requests
			
 
				+import urllib3
			
 
				+sys.path.append(os.getcwd())
			
 
				+from common.common import Common
			
 
				+proxies = {"http": None, "https": None}
			
 
				+
			
 
				+
			
 
				+class Feishu:
			
 
				+    """
			
 
				+    编辑飞书云文档
			
 
				+    """
			
 
				+    # 看一看爬虫数据表
			
 
				+    kanyikan_url = "https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?"
			
 
				+    # 快手爬虫数据表
			
 
				+    kuaishou_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnICEfaw9llDNQkKgdymM1xf?"
			
 
				+    # 微视爬虫数据表
			
 
				+    weishi_url = "https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?"
			
 
				+    # 小年糕爬虫数据表
			
 
				+    xiaoniangao_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?"
			
 
				+    # 音乐相册
			
 
				+    music_album = "https://w42nne6hzg.feishu.cn/sheets/shtcnT6zvmfsYe1g0iv4pt7855g?"
			
 
				+    # 本山祝福数据表
			
 
				+    crawler_benshanzhufu = "https://w42nne6hzg.feishu.cn/sheets/shtcnGh2rrsPYM4iVNEBO7OqWrb?"
			
 
				+    # 公众号爬虫表
			
 
				+    gzh_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnexNXnpDLHhARw0QdiwbYuA?"
			
 
				+    # 数据监控表
			
 
				+    crawler_monitor = "https://w42nne6hzg.feishu.cn/sheets/shtcnlZWYazInhf7Z60jkbLRJyd?"
			
 
				+    # 微群视频爬虫表
			
 
				+    crawler_weiqun_video = "https://w42nne6hzg.feishu.cn/sheets/shtcnoKThNquYRweaylMFVyo9Hc?"
			
 
				+    # 视频号爬虫表
			
 
				+    crawler_shipinhao = 'https://w42nne6hzg.feishu.cn/sheets/shtcn9rOdZRAGFbRkWpn7hqEHGc?'
			
 
				+    # 西瓜视频
			
 
				+    crawler_xigua = 'https://w42nne6hzg.feishu.cn/sheets/shtcnvOpx2P8vBXiV91Ot1MKIw8?'
			
 
				+    # 知乎 PC 端
			
 
				+    crawler_zhihu = 'https://w42nne6hzg.feishu.cn/sheets/shtcnkGPBmGsjaqapgzouuj8MXe?'
			
 
				+    # 吉祥幸福
			
 
				+    crawler_jixiangxingfu = 'https://w42nne6hzg.feishu.cn/sheets/shtcnSx4nafMbLTq7xl7RHBwHBf?'
			
 
				+    # 众妙音信
			
 
				+    crawler_zmyx = 'https://w42nne6hzg.feishu.cn/sheets/shtcnbZIxstPeM0xshW07b26sve?'
			
 
				+    # 岁岁年年迎福气
			
 
				+    crawler_ssnnyfq = 'https://w42nne6hzg.feishu.cn/sheets/shtcnyJmJSJynHDLLbLTkySfvZe?'
			
 
				+    # 祝福猫视频
			
 
				+    crawler_zhufumao = 'https://w42nne6hzg.feishu.cn/sheets/shtcnXfIJthvkjhI5zlEJq84i6g?'
			
 
				+    # 宗教公众号
			
 
				+    crawler_zongjiao = 'https://w42nne6hzg.feishu.cn/sheets/shtcn73NW0CyoOeF21HWO15KBsb?'
			
 
				+    # 好看视频
			
 
				+    crawler_haokan = 'https://w42nne6hzg.feishu.cn/sheets/shtcnaYz8Nhv8q6DbWtlL6rMEBd'
			
 
				+    # 看到就是福气
			
 
				+    crawler_kandaojiushifuqi = 'https://w42nne6hzg.feishu.cn/sheets/shtcnEokBkIjOUPAk8vbbPKnXgb'
			
 
				+    # 胜胜影音
			
 
				+    crawler_shengshengyingyin = 'https://w42nne6hzg.feishu.cn/sheets/shtcnz1ymxHL1u8WHblfqfys7qe'
			
 
				+    # 刚刚都传
			
 
				+    crawler_ganggangdouchuan = 'https://w42nne6hzg.feishu.cn/sheets/shtcnTuJgeZU2bc7VaesAqk3QJx'
			
 
				+    # 公众号_信欣
			
 
				+    crawler_gongzhonghao = 'https://w42nne6hzg.feishu.cn/sheets/shtcna98M2mX7TbivTj9Sb7WKBN?'
			
 
				+    # YouTube
			
 
				+    crawler_youtube = 'https://w42nne6hzg.feishu.cn/sheets/shtcnrLyr1zbYbhhZyqpN7Xrd5f?'
			
 
				+
			
 
				+    # 手机号
			
 
				+    wangkun = "13426262515"
			
 
				+    gaonannan = "18501180073"
			
 
				+    xinxin = "15546206651"
			
 
				+    huxinxue = "18832292015"
			
 
				+
			
 
				+    # 飞书路径token
			
 
				+    @classmethod
			
 
				+    def spreadsheettoken(cls, crawler):
			
 
				+        """
			
 
				+        :param crawler: 哪个爬虫
			
 
				+        """
			
 
				+        if crawler == "kanyikan":
			
 
				+            return "shtcngRPoDYAi24x52j2nDuHMih"
			
 
				+        elif crawler == "kuaishou":
			
 
				+            return "shtcnICEfaw9llDNQkKgdymM1xf"
			
 
				+        elif crawler == "weishi":
			
 
				+            return "shtcn5YSWg91JfVGzj0SFZIRRPh"
			
 
				+        elif crawler == "xiaoniangao":
			
 
				+            return "shtcnYxiyQ1wLklo1W5Kdqc9cGh"
			
 
				+        elif crawler == "monitor":
			
 
				+            return "shtcnlZWYazInhf7Z60jkbLRJyd"
			
 
				+        elif crawler == "music_album":
			
 
				+            return "shtcnT6zvmfsYe1g0iv4pt7855g"
			
 
				+        elif crawler == "bszf":
			
 
				+            return "shtcnGh2rrsPYM4iVNEBO7OqWrb"
			
 
				+        elif crawler == "gzh":
			
 
				+            return "shtcnexNXnpDLHhARw0QdiwbYuA"
			
 
				+        elif crawler == "weiqun":
			
 
				+            return "shtcnoKThNquYRweaylMFVyo9Hc"
			
 
				+        elif crawler == 'shipinhao':
			
 
				+            return 'shtcn9rOdZRAGFbRkWpn7hqEHGc'
			
 
				+        elif crawler == 'xigua':
			
 
				+            return 'shtcnvOpx2P8vBXiV91Ot1MKIw8'
			
 
				+        elif crawler == 'zhihu':
			
 
				+            return 'shtcnkGPBmGsjaqapgzouuj8MXe'
			
 
				+        elif crawler == 'jxxf':
			
 
				+            return 'shtcnSx4nafMbLTq7xl7RHBwHBf'
			
 
				+        elif crawler == 'zmyx':
			
 
				+            return 'shtcnbZIxstPeM0xshW07b26sve'
			
 
				+        elif crawler == 'ssnnyfq':
			
 
				+            return 'shtcnyJmJSJynHDLLbLTkySfvZe'
			
 
				+        elif crawler == 'zhufumao':
			
 
				+            return 'shtcnXfIJthvkjhI5zlEJq84i6g'
			
 
				+        elif crawler == 'zongjiao':
			
 
				+            return 'shtcn73NW0CyoOeF21HWO15KBsb'
			
 
				+        elif crawler == 'haokan':
			
 
				+            return 'shtcnaYz8Nhv8q6DbWtlL6rMEBd'
			
 
				+        elif crawler == 'kdjsfq':
			
 
				+            return 'shtcnEokBkIjOUPAk8vbbPKnXgb'
			
 
				+        elif crawler == 'ssyy':
			
 
				+            return 'shtcnz1ymxHL1u8WHblfqfys7qe'
			
 
				+        elif crawler == 'ggdc':
			
 
				+            return 'shtcnTuJgeZU2bc7VaesAqk3QJx'
			
 
				+        elif crawler == 'gongzhonghao_xinxin':
			
 
				+            return 'shtcna98M2mX7TbivTj9Sb7WKBN'
			
 
				+        elif crawler == 'youtube':
			
 
				+            return 'shtcnrLyr1zbYbhhZyqpN7Xrd5f'
			
 
				+
			
 
				+    # 获取飞书api token
			
 
				+    @classmethod
			
 
				+    def get_token(cls, log_type, crawler):
			
 
				+        """
			
 
				+        获取飞书api token
			
 
				+        :return:
			
 
				+        """
			
 
				+        url = "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal/"
			
 
				+        post_data = {"app_id": "cli_a13ad2afa438d00b",  # 这里账号密码是发布应用的后台账号及密码
			
 
				+                     "app_secret": "4tK9LY9VbiQlY5umhE42dclBFo6t4p5O"}
			
 
				+
			
 
				+        try:
			
 
				+            urllib3.disable_warnings()
			
 
				+            response = requests.post(url=url, data=post_data, proxies=proxies, verify=False)
			
 
				+            tenant_access_token = response.json()["tenant_access_token"]
			
 
				+            return tenant_access_token
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error("获取飞书 api token 异常:{}", e)
			
 
				+
			
 
				+    # 获取表格元数据
			
 
				+    @classmethod
			
 
				+    def get_metainfo(cls, log_type, crawler):
			
 
				+        """
			
 
				+        获取表格元数据
			
 
				+        :return:
			
 
				+        """
			
 
				+        get_metainfo_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                           + cls.spreadsheettoken(crawler) + "/metainfo"
			
 
				+
			
 
				+        headers = {
			
 
				+            "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				+            "Content-Type": "application/json; charset=utf-8"
			
 
				+        }
			
 
				+        params = {
			
 
				+            "extFields": "protectedRange",  # 额外返回的字段，extFields=protectedRange时返回保护行列信息
			
 
				+            "user_id_type": "open_id"  # 返回的用户id类型，可选open_id,union_id
			
 
				+        }
			
 
				+        try:
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.get(url=get_metainfo_url, headers=headers, params=params, proxies=proxies, verify=False)
			
 
				+            response = json.loads(r.content.decode("utf8"))
			
 
				+            return response
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error("获取表格元数据异常:{}", e)
			
 
				+
			
 
				+    # 读取工作表中所有数据
			
 
				+    @classmethod
			
 
				+    def get_values_batch(cls, log_type, crawler, sheetid):
			
 
				+        """
			
 
				+        读取工作表中所有数据
			
 
				+        :param log_type: 启用哪个 log
			
 
				+        :param crawler: 哪个爬虫
			
 
				+        :param sheetid: 哪张表
			
 
				+        :return: 所有数据
			
 
				+        """
			
 
				+        get_values_batch_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                               + cls.spreadsheettoken(crawler) + "/values_batch_get"
			
 
				+        headers = {
			
 
				+            "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				+            "Content-Type": "application/json; charset=utf-8"
			
 
				+        }
			
 
				+        params = {
			
 
				+            # 多个查询范围 如 url?ranges=range1,range2 ，其中 range 包含 sheetId 与单元格范围两部分
			
 
				+            "ranges": sheetid,
			
 
				+
			
 
				+            # valueRenderOption=ToString 可返回纯文本的值(数值类型除外)；
			
 
				+            # valueRenderOption=FormattedValue 计算并格式化单元格；
			
 
				+            # valueRenderOption=Formula单元格中含有公式时返回公式本身；
			
 
				+            # valueRenderOption=UnformattedValue计算但不对单元格进行格式化
			
 
				+            "valueRenderOption": "ToString",
			
 
				+
			
 
				+            # dateTimeRenderOption=FormattedString 计算并将时间日期按照其格式进行格式化，但不会对数字进行格式化，返回格式化后的字符串。
			
 
				+            "dateTimeRenderOption": "",
			
 
				+
			
 
				+            # 返回的用户id类型，可选open_id,union_id
			
 
				+            "user_id_type": "open_id"
			
 
				+        }
			
 
				+        try:
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.get(url=get_values_batch_url, headers=headers, params=params, proxies=proxies, verify=False)
			
 
				+            # print(r.text)
			
 
				+            response = json.loads(r.content.decode("utf8"))
			
 
				+            values = response["data"]["valueRanges"][0]["values"]
			
 
				+            return values
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error("读取工作表所有数据异常:{}", e)
			
 
				+
			
 
				+    # 工作表，插入行或列
			
 
				+    @classmethod
			
 
				+    def insert_columns(cls, log_type, crawler, sheetid, majordimension, startindex, endindex):
			
 
				+        """
			
 
				+        工作表插入行或列
			
 
				+        :param log_type: 日志路径
			
 
				+        :param crawler: 哪个爬虫的云文档
			
 
				+        :param sheetid:哪张工作表
			
 
				+        :param majordimension:行或者列, ROWS、COLUMNS
			
 
				+        :param startindex:开始位置
			
 
				+        :param endindex:结束位置
			
 
				+        """
			
 
				+        insert_columns_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                             + cls.spreadsheettoken(crawler) + "/insert_dimension_range"
			
 
				+        headers = {
			
 
				+            "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				+            "Content-Type": "application/json; charset=utf-8"
			
 
				+        }
			
 
				+        body = {
			
 
				+            "dimension": {
			
 
				+                "sheetId": sheetid,
			
 
				+                "majorDimension": majordimension,  # 默认 ROWS ，可选 ROWS、COLUMNS
			
 
				+                "startIndex": startindex,  # 开始的位置
			
 
				+                "endIndex": endindex  # 结束的位置
			
 
				+            },
			
 
				+            "inheritStyle": "AFTER"  # BEFORE 或 AFTER，不填为不继承 style
			
 
				+        }
			
 
				+        try:
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.post(url=insert_columns_url, headers=headers, json=body, proxies=proxies, verify=False)
			
 
				+            Common.logger(log_type, crawler).info("插入行或列:{}", r.json()["msg"])
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error("插入行或列异常:{}", e)
			
 
				+
			
 
				+    # 写入数据
			
 
				+    @classmethod
			
 
				+    def update_values(cls, log_type, crawler, sheetid, ranges, values):
			
 
				+        """
			
 
				+        写入数据
			
 
				+        :param log_type: 日志路径
			
 
				+        :param crawler: 哪个爬虫的云文档
			
 
				+        :param sheetid:哪张工作表
			
 
				+        :param ranges:单元格范围
			
 
				+        :param values:写入的具体数据，list
			
 
				+        """
			
 
				+        update_values_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                            + cls.spreadsheettoken(crawler) + "/values_batch_update"
			
 
				+        headers = {
			
 
				+            "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				+            "Content-Type": "application/json; charset=utf-8"
			
 
				+        }
			
 
				+        body = {
			
 
				+            "valueRanges": [
			
 
				+                {
			
 
				+                    "range": sheetid + "!" + ranges,
			
 
				+                    "values": values
			
 
				+                },
			
 
				+            ],
			
 
				+        }
			
 
				+
			
 
				+        try:
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.post(url=update_values_url, headers=headers, json=body, proxies=proxies, verify=False)
			
 
				+            Common.logger(log_type, crawler).info("写入数据:{}", r.json()["msg"])
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error("写入数据异常:{}", e)
			
 
				+
			
 
				+    # 合并单元格
			
 
				+    @classmethod
			
 
				+    def merge_cells(cls, log_type, crawler, sheetid, ranges):
			
 
				+        """
			
 
				+        合并单元格
			
 
				+        :param log_type: 日志路径
			
 
				+        :param crawler: 哪个爬虫
			
 
				+        :param sheetid:哪张工作表
			
 
				+        :param ranges:需要合并的单元格范围
			
 
				+        """
			
 
				+        merge_cells_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                          + cls.spreadsheettoken(crawler) + "/merge_cells"
			
 
				+        headers = {
			
 
				+            "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				+            "Content-Type": "application/json; charset=utf-8"
			
 
				+        }
			
 
				+
			
 
				+        body = {
			
 
				+            "range": sheetid + "!" + ranges,
			
 
				+            "mergeType": "MERGE_ROWS"
			
 
				+        }
			
 
				+
			
 
				+        try:
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.post(url=merge_cells_url, headers=headers, json=body, proxies=proxies, verify=False)
			
 
				+            Common.logger(log_type, crawler).info("合并单元格:{}", r.json()["msg"])
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error("合并单元格异常:{}", e)
			
 
				+
			
 
				+    # 读取单元格数据
			
 
				+    @classmethod
			
 
				+    def get_range_value(cls, log_type, crawler, sheetid, cell):
			
 
				+        """
			
 
				+        读取单元格内容
			
 
				+        :param log_type: 日志路径
			
 
				+        :param crawler: 哪个爬虫
			
 
				+        :param sheetid: 哪张工作表
			
 
				+        :param cell: 哪个单元格
			
 
				+        :return: 单元格内容
			
 
				+        """
			
 
				+        get_range_value_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                              + cls.spreadsheettoken(crawler) + "/values/" + sheetid + "!" + cell
			
 
				+        headers = {
			
 
				+            "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				+            "Content-Type": "application/json; charset=utf-8"
			
 
				+        }
			
 
				+        params = {
			
 
				+            # valueRenderOption=ToString 可返回纯文本的值(数值类型除外)；
			
 
				+            # valueRenderOption=FormattedValue 计算并格式化单元格；
			
 
				+            # valueRenderOption=Formula 单元格中含有公式时返回公式本身；
			
 
				+            # valueRenderOption=UnformattedValue 计算但不对单元格进行格式化。
			
 
				+            "valueRenderOption": "FormattedValue",
			
 
				+
			
 
				+            # dateTimeRenderOption=FormattedString 计算并对时间日期按照其格式进行格式化，但不会对数字进行格式化，返回格式化后的字符串。
			
 
				+            "dateTimeRenderOption": "",
			
 
				+
			
 
				+            # 返回的用户id类型，可选open_id,union_id
			
 
				+            "user_id_type": "open_id"
			
 
				+        }
			
 
				+        try:
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.get(url=get_range_value_url, headers=headers, params=params, proxies=proxies, verify=False)
			
 
				+            # print(r.text)
			
 
				+            return r.json()["data"]["valueRange"]["values"][0]
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error("读取单元格数据异常:{}", e)
			
 
				+
			
 
				+    # 获取表内容
			
 
				+    @classmethod
			
 
				+    def get_sheet_content(cls, log_type, crawler, sheet_id):
			
 
				+        try:
			
 
				+            sheet = Feishu.get_values_batch(log_type, crawler, sheet_id)
			
 
				+            content_list = []
			
 
				+            for x in sheet:
			
 
				+                for y in x:
			
 
				+                    if y is None:
			
 
				+                        pass
			
 
				+                    else:
			
 
				+                        content_list.append(y)
			
 
				+            return content_list
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error(f'get_sheet_content:{e}\n')
			
 
				+
			
 
				+    # 删除行或列，可选 ROWS、COLUMNS
			
 
				+    @classmethod
			
 
				+    def dimension_range(cls, log_type, crawler, sheetid, major_dimension, startindex, endindex):
			
 
				+        """
			
 
				+        删除行或列
			
 
				+        :param log_type: 日志路径
			
 
				+        :param crawler: 哪个爬虫
			
 
				+        :param sheetid:工作表
			
 
				+        :param major_dimension:默认 ROWS ，可选 ROWS、COLUMNS
			
 
				+        :param startindex:开始的位置
			
 
				+        :param endindex:结束的位置
			
 
				+        :return:
			
 
				+        """
			
 
				+        dimension_range_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/" \
			
 
				+                              + cls.spreadsheettoken(crawler) + "/dimension_range"
			
 
				+        headers = {
			
 
				+            "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				+            "Content-Type": "application/json; charset=utf-8"
			
 
				+        }
			
 
				+        body = {
			
 
				+            "dimension": {
			
 
				+                "sheetId": sheetid,
			
 
				+                "majorDimension": major_dimension,
			
 
				+                "startIndex": startindex,
			
 
				+                "endIndex": endindex
			
 
				+            }
			
 
				+        }
			
 
				+        try:
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.delete(url=dimension_range_url, headers=headers, json=body, proxies=proxies, verify=False)
			
 
				+            Common.logger(log_type, crawler).info("删除视频数据:{}", r.json()["msg"])
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error("删除视频数据异常:{}", e)
			
 
				+
			
 
				+    # 获取用户 ID
			
 
				+    @classmethod
			
 
				+    def get_userid(cls, log_type, crawler, username):
			
 
				+        try:
			
 
				+            url = "https://open.feishu.cn/open-apis/user/v1/batch_get_id?"
			
 
				+            headers = {
			
 
				+                "Authorization": "Bearer " + cls.get_token(log_type, crawler),
			
 
				+                "Content-Type": "application/json; charset=utf-8"
			
 
				+            }
			
 
				+            if username == "wangkun":
			
 
				+                username = cls.wangkun
			
 
				+            elif username == "gaonannan":
			
 
				+                username = cls.gaonannan
			
 
				+            elif username == "xinxin":
			
 
				+                username = cls.xinxin
			
 
				+            elif username == "huxinxue":
			
 
				+                username = cls.huxinxue
			
 
				+            data = {"mobiles": [username]}
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.get(url=url, headers=headers, params=data, verify=False, proxies=proxies)
			
 
				+            open_id = r.json()["data"]["mobile_users"][username][0]["open_id"]
			
 
				+            Common.logger(log_type, crawler).info("{}:{}", username, open_id)
			
 
				+            # print(f"{username}:{open_id}")
			
 
				+            return open_id
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error("get_userid异常:{}", e)
			
 
				+
			
 
				+    # 飞书机器人
			
 
				+    @classmethod
			
 
				+    def bot(cls, log_type, crawler, content):
			
 
				+        try:
			
 
				+            url = "https://open.feishu.cn/open-apis/bot/v2/hook/96989577-50e7-4653-9ec2-308fe3f2c5fe"
			
 
				+            headers = {
			
 
				+                'Content-Type': 'application/json'
			
 
				+            }
			
 
				+            data = json.dumps({
			
 
				+                "msg_type": "interactive",
			
 
				+                "card": {
			
 
				+                    "config": {
			
 
				+                        "wide_screen_mode": True,
			
 
				+                        "enable_forward": True
			
 
				+                    },
			
 
				+                    "elements": [{
			
 
				+                        "tag": "div",
			
 
				+                        "text": {
			
 
				+                            "content": "\n<at id=" + str(cls.get_userid(log_type, crawler, "wangkun")) + "></at>\n" + content,
			
 
				+                            "tag": "lark_md"
			
 
				+                        }
			
 
				+                    }, {
			
 
				+                        "actions": [{
			
 
				+                            "tag": "button",
			
 
				+                            "text": {
			
 
				+                                "content": "快手爬虫表",
			
 
				+                                "tag": "lark_md"
			
 
				+                            },
			
 
				+                            "url": "https://w42nne6hzg.feishu.cn/sheets/shtcnICEfaw9llDNQkKgdymM1xf",
			
 
				+                            "type": "default",
			
 
				+                            "value": {}
			
 
				+                        },
			
 
				+                            {
			
 
				+                                "tag": "button",
			
 
				+                                "text": {
			
 
				+                                    "content": "快手Jenkins",
			
 
				+                                    "tag": "lark_md"
			
 
				+                                },
			
 
				+                                "url": "https://jenkins-on.yishihui.com/view/%E7%88%AC%E8%99%AB-Spider/job/%E5%BF%"
			
 
				+                                       "AB%E6%89%8B%E5%B0%8F%E7%A8%8B%E5%BA%8F-%E8%A7%86%E9%A2%91%E7%88%AC%E5%8F%96/",
			
 
				+                                "type": "default",
			
 
				+                                "value": {}
			
 
				+                            }
			
 
				+
			
 
				+                        ],
			
 
				+                        "tag": "action"
			
 
				+                    }],
			
 
				+                    "header": {
			
 
				+                        "title": {
			
 
				+                            "content": "📣有新的报警，请注意查处",
			
 
				+                            "tag": "plain_text"
			
 
				+                        }
			
 
				+                    }
			
 
				+                }
			
 
				+            })
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.post(url, headers=headers, data=data, verify=False, proxies=proxies)
			
 
				+            Common.logger(log_type, crawler).info("触发机器人消息:{}, {}", r, r.json()["StatusMessage"])
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error("bot异常:{}", e)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    pass
			
--- a/common/publish.py
+++ b/common/publish.py
@@ -0,0 +1,349 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/1/31
			
 
				+"""
			
 
				+站内UID配置 / 环境配置 / 视频上传
			
 
				+"""
			
 
				+import json
			
 
				+import os
			
 
				+import random
			
 
				+import shutil
			
 
				+import sys
			
 
				+import time
			
 
				+import oss2
			
 
				+import requests
			
 
				+import urllib3
			
 
				+sys.path.append(os.getcwd())
			
 
				+from common.common import Common
			
 
				+proxies = {"http": None, "https": None}
			
 
				+
			
 
				+
			
 
				+class Publish:
			
 
				+    @classmethod
			
 
				+    def publish_video_dev(cls, log_type, crawler, request_data):
			
 
				+        """
			
 
				+        loginUid  站内uid (随机)
			
 
				+        appType  默认：888888
			
 
				+        crawlerSrcId   站外视频ID
			
 
				+        crawlerSrcCode   渠道（自定义 KYK）
			
 
				+        crawlerSrcPublishTimestamp  视频原发布时间
			
 
				+        crawlerTaskTimestamp   爬虫创建时间（可以是当前时间）
			
 
				+        videoPath  视频oss地址
			
 
				+        coverImgPath  视频封面oss地址
			
 
				+        title  标题
			
 
				+        totalTime  视频时长
			
 
				+        viewStatus  视频的有效状态 默认1
			
 
				+        versionCode  版本 默认1
			
 
				+        :return:
			
 
				+        """
			
 
				+        Common.logger(log_type, crawler).info('publish request data: {}'.format(request_data))
			
 
				+        result = cls.request_post('https://videotest.yishihui.com/longvideoapi/crawler/video/send', request_data)
			
 
				+        Common.logger(log_type, crawler).info('publish result: {}'.format(result))
			
 
				+        video_id = result["data"]["id"]
			
 
				+        Common.logger(log_type, crawler).info('video_id: {}'.format(video_id))
			
 
				+        if result['code'] != 0:
			
 
				+            Common.logger(log_type, crawler).error('pushlish failure msg = {}'.format(result['msg']))
			
 
				+        else:
			
 
				+            Common.logger(log_type, crawler).info('publish success video_id = : {}'.format(request_data['crawlerSrcId']))
			
 
				+        return video_id
			
 
				+
			
 
				+    @classmethod
			
 
				+    def publish_video_prod(cls, log_type, crawler, request_data):
			
 
				+        """
			
 
				+        loginUid  站内uid (随机)
			
 
				+        appType  默认：888888
			
 
				+        crawlerSrcId   站外视频ID
			
 
				+        crawlerSrcCode   渠道（自定义 KYK）
			
 
				+        crawlerSrcPublishTimestamp  视频原发布时间
			
 
				+        crawlerTaskTimestamp   爬虫创建时间（可以是当前时间）
			
 
				+        videoPath  视频oss地址
			
 
				+        coverImgPath  视频封面oss地址
			
 
				+        title  标题
			
 
				+        totalTime  视频时长
			
 
				+        viewStatus  视频的有效状态 默认1
			
 
				+        versionCode  版本 默认1
			
 
				+        :return:
			
 
				+        """
			
 
				+        # Common.logger(log_type, crawler).info(f'publish request data: {request_data}')
			
 
				+        result = cls.request_post('https://longvideoapi.piaoquantv.com/longvideoapi/crawler/video/send', request_data)
			
 
				+        # Common.logger(log_type, crawler).info(f'publish result: {result}')
			
 
				+        video_id = result["data"]["id"]
			
 
				+        # Common.logger(log_type, crawler).info(f'video_id: {video_id}')
			
 
				+        if result['code'] != 0:
			
 
				+            Common.logger(log_type, crawler).error('pushlish failure msg = {}'.format(result['msg']))
			
 
				+        else:
			
 
				+            Common.logger(log_type, crawler).info('publish success video_id = : {}'.format(request_data['crawlerSrcId']))
			
 
				+        return video_id
			
 
				+
			
 
				+    @classmethod
			
 
				+    def request_post(cls, request_url, request_data):
			
 
				+        """
			
 
				+        post 请求 HTTP接口
			
 
				+        :param request_url: 接口URL
			
 
				+        :param request_data: 请求参数
			
 
				+        :return: res_data json格式
			
 
				+        """
			
 
				+        urllib3.disable_warnings()
			
 
				+        response = requests.post(url=request_url, data=request_data, proxies=proxies, verify=False)
			
 
				+        if response.status_code == 200:
			
 
				+            res_data = json.loads(response.text)
			
 
				+            return res_data
			
 
				+
			
 
				+    @classmethod
			
 
				+    def bucket(cls, oss_endpoint):
			
 
				+        """
			
 
				+        创建 bucket
			
 
				+        :param oss_endpoint: inner:内网；out:外网
			
 
				+        :return: bucket
			
 
				+        """
			
 
				+        # 以下代码展示了基本的文件上传、下载、罗列、删除用法。
			
 
				+
			
 
				+        # 首先初始化AccessKeyId、AccessKeySecret、Endpoint等信息。
			
 
				+        # 通过环境变量获取，或者把诸如“<你的AccessKeyId>”替换成真实的AccessKeyId等。
			
 
				+        #
			
 
				+        # 以杭州区域为例，Endpoint可以是：
			
 
				+        #   http://oss-cn-hangzhou.aliyuncs.com
			
 
				+        #   https://oss-cn-hangzhou.aliyuncs.com
			
 
				+        # 分别以HTTP、HTTPS协议访问。
			
 
				+        access_key_id = os.getenv('OSS_TEST_ACCESS_KEY_ID', 'LTAIP6x1l3DXfSxm')
			
 
				+        access_key_secret = os.getenv('OSS_TEST_ACCESS_KEY_SECRET', 'KbTaM9ars4OX3PMS6Xm7rtxGr1FLon')
			
 
				+        bucket_name = os.getenv('OSS_TEST_BUCKET', 'art-pubbucket')
			
 
				+        # OSS 内网
			
 
				+        if oss_endpoint == 'inner':
			
 
				+            endpoint = os.getenv('OSS_TEST_ENDPOINT', 'oss-cn-hangzhou-internal.aliyuncs.com')
			
 
				+        # OSS 外网
			
 
				+        elif oss_endpoint == 'out':
			
 
				+            endpoint = os.getenv('OSS_TEST_ENDPOINT', 'oss-cn-hangzhou.aliyuncs.com')
			
 
				+        # 默认走外网
			
 
				+        else:
			
 
				+            endpoint = os.getenv('OSS_TEST_ENDPOINT', 'oss-cn-hangzhou.aliyuncs.com')
			
 
				+
			
 
				+        # 确认上面的参数都填写正确了
			
 
				+        for param in (access_key_id, access_key_secret, bucket_name, endpoint):
			
 
				+            assert '<' not in param, '请设置参数：' + param
			
 
				+
			
 
				+        # 创建Bucket对象，所有Object相关的接口都可以通过Bucket对象来进行
			
 
				+        bucket = oss2.Bucket(oss2.Auth(access_key_id, access_key_secret), endpoint, bucket_name)
			
 
				+        return bucket
			
 
				+
			
 
				+    """
			
 
				+    处理流程：
			
 
				+    1. 定时（每天凌晨1点执行一次）循环files文件下的内容 结构：files -> 视频文件夹 -> 视频文件 + 封面图 + 基本信息
			
 
				+    2. 视频文件和封面上传到oss
			
 
				+    - 视频文件oss目录  longvideo/crawler_local/video/prod/文件名
			
 
				+    - 视频封面oss目录  longvideo/crawler_local/image/prod/文件名
			
 
				+    3. 发布视频
			
 
				+    - 读取 基本信息 调用发布接口
			
 
				+    """
			
 
				+    # env 日期20220225 文件名
			
 
				+    oss_file_path_video = 'longvideo/crawler_local/video/{}/{}/{}'
			
 
				+    oss_file_path_image = 'longvideo/crawler_local/image/{}/{}/{}'
			
 
				+
			
 
				+    @classmethod
			
 
				+    def put_file(cls, log_type, crawler, oss_endpoint, oss_file, local_file):
			
 
				+        # cls.bucket.put_object_from_file(oss_file, local_file)
			
 
				+        cls.bucket(oss_endpoint).put_object_from_file(oss_file, local_file)
			
 
				+        Common.logger(log_type, crawler).info("put oss file = {}, local file = {} success".format(oss_file, local_file))
			
 
				+
			
 
				+    # 清除本地文件
			
 
				+    @classmethod
			
 
				+    def remove_local_file(cls, log_type, crawler, local_file):
			
 
				+        os.remove(local_file)
			
 
				+        Common.logger(log_type, crawler).info("remove local file = {} success".format(local_file))
			
 
				+
			
 
				+    # 清除本地文件夹
			
 
				+    @classmethod
			
 
				+    def remove_local_file_dir(cls, log_type, crawler, local_file):
			
 
				+        os.rmdir(local_file)
			
 
				+        Common.logger(log_type, crawler).info("remove local file dir = {} success".format(local_file))
			
 
				+
			
 
				+    # 站内 UID
			
 
				+    @classmethod
			
 
				+    def uids(cls, crawler, strategy, our_uid, env):
			
 
				+        """
			
 
				+        站内 ID
			
 
				+        :param crawler: 哪款爬虫
			
 
				+        :param env: 什么环境
			
 
				+        :param strategy: 榜单类型，也可以是指定的站内 UID
			
 
				+        :param our_uid: 上传到指定站内 UID
			
 
				+        :return: uid
			
 
				+        """
			
 
				+        if env == 'dev':
			
 
				+            uids_dev = [6267140, 6267141]
			
 
				+            return random.choice(uids_dev)
			
 
				+        elif crawler == 'kanyikan' and env == 'prod' and strategy == 'kanyikan_moment':
			
 
				+            uids_prod_kanyikan_moment = [20631208, 20631209, 20631210, 20631211, 20631212,
			
 
				+                                          20631213, 20631214, 20631215, 20631216, 20631217,
			
 
				+                                          20631223, 20631224, 20631225, 20631226, 20631227]
			
 
				+            return random.choice(uids_prod_kanyikan_moment)
			
 
				+        elif crawler == 'ggdc' and env == 'prod' and strategy == 'kanyikan_recommend':
			
 
				+            uids_ggdc_prod_recommend = [26117661, 26117662, 26117663]
			
 
				+            return random.choice(uids_ggdc_prod_recommend)
			
 
				+        elif crawler == 'ggdc' and env == 'prod' and strategy == 'follow':
			
 
				+            uids_ggdc_prod_follow = [26117661, 26117662, 26117663]
			
 
				+            return random.choice(uids_ggdc_prod_follow)
			
 
				+        else:
			
 
				+            return our_uid
			
 
				+
			
 
				+    # 爬虫渠道号
			
 
				+    @classmethod
			
 
				+    def crawlersrccode(cls, crawler):
			
 
				+        if crawler == 'youtube':
			
 
				+            return 'YOUTUBE'
			
 
				+        elif crawler == 'kanyikan':
			
 
				+            return 'KANYIKAN'
			
 
				+        elif crawler == "kuaishou":
			
 
				+            return "KUAISHOU_XCX"
			
 
				+        elif crawler == "weishi":
			
 
				+            return "WEISHI"
			
 
				+        elif crawler == "xiaoniangao":
			
 
				+            return "XIAONIANGAO_XCX"
			
 
				+        elif crawler == "benshanzhufu":
			
 
				+            return "BENSHANZHUFU"
			
 
				+        elif crawler == "gongzhonghao_xinxin":
			
 
				+            return "GONGZHONGHAO_XINXIN"
			
 
				+        elif crawler == 'shipinhao':
			
 
				+            return 'SHIPINHAO_XCX'
			
 
				+        elif crawler == 'xigua':
			
 
				+            return 'XIGUA'
			
 
				+        elif crawler == 'zhihu':
			
 
				+            return 'ZHIHU'
			
 
				+        elif crawler == 'jixiangxingfu':
			
 
				+            return 'JIXIANGXINGFU'
			
 
				+        elif crawler == 'zhongmiaoyinxin':
			
 
				+            return 'ZHONGMIAOYINXIN'
			
 
				+        elif crawler == 'suisuiniannianyingfuqi':
			
 
				+            return 'SUISUINIANNIANYINGFUQI'
			
 
				+        elif crawler == 'zhufumao':
			
 
				+            return 'ZHUFUMAO'
			
 
				+        elif crawler == 'zongjiao':
			
 
				+            return 'ZONGJIAO'
			
 
				+        elif crawler == 'haokan':
			
 
				+            return 'HAOKAN'
			
 
				+        elif crawler == 'kandaojiushifuqi':
			
 
				+            return 'KANDAOJIUSHIFUQI'
			
 
				+        elif crawler == 'shengshengyingyin':
			
 
				+            return 'SHENGSHENGYINGYIN'
			
 
				+        elif crawler == 'ganggangdouchuan':
			
 
				+            return 'GANGGANGDOUCHUAN'
			
 
				+        elif crawler == 'gongzhonghao_xinxin':
			
 
				+            return 'GONGZHONGHAO_XINXIN'
			
 
				+
			
 
				+    @classmethod
			
 
				+    def local_file_path(cls, crawler):
			
 
				+        local_file_path = f'./{crawler}/videos'
			
 
				+        video_file = 'video'
			
 
				+        image_file = 'image'
			
 
				+        info_file = 'info'
			
 
				+
			
 
				+        loacl_file_dict = {
			
 
				+            'local_file_path': local_file_path,
			
 
				+            'video_file': video_file,
			
 
				+            'image_file': image_file,
			
 
				+            'info_file': info_file}
			
 
				+        return loacl_file_dict
			
 
				+
			
 
				+    @classmethod
			
 
				+    def upload_and_publish(cls, log_type, crawler, strategy, our_uid, env, oss_endpoint):
			
 
				+        """
			
 
				+        上传视频到 oss
			
 
				+        :param log_type: 选择的 log
			
 
				+        :param crawler: 哪款爬虫
			
 
				+        :param env: 测试环境：dev，正式环境：prod
			
 
				+        :param our_uid: 站内 UID
			
 
				+        :param strategy: 榜单类型
			
 
				+        :param oss_endpoint: 内网:inner；外网:out
			
 
				+        """
			
 
				+        Common.logger(log_type, crawler).info("upload_and_publish starting...")
			
 
				+        today = time.strftime("%Y%m%d", time.localtime())
			
 
				+        # videos 目录下的所有视频文件夹
			
 
				+        files = os.listdir(cls.local_file_path(crawler)["local_file_path"])
			
 
				+        for fv in files:
			
 
				+            try:
			
 
				+                # 单个视频文件夹
			
 
				+                fi_d = os.path.join(cls.local_file_path(crawler)["local_file_path"], fv)
			
 
				+                # 确认为视频文件夹
			
 
				+                if os.path.isdir(fi_d):
			
 
				+                    Common.logger(log_type, crawler).info('dir = {}'.format(fi_d))
			
 
				+                    # 列出所有视频文件夹
			
 
				+                    dir_files = os.listdir(fi_d)
			
 
				+                    data = {'appType': '888888',
			
 
				+                            'crawlerSrcCode': cls.crawlersrccode(crawler),
			
 
				+                            'viewStatus': '1',
			
 
				+                            'versionCode': '1'}
			
 
				+                    now_timestamp = int(round(time.time() * 1000))
			
 
				+                    data['crawlerTaskTimestamp'] = str(now_timestamp)
			
 
				+                    data['loginUid'] = cls.uids(crawler, strategy, our_uid, env)
			
 
				+                    # 单个视频文件夹下的所有视频文件
			
 
				+                    for fi in dir_files:
			
 
				+                        # 视频文件夹下的所有文件路径
			
 
				+                        fi_path = fi_d + '/' + fi
			
 
				+                        Common.logger(log_type, crawler).info('dir fi_path = {}'.format(fi_path))
			
 
				+                        # 读取 info.txt，赋值给 data
			
 
				+                        if cls.local_file_path(crawler)["info_file"] in fi:
			
 
				+                            f = open(fi_path, "r", encoding="UTF-8")
			
 
				+                            # 读取数据 数据准确性写入的时候保证 读取暂不处理
			
 
				+                            for i in range(14):
			
 
				+                                line = f.readline()
			
 
				+                                line = line.replace('\n', '')
			
 
				+                                if line is not None and len(line) != 0 and not line.isspace():
			
 
				+                                    # Common.logger(log_type, crawler).info("line = {}".format(line))
			
 
				+                                    if i == 0:
			
 
				+                                        data['crawlerSrcId'] = line
			
 
				+                                    elif i == 1:
			
 
				+                                        data['title'] = line
			
 
				+                                    elif i == 2:
			
 
				+                                        data['totalTime'] = line
			
 
				+                                    elif i == 8:
			
 
				+                                        data['crawlerSrcPublishTimestamp'] = line
			
 
				+                                else:
			
 
				+                                    Common.logger(log_type, crawler).warning("{} line is None".format(fi_path))
			
 
				+                            f.close()
			
 
				+                            # remove info.txt
			
 
				+                            cls.remove_local_file(log_type, crawler, fi_path)
			
 
				+                    # 刷新数据
			
 
				+                    dir_files = os.listdir(fi_d)
			
 
				+                    for fi in dir_files:
			
 
				+                        fi_path = fi_d + '/' + fi
			
 
				+                        # Common.logger(log_type, crawler).info('dir fi_path = {}'.format(fi_path))
			
 
				+                        # 上传oss
			
 
				+                        if cls.local_file_path(crawler)["video_file"] in fi:
			
 
				+                            global oss_video_file
			
 
				+                            if env == "dev":
			
 
				+                                oss_video_file = cls.oss_file_path_video.format("dev", today, data['crawlerSrcId'])
			
 
				+                            elif env == "prod":
			
 
				+                                oss_video_file = cls.oss_file_path_video.format("prod", today, data['crawlerSrcId'])
			
 
				+                            Common.logger(log_type, crawler).info("oss_video_file = {}".format(oss_video_file))
			
 
				+                            cls.put_file(log_type, crawler, oss_endpoint, oss_video_file, fi_path)
			
 
				+                            data['videoPath'] = oss_video_file
			
 
				+                            Common.logger(log_type, crawler).info("videoPath = {}".format(oss_video_file))
			
 
				+                        elif cls.local_file_path(crawler)["image_file"] in fi:
			
 
				+                            global oss_image_file
			
 
				+                            if env == "dev":
			
 
				+                                oss_image_file = cls.oss_file_path_image.format("env", today, data['crawlerSrcId'])
			
 
				+                            elif env == "prod":
			
 
				+                                oss_image_file = cls.oss_file_path_image.format("prod", today, data['crawlerSrcId'])
			
 
				+                            Common.logger(log_type, crawler).info("oss_image_file = {}".format(oss_image_file))
			
 
				+                            cls.put_file(log_type, crawler, oss_endpoint, oss_image_file, fi_path)
			
 
				+                            data['coverImgPath'] = oss_image_file
			
 
				+                            Common.logger(log_type, crawler).info("coverImgPath = {}".format(oss_image_file))
			
 
				+                        # 全部remove
			
 
				+                        cls.remove_local_file(log_type, crawler, fi_path)
			
 
				+
			
 
				+                    # 发布
			
 
				+                    if env == "dev":
			
 
				+                        video_id = cls.publish_video_dev(log_type, crawler, data)
			
 
				+                    elif env == "prod":
			
 
				+                        video_id = cls.publish_video_prod(log_type, crawler, data)
			
 
				+                    else:
			
 
				+                        video_id = cls.publish_video_dev(log_type, crawler, data)
			
 
				+                    cls.remove_local_file_dir(log_type, crawler, fi_d)
			
 
				+                    Common.logger(log_type, crawler).info('video_id:{}', video_id)
			
 
				+                    return video_id
			
 
				+
			
 
				+                else:
			
 
				+                    Common.logger(log_type, crawler).error('file not a dir = {}'.format(fi_d))
			
 
				+            except Exception as e:
			
 
				+                # 删除视频文件夹
			
 
				+                shutil.rmtree(f"./{crawler}/videos/{fv}/")
			
 
				+                Common.logger(log_type, crawler).exception('upload_and_publish error', e)
			
--- a/common/translate.py
+++ b/common/translate.py
@@ -0,0 +1,156 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/2/8
			
 
				+import time
			
 
				+from selenium import webdriver
			
 
				+from selenium.webdriver.chrome.service import Service
			
 
				+from selenium.webdriver.common.by import By
			
 
				+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
			
 
				+
			
 
				+
			
 
				+class Translate:
			
 
				+    @classmethod
			
 
				+    def google_translate(cls, strs, machine):
			
 
				+        # 打印请求配置
			
 
				+        ca = DesiredCapabilities.CHROME
			
 
				+        ca["goog:loggingPrefs"] = {"performance": "ALL"}
			
 
				+
			
 
				+        # 不打开浏览器运行
			
 
				+        chrome_options = webdriver.ChromeOptions()
			
 
				+        chrome_options.add_argument("--headless")
			
 
				+        chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
			
 
				+        chrome_options.add_argument("--no-sandbox")
			
 
				+
			
 
				+        # driver初始化
			
 
				+        if machine == 'aliyun':
			
 
				+            driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
			
 
				+        elif machine == 'macpro':
			
 
				+            driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service('/Users/lieyunye/Downloads/chromedriver_v86/chromedriver'))
			
 
				+        elif machine == 'macair':
			
 
				+            driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service('/Users/piaoquan/Downloads/chromedriver'))
			
 
				+        else:
			
 
				+            driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service('/Users/wangkun/Downloads/chromedriver/chromedriver_v110/chromedriver'))
			
 
				+
			
 
				+        driver.implicitly_wait(10)
			
 
				+        # url = 'https://fanyi.baidu.com/?aldtype=16047#auto/zh'  # 百度翻译
			
 
				+        url = 'https://translate.google.de/?hl=zh-CN'  # 谷歌翻译
			
 
				+        driver.get(url)
			
 
				+        time.sleep(3)
			
 
				+        # driver.save_screenshot('./1.png')
			
 
				+        accept_btns = driver.find_elements(By.XPATH, '//span[text()="全部接受"]')
			
 
				+        accept_btns_eng = driver.find_elements(By.XPATH, '//span[text()="Accept all"]')
			
 
				+        if len(accept_btns) != 0:
			
 
				+            accept_btns[0].click()
			
 
				+            time.sleep(2)
			
 
				+        elif len(accept_btns_eng) != 0:
			
 
				+            accept_btns_eng[0].click()
			
 
				+            time.sleep(2)
			
 
				+
			
 
				+        textarea = driver.find_element(By.XPATH, '//textarea[@class="er8xn"]')
			
 
				+        textarea.send_keys(strs)
			
 
				+        time.sleep(5)
			
 
				+        translate_list = driver.find_elements(By.XPATH, '//span[@class="ryNqvb"]')
			
 
				+        translate_word_list = []
			
 
				+        for text in translate_list:
			
 
				+            word = text.get_attribute("innerHTML")
			
 
				+            translate_word_list.append(word)
			
 
				+        translate_word = "".join(translate_word_list)
			
 
				+        time.sleep(1)
			
 
				+        driver.quit()
			
 
				+        return translate_word
			
 
				+
			
 
				+    @classmethod
			
 
				+    def baidu_translate(cls, strs, machine):
			
 
				+        # 打印请求配置
			
 
				+        ca = DesiredCapabilities.CHROME
			
 
				+        ca["goog:loggingPrefs"] = {"performance": "ALL"}
			
 
				+
			
 
				+        # 不打开浏览器运行
			
 
				+        chrome_options = webdriver.ChromeOptions()
			
 
				+        chrome_options.add_argument("--headless")
			
 
				+        chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
			
 
				+        chrome_options.add_argument("--no-sandbox")
			
 
				+
			
 
				+        # driver初始化
			
 
				+        if machine == 'aliyun':
			
 
				+            driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
			
 
				+        elif machine == 'macpro':
			
 
				+            driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service('/Users/lieyunye/Downloads/chromedriver_v86/chromedriver'))
			
 
				+        elif machine == 'macair':
			
 
				+            driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service('/Users/piaoquan/Downloads/chromedriver'))
			
 
				+        else:
			
 
				+            driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service('/Users/wangkun/Downloads/chromedriver/chromedriver_v110/chromedriver'))
			
 
				+
			
 
				+        # driver.implicitly_wait(10)
			
 
				+        url = 'https://fanyi.baidu.com/?aldtype=16047#auto/zh'  # 百度翻译
			
 
				+        # url = 'https://translate.google.de/?hl=zh-CN'  # 谷歌翻译
			
 
				+        driver.get(url)
			
 
				+        time.sleep(1)
			
 
				+        # driver.save_screenshot('./1.png')
			
 
				+        close_btns = driver.find_elements(By.XPATH, '//span[@class="app-guide-close"]')
			
 
				+        accept_btns = driver.find_elements(By.XPATH, '//span[text()="全部接受"]')
			
 
				+        accept_btns_eng = driver.find_elements(By.XPATH, '//span[text()="Accept all"]')
			
 
				+        if len(close_btns) != 0:
			
 
				+            close_btns[0].click()
			
 
				+            time.sleep(2)
			
 
				+        elif len(accept_btns) != 0:
			
 
				+            accept_btns[0].click()
			
 
				+            time.sleep(2)
			
 
				+        elif len(accept_btns_eng) != 0:
			
 
				+            accept_btns_eng[0].click()
			
 
				+            time.sleep(2)
			
 
				+
			
 
				+        textarea = driver.find_element(By.XPATH, '//textarea[@id="baidu_translate_input"]')
			
 
				+        textarea.send_keys(strs)
			
 
				+        time.sleep(5)
			
 
				+        translate_list = driver.find_elements(By.XPATH, '//p[@class="ordinary-output target-output clearfix"]')
			
 
				+        translate_word_list = []
			
 
				+        for text in translate_list:
			
 
				+            word = text.find_elements(By.TAG_NAME, 'span')
			
 
				+            for word_text in word:
			
 
				+                word = word_text.text
			
 
				+                translate_word_list.append(word)
			
 
				+                # print(word)
			
 
				+        translate_word = "".join(translate_word_list)
			
 
				+        # print(translate_word)
			
 
				+        time.sleep(1)
			
 
				+        driver.quit()
			
 
				+        return translate_word
			
 
				+
			
 
				+    @classmethod
			
 
				+    def is_contains_chinese(cls, strs):
			
 
				+        """
			
 
				+        检查语句中是否包含中文字符
			
 
				+        :param strs: 需要查询的句子
			
 
				+        :return: 包含：True，不包含：False
			
 
				+        """
			
 
				+        for _char in strs:
			
 
				+            if '\u4e00' <= _char <= '\u9fa5':
			
 
				+                return True
			
 
				+        return False
			
 
				+
			
 
				+    @classmethod
			
 
				+    def is_all_chinese(cls, strs):
			
 
				+        """
			
 
				+        检查语句中是否全是中文字符，有标点符号也会返回 False
			
 
				+        :param strs:需要查询的句子
			
 
				+        :return:包含：True，不包含：False
			
 
				+        """
			
 
				+        for _char in strs:
			
 
				+            if not '\u4e00' <= _char <= '\u9fa5':
			
 
				+                return False
			
 
				+        return True
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    # is_contains_chinese = Translate.is_contains_chinese('SENSATIONAL Singer Wins the GROUP GOLDEN BUZZER with a STUNNING Audition!  | Got Talent Global')
			
 
				+    # print(is_contains_chinese)
			
 
				+    # # is_all_chinese = Translate.is_all_chinese('SENSATIONAL Singer Wins the GROUP GOLDEN BUZZER with a STUNNING Audition!')
			
 
				+    # is_all_chinese = Translate.is_all_chinese('啊啊啊')
			
 
				+    # print(is_all_chinese)
			
 
				+
			
 
				+    strs1 = 'SENSATIONAL Singer Wins the GROUP GOLDEN BUZZER with a STUNNING Audition!  | Got Talent Global'
			
 
				+    # Translate.google_translate(strs1, 'local')
			
 
				+    Translate.baidu_translate(strs1, 'local')
			
 
				+
			
 
				+    pass
			
--- a/common/users.py
+++ b/common/users.py
@@ -0,0 +1,60 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/2/2
			
 
				+"""
			
 
				+创建虚拟站内 UID
			
 
				+https://w42nne6hzg.feishu.cn/docx/PhbhdXScYo9CxpxTT3gcOle4nIs
			
 
				+"""
			
 
				+import os
			
 
				+import sys
			
 
				+import requests
			
 
				+sys.path.append(os.getcwd())
			
 
				+from common.common import Common
			
 
				+
			
 
				+
			
 
				+class Users:
			
 
				+    @classmethod
			
 
				+    def create_user(cls, log_type, crawler, user_dict, env):
			
 
				+        """
			
 
				+        创建站内虚拟 UID
			
 
				+        :param log_type: 日志
			
 
				+        :param crawler: 哪款爬虫
			
 
				+        :param user_dict: 字典{'nickName': 用户名, 'avatarUrl': 头像, 'tagName': 站内用户标签}
			
 
				+        :param env: 环境
			
 
				+        :return: 站内 UID
			
 
				+        """
			
 
				+        try:
			
 
				+            if env == 'dev':
			
 
				+                # 外网
			
 
				+                url = 'http://videotest.yishihui.com/longvideoapi/user/virtual/crawler/registerVirtualUser'
			
 
				+                # 内网
			
 
				+                # url = 'http://videotest-internal.yishihui.com/longvideoapi/user/virtual/crawler/registerVirtualUser'
			
 
				+            elif env == 'prod':
			
 
				+                # 外网
			
 
				+                url = 'http://longvideoapi.piaoquantv.com/longvideoapi/user/virtual/crawler/registerVirtualUser'
			
 
				+                # 内网
			
 
				+                # url = 'http://longvideoapi-internal.piaoquantv.com/longvideoapi/user/virtual/crawler/registerVirtualUser'
			
 
				+            else:
			
 
				+                # 外网
			
 
				+                url = 'http://longvideoapi.piaoquantv.com/longvideoapi/user/virtual/crawler/registerVirtualUser'
			
 
				+                # 内网
			
 
				+                # url = 'http://longvideoapi-internal.piaoquantv.com/longvideoapi/user/virtual/crawler/registerVirtualUser'
			
 
				+            params = {
			
 
				+                # 'count': 1,     # （必须）账号个数：传1
			
 
				+                # 'accountType': 4,   # （必须）账号类型 ：传 4 app虚拟账号
			
 
				+                'pwd': '',  # 密码 默认 12346
			
 
				+                'nickName': user_dict['nickName'],  # 昵称  默认 vuser......
			
 
				+                'avatarUrl': user_dict['avatarUrl'],  # 头像Url  默认 http://weapppiccdn.yishihui.com/resources/images/pic_normal.png
			
 
				+                'tagName': user_dict['tagName'],  # 多条数据用英文逗号分割
			
 
				+            }
			
 
				+            response = requests.post(url=url, params=params)
			
 
				+            # print(response.text)
			
 
				+            user_id = response.json()['data']
			
 
				+            return user_id
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error(f"create_user异常:{e}\n")
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    uid = Users.create_user('log', 'kanyikan', 'youtube爬虫,定向爬虫策略', 'dev')
			
 
				+    print(uid)
			
--- a/kanyikan/.DS_Store
+++ b/kanyikan/.DS_Store
--- a/kanyikan/__init__.py
+++ b/kanyikan/__init__.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/1/31
			
--- a/kanyikan/kanyikan_main/__init__.py
+++ b/kanyikan/kanyikan_main/__init__.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/1/31
			
--- a/kanyikan/kanyikan_main/run_kanyikan_moment.py
+++ b/kanyikan/kanyikan_main/run_kanyikan_moment.py
@@ -0,0 +1,53 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/1/31
			
 
				+"""
			
 
				+看一看+小程序: 朋友圈榜单执行入口
			
 
				+"""
			
 
				+import datetime
			
 
				+import sys
			
 
				+import os
			
 
				+import time
			
 
				+import argparse
			
 
				+sys.path.append(os.getcwd())
			
 
				+from common.common import Common
			
 
				+from common.feishu import Feishu
			
 
				+from kanyikan.kanyikan_moment.kanyikan_moment import Moment
			
 
				+
			
 
				+
			
 
				+def main(log_type, crawler, strategy, our_uid, env, oss_endpoint):
			
 
				+    """
			
 
				+    主函数入口
			
 
				+    :param log_type: 日志命名: monent
			
 
				+    :param crawler: 哪款爬虫: kanyikan
			
 
				+    :param strategy: 爬虫策略: kanyikan_moment
			
 
				+    :param our_uid: 站内 UID: kanyikan_moment
			
 
				+    :param env: 正式环境: prod；测试环境: dev
			
 
				+    :param oss_endpoint: 阿里云102服务器: inner ；其它: out
			
 
				+    :return: None
			
 
				+    """
			
 
				+    while True:
			
 
				+        if 1 >= datetime.datetime.now().hour >= 0:
			
 
				+            pass
			
 
				+        else:
			
 
				+            moment_video_list = Feishu.get_sheet_content(log_type, crawler, 'iK58HX')
			
 
				+            for moment_video_id in moment_video_list:
			
 
				+                Common.logger(log_type, crawler).info(f"开始抓取{moment_video_id}朋友圈推荐视频\n")
			
 
				+                Moment.get_videos(log_type, crawler, strategy, our_uid, env, oss_endpoint, moment_video_id)
			
 
				+
			
 
				+            Common.del_logs(log_type, crawler)
			
 
				+            Common.logger(log_type, crawler).info("抓取完一轮，休眠 10 秒\n")
			
 
				+            time.sleep(10)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
			
 
				+    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
			
 
				+    parser.add_argument('--crawler')  ## 添加参数
			
 
				+    parser.add_argument('--strategy')  ## 添加参数
			
 
				+    parser.add_argument('--our_uid')  ## 添加参数
			
 
				+    parser.add_argument('--oss_endpoint')  ## 添加参数
			
 
				+    parser.add_argument('--env')  ## 添加参数
			
 
				+    args = parser.parse_args()  ### 参数赋值，也可以通过终端赋值
			
 
				+    # print(args)
			
 
				+    main(args.log_type, args.crawler, args.strategy, args.our_uid, args.env, args.oss_endpoint)
			
--- a/kanyikan/kanyikan_main/run_kanyikan_recommend.py
+++ b/kanyikan/kanyikan_main/run_kanyikan_recommend.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/1/31
			
--- a/kanyikan/kanyikan_moment/__init__.py
+++ b/kanyikan/kanyikan_moment/__init__.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/1/31
			
--- a/kanyikan/kanyikan_moment/kanyikan_moment.py
+++ b/kanyikan/kanyikan_moment/kanyikan_moment.py
@@ -0,0 +1,285 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/1/31
			
 
				+"""
			
 
				+看一看+小程序: 朋友圈榜单
			
 
				+"""
			
 
				+import time
			
 
				+import requests
			
 
				+import urllib3
			
 
				+from common.common import Common
			
 
				+from common.feishu import Feishu
			
 
				+from common.publish import Publish
			
 
				+proxies = {"http": None, "https": None}
			
 
				+
			
 
				+
			
 
				+class Moment:
			
 
				+    # 抓取基础规则
			
 
				+    @staticmethod
			
 
				+    def download_rule(video_dict):
			
 
				+        """
			
 
				+        抓取基础规则
			
 
				+        """
			
 
				+        if int(float(video_dict['duration'])) >= 60:
			
 
				+            if int(video_dict['video_width']) >= 0 or int(video_dict['video_height']) >= 0:
			
 
				+                if int(video_dict['play_cnt']) >= 100000:
			
 
				+                    if int(video_dict['like_cnt']) >= 0:
			
 
				+                        if int(video_dict['share_cnt']) >= 0:
			
 
				+                            return True
			
 
				+                        else:
			
 
				+                            return False
			
 
				+                    else:
			
 
				+                        return False
			
 
				+                else:
			
 
				+                    return False
			
 
				+            return False
			
 
				+        return False
			
 
				+
			
 
				+    # 获取推荐视频列表
			
 
				+    @classmethod
			
 
				+    def get_videos(cls, log_type, crawler, strategy, our_uid, env, oss_endpoint, moment_video_id):
			
 
				+        url = "https://search.weixin.qq.com/cgi-bin/recwxa/snsgetvideoinfo?"
			
 
				+        headers = {
			
 
				+            "content-type": "application/json",
			
 
				+            "Accept-Encoding": "gzip,compress,br,deflate",
			
 
				+            "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)"
			
 
				+                          " AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148"
			
 
				+                          " MicroMessenger/8.0.20(0x18001442) NetType/WIFI Language/zh_CN",
			
 
				+            "Referer": "https://servicewechat.com/wxbb9a805eb4f9533c/236/page-frame.html"
			
 
				+        }
			
 
				+        # videoid = random.choice(Feishu.get_sheet_content(log_type, crawler, 'iK58HX'))
			
 
				+        params = {
			
 
				+            "vid": moment_video_id,
			
 
				+            "openid": "1924336296754305",
			
 
				+            "model": "iPhone 11<iPhone12,1>14.7.1",
			
 
				+            "sharesearchid": "8406805193800900989",
			
 
				+            "shareOpenid": "oh_m45YffSEGxvDH--6s6g9ZkPxg",
			
 
				+        }
			
 
				+        try:
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.get(url=url, headers=headers, params=params, proxies=proxies, verify=False)
			
 
				+            if r.status_code != 200:
			
 
				+                Common.logger(log_type, crawler).warning(f"response.status_code:{r.status_code}")
			
 
				+                Common.logger(log_type, crawler).warning(f"response.text:{r.text}\n")
			
 
				+            elif r.json()["errcode"] != 0:
			
 
				+                Common.logger(log_type, crawler).warning(f"msg:{r.json()['msg']}\n")
			
 
				+            elif "rec_video_list" not in r.json()["data"]:
			
 
				+                Common.logger(log_type, crawler).warning(f"该视频没有推荐列表\n")
			
 
				+            else:
			
 
				+                feeds = r.json()["data"]["rec_video_list"]
			
 
				+                for i in range(len(feeds)):
			
 
				+                    # video_id
			
 
				+                    if "vid" in feeds[i]:
			
 
				+                        video_id = feeds[i]["vid"]
			
 
				+                    else:
			
 
				+                        video_id = 0
			
 
				+
			
 
				+                    # video_title
			
 
				+                    if "title" in feeds[i]:
			
 
				+                        video_title = feeds[i]["title"].strip().replace("\n", "") \
			
 
				+                                .replace("/", "").replace("\\", "").replace("\r", "") \
			
 
				+                                .replace(":", "").replace("*", "").replace("？", "") \
			
 
				+                                .replace("?", "").replace('"', "").replace("<", "") \
			
 
				+                                .replace(">", "").replace("|", "").replace(" ", "") \
			
 
				+                                .replace("&NBSP", "").replace(".", "。").replace(" ", "") \
			
 
				+                                .replace("小年糕", "").replace("#", "").replace("Merge", "")
			
 
				+                    else:
			
 
				+                        video_title = 0
			
 
				+
			
 
				+                    # video_play_cnt
			
 
				+                    if "played_cnt" in feeds[i]:
			
 
				+                        video_play_cnt = feeds[i]["played_cnt"]
			
 
				+                    else:
			
 
				+                        video_play_cnt = 0
			
 
				+
			
 
				+                    # video_comment_cnt
			
 
				+                    if "comment_cnt" in feeds[i]:
			
 
				+                        video_comment_cnt = feeds[i]["comment_cnt"]
			
 
				+                    else:
			
 
				+                        video_comment_cnt = 0
			
 
				+
			
 
				+                    # video_liked_cnt
			
 
				+                    if "liked_cnt" in feeds[i]:
			
 
				+                        video_liked_cnt = feeds[i]["liked_cnt"]
			
 
				+                    else:
			
 
				+                        video_liked_cnt = 0
			
 
				+
			
 
				+                    # video_share_cnt
			
 
				+                    if "shared_cnt" in feeds[i]:
			
 
				+                        video_share_cnt = feeds[i]["shared_cnt"]
			
 
				+                    else:
			
 
				+                        video_share_cnt = 0
			
 
				+
			
 
				+                    # video_duration
			
 
				+                    if "duration" in feeds[i]:
			
 
				+                        video_duration = feeds[i]["duration"]
			
 
				+                    else:
			
 
				+                        video_duration = 0
			
 
				+
			
 
				+                    # video_width / video_height
			
 
				+                    if "width" in feeds[i] or "height" in feeds[i]:
			
 
				+                        video_width = feeds[i]["width"]
			
 
				+                        video_height = feeds[i]["height"]
			
 
				+                    else:
			
 
				+                        video_width = 0
			
 
				+                        video_height = 0
			
 
				+
			
 
				+                    # video_send_time
			
 
				+                    if "upload_time" in feeds[i]:
			
 
				+                        publish_time = feeds[i]["upload_time"]
			
 
				+                    else:
			
 
				+                        publish_time = 0
			
 
				+
			
 
				+                    # user_name
			
 
				+                    if "user_info" not in feeds[i]:
			
 
				+                        user_name = 0
			
 
				+                    elif "nickname" not in feeds[i]["user_info"]:
			
 
				+                        user_name = 0
			
 
				+                    else:
			
 
				+                        user_name = feeds[i]["user_info"]["nickname"].strip().replace("\n", "")
			
 
				+
			
 
				+                    # user_id
			
 
				+                    if "user_info" not in feeds[i]:
			
 
				+                        user_id = 0
			
 
				+                    elif "openid" not in feeds[i]["user_info"]:
			
 
				+                        user_id = 0
			
 
				+                    else:
			
 
				+                        user_id = feeds[i]["user_info"]["openid"]
			
 
				+
			
 
				+                    # head_url
			
 
				+                    if "user_info" not in feeds[i]:
			
 
				+                        avatar_url = 0
			
 
				+                    elif "headimg_url" not in feeds[i]["user_info"]:
			
 
				+                        avatar_url = 0
			
 
				+                    else:
			
 
				+                        avatar_url = feeds[i]["user_info"]["headimg_url"]
			
 
				+
			
 
				+                    # cover_url
			
 
				+                    if "cover_url" not in feeds[i]:
			
 
				+                        cover_url = 0
			
 
				+                    else:
			
 
				+                        cover_url = feeds[i]["cover_url"]
			
 
				+
			
 
				+                    # video_url
			
 
				+                    if "play_info" not in feeds[i]:
			
 
				+                        video_url = 0
			
 
				+                    elif "items" not in feeds[i]["play_info"]:
			
 
				+                        video_url = 0
			
 
				+                    else:
			
 
				+                        video_url = feeds[i]["play_info"]["items"][-1]["play_url"]
			
 
				+
			
 
				+                    video_dict = {
			
 
				+                        'video_id': video_id,
			
 
				+                        'video_title': video_title,
			
 
				+                        'duration': video_duration,
			
 
				+                        'play_cnt': video_play_cnt,
			
 
				+                        'comment_cnt': video_comment_cnt,
			
 
				+                        'like_cnt': video_liked_cnt,
			
 
				+                        'share_cnt': video_share_cnt,
			
 
				+                        'video_width': video_width,
			
 
				+                        'video_height': video_height,
			
 
				+                        'publish_time': publish_time,
			
 
				+                        'user_name': user_name,
			
 
				+                        'user_id': user_id,
			
 
				+                        'avatar_url': avatar_url,
			
 
				+                        'video_url': video_url,
			
 
				+                        'cover_url': cover_url,
			
 
				+                        'session': f'kanyikan_moment_{int(time.time())}',
			
 
				+                    }
			
 
				+                    Common.logger(log_type, crawler).info("video_title:{}", video_title)
			
 
				+                    Common.logger(log_type, crawler).info("video_play_cnt:{}", video_play_cnt)
			
 
				+                    Common.logger(log_type, crawler).info("video_duration:{}", video_duration)
			
 
				+                    Common.logger(log_type, crawler).info("video_url:{}", video_url)
			
 
				+
			
 
				+                    # 过滤无效视频
			
 
				+                    if video_id == 0 or video_title == 0 or video_duration == 0 or publish_time == 0 or user_id == 0\
			
 
				+                            or avatar_url == 0 or cover_url == 0 or video_url == 0:
			
 
				+                        Common.logger(log_type, crawler).warning("无效视频\n")
			
 
				+                    # 抓取基础规则
			
 
				+                    elif cls.download_rule(video_dict) is False:
			
 
				+                        Common.logger(log_type, crawler).info("不满足基础规则\n")
			
 
				+                    elif int(publish_time) < 1659283200:
			
 
				+                        Common.logger(log_type, crawler).info(f'发布时间{time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(publish_time))} < 2022-08-01\n')
			
 
				+                    # 过滤词库
			
 
				+                    elif any(word if word in video_title else False for word in Feishu.get_sheet_content(log_type, crawler, 'rofdM5')) is True:
			
 
				+                        Common.logger(log_type, crawler).info("视频已中过滤词\n")
			
 
				+                    # 已下载视频表去重
			
 
				+                    elif video_id in [j for m in Feishu.get_values_batch(log_type, crawler, "20ce0c") for j in m]:
			
 
				+                        Common.logger(log_type, crawler).info("视频已下载\n")
			
 
				+                    else:
			
 
				+                        cls.download_publish(log_type, crawler, strategy, our_uid, env, oss_endpoint, video_dict)
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error(f"get_videos异常:{e}\n")
			
 
				+
			
 
				+    # 下载/上传视频
			
 
				+    @classmethod
			
 
				+    def download_publish(cls, log_type, crawler, strategy, our_uid, env, oss_endpoint, video_dict):
			
 
				+        try:
			
 
				+            # 过滤空行及空标题视频
			
 
				+            if video_dict['video_id'] == 0 \
			
 
				+                    or video_dict['video_title'] == 0\
			
 
				+                    or video_dict['video_url'] == 0:
			
 
				+                Common.logger(log_type, crawler).info("无效视频\n")
			
 
				+            # # 视频的抓取时间小于 2 天
			
 
				+            # elif int(time.time()) - v_push_time > 172800:
			
 
				+            #     Common.logger(log_type, crawler).info("抓取时间超过2天:{}", video_dict['video_title'])
			
 
				+            #     # 删除行或列，可选 ROWS、COLUMNS
			
 
				+            #     Feishu.dimension_range("tGqZMX", "ROWS", i + 1, i + 1)
			
 
				+            #     return
			
 
				+            # 视频发布时间不小于 2021-06-01 00:00:00
			
 
				+            elif video_dict['publish_time'] < 1622476800:
			
 
				+                Common.logger(log_type, crawler).info(f'发布时间{time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(video_dict["publish_time"]))} < 2021-06-01\n')
			
 
				+            else:
			
 
				+                # 下载封面
			
 
				+                Common.download_method(log_type=log_type, crawler=crawler, text="cover",
			
 
				+                                       title=video_dict['video_title'], url=video_dict['cover_url'])
			
 
				+                # 下载视频
			
 
				+                Common.download_method(log_type=log_type, crawler=crawler, text="video",
			
 
				+                                       title=video_dict['video_title'], url=video_dict['video_url'])
			
 
				+                # 保存视频信息至 "./{crawler}/videos/{video_dict['video_title']}/info.txt"
			
 
				+                Common.save_video_info(log_type, crawler, video_dict)
			
 
				+
			
 
				+                # 上传视频
			
 
				+                Common.logger(log_type, crawler).info(f"开始上传视频:{video_dict['video_title']}")
			
 
				+                our_video_id = Publish.upload_and_publish(log_type, crawler, strategy, our_uid, env, oss_endpoint)
			
 
				+                if env == 'dev':
			
 
				+                    our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
			
 
				+                else:
			
 
				+                    our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
			
 
				+                Common.logger(log_type, crawler).info("视频上传完成:{}", video_dict['video_title'])
			
 
				+
			
 
				+                # 保存视频 ID 到云文档
			
 
				+                Common.logger(log_type, crawler).info(f"保存视频ID至云文档:{video_dict['video_title']}")
			
 
				+                # 视频ID工作表，插入首行
			
 
				+                Feishu.insert_columns(log_type, crawler, "20ce0c", "ROWS", 1, 2)
			
 
				+                # 视频ID工作表，首行写入数据
			
 
				+                upload_time = int(time.time())
			
 
				+                values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
			
 
				+                           "朋友圈",
			
 
				+                           str(video_dict['video_id']),
			
 
				+                           str(video_dict['video_title']),
			
 
				+                           our_video_link,
			
 
				+                           video_dict['play_cnt'],
			
 
				+                           video_dict['comment_cnt'],
			
 
				+                           video_dict['like_cnt'],
			
 
				+                           video_dict['share_cnt'],
			
 
				+                           video_dict['duration'],
			
 
				+                           f"{video_dict['video_width']}*{video_dict['video_height']}",
			
 
				+                           time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(video_dict["publish_time"])),
			
 
				+                           video_dict['user_name'],
			
 
				+                           video_dict['user_id'],
			
 
				+                           video_dict['head_url'],
			
 
				+                           video_dict['cover_url'],
			
 
				+                           video_dict['video_url']
			
 
				+                           ]]
			
 
				+                time.sleep(1)
			
 
				+                Feishu.update_values(log_type, crawler, "20ce0c", "F2:W2", values)
			
 
				+                Common.logger(log_type, crawler).info('下载/上传成功\n')
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error(f"download_publish异常:{e}\n")
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    kanyikan_moment = Moment()
			
 
				+
			
 
				+    pass
			
--- a/kanyikan/kanyikan_recommend/__init__.py
+++ b/kanyikan/kanyikan_recommend/__init__.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/1/31
			
--- a/kanyikan/kanyikan_recommend/kanyikan_recommend.py
+++ b/kanyikan/kanyikan_recommend/kanyikan_recommend.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/1/31
			
--- a/kanyikan/logs/.DS_Store
+++ b/kanyikan/logs/.DS_Store
--- a/kanyikan/videos/.DS_Store
+++ b/kanyikan/videos/.DS_Store
--- a/main/__init__.py
+++ b/main/__init__.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/2/1
			
--- a/main/main.sh
+++ b/main/main.sh
@@ -0,0 +1,72 @@
 
				+#!/bin/bash
			
 
				+# 看一看+小程序 朋友圈榜单
			
 
				+# sh ./main/main.sh ./kanyikan/kanyikan_main/run_kanyikan_moment.py --log_type="moment" --crawler="kanyikan" --strategy="kanyikan_moment" --our_uid="kanyikan_moment" --oss_endpoint="out" --env="dev" ./kanyikan/nohup.log local
			
 
				+# youtube 定向策略
			
 
				+# sh ./main/main.sh ./youtube/youtube_main/run_youtube_follow.py --log_type="follow" --crawler="youtube" --strategy="定向爬虫策略" --oss_endpoint="out" --env="dev" --machine="local" youtube/nohup.log
			
 
				+# ps aux | grep run_youtube | grep Python | grep -v grep | awk '{print $2}' | xargs kill -9
			
 
				+
			
 
				+crawler_dir=$1  # 爬虫Python脚本执行入口路径
			
 
				+log_type=$2     # 日志
			
 
				+crawler=$3      # 哪款爬虫
			
 
				+strategy=$4     # 爬虫策略
			
 
				+oss_endpoint=$5 # OSS 网关，脚本部署在阿里云服务器时填写 inner，其它填写 out
			
 
				+env=$6          # 爬虫上传环境。正式环境填写 prod，测试环境填写 dev
			
 
				+machine=$7      # 部署机器，阿里云填写 aliyun，线下分别填写 macpro，macair，local
			
 
				+nohup_dir=$8    # nohup 日志路径
			
 
				+
			
 
				+echo "开始"
			
 
				+
			
 
				+if [ ${machine} = "macpro" ];then
			
 
				+  piaoquan_crawler_dir=/Users/lieyunye/Desktop/piaoquan_crawler/
			
 
				+  profile_path=.bash_profile
			
 
				+  node_path=/usr/local/bin/node
			
 
				+elif [ ${machine} = "macair" ];then
			
 
				+  piaoquan_crawler_dir=/Users/piaoquan/Desktop/piaoquan_crawler/
			
 
				+  profile_path=./base_profile
			
 
				+  node_path=/usr/local/bin/node
			
 
				+elif [ ${machine} = "aliyun" ];then
			
 
				+  piaoquan_crawler_dir=/data5/wangkun/piaoquan_crawler/
			
 
				+  profile_path=/etc/profile
			
 
				+elif [ ${machine} = "local" ];then
			
 
				+  piaoquan_crawler_dir=/Users/wangkun/Desktop/crawler/piaoquan_crawler/
			
 
				+  profile_path=/etc/profile
			
 
				+  node_path=/opt/homebrew/bin/node
			
 
				+else
			
 
				+  piaoquan_crawler_dir=/Users/wangkun/Desktop/crawler/piaoquan_crawler/
			
 
				+  profile_path=/etc/profile
			
 
				+  node_path=/opt/homebrew/bin/node
			
 
				+fi
			
 
				+
			
 
				+echo "$(date "+%Y-%m-%d %H:%M:%S") 更新环境变量..."
			
 
				+cd ~ && source ${profile_path}
			
 
				+echo "$(date "+%Y-%m-%d %H:%M:%S") 更新环境变量完成！"
			
 
				+
			
 
				+echo "$(date "+%Y-%m-%d %H:%M:%S") 正在杀进程..."
			
 
				+grep_str=run_${crawler##*=}
			
 
				+ps aux | grep ${grep_str} | grep Python | grep -v grep | awk '{print $2}' | xargs kill -9
			
 
				+echo "$(date "+%Y-%m-%d %H:%M:%S") 进程已杀死！"
			
 
				+
			
 
				+echo "$(date "+%Y-%m-%d %H:%M:%S") 正在更新代码..."
			
 
				+cd ${piaoquan_crawler_dir} && git pull origin master --force && rm -f ${piaoquan_crawler_dir}main/nohup.log && rm -f ${piaoquan_crawler_dir}${nohup_dir}
			
 
				+#echo ${piaoquan_crawler_dir}
			
 
				+echo "$(date "+%Y-%m-%d %H:%M:%S") 代码更新完成！"
			
 
				+
			
 
				+if [ ${machine} != "aliyun" ];then
			
 
				+  echo "$(date "+%Y-%m-%d %H:%M:%S") 正在重启Appium..."
			
 
				+  ps aux | grep Appium.app | grep -v grep | awk '{print $2}' | xargs kill -9
			
 
				+  nohup ${node_path} /Applications/Appium.app/Contents/Resources/app/node_modules/appium/build/lib/main.js >>./nohup.log 2>&1 &
			
 
				+  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启Appium完毕!"
			
 
				+
			
 
				+  echo "$(date "+%Y-%m-%d %H:%M:%S") 正在重启adb..."
			
 
				+  adb kill-server
			
 
				+  adb start-server
			
 
				+  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启adb完毕!"
			
 
				+else
			
 
				+  echo "无需重启Appium及adb服务"
			
 
				+fi
			
 
				+
			
 
				+echo "$(date "+%Y-%m-%d %H:%M:%S") 正在重启服务..."
			
 
				+nohup python3 -u ${crawler_dir} ${log_type} ${crawler} ${strategy} ${oss_endpoint} ${env} ${machine} >>${nohup_dir} 2>&1 &
			
 
				+echo "$(date "+%Y-%m-%d %H:%M:%S") 服务重启完毕!"
			
 
				+
			
 
				+exit 0
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,7 @@
 
				+ffmpeg==1.4
			
 
				+loguru==0.6.0
			
 
				+oss2==2.15.0
			
 
				+PyMySQL==1.0.2
			
 
				+requests==2.27.1
			
 
				+selenium==4.8.0
			
 
				+urllib3==1.26.9
			
--- a/weixinzhishu/__init__.py
+++ b/weixinzhishu/__init__.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/1/31
			
--- a/weixinzhishu/main/__init__.py
+++ b/weixinzhishu/main/__init__.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/1/31
			
--- a/youtube/.DS_Store
+++ b/youtube/.DS_Store
--- a/youtube/__init__.py
+++ b/youtube/__init__.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/1/31
			
--- a/youtube/logs/.DS_Store
+++ b/youtube/logs/.DS_Store
--- a/youtube/videos/.DS_Store
+++ b/youtube/videos/.DS_Store
--- a/youtube/youtube_follow/.DS_Store
+++ b/youtube/youtube_follow/.DS_Store
--- a/youtube/youtube_follow/__init__.py
+++ b/youtube/youtube_follow/__init__.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/2/3
			
--- a/youtube/youtube_follow/youtube_follow.py
+++ b/youtube/youtube_follow/youtube_follow.py
@@ -0,0 +1,1095 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/2/3
			
 
				+"""
			
 
				+YouTube 定向榜
			
 
				+"""
			
 
				+import os
			
 
				+import shutil
			
 
				+import sys
			
 
				+import time
			
 
				+import json
			
 
				+import requests
			
 
				+from selenium import webdriver
			
 
				+from selenium.webdriver.chrome.service import Service
			
 
				+from selenium.webdriver.common.by import By
			
 
				+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
			
 
				+
			
 
				+from common.translate import Translate
			
 
				+
			
 
				+sys.path.append(os.getcwd())
			
 
				+from common.common import Common
			
 
				+from common.db import MysqlHelper
			
 
				+from common.feishu import Feishu
			
 
				+from common.users import Users
			
 
				+from common.publish import Publish
			
 
				+
			
 
				+
			
 
				+class Follow:
			
 
				+    # 翻页参数
			
 
				+    continuation = ''
			
 
				+    # 抓取平台
			
 
				+    platform = 'youtube'
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_browse_id(cls, log_type, crawler, out_user_id, machine):
			
 
				+        """
			
 
				+        获取每个用户的 browse_id
			
 
				+        :param log_type: 日志
			
 
				+        :param crawler: 哪款爬虫
			
 
				+        :param out_user_id: 站外用户 UID
			
 
				+        :param machine: 部署机器，阿里云填写 aliyun，线下分别填写 macpro，macair，local
			
 
				+        :return: browse_id
			
 
				+        """
			
 
				+        try:
			
 
				+            # 打印请求配置
			
 
				+            ca = DesiredCapabilities.CHROME
			
 
				+            ca["goog:loggingPrefs"] = {"performance": "ALL"}
			
 
				+
			
 
				+            # 不打开浏览器运行
			
 
				+            chrome_options = webdriver.ChromeOptions()
			
 
				+            chrome_options.add_argument("--headless")
			
 
				+            chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
			
 
				+            chrome_options.add_argument("--no-sandbox")
			
 
				+
			
 
				+            # driver初始化
			
 
				+            if machine == 'aliyun':
			
 
				+                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
			
 
				+            elif machine == 'macpro':
			
 
				+                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service('/Users/lieyunye/Downloads/chromedriver_v86/chromedriver'))
			
 
				+            elif machine == 'macair':
			
 
				+                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service('/Users/piaoquan/Downloads/chromedriver'))
			
 
				+            else:
			
 
				+                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service('/Users/wangkun/Downloads/chromedriver/chromedriver_v110/chromedriver'))
			
 
				+
			
 
				+            driver.implicitly_wait(10)
			
 
				+            url = f'https://www.youtube.com/{out_user_id}/videos'
			
 
				+            driver.get(url)
			
 
				+            # driver.save_screenshot("./1.png")
			
 
				+            # 向上滑动 1000 个像素
			
 
				+            # driver.execute_script('window.scrollBy(0, 2000)')
			
 
				+            # driver.save_screenshot("./2.png")
			
 
				+            time.sleep(3)
			
 
				+            accept_btns = driver.find_elements(By.XPATH, '//span[text()="全部接受"]')
			
 
				+            accept_btns_eng = driver.find_elements(By.XPATH, '//span[text()="Accept all"]')
			
 
				+            if len(accept_btns) != 0:
			
 
				+                accept_btns[0].click()
			
 
				+                time.sleep(2)
			
 
				+            elif len(accept_btns_eng) != 0:
			
 
				+                accept_btns_eng[0].click()
			
 
				+                time.sleep(2)
			
 
				+            browse_id = driver.find_element(By.XPATH, '//meta[@itemprop="channelId"]').get_attribute('content')
			
 
				+            driver.quit()
			
 
				+            return browse_id
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error(f'get_browse_id异常:{e}\n')
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_out_user_info(cls, log_type, crawler, browse_id, out_user_id):
			
 
				+        """
			
 
				+        获取站外用户信息
			
 
				+        :param log_type: 日志
			
 
				+        :param crawler: 哪款爬虫
			
 
				+        :param browse_id: browse_id
			
 
				+        :param out_user_id: 站外用户 UID
			
 
				+        :return: out_user_dict = {'out_user_name': 站外用户昵称,
			
 
				+                                'out_avatar_url': 站外用户头像,
			
 
				+                                'out_fans': 站外用户粉丝量,
			
 
				+                                'out_play_cnt': 站外用户总播放量,
			
 
				+                                'out_create_time': 站外用户创建时间}
			
 
				+        """
			
 
				+        try:
			
 
				+            url = "https://www.youtube.com/youtubei/v1/browse?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8&prettyPrint=false"
			
 
				+            payload = json.dumps({
			
 
				+                "context": {
			
 
				+                    "client": {
			
 
				+                        "hl": "zh-CN",
			
 
				+                        "gl": "US",
			
 
				+                        "remoteHost": "38.93.247.21",
			
 
				+                        "deviceMake": "Apple",
			
 
				+                        "deviceModel": "",
			
 
				+                        "visitorData": "CgtraDZfVnB4NXdIWSjL1IKfBg%3D%3D",
			
 
				+                        "userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36,gzip(gfe)",
			
 
				+                        "clientName": "WEB",
			
 
				+                        "clientVersion": "2.20230201.01.00",
			
 
				+                        "osName": "Macintosh",
			
 
				+                        "osVersion": "10_15_7",
			
 
				+                        "originalUrl": f"https://www.youtube.com/{out_user_id}/about",
			
 
				+                        "screenPixelDensity": 1,
			
 
				+                        "platform": "DESKTOP",
			
 
				+                        "clientFormFactor": "UNKNOWN_FORM_FACTOR",
			
 
				+                        "configInfo": {
			
 
				+                            "appInstallData": "CMvUgp8GEKLsrgUQzN-uBRC41K4FENfkrgUQsvWuBRDkoP4SELiLrgUQo_muBRDn964FENnprgUQlPiuBRC2nP4SEPuj_hIQ4tSuBRCJ6K4FEILdrgUQh92uBRD-7q4FEMz1rgUQ76P-EhDJya4FEJan_hIQkfj8Eg%3D%3D"
			
 
				+                        },
			
 
				+                        "screenDensityFloat": 1,
			
 
				+                        "timeZone": "Asia/Shanghai",
			
 
				+                        "browserName": "Chrome",
			
 
				+                        "browserVersion": "109.0.0.0",
			
 
				+                        "acceptHeader": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
			
 
				+                        "deviceExperimentId": "ChxOekU1TlRReU5qWTBOVFExTVRRNU5qRTBOdz09EMvUgp8GGOmU7Z4G",
			
 
				+                        "screenWidthPoints": 805,
			
 
				+                        "screenHeightPoints": 969,
			
 
				+                        "utcOffsetMinutes": 480,
			
 
				+                        "userInterfaceTheme": "USER_INTERFACE_THEME_LIGHT",
			
 
				+                        "memoryTotalKbytes": "8000000",
			
 
				+                        "mainAppWebInfo": {
			
 
				+                            "graftUrl": f"/{out_user_id}/about",
			
 
				+                            "pwaInstallabilityStatus": "PWA_INSTALLABILITY_STATUS_CAN_BE_INSTALLED",
			
 
				+                            "webDisplayMode": "WEB_DISPLAY_MODE_FULLSCREEN",
			
 
				+                            "isWebNativeShareAvailable": True
			
 
				+                        }
			
 
				+                    },
			
 
				+                    "user": {
			
 
				+                        "lockedSafetyMode": False
			
 
				+                    },
			
 
				+                    "request": {
			
 
				+                        "useSsl": True,
			
 
				+                        "internalExperimentFlags": [],
			
 
				+                        "consistencyTokenJars": []
			
 
				+                    },
			
 
				+                    "clickTracking": {
			
 
				+                        "clickTrackingParams": "CBMQ8JMBGAoiEwjY34r0rYD9AhURSEwIHfHZAak="
			
 
				+                    },
			
 
				+                    "adSignalsInfo": {
			
 
				+                        "params": [
			
 
				+                            {
			
 
				+                                "key": "dt",
			
 
				+                                "value": "1675668045032"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "key": "flash",
			
 
				+                                "value": "0"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "key": "frm",
			
 
				+                                "value": "0"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "key": "u_tz",
			
 
				+                                "value": "480"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "key": "u_his",
			
 
				+                                "value": "1"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "key": "u_h",
			
 
				+                                "value": "1080"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "key": "u_w",
			
 
				+                                "value": "1920"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "key": "u_ah",
			
 
				+                                "value": "1080"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "key": "u_aw",
			
 
				+                                "value": "1920"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "key": "u_cd",
			
 
				+                                "value": "24"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "key": "bc",
			
 
				+                                "value": "31"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "key": "bih",
			
 
				+                                "value": "969"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "key": "biw",
			
 
				+                                "value": "805"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "key": "brdim",
			
 
				+                                "value": "-269,-1080,-269,-1080,1920,-1080,1920,1080,805,969"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "key": "vis",
			
 
				+                                "value": "1"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "key": "wgl",
			
 
				+                                "value": "true"
			
 
				+                            },
			
 
				+                            {
			
 
				+                                "key": "ca_type",
			
 
				+                                "value": "image"
			
 
				+                            }
			
 
				+                        ],
			
 
				+                        "bid": "ANyPxKqvCBKtjNeHQ6uTC7sKj2ZwIvEkk3oRlmdU7H_soRJWLc4IQCkqMVP68RR-Xae0h3nMdOKYOtVh_Yb2OYr4znd60I5j7A"
			
 
				+                    }
			
 
				+                },
			
 
				+                "browseId": browse_id,
			
 
				+                "params": "EgVhYm91dPIGBAoCEgA%3D"
			
 
				+            })
			
 
				+            headers = {
			
 
				+                'authority': 'www.youtube.com',
			
 
				+                'accept': '*/*',
			
 
				+                'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
			
 
				+                'cache-control': 'no-cache',
			
 
				+                'content-type': 'application/json',
			
 
				+                'cookie': 'VISITOR_INFO1_LIVE=kh6_Vpx5wHY; YSC=UupqFrWvAR0; DEVICE_INFO=ChxOekU1TlRReU5qWTBOVFExTVRRNU5qRTBOdz09EOmU7Z4GGOmU7Z4G; GPS=1; PREF=tz=Asia.Shanghai; ST-h076le=itct=CBMQ8JMBGAoiEwjY34r0rYD9AhURSEwIHfHZAak%3D&csn=MC45NDM2MjgyNzM1ODE5NDAz&endpoint=%7B%22clickTrackingParams%22%3A%22CBMQ8JMBGAoiEwjY34r0rYD9AhURSEwIHfHZAak%3D%22%2C%22commandMetadata%22%3A%7B%22webCommandMetadata%22%3A%7B%22url%22%3A%22%2F%40weitravel%2Fabout%22%2C%22webPageType%22%3A%22WEB_PAGE_TYPE_CHANNEL%22%2C%22rootVe%22%3A3611%2C%22apiUrl%22%3A%22%2Fyoutubei%2Fv1%2Fbrowse%22%7D%7D%2C%22browseEndpoint%22%3A%7B%22browseId%22%3A%22UC08jgxf119fzynp2uHCvZIg%22%2C%22params%22%3A%22EgVhYm91dPIGBAoCEgA%253D%22%2C%22canonicalBaseUrl%22%3A%22%2F%40weitravel%22%7D%7D',
			
 
				+                'origin': 'https://www.youtube.com',
			
 
				+                'pragma': 'no-cache',
			
 
				+                'referer': f'https://www.youtube.com/{out_user_id}/videos',
			
 
				+                'sec-ch-ua': '"Not_A Brand";v="99", "Chromium";v="109", "Google Chrome";v="109.0.5414.87"',
			
 
				+                'sec-ch-ua-arch': '"arm"',
			
 
				+                'sec-ch-ua-bitness': '"64"',
			
 
				+                'sec-ch-ua-full-version': '"109.0.1518.52"',
			
 
				+                'sec-ch-ua-full-version-list': '"Not_A Brand";v="99.0.0.0", "Microsoft Edge";v="109.0.1518.52", "Chromium";v="109.0.5414.87"',
			
 
				+                'sec-ch-ua-mobile': '?0',
			
 
				+                'sec-ch-ua-model': '',
			
 
				+                'sec-ch-ua-platform': '"macOS"',
			
 
				+                'sec-ch-ua-platform-version': '"12.4.0"',
			
 
				+                'sec-ch-ua-wow64': '?0',
			
 
				+                'sec-fetch-dest': 'empty',
			
 
				+                'sec-fetch-mode': 'same-origin',
			
 
				+                'sec-fetch-site': 'same-origin',
			
 
				+                'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
			
 
				+                'x-goog-visitor-id': 'CgtraDZfVnB4NXdIWSjL1IKfBg%3D%3D',
			
 
				+                'x-youtube-bootstrap-logged-in': 'false',
			
 
				+                'x-youtube-client-name': '1',
			
 
				+                'x-youtube-client-version': '2.20230201.01.00'
			
 
				+            }
			
 
				+            response = requests.post(url=url, headers=headers, data=payload)
			
 
				+            if response.status_code != 200:
			
 
				+                Common.logger(log_type, crawler).warning(f'get_out_user_info:{response.text}\n')
			
 
				+            elif 'contents' not in response.text or 'header' not in response.text:
			
 
				+                Common.logger(log_type, crawler).warning(f'get_out_user_info:{response.text}\n')
			
 
				+            elif 'c4TabbedHeaderRenderer' not in response.json()['header']:
			
 
				+                Common.logger(log_type, crawler).warning(f'get_out_user_info:{response.json()["header"]}\n')
			
 
				+            elif 'twoColumnBrowseResultsRenderer' not in response.json()['contents']:
			
 
				+                Common.logger(log_type, crawler).warning(f'get_out_user_info:{response.json()}\n')
			
 
				+            elif 'tabs' not in response.json()['contents']['twoColumnBrowseResultsRenderer']:
			
 
				+                Common.logger(log_type, crawler).warning(f"get_out_user_info:{response.json()['contents']['twoColumnBrowseResultsRenderer']}\n")
			
 
				+            else:
			
 
				+                header = response.json()['header']['c4TabbedHeaderRenderer']
			
 
				+                tabs = response.json()['contents']['twoColumnBrowseResultsRenderer']['tabs']
			
 
				+                for i in range(len(tabs)):
			
 
				+                    if 'tabRenderer' not in tabs[i]:
			
 
				+                        title = ''
			
 
				+                    elif 'title' not in tabs[i]['tabRenderer']:
			
 
				+                        title = ''
			
 
				+                    else:
			
 
				+                        title = tabs[i]['tabRenderer']['title']
			
 
				+
			
 
				+                    if title == '简介':
			
 
				+                        if 'tabRenderer' not in tabs[i]:
			
 
				+                            Common.logger(log_type, crawler).warning(f"get_out_user_info:{tabs[i]}\n")
			
 
				+                        elif 'content' not in tabs[i]['tabRenderer']:
			
 
				+                            Common.logger(log_type, crawler).warning(f"get_out_user_info:{tabs[i]['tabRenderer']}\n")
			
 
				+                        elif 'sectionListRenderer' not in tabs[i]['tabRenderer']['content']:
			
 
				+                            Common.logger(log_type, crawler).warning(f"get_out_user_info:{tabs[i]['tabRenderer']['content']}\n")
			
 
				+                        elif 'contents' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']:
			
 
				+                            Common.logger(log_type, crawler).warning(f"get_out_user_info:{tabs[i]['tabRenderer']['content']['sectionListRenderer']}\n")
			
 
				+                        elif len(tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents']) == 0:
			
 
				+                            Common.logger(log_type, crawler).warning(f"get_out_user_info:{tabs[i]['tabRenderer']['content']['sectionListRenderer']}\n")
			
 
				+                        elif 'itemSectionRenderer' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]:
			
 
				+                            Common.logger(log_type, crawler).warning(f"get_out_user_info:{tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]}\n")
			
 
				+                        elif 'contents' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']:
			
 
				+                            Common.logger(log_type, crawler).warning(f"get_out_user_info:{tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']}\n")
			
 
				+                        elif len(tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents']) == 0:
			
 
				+                            Common.logger(log_type, crawler).warning(f"get_out_user_info:{tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']}\n")
			
 
				+                        elif 'channelAboutFullMetadataRenderer' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]:
			
 
				+                            Common.logger(log_type, crawler).warning(f"get_out_user_info:{tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]}\n")
			
 
				+                        else:
			
 
				+                            # 站外用户昵称
			
 
				+                            if 'title' not in header and 'title' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']:
			
 
				+                                out_user_name = ''
			
 
				+                            elif 'title' in header:
			
 
				+                                out_user_name = header['title']
			
 
				+                            elif 'simpleText' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']['title']:
			
 
				+                                out_user_name = ''
			
 
				+                            else:
			
 
				+                                out_user_name = tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']['title']['simpleText']
			
 
				+
			
 
				+                            # 站外用户头像
			
 
				+                            if 'avatar' not in header and 'avatar' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']:
			
 
				+                                out_avatar_url = ''
			
 
				+                            elif 'thumbnails' not in header['avatar'] and 'thumbnails' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']['avatar']:
			
 
				+                                out_avatar_url = ''
			
 
				+                            elif len(header['avatar']['thumbnails']) == 0 and len(tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']['avatar']['thumbnails']) == 0:
			
 
				+                                out_avatar_url = ''
			
 
				+                            elif 'url' not in header['avatar']['thumbnails'][-1] and 'url' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']['avatar']['thumbnails'][-1]:
			
 
				+                                out_avatar_url = ''
			
 
				+                            elif 'url' in header['avatar']['thumbnails'][-1]:
			
 
				+                                out_avatar_url = header['avatar']['thumbnails'][-1]['url']
			
 
				+                            else:
			
 
				+                                out_avatar_url = tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']['avatar']['thumbnails'][-1]['url']
			
 
				+
			
 
				+                            # 站外用户粉丝
			
 
				+                            if 'subscriberCountText' not in header:
			
 
				+                                out_fans = 0
			
 
				+                            elif 'accessibility' not in header['subscriberCountText']:
			
 
				+                                out_fans = 0
			
 
				+                            elif 'accessibilityData' not in header['subscriberCountText']['accessibility']:
			
 
				+                                out_fans = 0
			
 
				+                            elif 'label' not in header['subscriberCountText']['accessibility']['accessibilityData']:
			
 
				+                                out_fans = 0
			
 
				+                            else:
			
 
				+                                out_fans = header['subscriberCountText']['accessibility']['accessibilityData']['label']
			
 
				+                                if '万' in out_fans:
			
 
				+                                    out_fans = int(float(out_fans.split('万')[0])*10000)
			
 
				+                                else:
			
 
				+                                    pass
			
 
				+
			
 
				+                            # 站外用户总播放量
			
 
				+                            if 'viewCountText' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']:
			
 
				+                                out_play_cnt = 0
			
 
				+                            elif 'simpleText' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']['viewCountText']:
			
 
				+                                out_play_cnt = 0
			
 
				+                            else:
			
 
				+                                out_play_cnt = int(tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']['viewCountText']['simpleText'].split('次')[0].replace(',', ''))
			
 
				+
			
 
				+                            # 站外用户注册时间
			
 
				+                            if 'joinedDateText' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']:
			
 
				+                                out_create_time = ''
			
 
				+                            elif 'runs' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']['joinedDateText']:
			
 
				+                                out_create_time = ''
			
 
				+                            elif len(tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']['joinedDateText']['runs']) == 0:
			
 
				+                                out_create_time = ''
			
 
				+                            elif 'text' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']['joinedDateText']['runs'][0]:
			
 
				+                                out_create_time = ''
			
 
				+                            else:
			
 
				+                                out_create_time = tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']['joinedDateText']['runs'][0]['text'].replace('年', '-').replace('月', '-').replace('日', '')
			
 
				+                            out_user_dict = {
			
 
				+                                'out_user_name': out_user_name,
			
 
				+                                'out_avatar_url': out_avatar_url,
			
 
				+                                'out_fans': out_fans,
			
 
				+                                'out_play_cnt': out_play_cnt,
			
 
				+                                'out_create_time': out_create_time,
			
 
				+                            }
			
 
				+                            # print(out_user_dict)
			
 
				+                            return out_user_dict
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error(f'get_out_user_info异常:{e}\n')
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_user_from_feishu(cls, log_type, crawler, sheetid, env, machine):
			
 
				+        """
			
 
				+        补全飞书用户表信息，并返回
			
 
				+        :param log_type: 日志
			
 
				+        :param crawler: 哪款爬虫
			
 
				+        :param sheetid: 飞书表
			
 
				+        :param env: 正式环境:prod，测试环境:dev
			
 
				+        :param machine: 部署机器，阿里云填写 aliyun，线下分别填写 macpro，macair，local
			
 
				+        :return: user_list
			
 
				+        """
			
 
				+        user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
			
 
				+        user_list = []
			
 
				+        for i in range(1, len(user_sheet)):
			
 
				+            out_uid = user_sheet[i][2]
			
 
				+            user_name = user_sheet[i][3]
			
 
				+            browse_id = user_sheet[i][5]
			
 
				+            our_uid = user_sheet[i][6]
			
 
				+            Common.logger(log_type, crawler).info(f"正在更新 {user_name} 用户信息\n")
			
 
				+            # 获取站外browse_id，并写入飞书
			
 
				+            if browse_id is None:
			
 
				+                browse_id = cls.get_browse_id(log_type, crawler, out_uid, machine)
			
 
				+                if browse_id is None:
			
 
				+                    Common.logger(log_type, crawler).warning('browse_id is None !')
			
 
				+                else:
			
 
				+                    Feishu.update_values(log_type, crawler, sheetid, f'F{i+1}:F{i+1}', [[browse_id]])
			
 
				+                    Common.logger(log_type, crawler).info(f'browse_id写入成功:{browse_id}')
			
 
				+            # 站内 UID 为空，且数据库中（youtube+out_user_id）返回数量 == 0，则创建新的站内账号
			
 
				+            if our_uid is None:
			
 
				+                sql = f""" select * from crawler_user where platform="{cls.platform}" and out_user_id="{out_uid}" """
			
 
				+                our_user_info = MysqlHelper.get_values(log_type, crawler, sql, env)
			
 
				+                # 数据库中（youtube + out_user_id）返回数量 == 0，则创建站内账号UID，并写入定向账号飞书表。并结合站外用户信息，一并写入爬虫账号数据库
			
 
				+                if our_user_info is None or len(our_user_info) == 0:
			
 
				+                    # 获取站外账号信息，写入数据库
			
 
				+                    out_user_dict = cls.get_out_user_info(log_type, crawler, browse_id, out_uid)
			
 
				+                    out_avatar_url = out_user_dict['out_avatar_url']
			
 
				+                    out_create_time = out_user_dict['out_create_time']
			
 
				+                    out_play_cnt = out_user_dict['out_play_cnt']
			
 
				+                    out_fans = out_user_dict['out_fans']
			
 
				+                    tag = 'youtube爬虫,定向爬虫策略'
			
 
				+
			
 
				+                    # 创建站内账号
			
 
				+                    create_user_dict = {
			
 
				+                        'nickName': user_name,
			
 
				+                        'avatarUrl': out_avatar_url,
			
 
				+                        'tagName': tag,
			
 
				+                    }
			
 
				+                    our_uid = Users.create_user(log_type, crawler, create_user_dict, env)
			
 
				+                    if 'env' == 'prod':
			
 
				+                        our_user_link = f'https://admin.piaoquantv.com/ums/user/{our_uid}/post'
			
 
				+                    else:
			
 
				+                        our_user_link = f'https://testadmin.piaoquantv.com/ums/user/{our_uid}/post'
			
 
				+                    Common.logger(log_type, crawler).info(f'站内用户主页链接:{our_user_link}')
			
 
				+                    Feishu.update_values(log_type, crawler, sheetid, f'G{i + 1}:H{i + 1}', [[our_uid, our_user_link]])
			
 
				+                    Common.logger(log_type, crawler).info(f'站内用户信息写入飞书成功！')
			
 
				+
			
 
				+                    sql = f""" insert into crawler_user(user_id, 
			
 
				+                                        out_user_id, 
			
 
				+                                        out_user_name, 
			
 
				+                                        out_avatar_url, 
			
 
				+                                        out_create_time, 
			
 
				+                                        out_play_cnt, 
			
 
				+                                        out_fans, 
			
 
				+                                        platform, 
			
 
				+                                        tag)
			
 
				+                                        values({our_uid}, 
			
 
				+                                        "{out_uid}", 
			
 
				+                                        "{user_name}", 
			
 
				+                                        "{out_avatar_url}", 
			
 
				+                                        "{out_create_time}", 
			
 
				+                                        {out_play_cnt}, 
			
 
				+                                        {out_fans}, 
			
 
				+                                        "{cls.platform}",
			
 
				+                                        "{tag}") """
			
 
				+                    MysqlHelper.update_values(log_type, crawler, sql, env)
			
 
				+                    Common.logger(log_type, crawler).info('用户信息插入数据库成功！\n')
			
 
				+                # 数据库中（youtube + out_user_id）返回数量 != 0，则直接把数据库中的站内 UID 写入飞书
			
 
				+                else:
			
 
				+                    our_uid = our_user_info[0][1]
			
 
				+                    if 'env' == 'prod':
			
 
				+                        our_user_link = f'https://admin.piaoquantv.com/ums/user/{our_uid}/post'
			
 
				+                    else:
			
 
				+                        our_user_link = f'https://testadmin.piaoquantv.com/ums/user/{our_uid}/post'
			
 
				+                    Common.logger(log_type, crawler).info(f'站内用户主页链接:{our_user_link}')
			
 
				+                    Feishu.update_values(log_type, crawler, sheetid, f'G{i+1}:H{i+1}', [[our_uid, our_user_link]])
			
 
				+                    Common.logger(log_type, crawler).info(f'站内用户信息写入飞书成功！\n')
			
 
				+
			
 
				+            user_dict = {
			
 
				+                'out_user_id': out_uid,
			
 
				+                'out_user_name': user_name,
			
 
				+                'out_browse_id': browse_id,
			
 
				+                'our_user_id': our_uid,
			
 
				+            }
			
 
				+            user_list.append(user_dict)
			
 
				+        return user_list
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_feeds(cls, log_type, crawler, browse_id, out_uid):
			
 
				+        """
			
 
				+        获取个人主页视频列表
			
 
				+        :param log_type: 日志
			
 
				+        :param crawler: 哪款爬虫
			
 
				+        :param browse_id: 每个用户主页的请求参数中唯一值
			
 
				+        :param out_uid: 站外用户UID
			
 
				+        :return: video_list
			
 
				+        """
			
 
				+        url = "https://www.youtube.com/youtubei/v1/browse?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8&prettyPrint=false"
			
 
				+        payload = json.dumps({
			
 
				+            "context": {
			
 
				+                "client": {
			
 
				+                    "hl": "zh-CN",
			
 
				+                    "gl": "US",
			
 
				+                    "remoteHost": "38.93.247.21",
			
 
				+                    "deviceMake": "Apple",
			
 
				+                    "deviceModel": "",
			
 
				+                    "visitorData": "CgtraDZfVnB4NXdIWSi6mIOfBg%3D%3D",
			
 
				+                    "userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36,gzip(gfe)",
			
 
				+                    "clientName": "WEB",
			
 
				+                    "clientVersion": "2.20230201.01.00",
			
 
				+                    "osName": "Macintosh",
			
 
				+                    "osVersion": "10_15_7",
			
 
				+                    "originalUrl": f"https://www.youtube.com/{out_uid}/videos",
			
 
				+                    "platform": "DESKTOP",
			
 
				+                    "clientFormFactor": "UNKNOWN_FORM_FACTOR",
			
 
				+                    "configInfo": {
			
 
				+                        "appInstallData": "CLqYg58GEInorgUQuIuuBRCU-K4FENfkrgUQuNSuBRC2nP4SEPuj_hIQ5_euBRCy9a4FEKLsrgUQt-CuBRDi1K4FEILdrgUQh92uBRDM364FEP7urgUQzPWuBRDZ6a4FEOSg_hIQo_muBRDvo_4SEMnJrgUQlqf-EhCR-PwS"
			
 
				+                    },
			
 
				+                    "timeZone": "Asia/Shanghai",
			
 
				+                    "browserName": "Chrome",
			
 
				+                    "browserVersion": "109.0.0.0",
			
 
				+                    "acceptHeader": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
			
 
				+                    "deviceExperimentId": "ChxOekU1TlRReU5qWTBOVFExTVRRNU5qRTBOdz09ELqYg58GGOmU7Z4G",
			
 
				+                    "screenWidthPoints": 944,
			
 
				+                    "screenHeightPoints": 969,
			
 
				+                    "screenPixelDensity": 1,
			
 
				+                    "screenDensityFloat": 1,
			
 
				+                    "utcOffsetMinutes": 480,
			
 
				+                    "userInterfaceTheme": "USER_INTERFACE_THEME_LIGHT",
			
 
				+                    "memoryTotalKbytes": "8000000",
			
 
				+                    "mainAppWebInfo": {
			
 
				+                        "graftUrl": f"/{out_uid}/videos",
			
 
				+                        "pwaInstallabilityStatus": "PWA_INSTALLABILITY_STATUS_CAN_BE_INSTALLED",
			
 
				+                        "webDisplayMode": "WEB_DISPLAY_MODE_FULLSCREEN",
			
 
				+                        "isWebNativeShareAvailable": True
			
 
				+                    }
			
 
				+                },
			
 
				+                "user": {
			
 
				+                    "lockedSafetyMode": False
			
 
				+                },
			
 
				+                "request": {
			
 
				+                    "useSsl": True,
			
 
				+                    "internalExperimentFlags": [],
			
 
				+                    "consistencyTokenJars": []
			
 
				+                },
			
 
				+                "clickTracking": {
			
 
				+                    "clickTrackingParams": "CBcQ8JMBGAYiEwiNhIXX9IL9AhUFSUwIHWnnDks="
			
 
				+                },
			
 
				+                "adSignalsInfo": {
			
 
				+                    "params": [
			
 
				+                        {
			
 
				+                            "key": "dt",
			
 
				+                            "value": "1675676731048"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "flash",
			
 
				+                            "value": "0"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "frm",
			
 
				+                            "value": "0"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "u_tz",
			
 
				+                            "value": "480"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "u_his",
			
 
				+                            "value": "4"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "u_h",
			
 
				+                            "value": "1080"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "u_w",
			
 
				+                            "value": "1920"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "u_ah",
			
 
				+                            "value": "1080"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "u_aw",
			
 
				+                            "value": "1920"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "u_cd",
			
 
				+                            "value": "24"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "bc",
			
 
				+                            "value": "31"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "bih",
			
 
				+                            "value": "969"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "biw",
			
 
				+                            "value": "944"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "brdim",
			
 
				+                            "value": "-269,-1080,-269,-1080,1920,-1080,1920,1080,944,969"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "vis",
			
 
				+                            "value": "1"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "wgl",
			
 
				+                            "value": "true"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "ca_type",
			
 
				+                            "value": "image"
			
 
				+                        }
			
 
				+                    ],
			
 
				+                    "bid": "ANyPxKpfiaAf-DBzNeKLgkceMEA9UIeCWFRTRm4AQMCuejhI3PGwDB1jizQIX60YcEYtt_CX7tZWAbYerQ-rWLvV7y_KCLkBww"
			
 
				+                }
			
 
				+            },
			
 
				+            "browseId": browse_id,
			
 
				+            "params": "EgZ2aWRlb3PyBgQKAjoA",
			
 
				+            "continuation": cls.continuation
			
 
				+        })
			
 
				+        headers = {
			
 
				+            'authority': 'www.youtube.com',
			
 
				+            'accept': '*/*',
			
 
				+            'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
			
 
				+            'cache-control': 'no-cache',
			
 
				+            'content-type': 'application/json',
			
 
				+            'cookie': 'VISITOR_INFO1_LIVE=kh6_Vpx5wHY; YSC=UupqFrWvAR0; DEVICE_INFO=ChxOekU1TlRReU5qWTBOVFExTVRRNU5qRTBOdz09EOmU7Z4GGOmU7Z4G; PREF=tz=Asia.Shanghai; ST-1kg1gfd=itct=CBcQ8JMBGAYiEwiNhIXX9IL9AhUFSUwIHWnnDks%3D&csn=MC4zNzI3MDcwMDA1Mjg4NzE5Ng..&endpoint=%7B%22clickTrackingParams%22%3A%22CBcQ8JMBGAYiEwiNhIXX9IL9AhUFSUwIHWnnDks%3D%22%2C%22commandMetadata%22%3A%7B%22webCommandMetadata%22%3A%7B%22url%22%3A%22%2F%40chinatravel5971%2Fvideos%22%2C%22webPageType%22%3A%22WEB_PAGE_TYPE_CHANNEL%22%2C%22rootVe%22%3A3611%2C%22apiUrl%22%3A%22%2Fyoutubei%2Fv1%2Fbrowse%22%7D%7D%2C%22browseEndpoint%22%3A%7B%22browseId%22%3A%22UCpLXnfBCNhj8KLnt54RQMKA%22%2C%22params%22%3A%22EgZ2aWRlb3PyBgQKAjoA%22%2C%22canonicalBaseUrl%22%3A%22%2F%40chinatravel5971%22%7D%7D',
			
 
				+            'origin': 'https://www.youtube.com',
			
 
				+            'pragma': 'no-cache',
			
 
				+            'referer': f'https://www.youtube.com/{out_uid}/featured',
			
 
				+            'sec-ch-ua': '"Not_A Brand";v="99", "Chromium";v="109", "Google Chrome";v="109.0.5414.87"',
			
 
				+            'sec-ch-ua-arch': '"arm"',
			
 
				+            'sec-ch-ua-bitness': '"64"',
			
 
				+            'sec-ch-ua-full-version': '"109.0.1518.52"',
			
 
				+            'sec-ch-ua-full-version-list': '"Not_A Brand";v="99.0.0.0", "Microsoft Edge";v="109.0.1518.52", "Chromium";v="109.0.5414.87"',
			
 
				+            'sec-ch-ua-mobile': '?0',
			
 
				+            'sec-ch-ua-model': '',
			
 
				+            'sec-ch-ua-platform': '"macOS"',
			
 
				+            'sec-ch-ua-platform-version': '"12.4.0"',
			
 
				+            'sec-ch-ua-wow64': '?0',
			
 
				+            'sec-fetch-dest': 'empty',
			
 
				+            'sec-fetch-mode': 'same-origin',
			
 
				+            'sec-fetch-site': 'same-origin',
			
 
				+            'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
			
 
				+            'x-goog-visitor-id': 'CgtraDZfVnB4NXdIWSi6mIOfBg%3D%3D',
			
 
				+            'x-youtube-bootstrap-logged-in': 'false',
			
 
				+            'x-youtube-client-name': '1',
			
 
				+            'x-youtube-client-version': '2.20230201.01.00'
			
 
				+        }
			
 
				+        # try:
			
 
				+        response = requests.post(url=url, headers=headers, data=payload)
			
 
				+        # Common.logger(log_type, crawler).info(f"get_feeds_response:{response.json()}\n")
			
 
				+        cls.continuation = response.json()['trackingParams']
			
 
				+        if response.status_code != 200:
			
 
				+            Common.logger(log_type, crawler).warning(f'get_feeds_response:{response.text}\n')
			
 
				+        elif 'continuationContents' not in response.text and 'onResponseReceivedActions' not in response.text:
			
 
				+            Common.logger(log_type, crawler).warning(f'get_feeds_response:{response.text}\n')
			
 
				+        elif 'continuationContents' in response.json():
			
 
				+            # Common.logger(log_type, crawler).info("'continuationContents' in response.json()\n")
			
 
				+            if 'richGridContinuation' not in response.json()['continuationContents']:
			
 
				+                # Common.logger(log_type, crawler).warning(f"'richGridContinuation' not in response.json()['continuationContents']\n")
			
 
				+                Common.logger(log_type, crawler).warning(f'get_feeds_response:{response.json()["continuationContents"]}\n')
			
 
				+            elif 'contents' not in response.json()['continuationContents']['richGridContinuation']:
			
 
				+                Common.logger(log_type, crawler).warning(f'get_feeds_response:{response.json()["continuationContents"]["richGridContinuation"]}\n')
			
 
				+            elif 'contents' in response.json()["continuationContents"]["richGridContinuation"]:
			
 
				+                feeds = response.json()["continuationContents"]["richGridContinuation"]['contents']
			
 
				+                return feeds
			
 
				+        elif 'onResponseReceivedActions' in response.json():
			
 
				+            Common.logger(log_type, crawler).info("'onResponseReceivedActions' in response.json()\n")
			
 
				+            if len(response.json()['onResponseReceivedActions']) == 0:
			
 
				+                Common.logger(log_type, crawler).warning(f'get_feeds_response:{response.json()["onResponseReceivedActions"]}\n')
			
 
				+            elif 'appendContinuationItemsAction' not in response.json()['onResponseReceivedActions'][0]:
			
 
				+                Common.logger(log_type, crawler).warning(f'get_feeds_response:{response.json()["onResponseReceivedActions"][0]}\n')
			
 
				+            elif 'continuationItems' not in response.json()['onResponseReceivedActions'][0]['appendContinuationItemsAction']:
			
 
				+                Common.logger(log_type, crawler).warning(f'get_feeds_response:{response.json()["onResponseReceivedActions"][0]["appendContinuationItemsAction"]}\n')
			
 
				+            elif len(response.json()['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems']) == 0:
			
 
				+                Common.logger(log_type, crawler).warning(f'get_feeds_response:{response.json()["onResponseReceivedActions"][0]["appendContinuationItemsAction"]["continuationItems"]}\n')
			
 
				+            else:
			
 
				+                feeds = response.json()["onResponseReceivedActions"][0]["appendContinuationItemsAction"]["continuationItems"]
			
 
				+                return feeds
			
 
				+        else:
			
 
				+            Common.logger(log_type, crawler).info('feeds is None\n')
			
 
				+
			
 
				+        # except Exception as e:
			
 
				+        #     Common.logger(log_type, crawler).error(f'get_feeds异常:{e}\n')
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_videos(cls, log_type, crawler, strategy, oss_endpoint, env, browse_id, out_uid, our_uid, machine):
			
 
				+        while True:
			
 
				+            feeds = cls.get_feeds(log_type, crawler, browse_id, out_uid)
			
 
				+            # Common.logger(log_type, crawler).info(f"feeds:{feeds}\n")
			
 
				+            for i in range(len(feeds)):
			
 
				+                if 'richItemRenderer' not in feeds[i]:
			
 
				+                    Common.logger(log_type, crawler).warning(f'feeds:{feeds[i]}\n')
			
 
				+                elif 'content' not in feeds[i]['richItemRenderer']:
			
 
				+                    Common.logger(log_type, crawler).warning(f'feeds:{feeds[i]["richItemRenderer"]}\n')
			
 
				+                elif 'videoRenderer' not in feeds[i]['richItemRenderer']['content']:
			
 
				+                    Common.logger(log_type, crawler).warning(f'feeds:{feeds[i]["richItemRenderer"]["content"]}\n')
			
 
				+                elif 'videoId' not in feeds[i]["richItemRenderer"]["content"]['videoRenderer']:
			
 
				+                    Common.logger(log_type, crawler).warning(f'feeds:{feeds[i]["richItemRenderer"]["content"]["videoRenderer"]}\n')
			
 
				+                else:
			
 
				+                    video_id = feeds[i]["richItemRenderer"]["content"]['videoRenderer']['videoId']
			
 
				+                    video_dict = cls.get_video_info(log_type, crawler, out_uid, video_id, machine)
			
 
				+                    # 发布时间<=30天
			
 
				+                    publish_time = int(time.mktime(time.strptime(video_dict['publish_time'], "%Y-%m-%d")))
			
 
				+                    if int(time.time()) - publish_time <= 3600*24*30:
			
 
				+                        cls.download_publish(log_type, crawler, video_dict, strategy, our_uid, env, oss_endpoint)
			
 
				+                    else:
			
 
				+                        Common.logger(log_type, crawler).info('发布时间超过30天\n')
			
 
				+                        return
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_video_info(cls, log_type, crawler, out_uid, video_id, machine):
			
 
				+        url = "https://www.youtube.com/youtubei/v1/player?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8&prettyPrint=false"
			
 
				+        payload = json.dumps({
			
 
				+            "context": {
			
 
				+                "client": {
			
 
				+                    "hl": "zh-CN",
			
 
				+                    "gl": "US",
			
 
				+                    "remoteHost": "38.93.247.21",
			
 
				+                    "deviceMake": "Apple",
			
 
				+                    "deviceModel": "",
			
 
				+                    "visitorData": "CgtraDZfVnB4NXdIWSjkzoefBg%3D%3D",
			
 
				+                    "userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36,gzip(gfe)",
			
 
				+                    "clientName": "WEB",
			
 
				+                    "clientVersion": "2.20230201.01.00",
			
 
				+                    "osName": "Macintosh",
			
 
				+                    "osVersion": "10_15_7",
			
 
				+                    "originalUrl": f"https://www.youtube.com/watch?v={video_id}",
			
 
				+                    "platform": "DESKTOP",
			
 
				+                    "clientFormFactor": "UNKNOWN_FORM_FACTOR",
			
 
				+                    "configInfo": {
			
 
				+                        "appInstallData": "COTOh58GEPuj_hIQ1-SuBRC4i64FEMzfrgUQgt2uBRCi7K4FEOLUrgUQzPWuBRCKgK8FEOSg_hIQtpz-EhDa6a4FEP7urgUQieiuBRDn964FELjUrgUQlPiuBRCH3a4FELfgrgUQ76P-EhDJya4FEJan_hIQkfj8Eg%3D%3D"
			
 
				+                    },
			
 
				+                    "timeZone": "Asia/Shanghai",
			
 
				+                    "browserName": "Chrome",
			
 
				+                    "browserVersion": "109.0.0.0",
			
 
				+                    "acceptHeader": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
			
 
				+                    "deviceExperimentId": "ChxOekU1TlRReU5qWTBOVFExTVRRNU5qRTBOdz09EOTOh58GGOmU7Z4G",
			
 
				+                    "screenWidthPoints": 1037,
			
 
				+                    "screenHeightPoints": 969,
			
 
				+                    "screenPixelDensity": 1,
			
 
				+                    "screenDensityFloat": 1,
			
 
				+                    "utcOffsetMinutes": 480,
			
 
				+                    "userInterfaceTheme": "USER_INTERFACE_THEME_LIGHT",
			
 
				+                    "memoryTotalKbytes": "8000000",
			
 
				+                    "clientScreen": "WATCH",
			
 
				+                    "mainAppWebInfo": {
			
 
				+                        "graftUrl": f"/watch?v={video_id}",
			
 
				+                        "pwaInstallabilityStatus": "PWA_INSTALLABILITY_STATUS_CAN_BE_INSTALLED",
			
 
				+                        "webDisplayMode": "WEB_DISPLAY_MODE_FULLSCREEN",
			
 
				+                        "isWebNativeShareAvailable": True
			
 
				+                    }
			
 
				+                },
			
 
				+                "user": {
			
 
				+                    "lockedSafetyMode": False
			
 
				+                },
			
 
				+                "request": {
			
 
				+                    "useSsl": True,
			
 
				+                    "internalExperimentFlags": [],
			
 
				+                    "consistencyTokenJars": []
			
 
				+                },
			
 
				+                "clickTracking": {
			
 
				+                    "clickTrackingParams": "CIwBEKQwGAYiEwipncqx3IL9AhXs4cQKHbKZDO4yB3JlbGF0ZWRInsS1qbGFtIlUmgEFCAEQ-B0="
			
 
				+                },
			
 
				+                "adSignalsInfo": {
			
 
				+                    "params": [
			
 
				+                        {
			
 
				+                            "key": "dt",
			
 
				+                            "value": "1675749222611"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "flash",
			
 
				+                            "value": "0"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "frm",
			
 
				+                            "value": "0"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "u_tz",
			
 
				+                            "value": "480"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "u_his",
			
 
				+                            "value": "3"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "u_h",
			
 
				+                            "value": "1080"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "u_w",
			
 
				+                            "value": "1920"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "u_ah",
			
 
				+                            "value": "1080"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "u_aw",
			
 
				+                            "value": "1920"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "u_cd",
			
 
				+                            "value": "24"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "bc",
			
 
				+                            "value": "31"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "bih",
			
 
				+                            "value": "969"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "biw",
			
 
				+                            "value": "1037"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "brdim",
			
 
				+                            "value": "-269,-1080,-269,-1080,1920,-1080,1920,1080,1037,969"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "vis",
			
 
				+                            "value": "1"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "wgl",
			
 
				+                            "value": "true"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "key": "ca_type",
			
 
				+                            "value": "image"
			
 
				+                        }
			
 
				+                    ],
			
 
				+                    "bid": "ANyPxKop8SijebwUCq4ZfKbJwlSjVQa_RTdS6c6a6WPYpCKnxpWCJ33B1SzRuSXjSfH9O2MhURebAs0CngRg6B4nOjBpeJDKgA"
			
 
				+                }
			
 
				+            },
			
 
				+            "videoId": str(video_id),
			
 
				+            "playbackContext": {
			
 
				+                "contentPlaybackContext": {
			
 
				+                    "currentUrl": f"/watch?v={video_id}",
			
 
				+                    "vis": 0,
			
 
				+                    "splay": False,
			
 
				+                    "autoCaptionsDefaultOn": False,
			
 
				+                    "autonavState": "STATE_NONE",
			
 
				+                    "html5Preference": "HTML5_PREF_WANTS",
			
 
				+                    "signatureTimestamp": 19394,
			
 
				+                    "referer": f"https://www.youtube.com/watch?v={video_id}",
			
 
				+                    "lactMilliseconds": "-1",
			
 
				+                    "watchAmbientModeContext": {
			
 
				+                        "watchAmbientModeEnabled": True
			
 
				+                    }
			
 
				+                }
			
 
				+            },
			
 
				+            "racyCheckOk": False,
			
 
				+            "contentCheckOk": False
			
 
				+        })
			
 
				+        headers = {
			
 
				+            'authority': 'www.youtube.com',
			
 
				+            'accept': '*/*',
			
 
				+            'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
			
 
				+            'cache-control': 'no-cache',
			
 
				+            'content-type': 'application/json',
			
 
				+            'cookie': f'VISITOR_INFO1_LIVE=kh6_Vpx5wHY; YSC=UupqFrWvAR0; DEVICE_INFO=ChxOekU1TlRReU5qWTBOVFExTVRRNU5qRTBOdz09EOmU7Z4GGOmU7Z4G; PREF=tz=Asia.Shanghai; ST-180dxzo=itct=CIwBEKQwGAYiEwipncqx3IL9AhXs4cQKHbKZDO4yB3JlbGF0ZWRInsS1qbGFtIlUmgEFCAEQ-B0%3D&csn=MC41MTQ1NTQzMTE3NTA4MjY0&endpoint=%7B%22clickTrackingParams%22%3A%22CIwBEKQwGAYiEwipncqx3IL9AhXs4cQKHbKZDO4yB3JlbGF0ZWRInsS1qbGFtIlUmgEFCAEQ-B0%3D%22%2C%22commandMetadata%22%3A%7B%22webCommandMetadata%22%3A%7B%22url%22%3A%22%2Fwatch%3Fv%3D{video_id}%22%2C%22webPageType%22%3A%22WEB_PAGE_TYPE_WATCH%22%2C%22rootVe%22%3A3832%7D%7D%2C%22watchEndpoint%22%3A%7B%22videoId%22%3A%22{video_id}%22%2C%22nofollow%22%3Atrue%2C%22watchEndpointSupportedOnesieConfig%22%3A%7B%22html5PlaybackOnesieConfig%22%3A%7B%22commonConfig%22%3A%7B%22url%22%3A%22https%3A%2F%2Frr5---sn-nx5s7n76.googlevideo.com%2Finitplayback%3Fsource%3Dyoutube%26oeis%3D1%26c%3DWEB%26oad%3D3200%26ovd%3D3200%26oaad%3D11000%26oavd%3D11000%26ocs%3D700%26oewis%3D1%26oputc%3D1%26ofpcc%3D1%26msp%3D1%26odepv%3D1%26id%3D38654ad085c12212%26ip%3D38.93.247.21%26initcwndbps%3D11346250%26mt%3D1675748964%26oweuc%3D%26pxtags%3DCg4KAnR4EggyNDQ1MTI4OA%26rxtags%3DCg4KAnR4EggyNDQ1MTI4Ng%252CCg4KAnR4EggyNDQ1MTI4Nw%252CCg4KAnR4EggyNDQ1MTI4OA%252CCg4KAnR4EggyNDQ1MTI4OQ%22%7D%7D%7D%7D%7D',
			
 
				+            'origin': 'https://www.youtube.com',
			
 
				+            'pragma': 'no-cache',
			
 
				+            'referer': f'https://www.youtube.com/watch?v={video_id}',
			
 
				+            'sec-ch-ua': '"Not_A Brand";v="99", "Chromium";v="109", "Google Chrome";v="109.0.5414.87"',
			
 
				+            'sec-ch-ua-arch': '"arm"',
			
 
				+            'sec-ch-ua-bitness': '"64"',
			
 
				+            'sec-ch-ua-full-version': '"109.0.1518.52"',
			
 
				+            'sec-ch-ua-full-version-list': '"Not_A Brand";v="99.0.0.0", "Microsoft Edge";v="109.0.1518.52", "Chromium";v="109.0.5414.87"',
			
 
				+            'sec-ch-ua-mobile': '?0',
			
 
				+            'sec-ch-ua-model': '',
			
 
				+            'sec-ch-ua-platform': '"macOS"',
			
 
				+            'sec-ch-ua-platform-version': '"12.4.0"',
			
 
				+            'sec-ch-ua-wow64': '?0',
			
 
				+            'sec-fetch-dest': 'empty',
			
 
				+            'sec-fetch-mode': 'same-origin',
			
 
				+            'sec-fetch-site': 'same-origin',
			
 
				+            'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
			
 
				+            'x-goog-visitor-id': 'CgtraDZfVnB4NXdIWSjkzoefBg%3D%3D',
			
 
				+            'x-youtube-bootstrap-logged-in': 'false',
			
 
				+            'x-youtube-client-name': '1',
			
 
				+            'x-youtube-client-version': '2.20230201.01.00'
			
 
				+        }
			
 
				+        response = requests.post(url=url, headers=headers, data=payload)
			
 
				+        # Common.logger(log_type, crawler).info(f"get_video_info_response:{response.json()}\n")
			
 
				+        if response.status_code != 200:
			
 
				+            Common.logger(log_type, crawler).warning(f"get_video_info_response:{response.text}\n")
			
 
				+        elif 'streamingData' not in response.json():
			
 
				+            Common.logger(log_type, crawler).warning(f"get_video_info_response:{response.json()}\n")
			
 
				+        elif 'videoDetails' not in response.json():
			
 
				+            Common.logger(log_type, crawler).warning(f"get_video_info_response:{response.json()}\n")
			
 
				+        elif 'microformat' not in response.json():
			
 
				+            Common.logger(log_type, crawler).warning(f"get_video_info_response:{response.json()}\n")
			
 
				+        else:
			
 
				+            playerMicroformatRenderer = response.json()['microformat']['playerMicroformatRenderer']
			
 
				+            videoDetails = response.json()['videoDetails']
			
 
				+            streamingData = response.json()['streamingData']
			
 
				+
			
 
				+            # video_title
			
 
				+            if 'title' not in  videoDetails:
			
 
				+                video_title = ''
			
 
				+            else:
			
 
				+                video_title = videoDetails['title']
			
 
				+            if Translate.is_contains_chinese(video_title) is False:
			
 
				+                video_title = Translate.google_translate(video_title, machine)  # 自动翻译标题为中文
			
 
				+
			
 
				+            # play_cnt
			
 
				+            if 'viewCount' not in videoDetails:
			
 
				+                play_cnt = 0
			
 
				+            else:
			
 
				+                play_cnt = int(videoDetails['viewCount'])
			
 
				+
			
 
				+            # publish_time
			
 
				+            if 'publishDate' not in playerMicroformatRenderer:
			
 
				+                publish_time = ''
			
 
				+            else:
			
 
				+                publish_time = playerMicroformatRenderer['publishDate']
			
 
				+
			
 
				+            # user_name
			
 
				+            if 'author' not in videoDetails:
			
 
				+                user_name = ''
			
 
				+            else:
			
 
				+                user_name = videoDetails['author']
			
 
				+
			
 
				+            # cover_url
			
 
				+            if 'thumbnail' not in videoDetails:
			
 
				+                cover_url = ''
			
 
				+            elif 'thumbnails' not in videoDetails['thumbnail']:
			
 
				+                cover_url = ''
			
 
				+            elif len(videoDetails['thumbnail']['thumbnails']) == 0:
			
 
				+                cover_url = ''
			
 
				+            elif 'url' not in videoDetails['thumbnail']['thumbnails'][-1]:
			
 
				+                cover_url = ''
			
 
				+            else:
			
 
				+                cover_url = videoDetails['thumbnail']['thumbnails'][-1]['url']
			
 
				+
			
 
				+            # video_url
			
 
				+            if 'formats' not in streamingData:
			
 
				+                video_url = ''
			
 
				+            elif len(streamingData['formats']) == 0:
			
 
				+                video_url = ''
			
 
				+            elif 'url' not in streamingData['formats'][-1]:
			
 
				+                video_url = ''
			
 
				+            else:
			
 
				+                video_url = streamingData['formats'][-1]['url']
			
 
				+
			
 
				+            Common.logger(log_type, crawler).info(f'video_title:{video_title}')
			
 
				+            Common.logger(log_type, crawler).info(f'video_id:{video_id}')
			
 
				+            Common.logger(log_type, crawler).info(f'play_cnt:{play_cnt}')
			
 
				+            Common.logger(log_type, crawler).info(f'publish_time:{publish_time}')
			
 
				+            Common.logger(log_type, crawler).info(f'user_name:{user_name}')
			
 
				+            Common.logger(log_type, crawler).info(f'cover_url:{cover_url}')
			
 
				+            Common.logger(log_type, crawler).info(f'video_url:{video_url}')
			
 
				+
			
 
				+            video_dict = {
			
 
				+                'video_title': video_title,
			
 
				+                'video_id': video_id,
			
 
				+                'play_cnt': play_cnt,
			
 
				+                'publish_time': publish_time,
			
 
				+                'user_name': user_name,
			
 
				+                'out_uid': out_uid,
			
 
				+                'cover_url': cover_url,
			
 
				+                'video_url': video_url,
			
 
				+            }
			
 
				+            return video_dict
			
 
				+
			
 
				+    @classmethod
			
 
				+    def download_publish(cls, log_type, crawler, video_dict, strategy, our_uid, env, oss_endpoint):
			
 
				+        sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{video_dict['video_id']}" """
			
 
				+        repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
			
 
				+        if video_dict['video_title'] == '' or  video_dict['video_url'] == '':
			
 
				+            Common.logger(log_type, crawler).info('无效视频\n')
			
 
				+        elif repeat_video is not None and len(repeat_video) != 0:
			
 
				+            Common.logger(log_type, crawler).info('视频已下载\n')
			
 
				+        else:
			
 
				+            # 下载视频
			
 
				+            Common.logger(log_type, crawler).info('开始下载视频...')
			
 
				+            Common.download_method(log_type, crawler, 'video', video_dict['video_title'], video_dict['video_url'])
			
 
				+            ffmpeg_dict = Common.ffmpeg(log_type, crawler, f"./{crawler}/videos/{video_dict['video_title']}/")
			
 
				+            video_width = ffmpeg_dict['width']
			
 
				+            video_height = ffmpeg_dict['height']
			
 
				+            duration = int(ffmpeg_dict['duration'])
			
 
				+            video_size = ffmpeg_dict['size']
			
 
				+
			
 
				+            Common.logger(log_type, crawler).info(f'video_width:{video_width}')
			
 
				+            Common.logger(log_type, crawler).info(f'video_height:{video_height}')
			
 
				+            Common.logger(log_type, crawler).info(f'duration:{duration}')
			
 
				+            Common.logger(log_type, crawler).info(f'video_size:{video_size}\n')
			
 
				+
			
 
				+            video_dict['video_width'] = video_width
			
 
				+            video_dict['video_height'] = video_height
			
 
				+            video_dict['duration'] = duration
			
 
				+            video_dict['comment_cnt'] = 0
			
 
				+            video_dict['like_cnt'] = 0
			
 
				+            video_dict['share_cnt'] = 0
			
 
				+            video_dict['avatar_url'] = video_dict['cover_url']
			
 
				+            video_dict['session'] = f'youtube{int(time.time())}'
			
 
				+            rule='1,2'
			
 
				+            if duration < 60 or duration > 600:
			
 
				+                # 删除视频文件夹
			
 
				+                shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}/")
			
 
				+                Common.logger(log_type, crawler).info(f"时长:{video_dict['duration']}不满足抓取规则，删除成功\n")
			
 
				+                return
			
 
				+            elif video_size == 0 or duration == 0 or video_size is None or duration is None:
			
 
				+                # 删除视频文件夹
			
 
				+                shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}/")
			
 
				+                Common.logger(log_type, crawler).info(f"视频下载出错，删除成功\n")
			
 
				+                return
			
 
				+            else:
			
 
				+                # 下载封面
			
 
				+                Common.download_method(log_type, crawler, 'cover', video_dict['video_title'], video_dict['cover_url'])
			
 
				+                # 保存视频文本信息
			
 
				+                Common.save_video_info(log_type, crawler, video_dict)
			
 
				+
			
 
				+                # 上传视频
			
 
				+                Common.logger(log_type, crawler).info(f"开始上传视频")
			
 
				+                if env == 'dev':
			
 
				+                    our_video_id = Publish.upload_and_publish(log_type, crawler, strategy, our_uid, env, oss_endpoint)
			
 
				+                    our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
			
 
				+                else:
			
 
				+                    our_video_id = Publish.upload_and_publish(log_type, crawler, strategy, our_uid, env, oss_endpoint)
			
 
				+                    our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
			
 
				+                Common.logger(log_type, crawler).info("视频上传完成")
			
 
				+
			
 
				+                # 视频信息保存至飞书
			
 
				+                Feishu.insert_columns(log_type, crawler, "GVxlYk", "ROWS", 1, 2)
			
 
				+                # 视频ID工作表，首行写入数据
			
 
				+                upload_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time())))
			
 
				+                values = [[upload_time,
			
 
				+                           "定向榜",
			
 
				+                           video_dict['video_id'],
			
 
				+                           video_dict['video_title'],
			
 
				+                           our_video_link,
			
 
				+                           video_dict['play_cnt'],
			
 
				+                           video_dict['duration'],
			
 
				+                           f'{video_width}*{video_height}',
			
 
				+                           video_dict['publish_time'],
			
 
				+                           video_dict['user_name'],
			
 
				+                           video_dict['cover_url'],
			
 
				+                           video_dict['video_url']
			
 
				+                           ]]
			
 
				+                time.sleep(1)
			
 
				+                Feishu.update_values(log_type, crawler, "GVxlYk", "F2:Z2", values)
			
 
				+                Common.logger(log_type, crawler).info('视频信息写入定向_已下载表成功\n')
			
 
				+
			
 
				+                # 视频信息保存数据库
			
 
				+                sql = f""" insert into crawler_video(video_id, 
			
 
				+                user_id, 
			
 
				+                out_user_id, 
			
 
				+                platform, 
			
 
				+                strategy, 
			
 
				+                out_video_id, 
			
 
				+                video_title, 
			
 
				+                cover_url, 
			
 
				+                video_url, 
			
 
				+                duration, 
			
 
				+                publish_time, 
			
 
				+                play_cnt, 
			
 
				+                crawler_rule, 
			
 
				+                width, 
			
 
				+                height) 
			
 
				+                values({our_video_id}, 
			
 
				+                "{our_uid}", 
			
 
				+                "{video_dict['out_uid']}", 
			
 
				+                "{cls.platform}", 
			
 
				+                "定向爬虫策略", 
			
 
				+                "{video_dict['video_id']}", 
			
 
				+                "{video_dict['video_title']}", 
			
 
				+                "{video_dict['cover_url']}",
			
 
				+                "{video_dict['video_url']}",
			
 
				+                {int(duration)},
			
 
				+                "{video_dict['publish_time']}",
			
 
				+                {int(video_dict['play_cnt'])},
			
 
				+                "{rule}",
			
 
				+                {int(video_width)},
			
 
				+                {int(video_height)}) """
			
 
				+                MysqlHelper.update_values(log_type, crawler, sql, env)
			
 
				+                Common.logger(log_type, crawler).info('视频信息插入数据库成功！\n')
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_follow_videos(cls, log_type, crawler, strategy, oss_endpoint, env, machine):
			
 
				+        user_list = cls.get_user_from_feishu(log_type, crawler, 'c467d7', env, machine)
			
 
				+        if len(user_list) == 0:
			
 
				+            Common.logger(log_type, crawler).warning('用户列表为空\n')
			
 
				+        else:
			
 
				+            for user_dict in user_list:
			
 
				+                out_uid = user_dict['out_user_id']
			
 
				+                user_name = user_dict['out_user_name']
			
 
				+                browse_id = user_dict['out_browse_id']
			
 
				+                our_uid = user_dict['our_user_id']
			
 
				+                Common.logger(log_type, crawler).info(f'获取 {user_name} 主页视频\n')
			
 
				+                cls.get_videos(log_type, crawler, strategy, oss_endpoint, env, browse_id, out_uid, our_uid, machine)
			
 
				+                Common.logger(log_type, crawler).info('休眠 10 秒')
			
 
				+                time.sleep(10)
			
 
				+                cls.continuation = ''
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    print(Follow.get_browse_id('follow', 'youtube', '@chinatravel5971', "local"))
			
 
				+    # print(Follow.get_user_from_feishu('follow', 'youtube', 'c467d7', 'dev', 'local'))
			
 
				+    # Follow.get_out_user_info('follow', 'youtube', 'UC08jgxf119fzynp2uHCvZIg', '@weitravel')
			
 
				+    # Follow.get_video_info('follow', 'youtube', 'OGVK0IXBIhI')
			
 
				+    # Follow.get_follow_videos('follow', 'youtube', 'youtube_follow', 'out', 'dev', 'local')
			
 
				+    pass
			
--- a/youtube/youtube_main/__init__.py
+++ b/youtube/youtube_main/__init__.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/1/31
			
--- a/youtube/youtube_main/run_youtube_follow.py
+++ b/youtube/youtube_main/run_youtube_follow.py
@@ -0,0 +1,38 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/2/3
			
 
				+import argparse
			
 
				+import os
			
 
				+import sys
			
 
				+import time
			
 
				+sys.path.append(os.getcwd())
			
 
				+from common.common import Common
			
 
				+from youtube.youtube_follow.youtube_follow import Follow
			
 
				+
			
 
				+
			
 
				+def main(log_type, crawler, strategy, oss_endpoint, env, machine):
			
 
				+    while True:
			
 
				+        Common.logger(log_type, crawler).info('开始抓取youtube定向榜\n')
			
 
				+        Follow.get_follow_videos(log_type, crawler, strategy, oss_endpoint, env, machine)
			
 
				+        Common.del_logs(log_type, crawler)
			
 
				+        Common.logger(log_type, crawler).info('抓取完一轮，休眠 1 小时\n')
			
 
				+        time.sleep(3600)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
			
 
				+    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
			
 
				+    parser.add_argument('--crawler')  ## 添加参数
			
 
				+    parser.add_argument('--strategy')  ## 添加参数
			
 
				+    parser.add_argument('--our_uid')  ## 添加参数
			
 
				+    parser.add_argument('--oss_endpoint')  ## 添加参数
			
 
				+    parser.add_argument('--env')  ## 添加参数
			
 
				+    parser.add_argument('--machine')  ## 添加参数
			
 
				+    args = parser.parse_args()  ### 参数赋值，也可以通过终端赋值
			
 
				+    # print(args)
			
 
				+    main(log_type=args.log_type,
			
 
				+         crawler=args.crawler,
			
 
				+         strategy=args.strategy,
			
 
				+         oss_endpoint=args.oss_endpoint,
			
 
				+         env=args.env,
			
 
				+         machine=args.machine)
			
--- a/youtube/youtube_main/run_youtube_search.py
+++ b/youtube/youtube_main/run_youtube_search.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/2/3
			
--- a/youtube/youtube_search/__init__.py
+++ b/youtube/youtube_search/__init__.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/2/3
			
--- a/youtube/youtube_search/youtube_search.py
+++ b/youtube/youtube_search/youtube_search.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/2/3