wangkun 2 年 前
コミット
f110fe29d8

BIN
.DS_Store


+ 68 - 15
common/common.py

@@ -139,21 +139,6 @@ class Common:
             except Exception as e:
                 cls.logger(log_type, crawler).error(f"视频下载失败:{e}\n")
 
-        elif text == "youtube_video":
-            # 需要下载的视频地址
-            video_url = url
-            # 视频名
-            video_name = "video.mp4"
-            try:
-                download_cmd = f'yt-dlp -f "bv[height=720][ext=mp4]+ba[ext=m4a]" --merge-output-format mp4 {video_url}-U -o {video_path}{video_name}'
-                Common.logger(log_type, crawler).info(f"download_cmd:{download_cmd}")
-                os.system(download_cmd)
-                # move_cmd = f"mv {video_name} {video_path}"
-                # os.system(move_cmd)
-                cls.logger(log_type, crawler).info("==========视频下载完成==========")
-            except Exception as e:
-                Common.logger(log_type, crawler).error(f"视频下载失败:{e}\n")
-
         # 下载音频
         elif text == "audio":
             # 需要下载的视频地址
@@ -189,6 +174,56 @@ class Common:
             except Exception as e:
                 cls.logger(log_type, crawler).error(f"封面下载失败:{e}\n")
 
+        # youtube 视频下载
+        elif text == "youtube_video":
+            # 需要下载的视频地址
+            video_url = url
+            # 视频名
+            video_name = "video.mp4"
+            try:
+                download_cmd = f'yt-dlp -f "bv[height=720][ext=mp4]+ba[ext=m4a]" --merge-output-format mp4 {video_url}-U -o {video_path}{video_name}'
+                Common.logger(log_type, crawler).info(f"download_cmd:{download_cmd}")
+                os.system(download_cmd)
+                # move_cmd = f"mv {video_name} {video_path}"
+                # os.system(move_cmd)
+                cls.logger(log_type, crawler).info("==========视频下载完成==========")
+            except Exception as e:
+                Common.logger(log_type, crawler).error(f"视频下载失败:{e}\n")
+
+        # 西瓜视频 / 音频下载
+        elif text == "xigua_video":
+            # 需要下载的视频地址
+            video_url = str(url).replace('http://', 'https://')
+            # 视频名
+            video_name = "video1.mp4"
+
+            # 下载视频
+            urllib3.disable_warnings()
+            response = requests.get(video_url, stream=True, proxies=proxies, verify=False)
+            try:
+                with open(video_path + video_name, "wb") as f:
+                    for chunk in response.iter_content(chunk_size=10240):
+                        f.write(chunk)
+                cls.logger(log_type, crawler).info("==========视频下载完成==========")
+            except Exception as e:
+                cls.logger(log_type, crawler).error(f"视频下载失败:{e}\n")
+        elif text == "xigua_audio":
+            # 需要下载的视频地址
+            audio_url = str(url).replace('http://', 'https://')
+            # 音频名
+            audio_name = "audio1.mp4"
+
+            # 下载视频
+            urllib3.disable_warnings()
+            response = requests.get(audio_url, stream=True, proxies=proxies, verify=False)
+            try:
+                with open(video_path + audio_name, "wb") as f:
+                    for chunk in response.iter_content(chunk_size=10240):
+                        f.write(chunk)
+                cls.logger(log_type, crawler).info("==========音频下载完成==========")
+            except Exception as e:
+                cls.logger(log_type, crawler).error(f"音频下载失败:{e}\n")
+
     @classmethod
     def ffmpeg(cls, log_type, crawler, video_path):
         probe = ffmpeg.probe(video_path)
@@ -209,6 +244,24 @@ class Common:
         }
         return ffmpeg_dict
 
+    # 合并音视频
+    @classmethod
+    def video_compose(cls, log_type, crawler, video_dir):
+        try:
+            video_path = f'{video_dir}/video1.mp4'
+            audio_path = f'{video_dir}/audio1.mp4'
+            out_path = f'{video_dir}/video.mp4'
+            cmd = f'ffmpeg -i {video_path} -i {audio_path} -c:v copy -c:a aac -strict experimental -map 0:v:0 -map 1:a:0 {out_path}'
+            # print(cmd)
+            subprocess.call(cmd, shell=True)
+
+            for file in os.listdir(video_dir):
+                if file.split('.mp4')[0] == 'video1' or file.split('.mp4')[0] == 'audio1':
+                    os.remove(f'{video_dir}/{file}')
+            Common.logger(log_type, crawler).info('合成成功\n')
+        except Exception as e:
+            Common.logger(log_type, crawler).error(f'video_compose异常:{e}\n')
+
 
 if __name__ == "__main__":
 

+ 108 - 2
common/users.py

@@ -10,11 +10,12 @@ import sys
 import requests
 sys.path.append(os.getcwd())
 from common.common import Common
+from common.feishu import Feishu
 
 
 class Users:
     @classmethod
-    def create_user(cls, log_type, crawler, user_dict, env):
+    def create_uid(cls, log_type, crawler, user_dict, env):
         """
         创建站内虚拟 UID
         :param log_type: 日志
@@ -54,7 +55,112 @@ class Users:
         except Exception as e:
             Common.logger(log_type, crawler).error(f"create_user异常:{e}\n")
 
+    @classmethod
+    def create_user(cls, log_type, crawler, sheetid, out_user_dict, env, machine):
+        """
+        补全飞书用户表信息,并返回
+        :param log_type: 日志
+        :param crawler: 哪款爬虫
+        :param sheetid: 飞书表
+        :param out_user_dict: 站外用户信息字典
+        :param env: 正式环境:prod,测试环境:dev
+        :param machine: 部署机器,阿里云填写 aliyun,aliyun_hk ,线下分别填写 macpro,macair,local
+        :return: user_list
+        """
+        try:
+            # 获取站外账号信息
+            out_avatar_url = out_user_dict['out_avatar_url']  # 站外头像
+            out_create_time = out_user_dict['out_create_time']  # 站外注册时间,格式: YYYY-MM-DD HH:MM:SS
+            out_tag = out_user_dict['out_tag']  # 站外标签,例如:搞笑博主
+            out_play_cnt = out_user_dict['out_play_cnt']  # 站外总播放量
+            out_fans = out_user_dict['out_fans']  # 站外粉丝数量
+            out_follow = out_user_dict['out_follow']  # 站外关注量
+            out_friend = out_user_dict['out_friend']  # 站外好友量
+            out_like = out_user_dict['out_like']  # 站外点赞量
+            platform = out_user_dict['platform']  # 抓取平台,例如:小年糕、西瓜视频
+            tag = out_user_dict['tag']  # 站内身份标签,例如:小年糕爬虫,小时榜爬虫策略;好看爬虫,频道榜爬虫策略;youtube爬虫,定向爬虫策略
+            user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
+            user_list = []
+            for i in range(1, len(user_sheet)):
+                out_uid = user_sheet[i][2]
+                user_name = user_sheet[i][3]
+                our_uid = user_sheet[i][6]
+                Common.logger(log_type, crawler).info(f"正在更新 {user_name} 用户信息\n")
+                # 站内 UID 为空,且数据库中(youtube+out_user_id)返回数量 == 0,则创建新的站内账号
+                if our_uid is None:
+                    sql = f""" select * from crawler_user where platform="{platform}" and out_user_id="{out_uid}" """
+                    our_user_info = MysqlHelper.get_values(log_type, crawler, sql, env, machine)
+                    # 数据库中(youtube + out_user_id)返回数量 == 0,则创建站内账号UID,并写入定向账号飞书表。并结合站外用户信息,一并写入爬虫账号数据库
+                    if our_user_info is None or len(our_user_info) == 0:
+                        # 创建站内账号
+                        create_user_dict = {
+                            'nickName': user_name,
+                            'avatarUrl': out_avatar_url,
+                            'tagName': tag,  # 例如 'youtube爬虫,定向爬虫策略'
+                        }
+                        our_uid = cls.create_uid(log_type, crawler, create_user_dict, env)
+                        Common.logger(log_type, crawler).info(f'新创建的站内UID:{our_uid}')
+                        if env == 'prod':
+                            our_user_link = f'https://admin.piaoquantv.com/ums/user/{our_uid}/post'
+                        else:
+                            our_user_link = f'https://testadmin.piaoquantv.com/ums/user/{our_uid}/post'
+                        Common.logger(log_type, crawler).info(f'站内用户主页链接:{our_user_link}')
+                        Feishu.update_values(log_type, crawler, sheetid, f'G{i + 1}:H{i + 1}',
+                                             [[our_uid, our_user_link]])
+                        Common.logger(log_type, crawler).info(f'站内用户信息写入飞书成功!')
+
+                        # 用户信息写入数据库
+                        sql = f""" insert into crawler_user(user_id, 
+                                                out_user_id, 
+                                                out_user_name, 
+                                                out_avatar_url, 
+                                                out_create_time, 
+                                                out_tag,
+                                                out_play_cnt, 
+                                                out_fans, 
+                                                out_follow, 
+                                                out_friend, 
+                                                out_like, 
+                                                platform, 
+                                                tag)
+                                                values({our_uid}, 
+                                                "{out_uid}", 
+                                                "{user_name}", 
+                                                "{out_avatar_url}", 
+                                                "{out_create_time}", 
+                                                "{out_tag}", 
+                                                {out_play_cnt}, 
+                                                {out_fans}, 
+                                                {out_follow}, 
+                                                {out_friend}, 
+                                                {out_like}, 
+                                                "{platform}",
+                                                "{tag}") """
+                        Common.logger(log_type, crawler).info(f'sql:{sql}')
+                        MysqlHelper.update_values(log_type, crawler, sql, env, machine)
+                        Common.logger(log_type, crawler).info('用户信息插入数据库成功!\n')
+                    # 数据库中(youtube + out_user_id)返回数量 != 0,则直接把数据库中的站内 UID 写入飞书
+                    else:
+                        our_uid = our_user_info[0][1]
+                        if 'env' == 'prod':
+                            our_user_link = f'https://admin.piaoquantv.com/ums/user/{our_uid}/post'
+                        else:
+                            our_user_link = f'https://testadmin.piaoquantv.com/ums/user/{our_uid}/post'
+                        Common.logger(log_type, crawler).info(f'站内用户主页链接:{our_user_link}')
+                        Feishu.update_values(log_type, crawler, sheetid, f'G{i + 1}:H{i + 1}',
+                                             [[our_uid, our_user_link]])
+                        Common.logger(log_type, crawler).info(f'站内用户信息写入飞书成功!\n')
+                user_dict = {
+                    'out_user_id': out_uid,
+                    'out_user_name': user_name,
+                    'our_user_id': our_uid,
+                }
+                user_list.append(user_dict)
+            return user_list
+        except Exception as e:
+            Common.logger(log_type, crawler).error(f"create_user:{e}\n")
+
 
 if __name__ == "__main__":
-    uid = Users.create_user('log', 'kanyikan', 'youtube爬虫,定向爬虫策略', 'dev')
+    uid = Users.create_uid('log', 'kanyikan', 'youtube爬虫,定向爬虫策略', 'dev')
     print(uid)

BIN
xigua/.DS_Store


+ 673 - 292
xigua/xigua_follow/xigua_follow.py

@@ -4,6 +4,7 @@
 import base64
 import json
 import os
+import shutil
 import sys
 import time
 
@@ -14,6 +15,8 @@ from selenium.webdriver.chrome.service import Service
 from selenium.webdriver.common.by import By
 from seleniumwire import webdriver
 
+from common.db import MysqlHelper
+
 sys.path.append(os.getcwd())
 from common.common import Common
 from common.feishu import Feishu
@@ -25,6 +28,8 @@ class Follow:
     # 个人主页视频翻页参数
     offset = 0
 
+    platform = "西瓜视频"
+
     # 下载规则
     @staticmethod
     def download_rule(duration, width, height):
@@ -108,317 +113,693 @@ class Follow:
     # 获取视频详情
     @classmethod
     def get_video_url(cls, log_type, crawler, gid):
-        # try:
-        url = 'https://www.ixigua.com/api/mixVideo/information?'
-        headers = {
-            "accept-encoding": "gzip, deflate",
-            "accept-language": "zh-CN,zh-Hans;q=0.9",
-            "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
-                          "AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15",
-            "referer": "https://www.ixigua.com/7102614741050196520?logTag=0531c88ac04f38ab2c62",
-        }
-        params = {
-            'mixId': gid,
-            'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfC'
-                       'NVVIOBNjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
-            'X-Bogus': 'DFSzswVupYTANCJOSBk0P53WxM-r',
-            '_signature': '_02B4Z6wo0000119LvEwAAIDCuktNZ0y5wkdfS7jAALThuOR8D9yWNZ.EmWHKV0WSn6Px'
-                          'fPsH9-BldyxVje0f49ryXgmn7Tzk-swEHNb15TiGqa6YF.cX0jW8Eds1TtJOIZyfc9s5emH7gdWN94',
-        }
-        cookies = {
-            'ixigua-a-s': '1',
-            'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfCNVVIOB'
-                       'NjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
-            'ttwid': '1%7C_yXQeHWwLZgCsgHClOwTCdYSOt_MjdOkgnPIkpi-Sr8%7C1661241238%7Cf57d0c5ef3f1d7'
-                     '6e049fccdca1ac54887c34d1f8731c8e51a49780ff0ceab9f8',
-            'tt_scid': 'QZ4l8KXDG0YAEaMCSbADdcybdKbUfG4BC6S4OBv9lpRS5VyqYLX2bIR8CTeZeGHR9ee3',
-            'MONITOR_WEB_ID': '0a49204a-7af5-4e96-95f0-f4bafb7450ad',
-            '__ac_nonce': '06304878000964fdad287',
-            '__ac_signature': '_02B4Z6wo00f017Rcr3AAAIDCUVxeW1tOKEu0fKvAAI4cvoYzV-wBhq7B6D8k0no7lb'
-                              'FlvYoinmtK6UXjRIYPXnahUlFTvmWVtb77jsMkKAXzAEsLE56m36RlvL7ky.M3Xn52r9t1IEb7IR3ke8',
-            'ttcid': 'e56fabf6e85d4adf9e4d91902496a0e882',
-            '_tea_utm_cache_1300': 'undefined',
-            'support_avif': 'false',
-            'support_webp': 'false',
-            'xiguavideopcwebid': '7134967546256016900',
-            'xiguavideopcwebid.sig': 'xxRww5R1VEMJN_dQepHorEu_eAc',
-        }
-        urllib3.disable_warnings()
-        response = requests.get(url=url, headers=headers, params=params, cookies=cookies, verify=False)
-        if 'data' not in response.json() or response.json()['data'] == '':
-            Common.logger(log_type, crawler).warning('get_video_info: response: {}', response)
-        else:
-            video_info = response.json()['data']['gidInformation']['packerData']['video']
-            video_url_dict = {}
-            # video_url
-            if 'videoResource' not in video_info:
-                video_url_dict["video_url"] = ''
-                video_url_dict["audio_url"] = ''
-                video_url_dict["video_width"] = 0
-                video_url_dict["video_height"] = 0
-
-            elif 'dash_120fps' in video_info['videoResource']:
-                if "video_list" in video_info['videoResource']['dash_120fps'] and len(video_info['videoResource']['dash_120fps']['video_list']) != 0:
-                    video_url = video_info['videoResource']['dash_120fps']['video_list'][-1]['backup_url_1']
-                    audio_url = video_info['videoResource']['dash_120fps']['video_list'][-1]['backup_url_1']
-                    if len(video_url) % 3 == 1:
-                        video_url += '=='
-                    elif len(video_url) % 3 == 2:
-                        video_url += '='
-                    elif len(audio_url) % 3 == 1:
-                        audio_url += '=='
-                    elif len(audio_url) % 3 == 2:
-                        audio_url += '='
-                    video_url = base64.b64decode(video_url).decode('utf8')
-                    audio_url = base64.b64decode(audio_url).decode('utf8')
-                    video_width = video_info['videoResource']['dash_120fps']['video_list'][-1]['vwidth']
-                    video_height = video_info['videoResource']['dash_120fps']['video_list'][-1]['vheight']
-                    video_url_dict["video_url"] = video_url
-                    video_url_dict["audio_url"] = audio_url
-                    video_url_dict["video_width"] = video_width
-                    video_url_dict["video_height"] = video_height
-                elif 'dynamic_video' in video_info['videoResource']['dash_120fps'] \
-                        and 'dynamic_video' in video_info['videoResource']['dash_120fps'] \
-                        and 'dynamic_video_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
-                        and 'dynamic_audio_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
-                        and len(video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list']) != 0 \
-                        and len(video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list']) != 0:
-
-                    video_url = video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1]['backup_url_1']
-                    audio_url = video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list'][-1]['backup_url_1']
-                    if len(video_url) % 3 == 1:
-                        video_url += '=='
-                    elif len(video_url) % 3 == 2:
-                        video_url += '='
-                    elif len(audio_url) % 3 == 1:
-                        audio_url += '=='
-                    elif len(audio_url) % 3 == 2:
-                        audio_url += '='
-                    video_url = base64.b64decode(video_url).decode('utf8')
-                    audio_url = base64.b64decode(audio_url).decode('utf8')
-                    video_width = video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1]['vwidth']
-                    video_height = video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1]['vheight']
-                    video_url_dict["video_url"] = video_url
-                    video_url_dict["audio_url"] = audio_url
-                    video_url_dict["video_width"] = video_width
-                    video_url_dict["video_height"] = video_height
-
-
-            elif 'dash' in video_info['videoResource'] \
-                    and 'dynamic_video' in video_info['videoResource']['dash'] \
-                    and 'dynamic_video_list' in video_info['videoResource']['dash']['dynamic_video']:
-                video_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1]['backup_url_1']
-                audio_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list'][-1]['backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1]['vwidth']
-                video_height = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1]['vheight']
-
-
-
-            elif 'normal' in video_info['videoResource']:
-                video_url = video_info['videoResource']['normal']['video_list'][-1]['backup_url_1']
-                audio_url = video_info['videoResource']['normal']['video_list'][-1]['backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['normal']['video_list'][-1]['vwidth']
-                video_height = video_info['videoResource']['normal']['video_list'][-1]['vheight']
+        try:
+            url = 'https://www.ixigua.com/api/mixVideo/information?'
+            headers = {
+                "accept-encoding": "gzip, deflate",
+                "accept-language": "zh-CN,zh-Hans;q=0.9",
+                "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
+                              "AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15",
+                "referer": "https://www.ixigua.com/7102614741050196520?logTag=0531c88ac04f38ab2c62",
+            }
+            params = {
+                'mixId': gid,
+                'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfC'
+                           'NVVIOBNjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
+                'X-Bogus': 'DFSzswVupYTANCJOSBk0P53WxM-r',
+                '_signature': '_02B4Z6wo0000119LvEwAAIDCuktNZ0y5wkdfS7jAALThuOR8D9yWNZ.EmWHKV0WSn6Px'
+                              'fPsH9-BldyxVje0f49ryXgmn7Tzk-swEHNb15TiGqa6YF.cX0jW8Eds1TtJOIZyfc9s5emH7gdWN94',
+            }
+            cookies = {
+                'ixigua-a-s': '1',
+                'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfCNVVIOB'
+                           'NjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
+                'ttwid': '1%7C_yXQeHWwLZgCsgHClOwTCdYSOt_MjdOkgnPIkpi-Sr8%7C1661241238%7Cf57d0c5ef3f1d7'
+                         '6e049fccdca1ac54887c34d1f8731c8e51a49780ff0ceab9f8',
+                'tt_scid': 'QZ4l8KXDG0YAEaMCSbADdcybdKbUfG4BC6S4OBv9lpRS5VyqYLX2bIR8CTeZeGHR9ee3',
+                'MONITOR_WEB_ID': '0a49204a-7af5-4e96-95f0-f4bafb7450ad',
+                '__ac_nonce': '06304878000964fdad287',
+                '__ac_signature': '_02B4Z6wo00f017Rcr3AAAIDCUVxeW1tOKEu0fKvAAI4cvoYzV-wBhq7B6D8k0no7lb'
+                                  'FlvYoinmtK6UXjRIYPXnahUlFTvmWVtb77jsMkKAXzAEsLE56m36RlvL7ky.M3Xn52r9t1IEb7IR3ke8',
+                'ttcid': 'e56fabf6e85d4adf9e4d91902496a0e882',
+                '_tea_utm_cache_1300': 'undefined',
+                'support_avif': 'false',
+                'support_webp': 'false',
+                'xiguavideopcwebid': '7134967546256016900',
+                'xiguavideopcwebid.sig': 'xxRww5R1VEMJN_dQepHorEu_eAc',
+            }
+            urllib3.disable_warnings()
+            response = requests.get(url=url, headers=headers, params=params, cookies=cookies, verify=False)
+            if 'data' not in response.json() or response.json()['data'] == '':
+                Common.logger(log_type, crawler).warning('get_video_info: response: {}', response)
             else:
-                video_url = 0
-                audio_url = 0
-                video_width = 0
-                video_height = 0
+                video_info = response.json()['data']['gidInformation']['packerData']['video']
+                video_url_dict = {}
+                # video_url
+                if 'videoResource' not in video_info:
+                    video_url_dict["video_url"] = ''
+                    video_url_dict["audio_url"] = ''
+                    video_url_dict["video_width"] = 0
+                    video_url_dict["video_height"] = 0
+
+                elif 'dash_120fps' in video_info['videoResource']:
+                    if "video_list" in video_info['videoResource']['dash_120fps'] and 'video_4' in video_info['videoResource']['dash_120fps']['video_list']:
+                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_4']['backup_url_1']
+                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_4']['backup_url_1']
+                        if len(video_url) % 3 == 1:
+                            video_url += '=='
+                        elif len(video_url) % 3 == 2:
+                            video_url += '='
+                        elif len(audio_url) % 3 == 1:
+                            audio_url += '=='
+                        elif len(audio_url) % 3 == 2:
+                            audio_url += '='
+                        video_url = base64.b64decode(video_url).decode('utf8')
+                        audio_url = base64.b64decode(audio_url).decode('utf8')
+                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_4']['vwidth']
+                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_4']['vheight']
+                        video_url_dict["video_url"] = video_url
+                        video_url_dict["audio_url"] = audio_url
+                        video_url_dict["video_width"] = video_width
+                        video_url_dict["video_height"] = video_height
+                    elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_3' in video_info['videoResource']['dash_120fps']['video_list']:
+                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_3']['backup_url_1']
+                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_3']['backup_url_1']
+                        if len(video_url) % 3 == 1:
+                            video_url += '=='
+                        elif len(video_url) % 3 == 2:
+                            video_url += '='
+                        elif len(audio_url) % 3 == 1:
+                            audio_url += '=='
+                        elif len(audio_url) % 3 == 2:
+                            audio_url += '='
+                        video_url = base64.b64decode(video_url).decode('utf8')
+                        audio_url = base64.b64decode(audio_url).decode('utf8')
+                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_3']['vwidth']
+                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_3']['vheight']
+                        video_url_dict["video_url"] = video_url
+                        video_url_dict["audio_url"] = audio_url
+                        video_url_dict["video_width"] = video_width
+                        video_url_dict["video_height"] = video_height
+                    elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_2' in video_info['videoResource']['dash_120fps']['video_list']:
+                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_2']['backup_url_1']
+                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_2']['backup_url_1']
+                        if len(video_url) % 3 == 1:
+                            video_url += '=='
+                        elif len(video_url) % 3 == 2:
+                            video_url += '='
+                        elif len(audio_url) % 3 == 1:
+                            audio_url += '=='
+                        elif len(audio_url) % 3 == 2:
+                            audio_url += '='
+                        video_url = base64.b64decode(video_url).decode('utf8')
+                        audio_url = base64.b64decode(audio_url).decode('utf8')
+                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_2']['vwidth']
+                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_2']['vheight']
+                        video_url_dict["video_url"] = video_url
+                        video_url_dict["audio_url"] = audio_url
+                        video_url_dict["video_width"] = video_width
+                        video_url_dict["video_height"] = video_height
+                    elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_1' in video_info['videoResource']['dash_120fps']['video_list']:
+                        video_url = video_info['videoResource']['dash_120fps']['video_list']['video_1']['backup_url_1']
+                        audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_1']['backup_url_1']
+                        if len(video_url) % 3 == 1:
+                            video_url += '=='
+                        elif len(video_url) % 3 == 2:
+                            video_url += '='
+                        elif len(audio_url) % 3 == 1:
+                            audio_url += '=='
+                        elif len(audio_url) % 3 == 2:
+                            audio_url += '='
+                        video_url = base64.b64decode(video_url).decode('utf8')
+                        audio_url = base64.b64decode(audio_url).decode('utf8')
+                        video_width = video_info['videoResource']['dash_120fps']['video_list']['video_1']['vwidth']
+                        video_height = video_info['videoResource']['dash_120fps']['video_list']['video_1']['vheight']
+                        video_url_dict["video_url"] = video_url
+                        video_url_dict["audio_url"] = audio_url
+                        video_url_dict["video_width"] = video_width
+                        video_url_dict["video_height"] = video_height
+
+                    elif 'dynamic_video' in video_info['videoResource']['dash_120fps'] \
+                            and 'dynamic_video_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
+                            and 'dynamic_audio_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
+                            and len(video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list']) != 0 \
+                            and len(video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list']) != 0:
+
+                        video_url = video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1]['backup_url_1']
+                        audio_url = video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list'][-1]['backup_url_1']
+                        if len(video_url) % 3 == 1:
+                            video_url += '=='
+                        elif len(video_url) % 3 == 2:
+                            video_url += '='
+                        elif len(audio_url) % 3 == 1:
+                            audio_url += '=='
+                        elif len(audio_url) % 3 == 2:
+                            audio_url += '='
+                        video_url = base64.b64decode(video_url).decode('utf8')
+                        audio_url = base64.b64decode(audio_url).decode('utf8')
+                        video_width = video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1]['vwidth']
+                        video_height = video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1]['vheight']
+                        video_url_dict["video_url"] = video_url
+                        video_url_dict["audio_url"] = audio_url
+                        video_url_dict["video_width"] = video_width
+                        video_url_dict["video_height"] = video_height
+                    else:
+                        video_url_dict["video_url"] = ''
+                        video_url_dict["audio_url"] = ''
+                        video_url_dict["video_width"] = 0
+                        video_url_dict["video_height"] = 0
+
+                elif 'dash' in video_info['videoResource']:
+                    if "video_list" in video_info['videoResource']['dash'] and 'video_4' in video_info['videoResource']['dash']['video_list']:
+                        video_url = video_info['videoResource']['dash']['video_list']['video_4']['backup_url_1']
+                        audio_url = video_info['videoResource']['dash']['video_list']['video_4']['backup_url_1']
+                        if len(video_url) % 3 == 1:
+                            video_url += '=='
+                        elif len(video_url) % 3 == 2:
+                            video_url += '='
+                        elif len(audio_url) % 3 == 1:
+                            audio_url += '=='
+                        elif len(audio_url) % 3 == 2:
+                            audio_url += '='
+                        video_url = base64.b64decode(video_url).decode('utf8')
+                        audio_url = base64.b64decode(audio_url).decode('utf8')
+                        video_width = video_info['videoResource']['dash']['video_list']['video_4']['vwidth']
+                        video_height = video_info['videoResource']['dash']['video_list']['video_4']['vheight']
+                        video_url_dict["video_url"] = video_url
+                        video_url_dict["audio_url"] = audio_url
+                        video_url_dict["video_width"] = video_width
+                        video_url_dict["video_height"] = video_height
+                    elif "video_list" in video_info['videoResource']['dash'] and 'video_3' in video_info['videoResource']['dash']['video_list']:
+                        video_url = video_info['videoResource']['dash']['video_list']['video_3']['backup_url_1']
+                        audio_url = video_info['videoResource']['dash']['video_list']['video_3']['backup_url_1']
+                        if len(video_url) % 3 == 1:
+                            video_url += '=='
+                        elif len(video_url) % 3 == 2:
+                            video_url += '='
+                        elif len(audio_url) % 3 == 1:
+                            audio_url += '=='
+                        elif len(audio_url) % 3 == 2:
+                            audio_url += '='
+                        video_url = base64.b64decode(video_url).decode('utf8')
+                        audio_url = base64.b64decode(audio_url).decode('utf8')
+                        video_width = video_info['videoResource']['dash']['video_list']['video_3']['vwidth']
+                        video_height = video_info['videoResource']['dash']['video_list']['video_3']['vheight']
+                        video_url_dict["video_url"] = video_url
+                        video_url_dict["audio_url"] = audio_url
+                        video_url_dict["video_width"] = video_width
+                        video_url_dict["video_height"] = video_height
+                    elif "video_list" in video_info['videoResource']['dash'] and 'video_2' in video_info['videoResource']['dash']['video_list']:
+                        video_url = video_info['videoResource']['dash']['video_list']['video_2']['backup_url_1']
+                        audio_url = video_info['videoResource']['dash']['video_list']['video_2']['backup_url_1']
+                        if len(video_url) % 3 == 1:
+                            video_url += '=='
+                        elif len(video_url) % 3 == 2:
+                            video_url += '='
+                        elif len(audio_url) % 3 == 1:
+                            audio_url += '=='
+                        elif len(audio_url) % 3 == 2:
+                            audio_url += '='
+                        video_url = base64.b64decode(video_url).decode('utf8')
+                        audio_url = base64.b64decode(audio_url).decode('utf8')
+                        video_width = video_info['videoResource']['dash']['video_list']['video_2']['vwidth']
+                        video_height = video_info['videoResource']['dash']['video_list']['video_2']['vheight']
+                        video_url_dict["video_url"] = video_url
+                        video_url_dict["audio_url"] = audio_url
+                        video_url_dict["video_width"] = video_width
+                        video_url_dict["video_height"] = video_height
+                    elif "video_list" in video_info['videoResource']['dash'] and 'video_1' in video_info['videoResource']['dash']['video_list']:
+                        video_url = video_info['videoResource']['dash']['video_list']['video_1']['backup_url_1']
+                        audio_url = video_info['videoResource']['dash']['video_list']['video_1']['backup_url_1']
+                        if len(video_url) % 3 == 1:
+                            video_url += '=='
+                        elif len(video_url) % 3 == 2:
+                            video_url += '='
+                        elif len(audio_url) % 3 == 1:
+                            audio_url += '=='
+                        elif len(audio_url) % 3 == 2:
+                            audio_url += '='
+                        video_url = base64.b64decode(video_url).decode('utf8')
+                        audio_url = base64.b64decode(audio_url).decode('utf8')
+                        video_width = video_info['videoResource']['dash']['video_list']['video_1']['vwidth']
+                        video_height = video_info['videoResource']['dash']['video_list']['video_1']['vheight']
+                        video_url_dict["video_url"] = video_url
+                        video_url_dict["audio_url"] = audio_url
+                        video_url_dict["video_width"] = video_width
+                        video_url_dict["video_height"] = video_height
+
+                    elif 'dynamic_video' in video_info['videoResource']['dash'] \
+                            and 'dynamic_video_list' in video_info['videoResource']['dash']['dynamic_video'] \
+                            and 'dynamic_audio_list' in video_info['videoResource']['dash']['dynamic_video'] \
+                            and len(video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list']) != 0 \
+                            and len(video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list']) != 0:
+
+                        video_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1]['backup_url_1']
+                        audio_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list'][-1]['backup_url_1']
+                        if len(video_url) % 3 == 1:
+                            video_url += '=='
+                        elif len(video_url) % 3 == 2:
+                            video_url += '='
+                        elif len(audio_url) % 3 == 1:
+                            audio_url += '=='
+                        elif len(audio_url) % 3 == 2:
+                            audio_url += '='
+                        video_url = base64.b64decode(video_url).decode('utf8')
+                        audio_url = base64.b64decode(audio_url).decode('utf8')
+                        video_width = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1]['vwidth']
+                        video_height = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1]['vheight']
+                        video_url_dict["video_url"] = video_url
+                        video_url_dict["audio_url"] = audio_url
+                        video_url_dict["video_width"] = video_width
+                        video_url_dict["video_height"] = video_height
+                    else:
+                        video_url_dict["video_url"] = ''
+                        video_url_dict["audio_url"] = ''
+                        video_url_dict["video_width"] = 0
+                        video_url_dict["video_height"] = 0
+
+                elif 'normal' in video_info['videoResource']:
+                    if "video_list" in video_info['videoResource']['normal'] and 'video_4' in \
+                            video_info['videoResource']['normal']['video_list']:
+                        video_url = video_info['videoResource']['normal']['video_list']['video_4']['backup_url_1']
+                        audio_url = video_info['videoResource']['normal']['video_list']['video_4']['backup_url_1']
+                        if len(video_url) % 3 == 1:
+                            video_url += '=='
+                        elif len(video_url) % 3 == 2:
+                            video_url += '='
+                        elif len(audio_url) % 3 == 1:
+                            audio_url += '=='
+                        elif len(audio_url) % 3 == 2:
+                            audio_url += '='
+                        video_url = base64.b64decode(video_url).decode('utf8')
+                        audio_url = base64.b64decode(audio_url).decode('utf8')
+                        video_width = video_info['videoResource']['normal']['video_list']['video_4']['vwidth']
+                        video_height = video_info['videoResource']['normal']['video_list']['video_4']['vheight']
+                        video_url_dict["video_url"] = video_url
+                        video_url_dict["audio_url"] = audio_url
+                        video_url_dict["video_width"] = video_width
+                        video_url_dict["video_height"] = video_height
+                    elif "video_list" in video_info['videoResource']['normal'] and 'video_3' in \
+                            video_info['videoResource']['normal']['video_list']:
+                        video_url = video_info['videoResource']['normal']['video_list']['video_3']['backup_url_1']
+                        audio_url = video_info['videoResource']['normal']['video_list']['video_3']['backup_url_1']
+                        if len(video_url) % 3 == 1:
+                            video_url += '=='
+                        elif len(video_url) % 3 == 2:
+                            video_url += '='
+                        elif len(audio_url) % 3 == 1:
+                            audio_url += '=='
+                        elif len(audio_url) % 3 == 2:
+                            audio_url += '='
+                        video_url = base64.b64decode(video_url).decode('utf8')
+                        audio_url = base64.b64decode(audio_url).decode('utf8')
+                        video_width = video_info['videoResource']['normal']['video_list']['video_3']['vwidth']
+                        video_height = video_info['videoResource']['normal']['video_list']['video_3']['vheight']
+                        video_url_dict["video_url"] = video_url
+                        video_url_dict["audio_url"] = audio_url
+                        video_url_dict["video_width"] = video_width
+                        video_url_dict["video_height"] = video_height
+                    elif "video_list" in video_info['videoResource']['normal'] and 'video_2' in \
+                            video_info['videoResource']['normal']['video_list']:
+                        video_url = video_info['videoResource']['normal']['video_list']['video_2']['backup_url_1']
+                        audio_url = video_info['videoResource']['normal']['video_list']['video_2']['backup_url_1']
+                        if len(video_url) % 3 == 1:
+                            video_url += '=='
+                        elif len(video_url) % 3 == 2:
+                            video_url += '='
+                        elif len(audio_url) % 3 == 1:
+                            audio_url += '=='
+                        elif len(audio_url) % 3 == 2:
+                            audio_url += '='
+                        video_url = base64.b64decode(video_url).decode('utf8')
+                        audio_url = base64.b64decode(audio_url).decode('utf8')
+                        video_width = video_info['videoResource']['normal']['video_list']['video_2']['vwidth']
+                        video_height = video_info['videoResource']['normal']['video_list']['video_2']['vheight']
+                        video_url_dict["video_url"] = video_url
+                        video_url_dict["audio_url"] = audio_url
+                        video_url_dict["video_width"] = video_width
+                        video_url_dict["video_height"] = video_height
+                    elif "video_list" in video_info['videoResource']['normal'] and 'video_1' in \
+                            video_info['videoResource']['normal']['video_list']:
+                        video_url = video_info['videoResource']['normal']['video_list']['video_1']['backup_url_1']
+                        audio_url = video_info['videoResource']['normal']['video_list']['video_1']['backup_url_1']
+                        if len(video_url) % 3 == 1:
+                            video_url += '=='
+                        elif len(video_url) % 3 == 2:
+                            video_url += '='
+                        elif len(audio_url) % 3 == 1:
+                            audio_url += '=='
+                        elif len(audio_url) % 3 == 2:
+                            audio_url += '='
+                        video_url = base64.b64decode(video_url).decode('utf8')
+                        audio_url = base64.b64decode(audio_url).decode('utf8')
+                        video_width = video_info['videoResource']['normal']['video_list']['video_1']['vwidth']
+                        video_height = video_info['videoResource']['normal']['video_list']['video_1']['vheight']
+                        video_url_dict["video_url"] = video_url
+                        video_url_dict["audio_url"] = audio_url
+                        video_url_dict["video_width"] = video_width
+                        video_url_dict["video_height"] = video_height
+
+                    elif 'dynamic_video' in video_info['videoResource']['normal'] \
+                            and 'dynamic_video_list' in video_info['videoResource']['normal']['dynamic_video'] \
+                            and 'dynamic_audio_list' in video_info['videoResource']['normal']['dynamic_video'] \
+                            and len(video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list']) != 0 \
+                            and len(video_info['videoResource']['normal']['dynamic_video']['dynamic_audio_list']) != 0:
+
+                        video_url = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
+                            'backup_url_1']
+                        audio_url = video_info['videoResource']['normal']['dynamic_video']['dynamic_audio_list'][-1][
+                            'backup_url_1']
+                        if len(video_url) % 3 == 1:
+                            video_url += '=='
+                        elif len(video_url) % 3 == 2:
+                            video_url += '='
+                        elif len(audio_url) % 3 == 1:
+                            audio_url += '=='
+                        elif len(audio_url) % 3 == 2:
+                            audio_url += '='
+                        video_url = base64.b64decode(video_url).decode('utf8')
+                        audio_url = base64.b64decode(audio_url).decode('utf8')
+                        video_width = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
+                            'vwidth']
+                        video_height = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
+                            'vheight']
+                        video_url_dict["video_url"] = video_url
+                        video_url_dict["audio_url"] = audio_url
+                        video_url_dict["video_width"] = video_width
+                        video_url_dict["video_height"] = video_height
+                    else:
+                        video_url_dict["video_url"] = ''
+                        video_url_dict["audio_url"] = ''
+                        video_url_dict["video_width"] = 0
+                        video_url_dict["video_height"] = 0
 
-            return video_url_dict
+                else:
+                    video_url_dict["video_url"] = ''
+                    video_url_dict["audio_url"] = ''
+                    video_url_dict["video_width"] = 0
+                    video_url_dict["video_height"] = 0
 
+                return video_url_dict
 
-        # except Exception as e:
-        #     Common.logger(log_type).error(f'get_video_info异常:{e}\n')
+        except Exception as e:
+            Common.logger(log_type, crawler).error(f'get_video_url:{e}\n')
 
     @classmethod
-    def get_videolist(cls, log_type, crawler, out_uid, machine):
+    def get_videolist(cls, log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine):
         signature = cls.get_signature(log_type, crawler, out_uid, machine)
-        url = "https://www.ixigua.com/api/videov2/author/new_video_list?"
-        params = {
-            'to_user_id': str(out_uid),
-            'offset': str(cls.offset),
-            'limit': '30',
-            'maxBehotTime': '0',
-            'order': 'new',
-            'isHome': '0',
-            'msToken': 'G0eRzNkw189a8TLaXjc6nTHVMQwh9XcxVAqTbGKi7iPJdQcLwS3-XRrJ3MZ7QBfqErpxp3EX1WtvWOIcZ3NIgr41hgcd-v64so_RRj3YCRw1UsKW8mIssNLlIMspsg==',
-            'X-Bogus': 'DFSzswVuEkUANjW9ShFTgR/F6qHt',
-            '_signature': signature,
-        }
-        headers = {
-            'authority': 'www.ixigua.com',
-            'accept': 'application/json, text/plain, */*',
-            'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
-            'cache-control': 'no-cache',
-            'cookie': f'MONITOR_WEB_ID=7168304743566296612; __ac_signature={signature}; ixigua-a-s=1; support_webp=true; support_avif=false; csrf_session_id=a5355d954d3c63ed1ba35faada452b4d; msToken=G0eRzNkw189a8TLaXjc6nTHVMQwh9XcxVAqTbGKi7iPJdQcLwS3-XRrJ3MZ7QBfqErpxp3EX1WtvWOIcZ3NIgr41hgcd-v64so_RRj3YCRw1UsKW8mIssNLlIMspsg==; tt_scid=o4agqz7u9SKPwfBoPt6S82Cw0q.9KDtqmNe0JHxMqmpxNHQWq1BmrQdgVU6jEoX7ed99; ttwid=1%7CHHtv2QqpSGuSu8r-zXF1QoWsvjmNi1SJrqOrZzg-UCY%7C1676618894%7Cee5ad95378275f282f230a7ffa9947ae7eff40d0829c5a2568672a6dc90a1c96; ixigua-a-s=1',
-            'pragma': 'no-cache',
-            'referer': f'https://www.ixigua.com/home/{out_uid}/video/?preActiveKey=hotsoon&list_entrance=userdetail',
-            'sec-ch-ua': '"Chromium";v="110", "Not A(Brand";v="24", "Microsoft Edge";v="110"',
-            'sec-ch-ua-mobile': '?0',
-            'sec-ch-ua-platform': '"macOS"',
-            'sec-fetch-dest': 'empty',
-            'sec-fetch-mode': 'cors',
-            'sec-fetch-site': 'same-origin',
-            'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.41',
-            'x-secsdk-csrf-token': '00010000000119e3f9454d1dcbb288704cda1960f241e2d19bd21f2fd283520c3615a990ac5a17448bfbb902a249'
-        }
-        urllib3.disable_warnings()
-        response = requests.get(url=url, headers=headers, params=params, proxies=proxies, verify=False)
-        cls.offset += 30
-        if response.status_code != 200:
-            Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
-        elif 'data' not in response.text:
-            Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
-        elif 'videoList' not in response.json()["data"]:
-            Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.json()}\n")
-        else:
-            videoList = response.json()['data']['videoList']
-            for i in range(len(videoList)):
-                # video_title
-                if 'title' not in videoList[i]:
-                    video_title = 0
-                else:
-                    video_title = videoList[i]['title'].strip().replace('手游', '') \
-                        .replace('/', '').replace('\/', '').replace('\n', '')
+        while True:
+            url = "https://www.ixigua.com/api/videov2/author/new_video_list?"
+            params = {
+                'to_user_id': str(out_uid),
+                'offset': str(cls.offset),
+                'limit': '30',
+                'maxBehotTime': '0',
+                'order': 'new',
+                'isHome': '0',
+                'msToken': 'G0eRzNkw189a8TLaXjc6nTHVMQwh9XcxVAqTbGKi7iPJdQcLwS3-XRrJ3MZ7QBfqErpxp3EX1WtvWOIcZ3NIgr41hgcd-v64so_RRj3YCRw1UsKW8mIssNLlIMspsg==',
+                'X-Bogus': 'DFSzswVuEkUANjW9ShFTgR/F6qHt',
+                '_signature': signature,
+            }
+            headers = {
+                'authority': 'www.ixigua.com',
+                'accept': 'application/json, text/plain, */*',
+                'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
+                'cache-control': 'no-cache',
+                'cookie': f'MONITOR_WEB_ID=7168304743566296612; __ac_signature={signature}; ixigua-a-s=1; support_webp=true; support_avif=false; csrf_session_id=a5355d954d3c63ed1ba35faada452b4d; msToken=G0eRzNkw189a8TLaXjc6nTHVMQwh9XcxVAqTbGKi7iPJdQcLwS3-XRrJ3MZ7QBfqErpxp3EX1WtvWOIcZ3NIgr41hgcd-v64so_RRj3YCRw1UsKW8mIssNLlIMspsg==; tt_scid=o4agqz7u9SKPwfBoPt6S82Cw0q.9KDtqmNe0JHxMqmpxNHQWq1BmrQdgVU6jEoX7ed99; ttwid=1%7CHHtv2QqpSGuSu8r-zXF1QoWsvjmNi1SJrqOrZzg-UCY%7C1676618894%7Cee5ad95378275f282f230a7ffa9947ae7eff40d0829c5a2568672a6dc90a1c96; ixigua-a-s=1',
+                'pragma': 'no-cache',
+                'referer': f'https://www.ixigua.com/home/{out_uid}/video/?preActiveKey=hotsoon&list_entrance=userdetail',
+                'sec-ch-ua': '"Chromium";v="110", "Not A(Brand";v="24", "Microsoft Edge";v="110"',
+                'sec-ch-ua-mobile': '?0',
+                'sec-ch-ua-platform': '"macOS"',
+                'sec-fetch-dest': 'empty',
+                'sec-fetch-mode': 'cors',
+                'sec-fetch-site': 'same-origin',
+                'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.41',
+                'x-secsdk-csrf-token': '00010000000119e3f9454d1dcbb288704cda1960f241e2d19bd21f2fd283520c3615a990ac5a17448bfbb902a249'
+            }
+            urllib3.disable_warnings()
+            response = requests.get(url=url, headers=headers, params=params, proxies=proxies, verify=False)
+            cls.offset += 30
+            if response.status_code != 200:
+                Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
+            elif 'data' not in response.text:
+                Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
+            elif 'videoList' not in response.json()["data"]:
+                Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.json()}\n")
+            else:
+                videoList = response.json()['data']['videoList']
+                for i in range(len(videoList)):
+                    # video_title
+                    if 'title' not in videoList[i]:
+                        video_title = 0
+                    else:
+                        video_title = videoList[i]['title'].strip().replace('手游', '') \
+                            .replace('/', '').replace('\/', '').replace('\n', '')
 
-                # video_id
-                if 'video_id' not in videoList[i]:
-                    video_id = 0
-                else:
-                    video_id = videoList[i]['video_id']
+                    # video_id
+                    if 'video_id' not in videoList[i]:
+                        video_id = 0
+                    else:
+                        video_id = videoList[i]['video_id']
 
-                # gid
-                if 'gid' not in videoList[i]:
-                    gid = 0
-                else:
-                    gid = videoList[i]['gid']
+                    # gid
+                    if 'gid' not in videoList[i]:
+                        gid = 0
+                    else:
+                        gid = videoList[i]['gid']
 
-                # play_cnt
-                if 'video_detail_info' not in videoList[i]:
-                    play_cnt = 0
-                elif 'video_watch_count' not in videoList[i]['video_detail_info']:
-                    play_cnt = 0
-                else:
-                    play_cnt = videoList[i]['video_detail_info']['video_watch_count']
+                    # play_cnt
+                    if 'video_detail_info' not in videoList[i]:
+                        play_cnt = 0
+                    elif 'video_watch_count' not in videoList[i]['video_detail_info']:
+                        play_cnt = 0
+                    else:
+                        play_cnt = videoList[i]['video_detail_info']['video_watch_count']
 
-                # comment_cnt
-                if 'comment_count' not in videoList[i]:
-                    comment_cnt = 0
-                else:
-                    comment_cnt = videoList[i]['comment_count']
+                    # comment_cnt
+                    if 'comment_count' not in videoList[i]:
+                        comment_cnt = 0
+                    else:
+                        comment_cnt = videoList[i]['comment_count']
 
-                # like_cnt
-                if 'digg_count' not in videoList[i]:
-                    like_cnt = 0
-                else:
-                    like_cnt = videoList[i]['digg_count']
+                    # like_cnt
+                    if 'digg_count' not in videoList[i]:
+                        like_cnt = 0
+                    else:
+                        like_cnt = videoList[i]['digg_count']
 
-                # share_cnt
-                share_cnt = 0
+                    # share_cnt
+                    share_cnt = 0
 
-                # video_duration
-                if 'video_duration' not in videoList[i]:
-                    video_duration = 0
-                else:
-                    video_duration = videoList[i]['video_duration']
+                    # video_duration
+                    if 'video_duration' not in videoList[i]:
+                        video_duration = 0
+                    else:
+                        video_duration = videoList[i]['video_duration']
 
-                # send_time
-                if 'publish_time' not in videoList[i]:
-                    publish_time = 0
-                else:
-                    publish_time = videoList[i]['publish_time']
+                    # send_time
+                    if 'publish_time' not in videoList[i]:
+                        publish_time = 0
+                    else:
+                        publish_time = videoList[i]['publish_time']
 
-                # is_top
-                if 'is_top' not in videoList[i]:
-                    is_top = 0
-                else:
-                    is_top = videoList[i]['is_top']
+                    publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time))
 
-                # user_name
-                if 'user_info' not in videoList[i]:
-                    user_name = 0
-                elif 'name' not in videoList[i]['user_info']:
-                    user_name = 0
-                else:
-                    user_name = videoList[i]['user_info']['name']
+                    # is_top
+                    if 'is_top' not in videoList[i]:
+                        is_top = 0
+                    else:
+                        is_top = videoList[i]['is_top']
 
-                # user_id
-                if 'user_info' not in videoList[i]:
-                    user_id = 0
-                elif 'user_id' not in videoList[i]['user_info']:
-                    user_id = 0
-                else:
-                    user_id = videoList[i]['user_info']['user_id']
+                    # user_name
+                    if 'user_info' not in videoList[i]:
+                        user_name = 0
+                    elif 'name' not in videoList[i]['user_info']:
+                        user_name = 0
+                    else:
+                        user_name = videoList[i]['user_info']['name']
 
-                # avatar_url
-                if 'user_info' not in videoList[i]:
-                    avatar_url = 0
-                elif 'avatar_url' not in videoList[i]['user_info']:
-                    avatar_url = 0
-                else:
-                    avatar_url = videoList[i]['user_info']['avatar_url']
-
-                # cover_url
-                if 'video_detail_info' not in videoList[i]:
-                    cover_url = 0
-                elif 'detail_video_large_image' not in videoList[i]['video_detail_info']:
-                    cover_url = 0
-                elif 'url' in videoList[i]['video_detail_info']['detail_video_large_image']:
-                    cover_url = videoList[i]['video_detail_info']['detail_video_large_image']['url']
-                else:
-                    cover_url = videoList[i]['video_detail_info']['detail_video_large_image']['url_list'][0]['url']
-
-                Common.logger(log_type, crawler).info(
-                    f'send_time:{time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(publish_time))}')
-
-                video_url_dict = cls.get_video_url(log_type, crawler, gid)
-                video_url = video_url_dict["video_url"]
-                audio_url = video_url_dict["audio_url"]
-                video_width = video_url_dict["video_width"]
-                video_height = video_url_dict["video_height"]
-
-                video_dict = {'video_title': video_title,
-                              'video_id': video_id,
-                              'gid': gid,
-                              'play_cnt': play_cnt,
-                              'comment_cnt': comment_cnt,
-                              'like_cnt': like_cnt,
-                              'share_cnt': share_cnt,
-                              'video_width': video_width,
-                              'video_height': video_height,
-                              'video_duration': video_duration,
-                              'publish_time': publish_time,
-                              'is_top': is_top,
-                              'user_name': user_name,
-                              'user_id': user_id,
-                              'avatar_url': avatar_url,
-                              'cover_url': cover_url,
-                              'audio_url': audio_url,
-                              'video_url': video_url}
-                for k, v in video_dict.items():
-                    print(f"{k}:{v}")
-                print("\n")
+                    # user_id
+                    if 'user_info' not in videoList[i]:
+                        user_id = 0
+                    elif 'user_id' not in videoList[i]['user_info']:
+                        user_id = 0
+                    else:
+                        user_id = videoList[i]['user_info']['user_id']
+
+                    # avatar_url
+                    if 'user_info' not in videoList[i]:
+                        avatar_url = 0
+                    elif 'avatar_url' not in videoList[i]['user_info']:
+                        avatar_url = 0
+                    else:
+                        avatar_url = videoList[i]['user_info']['avatar_url']
+
+                    # cover_url
+                    if 'video_detail_info' not in videoList[i]:
+                        cover_url = 0
+                    elif 'detail_video_large_image' not in videoList[i]['video_detail_info']:
+                        cover_url = 0
+                    elif 'url' in videoList[i]['video_detail_info']['detail_video_large_image']:
+                        cover_url = videoList[i]['video_detail_info']['detail_video_large_image']['url']
+                    else:
+                        cover_url = videoList[i]['video_detail_info']['detail_video_large_image']['url_list'][0]['url']
+
+                    if gid == 0 or video_id == 0 or cover_url == 0:
+                        Common.logger(log_type, crawler).info('无效视频\n')
+                    elif is_top is True and int(time.time()) - int(publish_time) > 3600 * 24 * 10:
+                        Common.logger(log_type, crawler).info(f'置顶视频,且发布时间超过10天:{publish_time_str}\n')
+                    elif int(time.time()) - int(publish_time) > 3600 * 24 * 10:
+                        Common.logger(log_type, crawler).info(f'发布时间超过10天:{publish_time_str}\n')
+                        cls.offset = 0
+                        return
+                    else:
+                        video_url_dict = cls.get_video_url(log_type, crawler, gid)
+                        video_url = video_url_dict["video_url"]
+                        audio_url = video_url_dict["audio_url"]
+                        video_width = video_url_dict["video_width"]
+                        video_height = video_url_dict["video_height"]
+
+                        video_dict = {'video_title': video_title,
+                                      'video_id': video_id,
+                                      'gid': gid,
+                                      'play_cnt': play_cnt,
+                                      'comment_cnt': comment_cnt,
+                                      'like_cnt': like_cnt,
+                                      'share_cnt': share_cnt,
+                                      'video_width': video_width,
+                                      'video_height': video_height,
+                                      'duration': video_duration,
+                                      'publish_time_stamp': publish_time,
+                                      'publish_time_str': publish_time_str,
+                                      'is_top': is_top,
+                                      'user_name': user_name,
+                                      'user_id': user_id,
+                                      'avatar_url': avatar_url,
+                                      'cover_url': cover_url,
+                                      'audio_url': audio_url,
+                                      'video_url': video_url,
+                                      'session': signature}
+                        for k, v in video_dict.items():
+                            Common.logger(log_type, crawler).info(f"{k}:{v}")
+                        cls.download_publish(log_type=log_type,
+                                             crawler=crawler,
+                                             video_dict=video_dict,
+                                             strategy=strategy,
+                                             our_uid=our_uid,
+                                             oss_endpoint=oss_endpoint,
+                                             env=env,
+                                             machine=machine)
+
+    # 下载 / 上传
+    @classmethod
+    def download_publish(cls, log_type, crawler, strategy, video_dict, our_uid, oss_endpoint, env, machine):
+        # try:
+        if cls.download_rule(video_dict['duration'], video_dict['video_width'], video_dict['video_height']) is False:
+            Common.logger(log_type, crawler).info('不满足抓取规则\n')
+        elif any(word if word in video_dict['video_title'] else False for word in cls.filter_words(log_type, crawler)) is True:
+            Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
+        elif str(video_dict['video_id']) in [x for y in Feishu.get_values_batch(log_type, 'xigua', 'e075e9') for x in y]:
+            Common.logger(log_type, crawler).info('视频已下载\n')
+        elif str(video_dict['video_id']) in [x for y in Feishu.get_values_batch(log_type, 'xigua', '3Ul6wZ') for x in y]:
+            Common.logger(log_type, crawler).info('视频已下载\n')
+        elif str(video_dict['video_id']) in [x for y in Feishu.get_values_batch(log_type, 'xigua', 'QOWqMo') for x in y]:
+            Common.logger(log_type, crawler).info('视频已下载\n')
+        elif str(video_dict['video_id']) in [x for y in Feishu.get_values_batch(log_type, 'xigua', 'wjhpDs') for x in y]:
+            Common.logger(log_type, crawler).info('视频已存在\n')
+        else:
+            # 下载封面
+            Common.download_method(log_type=log_type, crawler=crawler, text='cover', title=video_dict['video_title'], url=video_dict['cover_url'])
+            # 下载视频
+            Common.download_method(log_type=log_type, crawler=crawler, text='xigua_video', title=video_dict['video_title'], url=video_dict['video_url'])
+            # 下载音频
+            Common.download_method(log_type=log_type, crawler=crawler, text='xigua_audio', title=video_dict['video_title'], url=video_dict['audio_url'])
+            # 保存视频信息至txt
+            Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
+            # 合成音视频
+            Common.video_compose(log_type=log_type, crawler=crawler, video_dir=f"./{crawler}/videos/{video_dict['video_title']}")
+
+            # 上传视频
+            Common.logger(log_type, crawler).info("开始上传视频...")
+            our_video_id = Publish.upload_and_publish(log_type=log_type,
+                                                      crawler=crawler,
+                                                      strategy=strategy,
+                                                      our_uid=our_uid,
+                                                      env=env,
+                                                      oss_endpoint=oss_endpoint)
+            if env == 'dev':
+                our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
+            else:
+                our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
+            Common.logger(log_type, crawler).info("视频上传完成")
+
+            if our_video_id is None:
+                # 删除视频文件夹
+                shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}/")
+                return
+
+            # 视频写入飞书
+            Feishu.insert_columns(log_type, 'xigua', "e075e9", "ROWS", 1, 2)
+            upload_time = int(time.time())
+            values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
+                       "定向榜",
+                       video_dict['video_title'],
+                       str(video_dict['video_id']),
+                       our_video_link,
+                       video_dict['gid'],
+                       video_dict['play_cnt'],
+                       video_dict['comment_cnt'],
+                       video_dict['like_cnt'],
+                       video_dict['share_cnt'],
+                       video_dict['duration'],
+                       str(video_dict['video_width']) + '*' + str(video_dict['video_height']),
+                       video_dict['publish_time_str'],
+                       video_dict['user_name'],
+                       video_dict['user_id'],
+                       video_dict['avatar_url'],
+                       video_dict['cover_url'],
+                       video_dict['video_url'],
+                       video_dict['audio_url']]]
+            time.sleep(1)
+            Feishu.update_values(log_type, 'xigua', "e075e9", "F2:Z2", values)
+            Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
+
+            # 视频信息保存数据库
+            sql = f""" insert into crawler_video(video_id,
+                            user_id,
+                            out_user_id,
+                            platform,
+                            strategy,
+                            out_video_id,
+                            video_title,
+                            cover_url,
+                            video_url,
+                            duration,
+                            publish_time,
+                            play_cnt,
+                            crawler_rule,
+                            width,
+                            height)
+                            values({our_video_id},
+                            "{our_uid}",
+                            "{video_dict['user_id']}",
+                            "{cls.platform}",
+                            "定向爬虫策略",
+                            "{video_dict['video_id']}",
+                            "{video_dict['video_title']}",
+                            "{video_dict['cover_url']}",
+                            "{video_dict['video_url']}",
+                            {int(video_dict['duration'])},
+                            "{video_dict['publish_time_str']}",
+                            {int(video_dict['play_cnt'])},
+                            "4,5,6",
+                            {int(video_dict['video_width'])},
+                            {int(video_dict['video_height'])}) """
+            MysqlHelper.update_values(log_type, crawler, sql, env, machine)
+            Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
+        # except Exception as e:
+        #     Common.logger(log_type, crawler).error(f'download_publish异常:{e}\n')
 
 
 

+ 79 - 75
youtube/youtube_follow/youtube_follow.py

@@ -388,88 +388,92 @@ class Follow:
             user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
             user_list = []
             for i in range(1, len(user_sheet)):
+            # for i in range(181, len(user_sheet)):
                 out_uid = user_sheet[i][2]
                 user_name = user_sheet[i][3]
                 browse_id = user_sheet[i][5]
                 our_uid = user_sheet[i][6]
-                Common.logger(log_type, crawler).info(f"正在更新 {user_name} 用户信息\n")
-                # 获取站外browse_id,并写入飞书
-                if browse_id is None:
-                    browse_id = cls.get_browse_id(log_type, crawler, out_uid, machine)
+                if our_uid is not None and user_name is not None:
+                    Common.logger(log_type, crawler).info(f"正在更新 {user_name} 用户信息\n")
+                    # 获取站外browse_id,并写入飞书
                     if browse_id is None:
-                        Common.logger(log_type, crawler).warning('browse_id is None !')
-                    else:
-                        Feishu.update_values(log_type, crawler, sheetid, f'F{i+1}:F{i+1}', [[browse_id]])
-                        Common.logger(log_type, crawler).info(f'browse_id写入成功:{browse_id}')
-                # 站内 UID 为空,且数据库中(youtube+out_user_id)返回数量 == 0,则创建新的站内账号
-                if our_uid is None:
-                    sql = f""" select * from crawler_user where platform="{cls.platform}" and out_user_id="{out_uid}" """
-                    our_user_info = MysqlHelper.get_values(log_type, crawler, sql, env, machine)
-                    # 数据库中(youtube + out_user_id)返回数量 == 0,则创建站内账号UID,并写入定向账号飞书表。并结合站外用户信息,一并写入爬虫账号数据库
-                    if our_user_info is None or len(our_user_info) == 0:
-                        # 获取站外账号信息,写入数据库
-                        out_user_dict = cls.get_out_user_info(log_type, crawler, browse_id, out_uid)
-                        out_avatar_url = out_user_dict['out_avatar_url']
-                        out_create_time = out_user_dict['out_create_time']
-                        out_play_cnt = out_user_dict['out_play_cnt']
-                        out_fans = out_user_dict['out_fans']
-                        tag = 'youtube爬虫,定向爬虫策略'
-
-                        # 创建站内账号
-                        create_user_dict = {
-                            'nickName': user_name,
-                            'avatarUrl': out_avatar_url,
-                            'tagName': tag,
-                        }
-                        our_uid = Users.create_user(log_type, crawler, create_user_dict, env)
-                        Common.logger(log_type, crawler).info(f'新创建的站内UID:{our_uid}')
-                        if env == 'prod':
-                            our_user_link = f'https://admin.piaoquantv.com/ums/user/{our_uid}/post'
+                        browse_id = cls.get_browse_id(log_type, crawler, out_uid, machine)
+                        if browse_id is None:
+                            Common.logger(log_type, crawler).warning('browse_id is None !')
                         else:
-                            our_user_link = f'https://testadmin.piaoquantv.com/ums/user/{our_uid}/post'
-                        Common.logger(log_type, crawler).info(f'站内用户主页链接:{our_user_link}')
-                        Feishu.update_values(log_type, crawler, sheetid, f'G{i + 1}:H{i + 1}', [[our_uid, our_user_link]])
-                        Common.logger(log_type, crawler).info(f'站内用户信息写入飞书成功!')
+                            Feishu.update_values(log_type, crawler, sheetid, f'F{i+1}:F{i+1}', [[browse_id]])
+                            Common.logger(log_type, crawler).info(f'browse_id写入成功:{browse_id}')
+                    # 站内 UID 为空,且数据库中(youtube+out_user_id)返回数量 == 0,则创建新的站内账号
+                    if our_uid is None:
+                        sql = f""" select * from crawler_user where platform="{cls.platform}" and out_user_id="{out_uid}" """
+                        our_user_info = MysqlHelper.get_values(log_type, crawler, sql, env, machine)
+                        # 数据库中(youtube + out_user_id)返回数量 == 0,则创建站内账号UID,并写入定向账号飞书表。并结合站外用户信息,一并写入爬虫账号数据库
+                        if our_user_info is None or len(our_user_info) == 0:
+                            # 获取站外账号信息,写入数据库
+                            out_user_dict = cls.get_out_user_info(log_type, crawler, browse_id, out_uid)
+                            out_avatar_url = out_user_dict['out_avatar_url']
+                            out_create_time = out_user_dict['out_create_time']
+                            out_play_cnt = out_user_dict['out_play_cnt']
+                            out_fans = out_user_dict['out_fans']
+                            tag = 'youtube爬虫,定向爬虫策略'
 
-                        sql = f""" insert into crawler_user(user_id, 
-                                            out_user_id, 
-                                            out_user_name, 
-                                            out_avatar_url, 
-                                            out_create_time, 
-                                            out_play_cnt, 
-                                            out_fans, 
-                                            platform, 
-                                            tag)
-                                            values({our_uid}, 
-                                            "{out_uid}", 
-                                            "{user_name}", 
-                                            "{out_avatar_url}", 
-                                            "{out_create_time}", 
-                                            {out_play_cnt}, 
-                                            {out_fans}, 
-                                            "{cls.platform}",
-                                            "{tag}") """
-                        Common.logger(log_type, crawler).info(f'sql:{sql}')
-                        MysqlHelper.update_values(log_type, crawler, sql, env, machine)
-                        Common.logger(log_type, crawler).info('用户信息插入数据库成功!\n')
-                    # 数据库中(youtube + out_user_id)返回数量 != 0,则直接把数据库中的站内 UID 写入飞书
-                    else:
-                        our_uid = our_user_info[0][1]
-                        if 'env' == 'prod':
-                            our_user_link = f'https://admin.piaoquantv.com/ums/user/{our_uid}/post'
+                            # 创建站内账号
+                            create_user_dict = {
+                                'nickName': user_name,
+                                'avatarUrl': out_avatar_url,
+                                'tagName': tag,
+                            }
+                            our_uid = Users.create_uid(log_type, crawler, create_user_dict, env)
+                            Common.logger(log_type, crawler).info(f'新创建的站内UID:{our_uid}')
+                            if env == 'prod':
+                                our_user_link = f'https://admin.piaoquantv.com/ums/user/{our_uid}/post'
+                            else:
+                                our_user_link = f'https://testadmin.piaoquantv.com/ums/user/{our_uid}/post'
+                            Common.logger(log_type, crawler).info(f'站内用户主页链接:{our_user_link}')
+                            Feishu.update_values(log_type, crawler, sheetid, f'G{i + 1}:H{i + 1}', [[our_uid, our_user_link]])
+                            Common.logger(log_type, crawler).info(f'站内用户信息写入飞书成功!')
+
+                            sql = f""" insert into crawler_user(user_id, 
+                                                out_user_id, 
+                                                out_user_name, 
+                                                out_avatar_url, 
+                                                out_create_time, 
+                                                out_play_cnt, 
+                                                out_fans, 
+                                                platform, 
+                                                tag)
+                                                values({our_uid}, 
+                                                "{out_uid}", 
+                                                "{user_name}", 
+                                                "{out_avatar_url}", 
+                                                "{out_create_time}", 
+                                                {out_play_cnt}, 
+                                                {out_fans}, 
+                                                "{cls.platform}",
+                                                "{tag}") """
+                            Common.logger(log_type, crawler).info(f'sql:{sql}')
+                            MysqlHelper.update_values(log_type, crawler, sql, env, machine)
+                            Common.logger(log_type, crawler).info('用户信息插入数据库成功!\n')
+                        # 数据库中(youtube + out_user_id)返回数量 != 0,则直接把数据库中的站内 UID 写入飞书
                         else:
-                            our_user_link = f'https://testadmin.piaoquantv.com/ums/user/{our_uid}/post'
-                        Common.logger(log_type, crawler).info(f'站内用户主页链接:{our_user_link}')
-                        Feishu.update_values(log_type, crawler, sheetid, f'G{i+1}:H{i+1}', [[our_uid, our_user_link]])
-                        Common.logger(log_type, crawler).info(f'站内用户信息写入飞书成功!\n')
+                            our_uid = our_user_info[0][1]
+                            if 'env' == 'prod':
+                                our_user_link = f'https://admin.piaoquantv.com/ums/user/{our_uid}/post'
+                            else:
+                                our_user_link = f'https://testadmin.piaoquantv.com/ums/user/{our_uid}/post'
+                            Common.logger(log_type, crawler).info(f'站内用户主页链接:{our_user_link}')
+                            Feishu.update_values(log_type, crawler, sheetid, f'G{i+1}:H{i+1}', [[our_uid, our_user_link]])
+                            Common.logger(log_type, crawler).info(f'站内用户信息写入飞书成功!\n')
 
-                user_dict = {
-                    'out_user_id': out_uid,
-                    'out_user_name': user_name,
-                    'out_browse_id': browse_id,
-                    'our_user_id': our_uid,
-                }
-                user_list.append(user_dict)
+                    user_dict = {
+                        'out_user_id': out_uid,
+                        'out_user_name': user_name,
+                        'out_browse_id': browse_id,
+                        'our_user_id': our_uid,
+                    }
+                    user_list.append(user_dict)
+                else:
+                    pass
             return user_list
         except Exception as e:
             Common.logger(log_type, crawler).error(f"get_user_from_feishu异常:{e}\n")
@@ -1166,6 +1170,6 @@ if __name__ == "__main__":
     # print(Follow.filter_emoji("姐妹倆一唱一和,完美配合,終於把大慶降服了😅😅#萌娃搞笑日常"))
     # Follow.repeat_video('follow', 'youtube', 4, "dev", "local")
     # title = "'西部巡游220丨两人一车环游中国半年,需要花费多少钱? 2万公里吃住行费用总结'"
-    title = "'Insanely Crowded Shanghai Yu Garden Lantern Festival Walk Tour 2023 人气爆棚的上海豫园元宵节漫步之行 4K'"
-    print(title.strip().replace("\\", "").replace(" ", "").replace("\n", "").replace("/", "").replace("\r", "").replace("&NBSP", "").replace("&", ""))
+    # title = "'Insanely Crowded Shanghai Yu Garden Lantern Festival Walk Tour 2023 人气爆棚的上海豫园元宵节漫步之行 4K'"
+    # print(title.strip().replace("\\", "").replace(" ", "").replace("\n", "").replace("/", "").replace("\r", "").replace("&NBSP", "").replace("&", ""))
     pass

+ 2 - 2
youtube/youtube_main/run_youtube_follow.py

@@ -15,8 +15,8 @@ def main(log_type, crawler, strategy, oss_endpoint, env, machine):
         Common.logger(log_type, crawler).info('开始抓取youtube定向榜\n')
         Follow.get_follow_videos(log_type, crawler, strategy, oss_endpoint, env, machine)
         Common.del_logs(log_type, crawler)
-        Common.logger(log_type, crawler).info('抓取完一轮,休眠 1 小时\n')
-        time.sleep(3600)
+        Common.logger(log_type, crawler).info('抓取完一轮,休眠 1 分钟\n')
+        time.sleep(60)
 
 
 if __name__ == "__main__":