wangkun před 1 rokem
rodič
revize
faf3dfa2d0

+ 1 - 1
common/common.py

@@ -46,7 +46,7 @@ class Common:
         # log_name = time.strftime("%Y-%m-%d", time.localtime(time.time())) + f'-{crawler}-{log_type}.log'
         # log_name = datetime.datetime.now().strftime('%Y-%m-%d') + f'-{crawler}-{log_type}.log'
         # log_name = f"{date.today():%Y-%m-%d}-{crawler}-{log_type}.log"
-        log_name = f"{date.today().strftime('%Y-%m-%d')}-{crawler}-{log_type}.log"
+        log_name = f"{crawler}-{log_type}-{date.today().strftime('%Y-%m-%d')}.log"
 
         # 日志不打印到控制台
         logger.remove(handler_id=None)

+ 1 - 0
ganggangdouchuan/ganggangdouchuan_recommend/ganggangdouchuan_recommend.py

@@ -244,6 +244,7 @@ class GanggangdouchuanRecommend:
             except Exception as e:
                 Common.logger(log_type, crawler).error(f'get_videoList异常:{e}\n')
                 cls.i = 0
+                return
 
     @classmethod
     def download_publish(cls, log_type, crawler, video_dict, oss_endpoint, env, driver: WebDriver):

+ 1 - 0
jixiangxingfu/jixiangxingfu_recommend/jixiangxingfu_recommend.py

@@ -304,6 +304,7 @@ class JixiangxingfuRecommend:
             except Exception as e:
                 Common.logger(log_type, crawler).info(f"get_videoList:{e}\n")
                 cls.i = 0
+                return
 
     @classmethod
     def download_publish(cls, log_type, crawler, video_dict, env, driver: WebDriver):

+ 25 - 21
xigua/xigua_main/run_xg_recommend.py

@@ -3,14 +3,17 @@
 # @Time: 2023/6/7
 import argparse
 import random
+
 from mq_http_sdk.mq_client import *
 from mq_http_sdk.mq_consumer import *
 from mq_http_sdk.mq_exception import MQExceptionBase
+
 sys.path.append(os.getcwd())
 from common.common import Common
 from common.public import get_consumer, ack_message, task_fun_mq
 from common.scheduling_db import MysqlHelper
-from xigua.xigua_recommend.xigua_recommend_scheduling import XiguarecommendScheduling
+# from xigua.xigua_recommend.xigua_recommend_scheduling import XiguarecommendScheduling
+from xigua.xigua_recommend.xg_recommend import XiguaRecommend
 
 
 def main(log_type, crawler, topic_name, group_id, env):
@@ -25,9 +28,9 @@ def main(log_type, crawler, topic_name, group_id, env):
                                           f'TopicName:{topic_name}\n'
                                           f'MQConsumer:{group_id}')
     Common.logging(log_type, crawler, env, f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
-                                          f'WaitSeconds:{wait_seconds}\n'
-                                          f'TopicName:{topic_name}\n'
-                                          f'MQConsumer:{group_id}')
+                                           f'WaitSeconds:{wait_seconds}\n'
+                                           f'TopicName:{topic_name}\n'
+                                           f'MQConsumer:{group_id}')
     while True:
         try:
             # 长轮询消费消息。
@@ -44,15 +47,15 @@ def main(log_type, crawler, topic_name, group_id, env):
                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
                                                       f"Properties:{msg.properties}")
                 Common.logging(log_type, crawler, env, f"Receive\n"
-                                                      f"MessageId:{msg.message_id}\n"
-                                                      f"MessageBodyMD5:{msg.message_body_md5}\n"
-                                                      f"MessageTag:{msg.message_tag}\n"
-                                                      f"ConsumedTimes:{msg.consumed_times}\n"
-                                                      f"PublishTime:{msg.publish_time}\n"
-                                                      f"Body:{msg.message_body}\n"
-                                                      f"NextConsumeTime:{msg.next_consume_time}\n"
-                                                      f"ReceiptHandle:{msg.receipt_handle}\n"
-                                                      f"Properties:{msg.properties}")
+                                                       f"MessageId:{msg.message_id}\n"
+                                                       f"MessageBodyMD5:{msg.message_body_md5}\n"
+                                                       f"MessageTag:{msg.message_tag}\n"
+                                                       f"ConsumedTimes:{msg.consumed_times}\n"
+                                                       f"PublishTime:{msg.publish_time}\n"
+                                                       f"Body:{msg.message_body}\n"
+                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
+                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
+                                                       f"Properties:{msg.properties}")
                 # ack_mq_message
                 ack_message(log_type=log_type, crawler=crawler, recv_msgs=recv_msgs, consumer=consumer)
 
@@ -70,15 +73,16 @@ def main(log_type, crawler, topic_name, group_id, env):
                 Common.logging(log_type, crawler, env, f"调度任务:{task_dict}")
                 Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
                 Common.logging(log_type, crawler, env, f"抓取规则:{rule_dict}")
-                # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
+                Common.logger(log_type, crawler).info(f"共{len(user_list)}个用户:\n{user_list}\n")
+                Common.logging(log_type, crawler, env, f"共{len(user_list)}个用户:\n{user_list}\n")
                 Common.logger(log_type, crawler).info(f'开始抓取:{task_dict["taskName"]}\n')
                 Common.logging(log_type, crawler, env, f'开始抓取:{task_dict["taskName"]}\n')
-                XiguarecommendScheduling.get_videoList(log_type=log_type,
-                                                       crawler=crawler,
-                                                       rule_dict=rule_dict,
-                                                       our_uid=our_uid,
-                                                       env=env)
-                Common.del_logs(log_type, crawler)
+                XiguaRecommend.get_videoList(log_type=log_type,
+                                             crawler=crawler,
+                                             rule_dict=rule_dict,
+                                             our_uid=our_uid,
+                                             env=env)
+                # Common.del_logs(log_type, crawler)
                 Common.logger(log_type, crawler).info('抓取一轮结束\n')
                 Common.logging(log_type, crawler, env, '抓取一轮结束\n')
 
@@ -107,4 +111,4 @@ if __name__ == "__main__":
          crawler=args.crawler,
          topic_name=args.topic_name,
          group_id=args.group_id,
-         env=args.env)
+         env=args.env)

+ 20 - 360
xigua/xigua_recommend/xg_recommend.py

@@ -12,6 +12,7 @@ import sys
 import time
 import requests
 import urllib3
+import re
 from requests.adapters import HTTPAdapter
 from selenium import webdriver
 from selenium.webdriver import DesiredCapabilities
@@ -59,372 +60,31 @@ class XiguaRecommend:
     @classmethod
     def get_video_url(cls, video_info):
         video_url_dict = {}
-        # video_url
-        if 'videoResource' not in video_info:
-            video_url_dict["video_url"] = ''
-            video_url_dict["audio_url"] = ''
-            video_url_dict["video_width"] = 0
-            video_url_dict["video_height"] = 0
 
-        elif 'dash_120fps' in video_info['videoResource']:
-            if "video_list" in video_info['videoResource']['dash_120fps'] and 'video_4' in \
-                    video_info['videoResource']['dash_120fps']['video_list']:
-                video_url = video_info['videoResource']['dash_120fps']['video_list']['video_4']['backup_url_1']
-                audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_4']['backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['dash_120fps']['video_list']['video_4']['vwidth']
-                video_height = video_info['videoResource']['dash_120fps']['video_list']['video_4']['vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
-            elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_3' in \
-                    video_info['videoResource']['dash_120fps']['video_list']:
-                video_url = video_info['videoResource']['dash_120fps']['video_list']['video_3']['backup_url_1']
-                audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_3']['backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['dash_120fps']['video_list']['video_3']['vwidth']
-                video_height = video_info['videoResource']['dash_120fps']['video_list']['video_3']['vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
-            elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_2' in \
-                    video_info['videoResource']['dash_120fps']['video_list']:
-                video_url = video_info['videoResource']['dash_120fps']['video_list']['video_2']['backup_url_1']
-                audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_2']['backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['dash_120fps']['video_list']['video_2']['vwidth']
-                video_height = video_info['videoResource']['dash_120fps']['video_list']['video_2']['vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
-            elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_1' in \
-                    video_info['videoResource']['dash_120fps']['video_list']:
-                video_url = video_info['videoResource']['dash_120fps']['video_list']['video_1']['backup_url_1']
-                audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_1']['backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['dash_120fps']['video_list']['video_1']['vwidth']
-                video_height = video_info['videoResource']['dash_120fps']['video_list']['video_1']['vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
+        video_resource = video_info.get('videoResource', {})
+        dash_120fps = video_resource.get('dash_120fps', {})
+        normal = video_resource.get('normal', {})
 
-            elif 'dynamic_video' in video_info['videoResource']['dash_120fps'] \
-                    and 'dynamic_video_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
-                    and 'dynamic_audio_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
-                    and len(
-                video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list']) != 0 \
-                    and len(
-                video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list']) != 0:
+        # 从dash_120fps和normal字典中获取video_list字典
+        video_list = dash_120fps.get('video_list', {}) or normal.get('video_list', {})
+        # 获取video_list字典中的video_4、video_3、video_2或video_1的值。如果找到非空视频URL,则将其赋值给变量video_url。否则,将赋值为空字符串。
+        video = video_list.get('video_4') or video_list.get('video_3') or video_list.get('video_2') or video_list.get('video_1')
 
-                video_url = \
-                    video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
-                        'backup_url_1']
-                audio_url = \
-                    video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list'][-1][
-                        'backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = \
-                    video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
-                        'vwidth']
-                video_height = \
-                    video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
-                        'vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
-            else:
-                video_url_dict["video_url"] = ''
-                video_url_dict["audio_url"] = ''
-                video_url_dict["video_width"] = 0
-                video_url_dict["video_height"] = 0
-
-        elif 'dash' in video_info['videoResource']:
-            if "video_list" in video_info['videoResource']['dash'] and 'video_4' in \
-                    video_info['videoResource']['dash']['video_list']:
-                video_url = video_info['videoResource']['dash']['video_list']['video_4']['backup_url_1']
-                audio_url = video_info['videoResource']['dash']['video_list']['video_4']['backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['dash']['video_list']['video_4']['vwidth']
-                video_height = video_info['videoResource']['dash']['video_list']['video_4']['vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
-            elif "video_list" in video_info['videoResource']['dash'] and 'video_3' in \
-                    video_info['videoResource']['dash']['video_list']:
-                video_url = video_info['videoResource']['dash']['video_list']['video_3']['backup_url_1']
-                audio_url = video_info['videoResource']['dash']['video_list']['video_3']['backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['dash']['video_list']['video_3']['vwidth']
-                video_height = video_info['videoResource']['dash']['video_list']['video_3']['vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
-            elif "video_list" in video_info['videoResource']['dash'] and 'video_2' in \
-                    video_info['videoResource']['dash']['video_list']:
-                video_url = video_info['videoResource']['dash']['video_list']['video_2']['backup_url_1']
-                audio_url = video_info['videoResource']['dash']['video_list']['video_2']['backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['dash']['video_list']['video_2']['vwidth']
-                video_height = video_info['videoResource']['dash']['video_list']['video_2']['vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
-            elif "video_list" in video_info['videoResource']['dash'] and 'video_1' in \
-                    video_info['videoResource']['dash']['video_list']:
-                video_url = video_info['videoResource']['dash']['video_list']['video_1']['backup_url_1']
-                audio_url = video_info['videoResource']['dash']['video_list']['video_1']['backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['dash']['video_list']['video_1']['vwidth']
-                video_height = video_info['videoResource']['dash']['video_list']['video_1']['vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
-
-            elif 'dynamic_video' in video_info['videoResource']['dash'] \
-                    and 'dynamic_video_list' in video_info['videoResource']['dash']['dynamic_video'] \
-                    and 'dynamic_audio_list' in video_info['videoResource']['dash']['dynamic_video'] \
-                    and len(video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list']) != 0 \
-                    and len(video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list']) != 0:
-
-                video_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
-                    'backup_url_1']
-                audio_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list'][-1][
-                    'backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
-                    'vwidth']
-                video_height = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
-                    'vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
-            else:
-                video_url_dict["video_url"] = ''
-                video_url_dict["audio_url"] = ''
-                video_url_dict["video_width"] = 0
-                video_url_dict["video_height"] = 0
-
-        elif 'normal' in video_info['videoResource']:
-            if "video_list" in video_info['videoResource']['normal'] and 'video_4' in \
-                    video_info['videoResource']['normal']['video_list']:
-                video_url = video_info['videoResource']['normal']['video_list']['video_4']['backup_url_1']
-                audio_url = video_info['videoResource']['normal']['video_list']['video_4']['backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['normal']['video_list']['video_4']['vwidth']
-                video_height = video_info['videoResource']['normal']['video_list']['video_4']['vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
-            elif "video_list" in video_info['videoResource']['normal'] and 'video_3' in \
-                    video_info['videoResource']['normal']['video_list']:
-                video_url = video_info['videoResource']['normal']['video_list']['video_3']['backup_url_1']
-                audio_url = video_info['videoResource']['normal']['video_list']['video_3']['backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['normal']['video_list']['video_3']['vwidth']
-                video_height = video_info['videoResource']['normal']['video_list']['video_3']['vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
-            elif "video_list" in video_info['videoResource']['normal'] and 'video_2' in \
-                    video_info['videoResource']['normal']['video_list']:
-                video_url = video_info['videoResource']['normal']['video_list']['video_2']['backup_url_1']
-                audio_url = video_info['videoResource']['normal']['video_list']['video_2']['backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['normal']['video_list']['video_2']['vwidth']
-                video_height = video_info['videoResource']['normal']['video_list']['video_2']['vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
-            elif "video_list" in video_info['videoResource']['normal'] and 'video_1' in \
-                    video_info['videoResource']['normal']['video_list']:
-                video_url = video_info['videoResource']['normal']['video_list']['video_1']['backup_url_1']
-                audio_url = video_info['videoResource']['normal']['video_list']['video_1']['backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['normal']['video_list']['video_1']['vwidth']
-                video_height = video_info['videoResource']['normal']['video_list']['video_1']['vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
+        video_url = video.get('backup_url_1', '') if video else ''
+        audio_url = video.get('backup_url_1', '') if video else ''
+        video_width = video.get('vwidth', 0) if video else 0
+        video_height = video.get('vheight', 0) if video else 0
 
-            elif 'dynamic_video' in video_info['videoResource']['normal'] \
-                    and 'dynamic_video_list' in video_info['videoResource']['normal']['dynamic_video'] \
-                    and 'dynamic_audio_list' in video_info['videoResource']['normal']['dynamic_video'] \
-                    and len(video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list']) != 0 \
-                    and len(video_info['videoResource']['normal']['dynamic_video']['dynamic_audio_list']) != 0:
+        video_url = re.sub(r'[^a-zA-Z0-9+/=]', '', video_url)  # 从视频URL中删除特殊字符
+        audio_url = re.sub(r'[^a-zA-Z0-9+/=]', '', audio_url)  # 从音频URL中删除特殊字符
 
-                video_url = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
-                    'backup_url_1']
-                audio_url = video_info['videoResource']['normal']['dynamic_video']['dynamic_audio_list'][-1][
-                    'backup_url_1']
-                if len(video_url) % 3 == 1:
-                    video_url += '=='
-                elif len(video_url) % 3 == 2:
-                    video_url += '='
-                elif len(audio_url) % 3 == 1:
-                    audio_url += '=='
-                elif len(audio_url) % 3 == 2:
-                    audio_url += '='
-                video_url = base64.b64decode(video_url).decode('utf8')
-                audio_url = base64.b64decode(audio_url).decode('utf8')
-                video_width = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
-                    'vwidth']
-                video_height = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
-                    'vheight']
-                video_url_dict["video_url"] = video_url
-                video_url_dict["audio_url"] = audio_url
-                video_url_dict["video_width"] = video_width
-                video_url_dict["video_height"] = video_height
-            else:
-                video_url_dict["video_url"] = ''
-                video_url_dict["audio_url"] = ''
-                video_url_dict["video_width"] = 0
-                video_url_dict["video_height"] = 0
+        video_url = base64.b64decode(video_url).decode('utf8')  # 解码视频URL
+        audio_url = base64.b64decode(audio_url).decode('utf8')  # 解码音频URL
 
-        else:
-            video_url_dict["video_url"] = ''
-            video_url_dict["audio_url"] = ''
-            video_url_dict["video_width"] = 0
-            video_url_dict["video_height"] = 0
+        video_url_dict["video_url"] = video_url
+        video_url_dict["audio_url"] = audio_url
+        video_url_dict["video_width"] = video_width
+        video_url_dict["video_height"] = video_height
 
         return video_url_dict
 

+ 1 - 1
zhiqingtiantiankan/zhiqingtiantiankan_main/run_zhiqingtiantiankan_recommend.py

@@ -12,7 +12,7 @@ from zhiqingtiantiankan.zhiqingtiantiankan_recommend.zhiqingtiantiankan_recommen
 def main(log_type, crawler, env):
     Common.logger(log_type, crawler).info('开始抓取:知青天天看小程序\n')
     ZhiqingtiantiankanRecommend.start_wechat(log_type, crawler, env)
-    Common.del_logs(log_type, crawler)
+    # Common.del_logs(log_type, crawler)
     Common.logger(log_type, crawler).info('抓取完一轮\n')
 
 

+ 1 - 0
zhiqingtiantiankan/zhiqingtiantiankan_recommend/zhiqingtiantiankan_recommend.py

@@ -310,6 +310,7 @@ class ZhiqingtiantiankanRecommend:
             except Exception as e:
                 Common.logger(log_type, crawler).info(f"get_videoList:{e}\n")
                 cls.i = 0
+                return
 
     @classmethod
     def download_publish(cls, log_type, crawler, video_dict, env, driver: WebDriver):

+ 1 - 1
zhongmiaoyinxin/zhongmiaoyinxin_main/run_zhongmiaoyinxin_recommend.py

@@ -12,7 +12,7 @@ from zhongmiaoyinxin.zhongmiaoyinxin_recommend.zhongmiaoyinxin_recommend import
 def main(log_type, crawler, env):
     Common.logger(log_type, crawler).info('开始抓取:众妙音信小程序\n')
     ZhongmiaoyinxinRecommend.start_wechat(log_type, crawler, env)
-    Common.del_logs(log_type, crawler)
+    # Common.del_logs(log_type, crawler)
     Common.logger(log_type, crawler).info('抓取完一轮\n')
 
 

+ 1 - 0
zhongmiaoyinxin/zhongmiaoyinxin_recommend/zhongmiaoyinxin_recommend.py

@@ -301,6 +301,7 @@ class ZhongmiaoyinxinRecommend:
             except Exception as e:
                 Common.logger(log_type, crawler).info(f"get_videoList:{e}\n")
                 cls.i = 0
+                return
 
     @classmethod
     def download_publish(cls, log_type, crawler, video_dict, env, driver: WebDriver):