|
@@ -7,10 +7,12 @@ YouTube 定向榜
|
|
2. 10分钟>=时长>=1分钟
|
|
2. 10分钟>=时长>=1分钟
|
|
"""
|
|
"""
|
|
import os
|
|
import os
|
|
|
|
+import re
|
|
import shutil
|
|
import shutil
|
|
import sys
|
|
import sys
|
|
import time
|
|
import time
|
|
import json
|
|
import json
|
|
|
|
+# import emoji
|
|
import requests
|
|
import requests
|
|
from selenium import webdriver
|
|
from selenium import webdriver
|
|
from selenium.webdriver.chrome.service import Service
|
|
from selenium.webdriver.chrome.service import Service
|
|
@@ -701,6 +703,15 @@ class Follow:
|
|
except Exception as e:
|
|
except Exception as e:
|
|
Common.logger(log_type, crawler).error(f"get_videos异常:{e}\n")
|
|
Common.logger(log_type, crawler).error(f"get_videos异常:{e}\n")
|
|
|
|
|
|
|
|
+ @classmethod
|
|
|
|
+ def filter_emoji(cls, title):
|
|
|
|
+ # 过滤表情
|
|
|
|
+ try:
|
|
|
|
+ co = re.compile(u'[\U00010000-\U0010ffff]')
|
|
|
|
+ except re.error:
|
|
|
|
+ co = re.compile(u'[\uD800-\uDBFF][\uDC00-\uDFFF]')
|
|
|
|
+ return co.sub("", title)
|
|
|
|
+
|
|
@classmethod
|
|
@classmethod
|
|
def get_video_info(cls, log_type, crawler, out_uid, video_id, machine):
|
|
def get_video_info(cls, log_type, crawler, out_uid, video_id, machine):
|
|
try:
|
|
try:
|
|
@@ -891,15 +902,16 @@ class Follow:
|
|
else:
|
|
else:
|
|
playerMicroformatRenderer = response.json()['microformat']['playerMicroformatRenderer']
|
|
playerMicroformatRenderer = response.json()['microformat']['playerMicroformatRenderer']
|
|
videoDetails = response.json()['videoDetails']
|
|
videoDetails = response.json()['videoDetails']
|
|
- streamingData = response.json()['streamingData']
|
|
|
|
|
|
+ # streamingData = response.json()['streamingData']
|
|
|
|
|
|
# video_title
|
|
# video_title
|
|
if 'title' not in videoDetails:
|
|
if 'title' not in videoDetails:
|
|
video_title = ''
|
|
video_title = ''
|
|
else:
|
|
else:
|
|
video_title = videoDetails['title']
|
|
video_title = videoDetails['title']
|
|
- if Translate.is_contains_chinese(video_title) is False:
|
|
|
|
- video_title = Translate.google_translate(video_title, machine) # 自动翻译标题为中文
|
|
|
|
|
|
+ video_title = cls.filter_emoji(video_title)
|
|
|
|
+ # if Translate.is_contains_chinese(video_title) is False:
|
|
|
|
+ video_title = Translate.google_translate(video_title, machine) # 自动翻译标题为中文
|
|
|
|
|
|
if 'lengthSeconds' not in videoDetails:
|
|
if 'lengthSeconds' not in videoDetails:
|
|
duration = 0
|
|
duration = 0
|
|
@@ -945,14 +957,15 @@ class Follow:
|
|
cover_url = videoDetails['thumbnail']['thumbnails'][-1]['url']
|
|
cover_url = videoDetails['thumbnail']['thumbnails'][-1]['url']
|
|
|
|
|
|
# video_url
|
|
# video_url
|
|
- if 'formats' not in streamingData:
|
|
|
|
- video_url = ''
|
|
|
|
- elif len(streamingData['formats']) == 0:
|
|
|
|
- video_url = ''
|
|
|
|
- elif 'url' not in streamingData['formats'][-1]:
|
|
|
|
- video_url = ''
|
|
|
|
- else:
|
|
|
|
- video_url = streamingData['formats'][-1]['url']
|
|
|
|
|
|
+ # if 'formats' not in streamingData:
|
|
|
|
+ # video_url = ''
|
|
|
|
+ # elif len(streamingData['formats']) == 0:
|
|
|
|
+ # video_url = ''
|
|
|
|
+ # elif 'url' not in streamingData['formats'][-1]:
|
|
|
|
+ # video_url = ''
|
|
|
|
+ # else:
|
|
|
|
+ # video_url = streamingData['formats'][-1]['url']
|
|
|
|
+ video_url = f"https://www.youtube.com/watch?v={video_id}"
|
|
|
|
|
|
Common.logger(log_type, crawler).info(f'video_title:{video_title}')
|
|
Common.logger(log_type, crawler).info(f'video_title:{video_title}')
|
|
Common.logger(log_type, crawler).info(f'video_id:{video_id}')
|
|
Common.logger(log_type, crawler).info(f'video_id:{video_id}')
|
|
@@ -994,7 +1007,8 @@ class Follow:
|
|
else:
|
|
else:
|
|
# 下载视频
|
|
# 下载视频
|
|
Common.logger(log_type, crawler).info('开始下载视频...')
|
|
Common.logger(log_type, crawler).info('开始下载视频...')
|
|
- Common.download_method(log_type, crawler, 'video', video_dict['video_title'], video_dict['video_url'])
|
|
|
|
|
|
+ # Common.download_method(log_type, crawler, 'video', video_dict['video_title'], video_dict['video_url'])
|
|
|
|
+ Common.download_method(log_type, crawler, 'youtube_video', video_dict['video_title'], video_dict['video_url'])
|
|
ffmpeg_dict = Common.ffmpeg(log_type, crawler, f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
|
|
ffmpeg_dict = Common.ffmpeg(log_type, crawler, f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
|
|
video_width = int(ffmpeg_dict['width'])
|
|
video_width = int(ffmpeg_dict['width'])
|
|
video_height = int(ffmpeg_dict['height'])
|
|
video_height = int(ffmpeg_dict['height'])
|
|
@@ -1015,12 +1029,12 @@ class Follow:
|
|
video_dict['avatar_url'] = video_dict['cover_url']
|
|
video_dict['avatar_url'] = video_dict['cover_url']
|
|
video_dict['session'] = f'youtube{int(time.time())}'
|
|
video_dict['session'] = f'youtube{int(time.time())}'
|
|
rule='1,2'
|
|
rule='1,2'
|
|
- if duration < 60 or duration > 600:
|
|
|
|
- # 删除视频文件夹
|
|
|
|
- shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}/")
|
|
|
|
- Common.logger(log_type, crawler).info(f"时长:{video_dict['duration']}不满足抓取规则,删除成功\n")
|
|
|
|
- return
|
|
|
|
- elif video_size == 0 or duration == 0 or video_size is None or duration is None:
|
|
|
|
|
|
+ # if duration < 60 or duration > 600:
|
|
|
|
+ # # 删除视频文件夹
|
|
|
|
+ # shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}/")
|
|
|
|
+ # Common.logger(log_type, crawler).info(f"时长:{video_dict['duration']}不满足抓取规则,删除成功\n")
|
|
|
|
+ # return
|
|
|
|
+ if video_size == 0 or duration == 0 or video_size is None or duration is None:
|
|
# 删除视频文件夹
|
|
# 删除视频文件夹
|
|
shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}/")
|
|
shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}/")
|
|
Common.logger(log_type, crawler).info(f"视频下载出错,删除成功\n")
|
|
Common.logger(log_type, crawler).info(f"视频下载出错,删除成功\n")
|
|
@@ -1120,9 +1134,10 @@ class Follow:
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if __name__ == "__main__":
|
|
- print(Follow.get_browse_id('follow', 'youtube', '@chinatravel5971', "local"))
|
|
|
|
|
|
+ # print(Follow.get_browse_id('follow', 'youtube', '@chinatravel5971', "local"))
|
|
# print(Follow.get_user_from_feishu('follow', 'youtube', 'c467d7', 'dev', 'local'))
|
|
# print(Follow.get_user_from_feishu('follow', 'youtube', 'c467d7', 'dev', 'local'))
|
|
# Follow.get_out_user_info('follow', 'youtube', 'UC08jgxf119fzynp2uHCvZIg', '@weitravel')
|
|
# Follow.get_out_user_info('follow', 'youtube', 'UC08jgxf119fzynp2uHCvZIg', '@weitravel')
|
|
# Follow.get_video_info('follow', 'youtube', 'OGVK0IXBIhI')
|
|
# Follow.get_video_info('follow', 'youtube', 'OGVK0IXBIhI')
|
|
# Follow.get_follow_videos('follow', 'youtube', 'youtube_follow', 'out', 'dev', 'local')
|
|
# Follow.get_follow_videos('follow', 'youtube', 'youtube_follow', 'out', 'dev', 'local')
|
|
|
|
+ print(Follow.filter_emoji("姐妹倆一唱一和,完美配合,終於把大慶降服了😅😅#萌娃搞笑日常"))
|
|
pass
|
|
pass
|