|
@@ -12,13 +12,7 @@ import shutil
|
|
|
import sys
|
|
|
import time
|
|
|
import json
|
|
|
-import random
|
|
|
-# import emoji
|
|
|
import requests
|
|
|
-# from selenium import webdriver
|
|
|
-# from selenium.webdriver.chrome.service import Service
|
|
|
-# from selenium.webdriver.common.by import By
|
|
|
-# from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
|
|
|
|
|
sys.path.append(os.getcwd())
|
|
|
from common.common import Common
|
|
@@ -27,6 +21,7 @@ from common.feishu import Feishu
|
|
|
from common.getuser import getUser
|
|
|
from common.publish import Publish
|
|
|
from common.translate import Translate
|
|
|
+from common.public import get_user_from_mysql
|
|
|
|
|
|
headers = {
|
|
|
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
|
|
@@ -144,11 +139,11 @@ class YoutubeFollow:
|
|
|
if 'tabRenderer' not in tab or 'content' not in tab['tabRenderer']:
|
|
|
continue
|
|
|
viewCountText = \
|
|
|
- tab['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer'][
|
|
|
- 'contents'][0]['channelAboutFullMetadataRenderer']['viewCountText']['simpleText']
|
|
|
+ tab['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer'][
|
|
|
+ 'contents'][0]['channelAboutFullMetadataRenderer']['viewCountText']['simpleText']
|
|
|
out_create_time = \
|
|
|
- tab['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer'][
|
|
|
- 'contents'][0]['channelAboutFullMetadataRenderer']['joinedDateText']['runs'][1]['text']
|
|
|
+ tab['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer'][
|
|
|
+ 'contents'][0]['channelAboutFullMetadataRenderer']['joinedDateText']['runs'][1]['text']
|
|
|
break
|
|
|
out_user_dict = {
|
|
|
'out_user_name': header['title'],
|
|
@@ -1151,16 +1146,16 @@ class YoutubeFollow:
|
|
|
@classmethod
|
|
|
def get_follow_videos(cls, log_type, crawler, strategy, oss_endpoint, env, machine):
|
|
|
try:
|
|
|
- user_list = cls.get_user_from_feishu(log_type, crawler, 'c467d7', env, machine)
|
|
|
+ # user_list = cls.get_user_from_feishu(log_type, crawler, 'c467d7', env, machine)
|
|
|
+ user_list = get_user_from_mysql(log_type, crawler, crawler, env, machine)
|
|
|
if len(user_list) == 0:
|
|
|
Common.logger(log_type, crawler).warning('用户列表为空\n')
|
|
|
else:
|
|
|
for user_dict in user_list:
|
|
|
- out_uid = user_dict['out_user_id']
|
|
|
- user_name = user_dict['out_user_name']
|
|
|
- browse_id = user_dict['out_browse_id']
|
|
|
- our_uid = user_dict['our_user_id']
|
|
|
- out_user_url = user_dict['out_user_url']
|
|
|
+ out_user_url = user_dict['spider_link']
|
|
|
+ out_uid = out_user_url.split('/')[3]
|
|
|
+ user_name = user_dict['nick_name']
|
|
|
+ our_uid = user_dict['media_id']
|
|
|
Common.logger(log_type, crawler).info(f'获取 {user_name} 主页视频\n')
|
|
|
cls.get_videos(log_type, crawler, strategy, oss_endpoint, env, out_uid, our_uid, machine,
|
|
|
out_user_url)
|