ソースを参照

update youtube获取抓取作者名单从飞书改为mysql

lierqiang 2 年 前
コミット
0fe6b3e661
1 ファイル変更11 行追加16 行削除
  1. 11 16
      youtube/youtube_follow/youtube_follow_api.py

+ 11 - 16
youtube/youtube_follow/youtube_follow_api.py

@@ -12,13 +12,7 @@ import shutil
 import sys
 import time
 import json
-import random
-# import emoji
 import requests
-# from selenium import webdriver
-# from selenium.webdriver.chrome.service import Service
-# from selenium.webdriver.common.by import By
-# from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
 
 sys.path.append(os.getcwd())
 from common.common import Common
@@ -27,6 +21,7 @@ from common.feishu import Feishu
 from common.getuser import getUser
 from common.publish import Publish
 from common.translate import Translate
+from common.public import get_user_from_mysql
 
 headers = {
     'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
@@ -144,11 +139,11 @@ class YoutubeFollow:
                 if 'tabRenderer' not in tab or 'content' not in tab['tabRenderer']:
                     continue
                 viewCountText = \
-                tab['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer'][
-                    'contents'][0]['channelAboutFullMetadataRenderer']['viewCountText']['simpleText']
+                    tab['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer'][
+                        'contents'][0]['channelAboutFullMetadataRenderer']['viewCountText']['simpleText']
                 out_create_time = \
-                tab['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer'][
-                    'contents'][0]['channelAboutFullMetadataRenderer']['joinedDateText']['runs'][1]['text']
+                    tab['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer'][
+                        'contents'][0]['channelAboutFullMetadataRenderer']['joinedDateText']['runs'][1]['text']
                 break
             out_user_dict = {
                 'out_user_name': header['title'],
@@ -1151,16 +1146,16 @@ class YoutubeFollow:
     @classmethod
     def get_follow_videos(cls, log_type, crawler, strategy, oss_endpoint, env, machine):
         try:
-            user_list = cls.get_user_from_feishu(log_type, crawler, 'c467d7', env, machine)
+            # user_list = cls.get_user_from_feishu(log_type, crawler, 'c467d7', env, machine)
+            user_list = get_user_from_mysql(log_type, crawler, crawler, env, machine)
             if len(user_list) == 0:
                 Common.logger(log_type, crawler).warning('用户列表为空\n')
             else:
                 for user_dict in user_list:
-                    out_uid = user_dict['out_user_id']
-                    user_name = user_dict['out_user_name']
-                    browse_id = user_dict['out_browse_id']
-                    our_uid = user_dict['our_user_id']
-                    out_user_url = user_dict['out_user_url']
+                    out_user_url = user_dict['spider_link']
+                    out_uid = out_user_url.split('/')[3]
+                    user_name = user_dict['nick_name']
+                    our_uid = user_dict['media_id']
                     Common.logger(log_type, crawler).info(f'获取 {user_name} 主页视频\n')
                     cls.get_videos(log_type, crawler, strategy, oss_endpoint, env, out_uid, our_uid, machine,
                                    out_user_url)