Selaa lähdekoodia

update youtube 获取飞书数据bug修复

lierqiang 2 vuotta sitten
vanhempi
commit
161247af40

+ 2 - 2
common/users.py → common/getuser.py

@@ -15,7 +15,7 @@ from common.common import Common
 from common.db import MysqlHelper
 
 
-class Users:
+class getUser:
 
     @classmethod
     def get_default_user(cls):
@@ -162,5 +162,5 @@ class Users:
 
 
 if __name__ == "__main__":
-    uid = Users.create_uid('log', 'kanyikan', 'youtube爬虫,定向爬虫策略', 'dev')
+    uid = getUser.create_uid('log', 'kanyikan', 'youtube爬虫,定向爬虫策略', 'dev')
     print(uid)

+ 32 - 34
youtube/youtube_follow/youtube_follow_api.py

@@ -24,7 +24,7 @@ sys.path.append(os.getcwd())
 from common.common import Common
 from common.db import MysqlHelper
 from common.feishu import Feishu
-from common.users import Users
+from common.getuser import getUser
 from common.publish import Publish
 from common.translate import Translate
 
@@ -135,7 +135,11 @@ class YoutubeFollow:
             data = json.loads(info)
             header = data['header']['c4TabbedHeaderRenderer']
             tabs = data['contents']['twoColumnBrowseResultsRenderer']['tabs']
-            subsimpleText = header['subscriberCountText']['simpleText'].replace('位订阅者', '')
+            try:
+                subsimpleText = header['subscriberCountText']['simpleText'].replace('位订阅者', '')
+                out_fans = format_nums(subsimpleText)
+            except Exception as e:
+                out_fans = 0
             for tab in tabs:
                 if 'tabRenderer' not in tab or 'content' not in tab['tabRenderer']:
                     continue
@@ -149,7 +153,7 @@ class YoutubeFollow:
             out_user_dict = {
                 'out_user_name': header['title'],
                 'out_avatar_url': header['avatar']['thumbnails'][-1]['url'],
-                'out_fans': format_nums(subsimpleText),
+                'out_fans': out_fans,
                 'out_play_cnt': int(
                     viewCountText.replace('收看次數:', '').replace('次', '').replace(',', '')) if viewCountText else 0,
                 'out_create_time': out_create_time.replace('年', '-').replace('月', '-').replace('日', ''),
@@ -173,8 +177,7 @@ class YoutubeFollow:
         try:
             user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
             user_list = []
-            for i in range(1, len(user_sheet)):
-                # for i in range(181, len(user_sheet)):
+            for i in range(271, len(user_sheet)):
                 out_uid = user_sheet[i][2]
                 user_name = user_sheet[i][3]
                 browse_id = user_sheet[i][5]
@@ -182,22 +185,17 @@ class YoutubeFollow:
                 uer_url = user_sheet[i][4]
                 if out_uid is not None and user_name is not None:
                     Common.logger(log_type, crawler).info(f"正在更新 {user_name} 用户信息\n")
-                    # 获取站外browse_id,并写入飞书
-                    # if browse_id is None:
-                    #     browse_id = cls.get_browse_id(log_type, crawler, out_uid, machine)
-                    #     if browse_id is None:
-                    #         Common.logger(log_type, crawler).warning('browse_id is None !')
-                    #     else:
-                    #         Feishu.update_values(log_type, crawler, sheetid, f'F{i + 1}:F{i + 1}', [[browse_id]])
-                    #         Common.logger(log_type, crawler).info(f'browse_id写入成功:{browse_id}')
-                    # 站内 UID 为空,且数据库中(youtube+out_user_id)返回数量 == 0,则创建新的站内账号
+
                     if our_uid is None:
                         sql = f""" select * from crawler_user where platform="{cls.platform}" and out_user_id="{out_uid}" """
                         our_user_info = MysqlHelper.get_values(log_type, crawler, sql, env, machine)
                         # 数据库中(youtube + out_user_id)返回数量 == 0,则创建站内账号UID,并写入定向账号飞书表。并结合站外用户信息,一并写入爬虫账号数据库
-                        if our_user_info is None or len(our_user_info) == 0:
+                        if not our_user_info:
                             # 获取站外账号信息,写入数据库
-                            out_user_dict = cls.get_out_user_info(log_type, crawler, browse_id, out_uid)
+                            try:
+                                out_user_dict = cls.get_out_user_info(log_type, crawler, browse_id, out_uid)
+                            except Exception as e:
+                                continue
                             out_avatar_url = out_user_dict['out_avatar_url']
                             out_create_time = out_user_dict['out_create_time']
                             out_play_cnt = out_user_dict['out_play_cnt']
@@ -210,7 +208,7 @@ class YoutubeFollow:
                                 'avatarUrl': out_avatar_url,
                                 'tagName': tag,
                             }
-                            our_uid = Users.create_uid(log_type, crawler, create_user_dict, env)
+                            our_uid = getUser.create_uid(log_type, crawler, create_user_dict, env)
                             Common.logger(log_type, crawler).info(f'新创建的站内UID:{our_uid}')
                             if env == 'prod':
                                 our_user_link = f'https://admin.piaoquantv.com/ums/user/{our_uid}/post'
@@ -221,22 +219,22 @@ class YoutubeFollow:
                                                  [[our_uid, our_user_link]])
                             Common.logger(log_type, crawler).info(f'站内用户信息写入飞书成功!')
 
-                            sql = f""" insert into crawler_user(user_id, 
-                                                out_user_id, 
-                                                out_user_name, 
-                                                out_avatar_url, 
-                                                out_create_time, 
-                                                out_play_cnt, 
-                                                out_fans, 
-                                                platform, 
+                            sql = f""" insert into crawler_user(user_id,
+                                                out_user_id,
+                                                out_user_name,
+                                                out_avatar_url,
+                                                out_create_time,
+                                                out_play_cnt,
+                                                out_fans,
+                                                platform,
                                                 tag)
-                                                values({our_uid}, 
-                                                "{out_uid}", 
-                                                "{user_name}", 
-                                                "{out_avatar_url}", 
-                                                "{out_create_time}", 
-                                                {out_play_cnt}, 
-                                                {out_fans}, 
+                                                values({our_uid},
+                                                "{out_uid}",
+                                                "{user_name}",
+                                                "{out_avatar_url}",
+                                                "{out_create_time}",
+                                                {out_play_cnt},
+                                                {out_fans},
                                                 "{cls.platform}",
                                                 "{tag}") """
                             Common.logger(log_type, crawler).info(f'sql:{sql}')
@@ -1179,10 +1177,10 @@ if __name__ == "__main__":
     # print(YoutubeFollow.get_user_from_feishu('follow', 'youtube', 'c467d7', 'prod', 'prod'))
     # YoutubeFollow.get_out_user_info('follow', 'youtube', 'UC08jgxf119fzynp2uHCvZIg', '@weitravel')
     # YoutubeFollow.get_video_info('follow', 'youtube', 'OGVK0IXBIhI')
-    # YoutubeFollow.get_follow_videos('follow', 'youtube', 'youtube_follow', 'out', 'dev', 'local')
+    YoutubeFollow.get_follow_videos('follow', 'youtube', 'youtube_follow', 'hk', 'dev', 'local')
     # print(YoutubeFollow.filter_emoji("姐妹倆一唱一和,完美配合,終於把大慶降服了😅😅#萌娃搞笑日常"))
     # YoutubeFollow.repeat_video('follow', 'youtube', 4, "dev", "local")
     # title = "'西部巡游220丨两人一车环游中国半年,需要花费多少钱? 2万公里吃住行费用总结'"
     # title = "'Insanely Crowded Shanghai Yu Garden Lantern Festival Walk Tour 2023 人气爆棚的上海豫园元宵节漫步之行 4K'"
     # print(title.strip().replace("\\", "").replace(" ", "").replace("\n", "").replace("/", "").replace("\r", "").replace("&NBSP", "").replace("&", ""))
-    pass
+    pass

+ 6 - 7
youtube/youtube_main/run_youtube_follow.py

@@ -21,13 +21,12 @@ def main(log_type, crawler, strategy, oss_endpoint, env, machine):
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()  ## 新建参数解释器对象
-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
-    parser.add_argument('--crawler')  ## 添加参数
-    parser.add_argument('--strategy')  ## 添加参数
-    parser.add_argument('--our_uid')  ## 添加参数
-    parser.add_argument('--oss_endpoint')  ## 添加参数
-    parser.add_argument('--env')  ## 添加参数
-    parser.add_argument('--machine')  ## 添加参数
+    parser.add_argument('--log_type', default='follow')  ## 添加参数,注明参数类型
+    parser.add_argument('--crawler',default='youtube')  ## 添加参数
+    parser.add_argument('--strategy',default='youtube定向')  ## 添加参数
+    parser.add_argument('--oss_endpoint',default='outer')  ## 添加参数
+    parser.add_argument('--env',default='prod')  ## 添加参数
+    parser.add_argument('--machine',default='aliyun_hk')  ## 添加参数
     args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
     # print(args)
     main(log_type=args.log_type,