wangkun 1 year ago
parent
commit
9caf32005d

+ 10 - 10
gongzhonghao/gongzhonghao_author/gongzhonghao1_author.py

@@ -446,7 +446,6 @@ class GongzhonghaoAuthor1:
             if len_sheet >= 101:
                 len_sheet = 101
             for i in range(1, len_sheet):
-                # try:
                 user_dict = cls.get_users(log_type=log_type,
                                           crawler=crawler,
                                           user_sheet=user_sheet,
@@ -454,15 +453,16 @@ class GongzhonghaoAuthor1:
                                           i=i,
                                           env=env)
                 Common.logger(log_type, crawler).info(f'获取 {user_dict["user_name"]} 公众号视频\n')
-                cls.get_videoList(log_type=log_type,
-                                  crawler=crawler,
-                                  rule_dict=rule_dict,
-                                  user_dict=user_dict,
-                                  env=env)
-                Common.logger(log_type, crawler).info('休眠 60 秒\n')
-                time.sleep(60)
-            # except Exception as e:
-            #     Common.logger(log_type, crawler).info(f'抓取{user_dict["user_name"]}公众号时异常:{e}\n')
+                try:
+                    cls.get_videoList(log_type=log_type,
+                                      crawler=crawler,
+                                      rule_dict=rule_dict,
+                                      user_dict=user_dict,
+                                      env=env)
+                    Common.logger(log_type, crawler).info('休眠 60 秒\n')
+                    time.sleep(60)
+                except Exception as e:
+                    Common.logger(log_type, crawler).info(f'抓取{user_dict["user_name"]}公众号时异常:{e}\n')
 
 
 if __name__ == "__main__":

+ 52 - 84
gongzhonghao/gongzhonghao_author/gongzhonghao2_author.py

@@ -48,76 +48,28 @@ class GongzhonghaoAuthor2:
         return token_dict
 
     @classmethod
-    def get_users(cls, log_type, crawler, sheetid, env):
-        while True:
-            user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
-            if user_sheet is None:
-                Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet}, 2秒后重试")
-                time.sleep(2)
-                continue
-            user_list = []
-            len_sheet = len(user_sheet)
-            if len_sheet <= 101:
-                Common.logger(log_type, crawler).info("抓取用户数<=100,无需启动第二套抓取脚本\n")
-                return
-            if len_sheet >= 201:
-                len_sheet = 201
-            for i in range(101, len_sheet):
-            # for i in range(1, 3):
-                user_name = user_sheet[i][0]
-                wechat_name = user_sheet[i][2]
-                if wechat_name is None or wechat_name.strip() == "" or wechat_name.replace(" ", "") == "":
-                    wechat_name = user_name
-                # our_uid = user_sheet[i][5]
-                # our_user_link = user_sheet[i][6]
-                out_uid = user_sheet[i][3]
-                avatar_url = user_sheet[i][4]
-                if out_uid is None or out_uid.strip() == "" or out_uid.replace(" ", "") == "":
-                    user_info_dict = cls.get_user_info(log_type=log_type, crawler=crawler, wechat_name=wechat_name, env=env)
-                    out_uid = user_info_dict["user_id"]
-                    avatar_url = user_info_dict["avatar_url"]
-                # tag1 = user_sheet[i][7]
-                # tag2 = user_sheet[i][8]
-                # tag3 = user_sheet[i][9]
-                # tag4 = user_sheet[i][10]
-                # tag5 = user_sheet[i][11]
-                # tag6 = user_sheet[i][12]
-                # Common.logger(log_type, crawler).info(f"正在更新 {user_name} 用户信息")
-                # if out_uid is None or our_uid is None:
-                #     # 用来创建our_id的信息
-                #     user_dict = {
-                #         'recommendStatus': -6,
-                #         'appRecommendStatus': -6,
-                #         'nickName': user_info_dict["user_name"],
-                #         'avatarUrl': user_info_dict['avatar_url'],
-                #         'tagName': f'{tag1},{tag2},{tag3},{tag4},{tag5},{tag6}',
-                #     }
-                #     our_uid = getUser.create_uid(log_type, crawler, user_dict, env)
-                #     Common.logger(log_type, crawler).info(f'新创建的站内UID:{our_uid}')
-                #     if env == 'prod':
-                #         our_user_link = f'https://admin.piaoquantv.com/ums/user/{our_uid}/post'
-                #     else:
-                #         our_user_link = f'https://testadmin.piaoquantv.com/ums/user/{our_uid}/post'
-                #     Feishu.update_values(log_type, crawler, sheetid, f'D{i + 1}:G{i + 1}', [
-                #         [user_info_dict["user_id"], user_info_dict["avatar_url"], our_uid, our_user_link]])
-                #     Common.logger(log_type, crawler).info(f'用户信息创建成功!\n')
-                # else:
-                #     Common.logger(log_type, crawler).info("用户信息已存在\n")
-                    Feishu.update_values(log_type, crawler, "Bzv72P", f'D{i + 1}:E{i + 1}', [[out_uid, avatar_url]])
-                our_user_dict = {
-                    'user_name': user_name,
-                    'user_id': out_uid,
-                    'wechat_name': wechat_name,
-                    # 'our_uid': our_uid,
-                    # 'our_user_link': our_user_link,
-                    'avatar_url': avatar_url,
-                }
-                for k, v in our_user_dict.items():
-                    Common.logger(log_type, crawler).info(f"{k}:{v}")
-                user_list.append(our_user_dict)
-                time.sleep(1)
+    def get_users(cls, log_type, crawler, user_sheet, sheetid, i, env):
+        user_name = user_sheet[i][0]
+        wechat_name = user_sheet[i][2]
+        if wechat_name is None or wechat_name.strip() == "" or wechat_name.replace(" ", "") == "":
+            wechat_name = user_name
+        out_uid = user_sheet[i][3]
+        avatar_url = user_sheet[i][4]
+        if out_uid is None or out_uid.strip() == "" or out_uid.replace(" ", "") == "":
+            user_info_dict = cls.get_user_info(log_type=log_type, crawler=crawler, wechat_name=wechat_name, env=env)
+            out_uid = user_info_dict["user_id"]
+            avatar_url = user_info_dict["avatar_url"]
+            Feishu.update_values(log_type, crawler, sheetid, f'D{i + 1}:E{i + 1}', [[out_uid, avatar_url]])
 
-            return user_list
+        our_user_dict = {
+            'user_name': user_name,
+            'user_id': out_uid,
+            'wechat_name': wechat_name,
+            'avatar_url': avatar_url,
+        }
+        for k, v in our_user_dict.items():
+            Common.logger(log_type, crawler).info(f"{k}:{v}")
+        return our_user_dict
 
     # 获取用户 fakeid
     @classmethod
@@ -484,22 +436,38 @@ class GongzhonghaoAuthor2:
 
     @classmethod
     def get_all_videos(cls, log_type, crawler, rule_dict, env):
-        user_list = cls.get_users(log_type, crawler, "Bzv72P", env)
-        if user_list is None or len(user_list) == 0:
-            Common.logger(log_type, crawler).warning(f"抓取用户列表为空\n")
-            return
-        for user_dict in user_list:
-            try:
+        while True:
+            sheetid = "Bzv72P"
+            user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
+            if user_sheet is None:
+                Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet}, 2秒后重试")
+                time.sleep(2)
+                continue
+            len_sheet = len(user_sheet)
+            if len_sheet <= 101:
+                Common.logger(log_type, crawler).info("抓取用户数<=100,无需启动第二套抓取脚本\n")
+                return
+            if len_sheet >= 201:
+                len_sheet = 201
+            for i in range(101, len_sheet):
+                user_dict = cls.get_users(log_type=log_type,
+                                          crawler=crawler,
+                                          user_sheet=user_sheet,
+                                          sheetid=sheetid,
+                                          i=i,
+                                          env=env)
                 Common.logger(log_type, crawler).info(f'获取 {user_dict["user_name"]} 公众号视频\n')
-                cls.get_videoList(log_type=log_type,
-                                  crawler=crawler,
-                                  rule_dict=rule_dict,
-                                  user_dict=user_dict,
-                                  env=env)
-                Common.logger(log_type, crawler).info('休眠 60 秒\n')
-                time.sleep(60)
-            except Exception as e:
-                Common.logger(log_type, crawler).info(f'抓取{user_dict["user_name"]}公众号时异常:{e}\n')
+                try:
+                    Common.logger(log_type, crawler).info(f'获取 {user_dict["user_name"]} 公众号视频\n')
+                    cls.get_videoList(log_type=log_type,
+                                      crawler=crawler,
+                                      rule_dict=rule_dict,
+                                      user_dict=user_dict,
+                                      env=env)
+                    Common.logger(log_type, crawler).info('休眠 60 秒\n')
+                    time.sleep(60)
+                except Exception as e:
+                    Common.logger(log_type, crawler).info(f'抓取{user_dict["user_name"]}公众号时异常:{e}\n')
 
 
 if __name__ == "__main__":

+ 52 - 85
gongzhonghao/gongzhonghao_author/gongzhonghao3_author.py

@@ -48,76 +48,28 @@ class GongzhonghaoAuthor3:
         return token_dict
 
     @classmethod
-    def get_users(cls, log_type, crawler, sheetid, env):
-        while True:
-            user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
-            if user_sheet is None:
-                Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet}, 2秒后重试")
-                time.sleep(2)
-                continue
-            user_list = []
-            len_sheet = len(user_sheet)
-            if len_sheet <= 201:
-                Common.logger(log_type, crawler).info("抓取用户数<=200,无需启动第三套抓取脚本\n")
-                return
-            if len_sheet >= 301:
-                len_sheet = 301
-            for i in range(201, len_sheet):
-            # for i in range(1, 3):
-                user_name = user_sheet[i][0]
-                wechat_name = user_sheet[i][2]
-                if wechat_name is None or wechat_name.strip() == "" or wechat_name.replace(" ", "") == "":
-                    wechat_name = user_name
-                # our_uid = user_sheet[i][5]
-                # our_user_link = user_sheet[i][6]
-                out_uid = user_sheet[i][3]
-                avatar_url = user_sheet[i][4]
-                if out_uid is None or out_uid.strip() == "" or out_uid.replace(" ", "") == "":
-                    user_info_dict = cls.get_user_info(log_type=log_type, crawler=crawler, wechat_name=wechat_name, env=env)
-                    out_uid = user_info_dict["user_id"]
-                    avatar_url = user_info_dict["avatar_url"]
-                # tag1 = user_sheet[i][7]
-                # tag2 = user_sheet[i][8]
-                # tag3 = user_sheet[i][9]
-                # tag4 = user_sheet[i][10]
-                # tag5 = user_sheet[i][11]
-                # tag6 = user_sheet[i][12]
-                # Common.logger(log_type, crawler).info(f"正在更新 {user_name} 用户信息")
-                # if out_uid is None or our_uid is None:
-                #     # 用来创建our_id的信息
-                #     user_dict = {
-                #         'recommendStatus': -6,
-                #         'appRecommendStatus': -6,
-                #         'nickName': user_info_dict["user_name"],
-                #         'avatarUrl': user_info_dict['avatar_url'],
-                #         'tagName': f'{tag1},{tag2},{tag3},{tag4},{tag5},{tag6}',
-                #     }
-                #     our_uid = getUser.create_uid(log_type, crawler, user_dict, env)
-                #     Common.logger(log_type, crawler).info(f'新创建的站内UID:{our_uid}')
-                #     if env == 'prod':
-                #         our_user_link = f'https://admin.piaoquantv.com/ums/user/{our_uid}/post'
-                #     else:
-                #         our_user_link = f'https://testadmin.piaoquantv.com/ums/user/{our_uid}/post'
-                #     Feishu.update_values(log_type, crawler, sheetid, f'D{i + 1}:G{i + 1}', [
-                #         [user_info_dict["user_id"], user_info_dict["avatar_url"], our_uid, our_user_link]])
-                #     Common.logger(log_type, crawler).info(f'用户信息创建成功!\n')
-                # else:
-                #     Common.logger(log_type, crawler).info("用户信息已存在\n")
-                    Feishu.update_values(log_type, crawler, "Bzv72P", f'D{i + 1}:E{i + 1}', [[out_uid, avatar_url]])
-                our_user_dict = {
-                    'user_name': user_name,
-                    'user_id': out_uid,
-                    'wechat_name': wechat_name,
-                    # 'our_uid': our_uid,
-                    # 'our_user_link': our_user_link,
-                    'avatar_url': avatar_url,
-                }
-                for k, v in our_user_dict.items():
-                    Common.logger(log_type, crawler).info(f"{k}:{v}")
-                user_list.append(our_user_dict)
-                time.sleep(1)
+    def get_users(cls, log_type, crawler, user_sheet, sheetid, i, env):
+        user_name = user_sheet[i][0]
+        wechat_name = user_sheet[i][2]
+        if wechat_name is None or wechat_name.strip() == "" or wechat_name.replace(" ", "") == "":
+            wechat_name = user_name
+        out_uid = user_sheet[i][3]
+        avatar_url = user_sheet[i][4]
+        if out_uid is None or out_uid.strip() == "" or out_uid.replace(" ", "") == "":
+            user_info_dict = cls.get_user_info(log_type=log_type, crawler=crawler, wechat_name=wechat_name, env=env)
+            out_uid = user_info_dict["user_id"]
+            avatar_url = user_info_dict["avatar_url"]
+            Feishu.update_values(log_type, crawler, sheetid, f'D{i + 1}:E{i + 1}', [[out_uid, avatar_url]])
 
-            return user_list
+        our_user_dict = {
+            'user_name': user_name,
+            'user_id': out_uid,
+            'wechat_name': wechat_name,
+            'avatar_url': avatar_url,
+        }
+        for k, v in our_user_dict.items():
+            Common.logger(log_type, crawler).info(f"{k}:{v}")
+        return our_user_dict
 
     # 获取用户 fakeid
     @classmethod
@@ -490,23 +442,38 @@ class GongzhonghaoAuthor3:
 
     @classmethod
     def get_all_videos(cls, log_type, crawler, rule_dict, env):
-        user_list = cls.get_users(log_type, crawler, "Bzv72P", env)
-        # Common.logger(log_type, crawler).info(f"user_list:{user_list}")
-        if user_list is None or len(user_list) == 0:
-            Common.logger(log_type, crawler).warning(f"抓取用户列表为空\n")
-            return
-        for user_dict in user_list:
-            try:
+        while True:
+            sheetid = "Bzv72P"
+            user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
+            if user_sheet is None:
+                Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet}, 2秒后重试")
+                time.sleep(2)
+                continue
+            len_sheet = len(user_sheet)
+            if len_sheet <= 201:
+                Common.logger(log_type, crawler).info("抓取用户数<=200,无需启动第三套抓取脚本\n")
+                return
+            if len_sheet >= 301:
+                len_sheet = 301
+            for i in range(201, len_sheet):
+                user_dict = cls.get_users(log_type=log_type,
+                                          crawler=crawler,
+                                          user_sheet=user_sheet,
+                                          sheetid=sheetid,
+                                          i=i,
+                                          env=env)
                 Common.logger(log_type, crawler).info(f'获取 {user_dict["user_name"]} 公众号视频\n')
-                cls.get_videoList(log_type=log_type,
-                                  crawler=crawler,
-                                  rule_dict=rule_dict,
-                                  user_dict=user_dict,
-                                  env=env)
-                Common.logger(log_type, crawler).info('休眠 60 秒\n')
-                time.sleep(60)
-            except Exception as e:
-                Common.logger(log_type, crawler).info(f'抓取{user_dict["user_name"]}公众号时异常:{e}\n')
+                try:
+                    Common.logger(log_type, crawler).info(f'获取 {user_dict["user_name"]} 公众号视频\n')
+                    cls.get_videoList(log_type=log_type,
+                                      crawler=crawler,
+                                      rule_dict=rule_dict,
+                                      user_dict=user_dict,
+                                      env=env)
+                    Common.logger(log_type, crawler).info('休眠 60 秒\n')
+                    time.sleep(60)
+                except Exception as e:
+                    Common.logger(log_type, crawler).info(f'抓取{user_dict["user_name"]}公众号时异常:{e}\n')
 
 
 if __name__ == "__main__":