wangkun 2 years ago
parent
commit
72b619cb00

+ 31 - 31
shipinhao/shipinhao_search/shipinhao_search.py

@@ -297,23 +297,23 @@ class ShipinhaoSearch:
 
         index = 0
         while True:
-            try:
-                if cls.search_elements(driver, '//*[@class="double-rich double-rich_vertical"]') is None:
-                    Common.logger(log_type, crawler).info('窗口已销毁\n')
-                    return
-
-                Common.logger(log_type, crawler).info('获取视频列表\n')
-                video_elements = cls.search_elements(driver, '//div[@class="vc active__mask"]')
-                if video_elements is None:
-                    Common.logger(log_type, crawler).warning(f'video_elements:{video_elements}')
-                    return
-
-                video_element_temp = video_elements[index:]
-                if len(video_element_temp) == 0:
-                    Common.logger(log_type, crawler).info('到底啦~~~~~~~~~~~~~\n')
-                    return
-
-                for i, video_element in enumerate(video_element_temp):
+            if cls.search_elements(driver, '//*[@class="double-rich double-rich_vertical"]') is None:
+                Common.logger(log_type, crawler).info('窗口已销毁\n')
+                return
+
+            Common.logger(log_type, crawler).info('获取视频列表\n')
+            video_elements = cls.search_elements(driver, '//div[@class="vc active__mask"]')
+            if video_elements is None:
+                Common.logger(log_type, crawler).warning(f'video_elements:{video_elements}')
+                return
+
+            video_element_temp = video_elements[index:]
+            if len(video_element_temp) == 0:
+                Common.logger(log_type, crawler).info('到底啦~~~~~~~~~~~~~\n')
+                return
+
+            for i, video_element in enumerate(video_element_temp):
+                try:
                     Common.logger(log_type, crawler).info(f"download_cnt:{cls.download_cnt}")
                     if cls.download_cnt >= cls.videos_cnt(log_type, crawler):
                         Common.logger(log_type, crawler).info(f'搜索词:"{word}",已抓取视频数:{cls.download_cnt}')
@@ -386,13 +386,12 @@ class ShipinhaoSearch:
                                                  video_dict=video_dict,
                                                  our_uid=our_uid,
                                                  env=env)
+                except Exception as e:
+                    Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
 
-                Common.logger(log_type, crawler).info('已抓取完一组视频,休眠1秒\n')
-                time.sleep(1)
-                index = index + len(video_element_temp)
-            except Exception as e:
-                Common.logger(log_type, crawler).info(f"get_videoList:{e}\n")
-                cls.i = 0
+            Common.logger(log_type, crawler).info('已抓取完一组视频,休眠1秒\n')
+            time.sleep(1)
+            index = index + len(video_element_temp)
 
     @classmethod
     def download_publish(cls, log_type, crawler, word, video_dict, our_uid, env):
@@ -660,19 +659,20 @@ class ShipinhaoSearch:
     def get_search_videos(cls, log_type, crawler, env):
         user_list = cls.get_users(log_type, crawler, "wNgi6Z", env)
         for user in user_list:
-            cls.i = 0
-            cls.download_cnt = 0
-            search_word = user["search_word"]
-            our_uid = user["our_uid"]
-            Common.logger(log_type, crawler).info(f"开始抓取搜索词:{search_word}")
             try:
+                cls.i = 0
+                cls.download_cnt = 0
+                search_word = user["search_word"]
+                our_uid = user["our_uid"]
+                Common.logger(log_type, crawler).info(f"开始抓取搜索词:{search_word}")
+
                 cls.start_wechat(log_type=log_type,
                                  crawler=crawler,
                                  word=search_word,
                                  our_uid=our_uid,
                                  env=env)
             except Exception as e:
-                Common.logger(log_type, crawler).error(f"search_video:{e}\n")
+                Common.logger(log_type, crawler).error(f"抓取{user['search_word']}时异常:{e}\n")
 
 
 if __name__ == '__main__':
@@ -681,12 +681,12 @@ if __name__ == '__main__':
     #                                             rule_dict='[{"videos_cnt":{"min":10,"max":0}},{"duration":{"min":30,"max":600}},{"share_cnt":{"min":3000,"max":0}},{"favorite_cnt":{"min":1000,"max":0}},{"publish_time":{"min":1672502400000,"max":0}}]',
     #                                             oss_endpoint="out",
     #                                             env="dev")
-    # print(ShipinhaoSearchScheduling.get_users("search", "shipinhao", "wNgi6Z", "dev"))
+    print(ShipinhaoSearch.get_users("search", "shipinhao", "wNgi6Z", "prod"))
     # print((date.today() + timedelta(days=0)).strftime("%Y-%m-%d"))
     # print(ShipinhaoSearchScheduling.repeat_out_video_id(log_type="search",
     #                                                     crawler="shipinhao",
     #                                                     out_video_id="123",
     #                                                     env="dev"))
     # ShipinhaoSearch.download_rule(log_type="search", crawler="shipinhao", video_dict={})
-    print(ShipinhaoSearch.rule_dict(log_type="search", crawler="shipinhao"))
+    # print(ShipinhaoSearch.rule_dict(log_type="search", crawler="shipinhao"))
     pass

+ 24 - 24
shipinhao/shipinhao_search/shipinhao_search_scheduling.py

@@ -262,23 +262,24 @@ class ShipinhaoSearchScheduling:
         videos_cnt = rule_dict.get('videos_cnt', {}).get('min', 0)
         index = 0
         while True:
-            try:
-                if cls.search_elements(driver, '//*[@class="double-rich double-rich_vertical"]') is None:
-                    Common.logger(log_type, crawler).info('窗口已销毁\n')
-                    return
-
-                Common.logger(log_type, crawler).info('获取视频列表\n')
-                video_elements = cls.search_elements(driver, '//div[@class="vc active__mask"]')
-                if video_elements is None:
-                    Common.logger(log_type, crawler).warning(f'video_elements:{video_elements}')
-                    return
-
-                video_element_temp = video_elements[index:]
-                if len(video_element_temp) == 0:
-                    Common.logger(log_type, crawler).info('到底啦~~~~~~~~~~~~~\n')
-                    return
-
-                for i, video_element in enumerate(video_element_temp):
+
+            if cls.search_elements(driver, '//*[@class="double-rich double-rich_vertical"]') is None:
+                Common.logger(log_type, crawler).info('窗口已销毁\n')
+                return
+
+            Common.logger(log_type, crawler).info('获取视频列表\n')
+            video_elements = cls.search_elements(driver, '//div[@class="vc active__mask"]')
+            if video_elements is None:
+                Common.logger(log_type, crawler).warning(f'video_elements:{video_elements}')
+                return
+
+            video_element_temp = video_elements[index:]
+            if len(video_element_temp) == 0:
+                Common.logger(log_type, crawler).info('到底啦~~~~~~~~~~~~~\n')
+                return
+
+            for i, video_element in enumerate(video_element_temp):
+                try:
                     Common.logger(log_type, crawler).info(f"download_cnt:{cls.download_cnt}")
                     if cls.download_cnt >= int(videos_cnt):
                         Common.logger(log_type, crawler).info(f'搜索词:"{word}",已抓取视频数:{cls.download_cnt}')
@@ -350,13 +351,12 @@ class ShipinhaoSearchScheduling:
                                              our_uid=our_uid,
                                              oss_endpoint=oss_endpoint,
                                              env=env)
+                except Exception as e:
+                    Common.logger(log_type, crawler).error(f"抓取单条视频时异常:{e}\n")
 
-                Common.logger(log_type, crawler).info('已抓取完一组视频,休眠1秒\n')
-                time.sleep(1)
-                index = index + len(video_element_temp)
-            except Exception as e:
-                Common.logger(log_type, crawler).info(f"get_videoList:{e}\n")
-                cls.i = 0
+            Common.logger(log_type, crawler).info('已抓取完一组视频,休眠1秒\n')
+            time.sleep(1)
+            index = index + len(video_element_temp)
 
     @classmethod
     def download_publish(cls, log_type, crawler, word, rule_dict, video_dict, our_uid, oss_endpoint, env):
@@ -642,7 +642,7 @@ class ShipinhaoSearchScheduling:
                                  oss_endpoint=oss_endpoint,
                                  env=env)
             except Exception as e:
-                Common.logger(log_type, crawler).error(f"search_video:{e}\n")
+                Common.logger(log_type, crawler).error(f"抓取{user['search_word']}时异常:{e}\n")
 
 
 if __name__ == '__main__':