| 
					
				 | 
			
			
				@@ -48,7 +48,7 @@ class ShipinhaoSearchScheduling: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if rule_duration_max == 0: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             rule_duration_max = 100000000 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        rule_period_min = rule_dict.get('period', {}).get('min', 0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # rule_period_min = rule_dict.get('period', {}).get('min', 0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # rule_period_max = rule_dict.get('period', {}).get('max', 100000000) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # if rule_period_max == 0: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         #     rule_period_max = 100000000 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -115,7 +115,7 @@ class ShipinhaoSearchScheduling: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         Common.logger(log_type, crawler).info( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             f'rule_height_max:{int(rule_height_max)} >= video_height:{int(video_dict["video_height"])} >= rule_height_min:{int(rule_height_min)}') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         Common.logger(log_type, crawler).info( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            f'rule_publish_time_max:{int(rule_publish_time_max)} >= publish_time_stamp:{int(video_dict["publish_time_stamp"])} >= rule_publish_time_min:{int(rule_publish_time_min)}') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            f'rule_publish_time_max:{int(rule_publish_time_max)} >= publish_time_stamp:{int(video_dict["publish_time_stamp"])*1000} >= rule_publish_time_min:{int(rule_publish_time_min)}') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if int(rule_duration_max) >= int(float(video_dict["duration"])) >= int(rule_duration_min) \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 and int(rule_play_cnt_max) >= int(video_dict['play_cnt']) >= int(rule_play_cnt_min) \ 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -125,7 +125,7 @@ class ShipinhaoSearchScheduling: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 and int(rule_favorite_cnt_max) >= int(video_dict['favorite_cnt']) >= int(rule_favorite_cnt_min) \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 and int(rule_width_max) >= int(video_dict['video_width']) >= int(rule_width_min) \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 and int(rule_height_max) >= int(video_dict['video_height']) >= int(rule_height_min) \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                and int(rule_publish_time_max) >= int(video_dict['publish_time_stamp']) >= int(rule_publish_time_min): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                and int(rule_publish_time_max) >= int(video_dict['publish_time_stamp'])*1000 >= int(rule_publish_time_min): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             return True 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             return False 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -262,10 +262,6 @@ class ShipinhaoSearchScheduling: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         videos_cnt = rule_dict.get('videos_cnt', {}).get('min', 0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         index = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         while True: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            if cls.download_cnt >= int(videos_cnt): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                Common.logger(log_type, crawler).info(f'搜索词:"{word}",已抓取视频数:{index}') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                cls.download_cnt = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             # try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             if cls.search_elements(driver, '//*[@class="double-rich double-rich_vertical"]') is None: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 Common.logger(log_type, crawler).info('窗口已销毁\n') 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -283,9 +279,16 @@ class ShipinhaoSearchScheduling: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             for i, video_element in enumerate(video_element_temp): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                Common.logger(log_type, crawler).info(f"download_cnt:{cls.download_cnt}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                if cls.download_cnt >= int(videos_cnt): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    Common.logger(log_type, crawler).info(f'搜索词:"{word}",已抓取视频数:{cls.download_cnt}') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    cls.download_cnt = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 if video_element is None: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     Common.logger(log_type, crawler).info('到底啦~\n') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 cls.i += 1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 cls.search_elements(driver, '//div[@class="vc active__mask"]') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -371,7 +374,10 @@ class ShipinhaoSearchScheduling: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         video_dict["video_height"] = ffmpeg_dict["height"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # 规则判断 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if cls.download_rule(log_type, crawler, video_dict, rule_dict) is False: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if cls.download_rule(log_type=log_type, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                             crawler=crawler, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                             video_dict=video_dict, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                             rule_dict=rule_dict) is False: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             shutil.rmtree(f"./{crawler}/videos/{md_title}/") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             Common.logger(log_type, crawler).info("不满足抓取规则,删除成功\n") 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -397,9 +403,14 @@ class ShipinhaoSearchScheduling: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         Common.logger(log_type, crawler).info("视频上传完成") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if our_video_id is None: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            # 删除视频文件夹 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                # 删除视频文件夹 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                shutil.rmtree(f"./{crawler}/videos/{md_title}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                Common.logger(log_type, crawler).warning(f"our_video_id:{our_video_id}, 删除成功\n") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            except FileNotFoundError: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         insert_sql = f""" insert into crawler_video(video_id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                                                 out_user_id, 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -573,7 +584,7 @@ class ShipinhaoSearchScheduling: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             our_user_list = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             # for i in range(1, len(user_sheet)): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            for i in range(1, 4): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            for i in range(1, 3): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 search_word = user_sheet[i][4] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 our_uid = user_sheet[i][6] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 tag1 = user_sheet[i][8] 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -641,5 +652,9 @@ if __name__ == '__main__': 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     #                                             oss_endpoint="out", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     #                                             env="dev") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     # print(ShipinhaoSearchScheduling.get_users("search", "shipinhao", "wNgi6Z", "dev")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    print((date.today() + timedelta(days=0)).strftime("%Y-%m-%d")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # print((date.today() + timedelta(days=0)).strftime("%Y-%m-%d")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    print(ShipinhaoSearchScheduling.repeat_out_video_id(log_type="search", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                                        crawler="shipinhao", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                                        out_video_id="123", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                                        env="dev")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     pass 
			 |