| 
					
				 | 
			
			
				@@ -24,13 +24,13 @@ def get_feeds(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     1.从看一看+小程序首页推荐,获取视频列表 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     2.先在 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c 中去重 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    3.再从 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Y8N3Vl 中去重 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    4.添加视频信息至 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Y8N3Vl 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    3.再从 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM 中去重 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    4.添加视频信息至 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     host = "https://search.weixin.qq.com" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     url = '/cgi-bin/recwxa/recwxavideolist?' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     video_list_session = Common.get_session() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    Common.crawler_log().info("获取视频list时,session:{}".format(video_list_session)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    Common.logger().info("获取视频list时,session:{}", video_list_session) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     header = { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         "Connection": "keep-alive", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         "content-type": "application/json", 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -60,12 +60,12 @@ def get_feeds(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         response = json.loads(r.content.decode("utf8")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if "data" not in response: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            Common.crawler_log().info("获取视频list时,session过期,随机睡眠 31-50 秒") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            Common.logger().info("获取视频list时,session过期,随机睡眠 31-50 秒") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             # 如果返回空信息,则随机睡眠 31-40 秒 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             time.sleep(random.randint(31, 40)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             get_feeds() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         elif "items" not in response["data"]: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            Common.crawler_log().info("获取视频list时,返回空信息,随机睡眠 1-3 分钟") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            Common.logger().info("获取视频list时,返回空信息,随机睡眠 1-3 分钟") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             # 如果返回空信息,则随机睡眠 1-3 分钟 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             time.sleep(random.randint(60, 180)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             get_feeds() 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -74,11 +74,11 @@ def get_feeds(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             for i in range(len(items)): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 # 如果该视频没有视频信息,则忽略 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 if "videoInfo" not in items[i]: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    Common.crawler_log().info("无视频信息") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    Common.logger().info("无视频信息") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     # 获取视频ID 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     video_id = items[i]["videoId"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    Common.crawler_log().info('视频ID:{}'.format(video_id)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    Common.logger().info('视频ID:{}', video_id) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     # 获取视频标题 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     video_title = items[i]["title"].strip().replace("\n", "")\ 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -87,48 +87,48 @@ def get_feeds(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         .replace("?", "").replace('"', "").replace("<", "")\ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         .replace(">", "").replace("|", "").replace(" ", "")\ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         .replace("&NBSP", "").replace(".", "。").replace(" ", "") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    Common.crawler_log().info('视频标题:{}'.format(video_title)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    Common.logger().info('视频标题:{}', video_title) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     # 获取视频播放次数 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     video_play_cnt = items[i]["playCount"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    Common.crawler_log().info('视频播放次数:{}'.format(video_play_cnt)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    Common.logger().info('视频播放次数:{}', video_play_cnt) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     # 获取视频点赞数 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     video_liked_cnt = items[i]["liked_cnt"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    Common.crawler_log().info('视频点赞数:{}'.format(video_liked_cnt)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    Common.logger().info('视频点赞数:{}', video_liked_cnt) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     # 获取视频时长 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     video_duration = items[i]["mediaDuration"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    Common.crawler_log().info('视频时长:{}秒'.format(video_duration)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    Common.logger().info('视频时长:{}秒', video_duration) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     # 获取视频评论数 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     video_comment_cnt = items[i]["comment_cnt"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    Common.crawler_log().info('视频评论数:{}'.format(video_comment_cnt)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    Common.logger().info('视频评论数:{}', video_comment_cnt) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     # 获取视频分享数 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     video_shared_cnt = items[i]["shared_cnt"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    Common.crawler_log().info('视频分享数:{}'.format(video_shared_cnt)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    Common.logger().info('视频分享数:{}', video_shared_cnt) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     # 获取视频发布时间 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     video_send_date = items[i]["date"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    Common.crawler_log().info('视频发布时间:{}'.format( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(video_send_date)))) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    Common.logger().info("视频发布时间:{}", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                         time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(video_send_date))) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     # 获取视频用户名 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     video_user = items[i]["source"].strip().replace("\n", "") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    Common.crawler_log().info('视频用户名:{}'.format(video_user)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    Common.logger().info('视频用户名:{}', video_user) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     # 获取视频宽高 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     if "short_video_info" not in items[i]: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         video_width = "0" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         video_height = "0" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         video_resolution = str(video_width) + "*" + str(video_height) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        Common.crawler_log().info("无分辨率:{}".format(video_resolution)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        Common.logger().info("无分辨率:{}", video_resolution) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     elif len(items[i]["short_video_info"]) == 0: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         video_width = "0" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         video_height = "0" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         video_resolution = str(video_width) + "*" + str(video_height) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        Common.crawler_log().info("无分辨率:{}".format(video_resolution)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        Common.logger().info("无分辨率:{}", video_resolution) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         # 视频宽 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -136,34 +136,34 @@ def get_feeds(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         # 视频高 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         video_height = items[i]["short_video_info"]["height"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         video_resolution = str(video_width) + "*" + str(video_height) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        Common.crawler_log().info('视频宽高:{}'.format(video_resolution)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        Common.logger().info('视频宽高:{}', video_resolution) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     # 获取视频用户头像 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     video_user_cover = items[i]["bizIcon"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    Common.crawler_log().info('视频用户头像:{}'.format(video_user_cover)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    Common.logger().info('视频用户头像:{}', video_user_cover) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     # 获取视频封面 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     if "smartCoverUrl" in items[i]: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         video_cover = items[i]["smartCoverUrl"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        Common.crawler_log().info('视频封面:{}'.format(video_cover)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        Common.logger().info('视频封面:{}', video_cover) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         video_cover = items[i]["thumbUrl"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        Common.crawler_log().info('视频封面:{}'.format(video_cover)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        Common.logger().info('视频封面:{}', video_cover) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     # 获取播放地址 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     if "mpInfo" in items[i]["videoInfo"]["videoCdnInfo"].keys(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         if len(items[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"]) > 2: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                             url = items[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"][2]["url"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            Common.crawler_log().info('视频播放地址:{}'.format(url)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            Common.logger().info('视频播放地址:{}', url) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                             url = items[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"][0]["url"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            Common.crawler_log().info('视频播放地址:{}'.format(url)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            Common.logger().info('视频播放地址:{}', url) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     elif "ctnInfo" in items[i]["videoInfo"]["videoCdnInfo"]: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         url = items[i]["videoInfo"]["videoCdnInfo"]["ctnInfo"]["urlInfo"][0]["url"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        Common.crawler_log().info('视频播放地址:{}'.format(url)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        Common.logger().info('视频播放地址:{}', url) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         url = items[i]["videoInfo"]["videoCdnInfo"]["urlInfo"][0]["url"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        Common.crawler_log().info('视频播放地址:{}'.format(url)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        Common.logger().info('视频播放地址:{}', url) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     # 过滤无效视频 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     if video_id == "" \ 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -178,27 +178,27 @@ def get_feeds(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                             or video_user_cover == "" \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                             or video_cover == "" \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                             or url == "": 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        Common.crawler_log().info("无效视频") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        Common.logger().info("无效视频") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         if video_id in [j for i in Feishu.get_values_batch("20ce0c") for j in i]: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            Common.crawler_log().info("该视频已下载:{}".format(video_title)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            Common.logger().info("该视频已下载:{}", video_title) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            Common.crawler_log().info("该视频未下载:{}".format(video_title)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            Common.logger().info("该视频未下载:{}", video_title) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=Y8N3Vl 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            if video_id in [j for i in Feishu.get_values_batch("Y8N3Vl") for j in i]: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                                Common.crawler_log().info("该视频已在kanyikan_feeds_1中:{}".format(video_title)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            if video_id in [j for i in Feishu.get_values_batch("SdCHOM") for j in i]: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                Common.logger().info("该视频已在kanyikan_feeds_1中:{}", video_title) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                             else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                                Common.crawler_log().info("添加该视频信息至kanyikan_feeds_1:{}".format(video_title)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                Common.logger().info("添加该视频信息至kanyikan_feeds_1:{}", video_title) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                                 # 看一看+工作表,插入首行 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                                print(Feishu.insert_columns("Y8N3Vl")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                print(Feishu.insert_columns("SdCHOM")) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                                 # 获取当前时间 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                                 get_feeds_time = int(time.time()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                                 # 看一看云文档,工作表 kanyikan_feeds_1 中写入数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                                Feishu.update_values("Y8N3Vl", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                Feishu.update_values("SdCHOM", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                                                      a1=str(get_feeds_time), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                                                      b1=str(video_id), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                                                      c1=str(video_play_cnt), 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -215,7 +215,7 @@ def get_feeds(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                                                      n1=str(url), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                                                      o1=str(video_list_session)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        Common.crawler_log().error("获取视频 list 时异常:{}".format(e)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        Common.logger().exception("获取视频 list 时异常:{}", e) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 if __name__ == "__main__": 
			 |