# -*- coding: utf-8 -*- # @Author: wangkun # @Time: 2022/5/11 import os import sys import time import requests import urllib3 sys.path.append(os.getcwd()) from main.common import Common from main.feishu_lib import Feishu proxies = {"http": None, "https": None} # 敏感词库 def sensitive_words(): # 敏感词库列表 word_list = [] # 从云文档读取所有敏感词,添加到词库列表 lists = Feishu.get_values_batch("QQrfQ7") for i in lists: for j in i: # 过滤空的单元格内容 if j is None: pass else: word_list.append(j) return word_list # 获取列表数据 def get_feeds(): url = "https://kapi.xiaoniangao.cn/trends/get_recommend_trends" headers = { "x-b3-traceid": "17e832d1a42807", "X-Token-Id": "4bff41a8c35f054fa915dc71b937ac70-1145266232", "uid": "uid 250e6514-fd83-446c-a880-e274c7f17bce", "content-type": "application/json", "Accept-Encoding": "gzip,compress,br,deflate", "User-Agent": 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)' ' AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 ' 'MicroMessenger/8.0.20(0x18001432) NetType/WIFI Language/zh_CN', "Referer": "https://servicewechat.com/wxd7911e4c177690e4/616/page-frame.html" } data = { "log_params": { "page": "discover_rec", "common": { "brand": "iPhone", "device": "iPhone 11", "os": "iOS 14.7.1", "weixinver": "8.0.20", "srcver": "2.24.2", "net": "none" } }, "qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!750x500r/crop/750x500/interlace/1/format/jpg", "h_qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!80x80r/crop/80x80/interlace/1/format/jpg", "share_width": 625, "share_height": 500, "ext": { "fmid": 0, "items": { "15408539": { "type": "rec", "pd": 328.31, "ct": 1649248176494, "ut": 1649248509951 }, "36776414": { "type": "nice", "pd": 33.501, "ct": 1648893152909, "ut": 1648893189506 }, "39467179": { "type": "rec", "pd": 49.343, "ct": 1649247992882, "ut": 1649248043566 }, "40347940": { "type": "rec", "pd": 3.681, "ct": 1649248080042, "ut": 1649248084907 }, "42285576": { "type": "rec", "pd": 0.66, "ct": 1649248087683, "ut": 1649248089445 }, "42300668": { "type": "rec", "pd": 13.5, "ct": 1649248059189, "ut": 1649248074218 }, "42306954": { "type": "rec", "pd": 4.918, "ct": 1649248091376, "ut": 1649248097663 }, "42328061": { "type": "reflux", "pd": 4.001, "ct": 1649248510102, "ut": 1649248516806 }, "90004473051301": { "type": "my", "pd": 0, "ct": 1649247069688, "ut": 1649247069688 }, "80004478730156": { "type": "other", "pd": 153, "ct": 1649247850322, "ut": 1649247919263 } } }, "app": "xng", "rec_scene": "discover_rec", "log_common_params": { "e": [{ "data": { "page": "discoverIndexPage", "topic": "recommend" }, "ab": {} }], "ext": { "brand": "iPhone", "device": "iPhone 11", "os": "iOS 14.7.1", "weixinver": "8.0.20", "srcver": "2.24.2", "net": "wifi", "scene": "1089" }, "pj": "1", "pf": "2", "session_id": "cd944ac9-f35f-47ac-9aa4-2f354afec0e2" }, "refresh": False, "token": "132645b6e2b996aaad1713a557456816", "uid": "250e6514-fd83-446c-a880-e274c7f17bce", "proj": "ma", "wx_ver": "8.0.20", "code_ver": "3.61.0" } try: urllib3.disable_warnings() r = requests.post(url=url, headers=headers, json=data, proxies=proxies, verify=False) if "data" not in r.json(): Common.logger().warning("获取视频feeds错误:{}", r.text) elif "list" not in r.json()["data"]: Common.logger().warning("获取视频feeds无数据,休眠10s:{}", r.json()["data"]) else: # 视频列表数据 feeds = r.json()["data"]["list"] for i in range(len(feeds)): # 标题 if "title" in feeds[i]: video_title = feeds[i]["title"].strip().replace("\n", "")\ .replace("/", "").replace("\r", "").replace("#", "")\ .replace(".", "。").replace("\\", "").replace("&NBSP", "")\ .replace(":", "").replace("*", "").replace("?", "") \ .replace("?", "").replace('"', "").replace("<", "") \ .replace(">", "").replace("|", "").replace(" ", "") Common.logger().info("标题:{}", video_title) else: video_title = "" Common.logger().info("当前视频无标题:{}", video_title) # 视频 ID if "vid" in feeds[i]: video_id = feeds[i]["vid"] Common.logger().info("视频ID:{}", video_id) else: video_id = "" Common.logger().info("当前视频无ID:{}", video_id) # 播放量 if "play_pv" in feeds[i]: video_play_cnt = feeds[i]["play_pv"] Common.logger().info("视频播放量:{}", video_play_cnt) else: video_play_cnt = "" Common.logger().info("当前视频无播放量:{}", video_play_cnt) # 点赞量 if "favor" in feeds[i]: video_like_cnt = feeds[i]["favor"]["total"] Common.logger().info("视频点赞量:{}", video_like_cnt) else: video_like_cnt = "" Common.logger().info("当前视频无点赞量:{}", video_like_cnt) # 分享量 if "share" in feeds[i]: video_share_cnt = feeds[i]["share"] Common.logger().info("视频分享量:{}", video_share_cnt) else: video_share_cnt = "" Common.logger().info("当前视频无分享量:{}", video_share_cnt) # 评论量 if "comment_count" in feeds[i]: video_comment_cnt = feeds[i]["comment_count"] Common.logger().info("视频评论数:{}", video_comment_cnt) else: video_comment_cnt = "" Common.logger().info("当前视频无评论:{}", video_comment_cnt) # 时长 if "du" in feeds[i]: video_duration = int(feeds[i]["du"] / 1000) Common.logger().info("视频时长:{}秒", video_duration) else: video_duration = "" Common.logger().info("当前视频无时长:{}", video_duration) # 宽和高 if "w" or "h" in feeds[i]: video_width = feeds[i]["w"] video_height = feeds[i]["h"] Common.logger().info("视频宽高:{}*{}", video_width, video_height) else: video_width = "" video_height = "" Common.logger().info("当前视频无宽高:{}{}", video_width, video_height) # 发布时间 if "t" in feeds[i]: video_send_time = feeds[i]["t"] Common.logger().info( "视频发布时间:{}", time.strftime( "%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time) / 1000))) else: video_send_time = "" Common.logger().info("当前视频无发布时间:{}", video_send_time) # 用户名 / 头像 if "user" in feeds[i]: user_name = feeds[i]["user"]["nick"].strip().replace("\n", "")\ .replace("/", "").replace("快手", "").replace(" ", "") \ .replace(" ", "").replace("&NBSP", "").replace("\r", "") head_url = feeds[i]["user"]["hurl"] Common.logger().info("用户名:{}", user_name) Common.logger().info("用户头像:{}", head_url) else: user_name = "" head_url = "" Common.logger().info("当前视频无用户名:{}", user_name) Common.logger().info("当前视频无用户头像:{}", head_url) # 视频封面 if "url" in feeds[i]: cover_url = feeds[i]["url"] Common.logger().info("视频封面:{}", cover_url) else: cover_url = "" Common.logger().info("当前视频无视频封面:{}", cover_url) # 视频播放地址 if "v_url" in feeds[i]: video_url = feeds[i]["v_url"] Common.logger().info("播放地址:{}", video_url) else: video_url = "" Common.logger().info("当前视频无播放地址:{}", video_url) # 视频水印:0 无 1 有 if "v_ort" in feeds[i]: video_logo = feeds[i]["v_ort"] else: video_logo = "1" # 过滤无效视频 if video_title == "" or video_id == "" or video_duration == ""\ or video_send_time == "" or user_name == "" or head_url == ""\ or cover_url == "" or video_url == "": Common.logger().warning("无效视频") # 过滤敏感词 elif any(word if word in video_title else False for word in sensitive_words()) is True: Common.logger().info("视频已中敏感词:{}".format(video_title)) # 过滤水印视频 elif str(video_logo) == "1" and feeds[i]["tpl_id"] != 0 and len(video_id) > 15: Common.logger().info("视频有水印:{}", video_title) # 从云文档去重:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=onyBDH elif video_id in [j for i in Feishu.get_values_batch("onyBDH") for j in i]: Common.logger().info("该视频已下载:{}", video_title) # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=S714lO elif video_id in [j for i in Feishu.get_values_batch("S714lO") for j in i]: Common.logger().info("该视频已在feeds中:{}", video_title) else: Common.logger().info("该视频未下载,添加至feeds中:{}".format(video_title)) # feeds工作表,插入首行 Feishu.insert_columns("S714lO", "ROWS", 1, 2) # 获取当前时间 get_feeds_time = int(time.time()) # 看一看云文档,工作表 kanyikan_feeds 中写入数据 time.sleep(1) Feishu.update_values("S714lO", a1=str(get_feeds_time), b1=str(video_id), c1=str(video_play_cnt), d1=str(video_title), e1=str(video_duration), f1=str(video_comment_cnt), g1=str(video_like_cnt), h1=str(video_share_cnt), i1=str(video_width)+"*"+str(video_height), j1=str(video_send_time), k1=str(user_name), l1=str(head_url), m1=str(cover_url), n1=str(video_url), o1=str("132645b6e2b996aaad1713a557456816")) except Exception as e: Common.logger().error("获取视频列表异常:{}", e) if __name__ == "__main__": get_feeds()