import json import os from hashlib import md5 import requests import time from urllib.parse import urlencode from datetime import datetime, timedelta from common import Oss, Feishu from common.sql_help import sqlCollect headers = { 'Accept-Language': 'zh-cn', 'Connection': 'keep-alive', 'Content-Type': 'application/x-www-form-urlencoded', 'Host': 'creator-app.kuaishou.com', 'User-Agent': 'kwai-android aegon/3.12.1', } class KsFeedVideo: CATEGORY_IDS = { 1: "生活", 2: "才艺", 3: "时尚", 4: "宠物", 5: "读书", 6: "二次元", 7: "家居", 8: "数码", 9: "搞笑", 10: "健康", 11: "旅游", 12: "美食", 13: "美妆", 14: "汽车", 15: "亲子", 16: "情感", 17: "三农", 18: "摄影", 19: "舞蹈", 20: "颜值", 21: "音乐", 22: "影视", 23: "短剧", 24: "游戏", 25: "运动", 26: "资讯", 27: "人文" } current_category_index = 0 @staticmethod def calculate_sig(data): src = ''.join([f'{key}={data[key]}' for key in sorted(data.keys())]) salt = '08d8eece8e83' return md5(f'{src}{salt}'.encode()).hexdigest() """ 切换品类 """ @classmethod def switch_category(cls): if cls.current_category_index >= len(cls.CATEGORY_IDS): cls.current_category_index = 0 category_id = list(cls.CATEGORY_IDS.keys())[cls.current_category_index] url = 'https://creator-app.kuaishou.com/rest/bamboo/inspiration/n/category/confirm/optimize' data = { 'isRecommendChange': False, 'categoryId': category_id, 'kuaishou.api_st': "Cg9rdWFpc2hvdS5hcGkuc3QSkAGMQoIK2ZpwlQszYISTxSFxzugi58w2U5gpPqa6an0eU6MFcVsXq2rd_K16UTItZ_OzPV-4jmVN5rNXKXW9jL97JV79Y9PqxaR9xOIr1TEyDzpOq2GM-0W1QRW3M8Li_J6NZ5t1hRFCWHBlOESjiBWs7vq4m1bq_ml0dZ6pgEDfpsWNpBaLRzwZwOO1mD4LqO4aEokh6uHql0RmmtbfoBF25r7QOyIgqNv0TBf6mlwS3bjE0K6sl08M1mMPjW1PB9e0Qr494H8oBTAB", 'client_key': '214c9979', } sig = cls.calculate_sig(data) data['sig'] = sig response = requests.post(url=url, headers=headers, data=data) body = response.content.decode() cls.current_category_index += 1 return body """ 获取feed流信息 """ @classmethod def get_feed_list(cls): cls.switch_category() url = 'https://creator-app.kuaishou.com/rest/bamboo/inspiration/n/feed' data = { 'cs': False, 'kuaishou.api_st': "Cg9rdWFpc2hvdS5hcGkuc3QSkAGMQoIK2ZpwlQszYISTxSFxzugi58w2U5gpPqa6an0eU6MFcVsXq2rd_K16UTItZ_OzPV-4jmVN5rNXKXW9jL97JV79Y9PqxaR9xOIr1TEyDzpOq2GM-0W1QRW3M8Li_J6NZ5t1hRFCWHBlOESjiBWs7vq4m1bq_ml0dZ6pgEDfpsWNpBaLRzwZwOO1mD4LqO4aEokh6uHql0RmmtbfoBF25r7QOyIgqNv0TBf6mlwS3bjE0K6sl08M1mMPjW1PB9e0Qr494H8oBTAB", 'client_key': '214c9979', } sig = cls.calculate_sig(data) data['sig'] = sig response = requests.post(url=url, headers=headers, data=data) body = response.content.decode() return body """ 获取观众画像 """ @classmethod def analyze_photo(cls, photo_id): url = 'https://creator-app.kuaishou.com/rest/bamboo/inspiration/n/photo/analysis' headers = { 'Accept-Language': 'zh-cn', 'Connection': 'keep-alive', 'Content-Type': 'application/x-www-form-urlencoded', 'Host': 'creator-app.kuaishou.com', } data = { 'photoId': photo_id, 'client_key': '214c9979', } sig = cls.calculate_sig(data) data['sig'] = sig response = requests.post(url=url, headers=headers, data=data) body = response.content.decode() json_body = json.loads(body) user_range = json_body['data']['play']['userRange'] if len(user_range) == 0: return False, "无画像" age_range = user_range['ageRange'] value = age_range[5]['value'] value = int(value.strip('%')) if value >= 50: return False, value else: return True, value """ 视频时长转换成秒 """ @classmethod def milliseconds_to_seconds(cls, milliseconds): seconds = milliseconds / 1000 return int(seconds) """ 判断当前视频是否在90天内 """ @classmethod def get_video_data(cls, timestamp_str): timestamp = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S") # 获取当前时间 current_time = datetime.now() difference = current_time - timestamp if difference <= timedelta(days=90): return False else: return True """ 获取票圈ID """ @classmethod def get_id_by_category(cls, category_name): category_list = [ {"id": 71502003, "category": "生活"}, {"id": 71502004, "category": "才艺"}, {"id": 71502005, "category": "时尚"}, {"id": 71502006, "category": "宠物"}, {"id": 71502007, "category": "读书"}, {"id": 71502008, "category": "二次元"}, {"id": 71502009, "category": "家居"}, {"id": 71502010, "category": "数码"}, {"id": 71502011, "category": "搞笑"}, {"id": 71502012, "category": "健康"}, {"id": 71502013, "category": "旅游"}, {"id": 71502014, "category": "美食"}, {"id": 71502015, "category": "美妆"}, {"id": 71502016, "category": "汽车"}, {"id": 71502018, "category": "亲子"}, {"id": 71502019, "category": "情感"}, {"id": 71502020, "category": "三农"}, {"id": 71502021, "category": "摄影"}, {"id": 71502022, "category": "舞蹈"}, {"id": 71502023, "category": "颜值"}, {"id": 71502024, "category": "音乐"}, {"id": 71502025, "category": "影视"}, {"id": 71502026, "category": "短剧"}, {"id": 71502027, "category": "游戏"}, {"id": 71502028, "category": "运动"}, {"id": 71502029, "category": "资讯"}, {"id": 71502030, "category": "人文"} ] for category in category_list: if category['category'] == category_name: return category['id'] return None """ 新生成视频上传到对应账号下 """ @classmethod def insert_piaoquantv(cls, new_video_path, new_title, n_id, cover): url = "https://vlogapi.piaoquantv.com/longvideoapi/crawler/video/send" headers = { 'User-Agent': 'PQSpeed/486 CFNetwork/1410.1 Darwin/22.6.0', 'cookie': 'JSESSIONID=4DEA2B5173BB9A9E82DB772C0ACDBC9F; JSESSIONID=D02C334150025222A0B824A98B539B78', 'referer': 'http://appspeed.piaoquantv.com', 'token': '524a8bc871dbb0f4d4717895083172ab37c02d2f', 'accept-language': 'zh-CN,zh-Hans;q=0.9', 'Content-Type': 'application/x-www-form-urlencoded' } payload = { 'coverImgPath': cover, 'deviceToken': '9ef064f2f7869b3fd67d6141f8a899175dddc91240971172f1f2a662ef891408', 'fileExtensions': 'MP4', 'loginUid': n_id, 'networkType': 'Wi-Fi', 'platform': 'iOS', 'requestId': 'fb972cbd4f390afcfd3da1869cd7d001', 'sessionId': '362290597725ce1fa870d7be4f46dcc2', 'subSessionId': '362290597725ce1fa870d7be4f46dcc2', 'title': new_title, 'token': '524a8bc871dbb0f4d4717895083172ab37c02d2f', 'uid': n_id, 'versionCode': '486', 'versionName': '3.4.12', 'videoFromScene': '1', 'videoPath': new_video_path, 'viewStatus': '1' } encoded_payload = urlencode(payload) response = requests.request("POST", url, headers=headers, data=encoded_payload) data = response.json() code = data["code"] if code == 0: new_video_id = data["data"]["id"] return new_video_id else: return None @classmethod def get_data(cls): for category_id, category_name in cls.CATEGORY_IDS.items(): feed_data = cls.get_feed_list() feed_data = json.loads(feed_data) feeds = feed_data['feeds'] for feed in feeds: photo_id = 5241345676200718454 status = sqlCollect.is_used(photo_id) if status: continue user_name = feed["user_name"] # 用户名 user_sex = feed["user_sex"] # 性别 F为女,U为男 time_data = feed["time"] # 发布时间 caption = feed["caption"] # 标题 view_count = feed["view_count"] # 浏览数 like_count = feed["like_count"] # 点赞数 share_count = feed["share_count"] # 分享数 duration = feed["duration"] # 时长/秒 duration = cls.milliseconds_to_seconds(duration) main_mv_url = feed["main_mv_url"] # 视频链接 thumbnail_url = feed["thumbnail_url"] # 视频封面 user_id = feed["user_id"] # 用户id非用户主页id time_status = cls.get_video_data(time_data) if time_status: sqlCollect.insert_ks_data(user_name, user_sex, time_data, caption, view_count, like_count, share_count, duration, main_mv_url, thumbnail_url, user_id, '1', photo_id, category_name, age=None, oss_object=None, video_uid=None) continue video_percent = '%.2f' % (share_count / like_count) special = float(0.2) if float(video_percent) < special or share_count < 2000 or duration < 30 or duration > 6000: sqlCollect.insert_ks_data(user_name, user_sex, time_data, caption, view_count, like_count, share_count, duration, main_mv_url, thumbnail_url, user_id, '1', photo_id, category_name, age=None, oss_object=None, video_uid=None) continue value, age = cls.analyze_photo(photo_id) if value: sqlCollect.insert_ks_data(user_name, user_sex, time_data, caption, view_count, like_count, share_count, duration, main_mv_url, thumbnail_url, user_id, '1', photo_id, category_name, age=None, oss_object=None, video_uid=None) continue oss_object_key = Oss.channel_upload_oss(main_mv_url, photo_id) time.sleep(2) oss_object = oss_object_key.get("oss_object_key") pq_id = cls.get_id_by_category(category_name) if pq_id: video_uid = cls.insert_piaoquantv(oss_object, caption, pq_id, thumbnail_url) sqlCollect.insert_ks_data(user_name, user_sex, time_data, caption, view_count, like_count, share_count, duration, main_mv_url, thumbnail_url, user_id, '0', photo_id, category_name, age, oss_object, video_uid) current_time = datetime.now() formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S") values = [ [category_name, user_name, user_sex, caption, view_count, like_count, share_count, duration, main_mv_url, thumbnail_url, user_id, age, pq_id, video_uid, time_data, formatted_time]] Feishu.insert_columns("PlcisKhObhzmBothRutc65sJnph", "823f74", "ROWS", 1, 2) time.sleep(0.5) Feishu.update_values("PlcisKhObhzmBothRutc65sJnph", "823f74", "A2:Z2", values) time.sleep(120) # Example usage: if __name__ == "__main__": KsFeedVideo.get_data()