ks_feed_list.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335
  1. import json
  2. import os
  3. from hashlib import md5
  4. import requests
  5. import time
  6. from urllib.parse import urlencode
  7. from datetime import datetime, timedelta
  8. from common import Oss
  9. from common.sql_help import sqlCollect
  10. headers = {
  11. 'Accept-Language': 'zh-cn',
  12. 'Connection': 'keep-alive',
  13. 'Content-Type': 'application/x-www-form-urlencoded',
  14. 'Host': 'creator-app.kuaishou.com',
  15. 'User-Agent': 'kwai-android aegon/3.12.1',
  16. }
  17. class KsFeedVideo:
  18. CATEGORY_IDS = {
  19. 1: "生活",
  20. 2: "才艺",
  21. 3: "时尚",
  22. 4: "宠物",
  23. 5: "读书",
  24. 6: "二次元",
  25. 7: "家居",
  26. 8: "数码",
  27. 9: "搞笑",
  28. 10: "健康",
  29. 11: "旅游",
  30. 12: "美食",
  31. 13: "美妆",
  32. 14: "汽车",
  33. 15: "亲子",
  34. 16: "情感",
  35. 17: "三农",
  36. 18: "摄影",
  37. 19: "舞蹈",
  38. 20: "颜值",
  39. 21: "音乐",
  40. 22: "影视",
  41. 23: "短剧",
  42. 24: "游戏",
  43. 25: "运动",
  44. 26: "资讯",
  45. 27: "人文"
  46. }
  47. current_category_index = 0
  48. @staticmethod
  49. def calculate_sig(data):
  50. src = ''.join([f'{key}={data[key]}' for key in sorted(data.keys())])
  51. salt = '08d8eece8e83'
  52. return md5(f'{src}{salt}'.encode()).hexdigest()
  53. """
  54. 切换品类
  55. """
  56. @classmethod
  57. def switch_category(cls):
  58. if cls.current_category_index >= len(cls.CATEGORY_IDS):
  59. cls.current_category_index = 0
  60. category_id = list(cls.CATEGORY_IDS.keys())[cls.current_category_index]
  61. url = 'https://creator-app.kuaishou.com/rest/bamboo/inspiration/n/category/confirm/optimize'
  62. data = {
  63. 'isRecommendChange': False,
  64. 'categoryId': category_id,
  65. 'kuaishou.api_st': "Cg9rdWFpc2hvdS5hcGkuc3QSkAGMQoIK2ZpwlQszYISTxSFxzugi58w2U5gpPqa6an0eU6MFcVsXq2rd_K16UTItZ_OzPV-4jmVN5rNXKXW9jL97JV79Y9PqxaR9xOIr1TEyDzpOq2GM-0W1QRW3M8Li_J6NZ5t1hRFCWHBlOESjiBWs7vq4m1bq_ml0dZ6pgEDfpsWNpBaLRzwZwOO1mD4LqO4aEokh6uHql0RmmtbfoBF25r7QOyIgqNv0TBf6mlwS3bjE0K6sl08M1mMPjW1PB9e0Qr494H8oBTAB",
  66. 'client_key': '214c9979',
  67. }
  68. sig = cls.calculate_sig(data)
  69. data['sig'] = sig
  70. response = requests.post(url=url, headers=headers, data=data)
  71. body = response.content.decode()
  72. cls.current_category_index += 1
  73. return body
  74. """
  75. 获取feed流信息
  76. """
  77. @classmethod
  78. def get_feed_list(cls):
  79. cls.switch_category()
  80. url = 'https://creator-app.kuaishou.com/rest/bamboo/inspiration/n/feed'
  81. data = {
  82. 'cs': False,
  83. 'kuaishou.api_st': "Cg9rdWFpc2hvdS5hcGkuc3QSkAGMQoIK2ZpwlQszYISTxSFxzugi58w2U5gpPqa6an0eU6MFcVsXq2rd_K16UTItZ_OzPV-4jmVN5rNXKXW9jL97JV79Y9PqxaR9xOIr1TEyDzpOq2GM-0W1QRW3M8Li_J6NZ5t1hRFCWHBlOESjiBWs7vq4m1bq_ml0dZ6pgEDfpsWNpBaLRzwZwOO1mD4LqO4aEokh6uHql0RmmtbfoBF25r7QOyIgqNv0TBf6mlwS3bjE0K6sl08M1mMPjW1PB9e0Qr494H8oBTAB",
  84. 'client_key': '214c9979',
  85. }
  86. sig = cls.calculate_sig(data)
  87. data['sig'] = sig
  88. response = requests.post(url=url, headers=headers, data=data)
  89. body = response.content.decode()
  90. return body
  91. """
  92. 获取观众画像
  93. """
  94. @classmethod
  95. def analyze_photo(cls, photo_id):
  96. url = 'https://creator-app.kuaishou.com/rest/bamboo/inspiration/n/photo/analysis'
  97. headers = {
  98. 'Accept-Language': 'zh-cn',
  99. 'Connection': 'keep-alive',
  100. 'Content-Type': 'application/x-www-form-urlencoded',
  101. 'Host': 'creator-app.kuaishou.com',
  102. }
  103. data = {
  104. 'photoId': photo_id,
  105. 'client_key': '214c9979',
  106. }
  107. sig = cls.calculate_sig(data)
  108. data['sig'] = sig
  109. response = requests.post(url=url, headers=headers, data=data)
  110. body = response.content.decode()
  111. json_body = json.loads(body)
  112. user_range = json_body['data']['play']['userRange']
  113. if len(user_range) == 0:
  114. return False, "无画像"
  115. age_range = user_range['ageRange']
  116. value = age_range[5]['value']
  117. value = int(value.strip('%'))
  118. if value >= 50:
  119. return True, value
  120. else:
  121. return False, value
  122. """
  123. 视频时长转换成秒
  124. """
  125. @classmethod
  126. def milliseconds_to_seconds(cls, milliseconds):
  127. seconds = milliseconds / 1000
  128. return int(seconds)
  129. """
  130. 判断当前视频是否在90天内
  131. """
  132. @classmethod
  133. def get_video_data(cls, timestamp_str):
  134. timestamp = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S")
  135. # 获取当前时间
  136. current_time = datetime.now()
  137. difference = current_time - timestamp
  138. if difference <= timedelta(days=90):
  139. return False
  140. else:
  141. return True
  142. """
  143. 生成目录
  144. """
  145. @classmethod
  146. def create_folders(cls):
  147. video_path_url = "/Users/tzld/Desktop/ks_automation/path_video/"
  148. if not os.path.exists(video_path_url):
  149. os.makedirs(video_path_url)
  150. return video_path_url
  151. """
  152. 删除文件
  153. """
  154. @classmethod
  155. def remove_files(cls, video_path_url):
  156. if os.path.exists(video_path_url) and os.path.isdir(video_path_url):
  157. for root, dirs, files in os.walk(video_path_url):
  158. for file in files:
  159. file_path = os.path.join(root, file)
  160. os.remove(file_path)
  161. for dir in dirs:
  162. dir_path = os.path.join(root, dir)
  163. os.rmdir(dir_path)
  164. """
  165. 视频下载
  166. """
  167. @classmethod
  168. def download_video(cls, video_url, path_url, video_id):
  169. for i in range(3):
  170. payload = {}
  171. headers = {}
  172. response = requests.request("GET", video_url, headers=headers, data=payload)
  173. if response.status_code == 200:
  174. # 以二进制写入模式打开文件
  175. video = path_url + str(video_id) + '.mp4'
  176. with open(f"{video}", "wb") as file:
  177. # 将响应内容写入文件
  178. file.write(response.content)
  179. time.sleep(5)
  180. return video
  181. return ''
  182. """
  183. 获取票圈ID
  184. """
  185. @classmethod
  186. def get_id_by_category(cls, category_name):
  187. category_list = [
  188. {"id": 71502003, "category": "生活"},
  189. {"id": 71502004, "category": "才艺"},
  190. {"id": 71502005, "category": "时尚"},
  191. {"id": 71502006, "category": "宠物"},
  192. {"id": 71502007, "category": "读书"},
  193. {"id": 71502008, "category": "二次元"},
  194. {"id": 71502009, "category": "家居"},
  195. {"id": 71502010, "category": "数码"},
  196. {"id": 71502011, "category": "搞笑"},
  197. {"id": 71502012, "category": "健康"},
  198. {"id": 71502013, "category": "旅游"},
  199. {"id": 71502014, "category": "美食"},
  200. {"id": 71502015, "category": "美妆"},
  201. {"id": 71502016, "category": "汽车"},
  202. {"id": 71502018, "category": "亲子"},
  203. {"id": 71502019, "category": "情感"},
  204. {"id": 71502020, "category": "三农"},
  205. {"id": 71502021, "category": "摄影"},
  206. {"id": 71502022, "category": "舞蹈"},
  207. {"id": 71502023, "category": "颜值"},
  208. {"id": 71502024, "category": "音乐"},
  209. {"id": 71502025, "category": "影视"},
  210. {"id": 71502026, "category": "短剧"},
  211. {"id": 71502027, "category": "游戏"},
  212. {"id": 71502028, "category": "运动"},
  213. {"id": 71502029, "category": "资讯"},
  214. {"id": 71502030, "category": "人文"}
  215. ]
  216. for category in category_list:
  217. if category['category'] == category_name:
  218. return category['id']
  219. return None
  220. """
  221. 新生成视频上传到对应账号下
  222. """
  223. @classmethod
  224. def insert_piaoquantv(cls, new_video_path, new_title, n_id):
  225. url = "https://vlogapi.piaoquantv.com/longvideoapi/crawler/video/send"
  226. headers = {
  227. 'User-Agent': 'PQSpeed/486 CFNetwork/1410.1 Darwin/22.6.0',
  228. 'cookie': 'JSESSIONID=4DEA2B5173BB9A9E82DB772C0ACDBC9F; JSESSIONID=D02C334150025222A0B824A98B539B78',
  229. 'referer': 'http://appspeed.piaoquantv.com',
  230. 'token': '524a8bc871dbb0f4d4717895083172ab37c02d2f',
  231. 'accept-language': 'zh-CN,zh-Hans;q=0.9',
  232. 'Content-Type': 'application/x-www-form-urlencoded'
  233. }
  234. payload = {
  235. 'deviceToken': '9ef064f2f7869b3fd67d6141f8a899175dddc91240971172f1f2a662ef891408',
  236. 'fileExtensions': 'MP4',
  237. 'loginUid': n_id,
  238. 'networkType': 'Wi-Fi',
  239. 'platform': 'iOS',
  240. 'requestId': 'fb972cbd4f390afcfd3da1869cd7d001',
  241. 'sessionId': '362290597725ce1fa870d7be4f46dcc2',
  242. 'subSessionId': '362290597725ce1fa870d7be4f46dcc2',
  243. 'title': new_title,
  244. 'token': '524a8bc871dbb0f4d4717895083172ab37c02d2f',
  245. 'uid': n_id,
  246. 'versionCode': '486',
  247. 'versionName': '3.4.12',
  248. 'videoFromScene': '1',
  249. 'videoPath': new_video_path,
  250. 'viewStatus': '1'
  251. }
  252. encoded_payload = urlencode(payload)
  253. response = requests.request("POST", url, headers=headers, data=encoded_payload)
  254. data = response.json()
  255. code = data["code"]
  256. if code == 0:
  257. new_video_id = data["data"]["id"]
  258. return new_video_id
  259. else:
  260. return None
  261. @classmethod
  262. def get_data(cls):
  263. for category_id, category_name in cls.CATEGORY_IDS.items():
  264. feed_data = cls.get_feed_list()
  265. feed_data = json.loads(feed_data)
  266. feeds = feed_data['feeds']
  267. for feed in feeds:
  268. photo_id = feed["photo_id"] # 视频ID
  269. status = sqlCollect.is_used(photo_id)
  270. if status:
  271. user_name = feed["user_name"] # 用户名
  272. user_sex = feed["user_sex"] # 性别 F为女,U为男
  273. time_data = feed["time"] # 发布时间
  274. caption = feed["caption"] # 标题
  275. view_count = feed["view_count"] # 浏览数
  276. like_count = feed["like_count"] # 点赞数
  277. share_count = feed["share_count"] # 分享数
  278. duration = feed["duration"] # 时长/秒
  279. duration = cls.milliseconds_to_seconds(duration)
  280. main_mv_url = feed["main_mv_url"] # 视频链接
  281. thumbnail_url = feed["thumbnail_url"] # 视频封面
  282. user_id = feed["user_id"] # 用户id非用户主页id
  283. time_data = cls.get_video_data(time_data)
  284. if time_data:
  285. continue
  286. video_percent = '%.2f' % (share_count / like_count)
  287. special = float(0.2)
  288. # if float(video_percent) < special or share_count < 2000 or duration < 30 or duration > 6000:
  289. # continue
  290. value, age = cls.analyze_photo(photo_id)
  291. # path_url = cls.create_folders() # 创建目录
  292. # video_path = cls.download_video(main_mv_url, path_url, photo_id)
  293. # if not os.path.isfile(video_path):
  294. # cls.remove_files(path_url)
  295. # continue
  296. oss_object_key = Oss.channel_upload_oss(main_mv_url, photo_id)
  297. time.sleep(2)
  298. oss_object = oss_object_key.get("oss_object_key")
  299. pq_id = cls.get_id_by_category(category_name)
  300. if pq_id:
  301. video_uid = cls.insert_piaoquantv(oss_object_key, caption, pq_id)
  302. if video_uid:
  303. print(video_uid)
  304. return
  305. if oss_object:
  306. pass
  307. if value:
  308. pass
  309. else:
  310. pass
  311. # Example usage:
  312. if __name__ == "__main__":
  313. KsFeedVideo.get_data()