ks_feed_list.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. import json
  2. import os
  3. from hashlib import md5
  4. import requests
  5. import time
  6. from urllib.parse import urlencode
  7. from datetime import datetime, timedelta
  8. from common import Oss, Feishu
  9. from common.sql_help import sqlCollect
  10. headers = {
  11. 'Accept-Language': 'zh-cn',
  12. 'Connection': 'keep-alive',
  13. 'Content-Type': 'application/x-www-form-urlencoded',
  14. 'Host': 'creator-app.kuaishou.com',
  15. 'User-Agent': 'kwai-android aegon/3.12.1',
  16. }
  17. class KsFeedVideo:
  18. CATEGORY_IDS = {
  19. 1: "生活",
  20. 2: "才艺",
  21. 3: "时尚",
  22. 4: "宠物",
  23. 5: "读书",
  24. 6: "二次元",
  25. 7: "家居",
  26. 8: "数码",
  27. 9: "搞笑",
  28. 10: "健康",
  29. 11: "旅游",
  30. 12: "美食",
  31. 13: "美妆",
  32. 14: "汽车",
  33. 15: "亲子",
  34. 16: "情感",
  35. 17: "三农",
  36. 18: "摄影",
  37. 19: "舞蹈",
  38. 20: "颜值",
  39. 21: "音乐",
  40. 22: "影视",
  41. 23: "短剧",
  42. 24: "游戏",
  43. 25: "运动",
  44. 26: "资讯",
  45. 27: "人文"
  46. }
  47. current_category_index = 0
  48. @staticmethod
  49. def calculate_sig(data):
  50. src = ''.join([f'{key}={data[key]}' for key in sorted(data.keys())])
  51. salt = '08d8eece8e83'
  52. return md5(f'{src}{salt}'.encode()).hexdigest()
  53. """
  54. 切换品类
  55. """
  56. @classmethod
  57. def switch_category(cls):
  58. if cls.current_category_index >= len(cls.CATEGORY_IDS):
  59. cls.current_category_index = 0
  60. category_id = list(cls.CATEGORY_IDS.keys())[cls.current_category_index]
  61. url = 'https://creator-app.kuaishou.com/rest/bamboo/inspiration/n/category/confirm/optimize'
  62. data = {
  63. 'isRecommendChange': False,
  64. 'categoryId': category_id,
  65. # 'kuaishou.api_st': "Cg9rdWFpc2hvdS5hcGkuc3QSkAGMQoIK2ZpwlQszYISTxSFxzugi58w2U5gpPqa6an0eU6MFcVsXq2rd_K16UTItZ_OzPV-4jmVN5rNXKXW9jL97JV79Y9PqxaR9xOIr1TEyDzpOq2GM-0W1QRW3M8Li_J6NZ5t1hRFCWHBlOESjiBWs7vq4m1bq_ml0dZ6pgEDfpsWNpBaLRzwZwOO1mD4LqO4aEokh6uHql0RmmtbfoBF25r7QOyIgqNv0TBf6mlwS3bjE0K6sl08M1mMPjW1PB9e0Qr494H8oBTAB",
  66. 'kuaishou.api_st': 'Cg9rdWFpc2hvdS5hcGkuc3QSkAE_VoKJQaM0WajPO6D5L7oDL1T27Alg2mU-kCyJ3UYVcMHrcHWvW25h0I8N99ji9ZU5hLqzw8zaJ9X2U7RbOnoCL6cHJYqdq8UDVpz7sXWOmaNPM5iCYJ7zLa4aXgSAzpHoGhzOqVSr1o3Y7BSJ57WMhJGECPyATpfd6MPBmftxqYXyR_BEvC8x3O_4mboHODAaEgGwS-thQkgSvdABsv26PXMiGyIgTpI18EY0ssCCp3tPqI1swbohPO3jH36-5NV0yKMdJWQoBTAB',
  67. 'client_key': '214c9979',
  68. }
  69. sig = cls.calculate_sig(data)
  70. data['sig'] = sig
  71. response = requests.post(url=url, headers=headers, data=data)
  72. body = response.content.decode()
  73. cls.current_category_index += 1
  74. return body
  75. """
  76. 获取feed流信息
  77. """
  78. @classmethod
  79. def get_feed_list(cls):
  80. cls.switch_category()
  81. url = 'https://creator-app.kuaishou.com/rest/bamboo/inspiration/n/feed'
  82. data = {
  83. 'cs': False,
  84. 'kuaishou.api_st': 'Cg9rdWFpc2hvdS5hcGkuc3QSkAE_VoKJQaM0WajPO6D5L7oDL1T27Alg2mU-kCyJ3UYVcMHrcHWvW25h0I8N99ji9ZU5hLqzw8zaJ9X2U7RbOnoCL6cHJYqdq8UDVpz7sXWOmaNPM5iCYJ7zLa4aXgSAzpHoGhzOqVSr1o3Y7BSJ57WMhJGECPyATpfd6MPBmftxqYXyR_BEvC8x3O_4mboHODAaEgGwS-thQkgSvdABsv26PXMiGyIgTpI18EY0ssCCp3tPqI1swbohPO3jH36-5NV0yKMdJWQoBTAB',
  85. # 'kuaishou.api_st': "Cg9rdWFpc2hvdS5hcGkuc3QSkAGMQoIK2ZpwlQszYISTxSFxzugi58w2U5gpPqa6an0eU6MFcVsXq2rd_K16UTItZ_OzPV-4jmVN5rNXKXW9jL97JV79Y9PqxaR9xOIr1TEyDzpOq2GM-0W1QRW3M8Li_J6NZ5t1hRFCWHBlOESjiBWs7vq4m1bq_ml0dZ6pgEDfpsWNpBaLRzwZwOO1mD4LqO4aEokh6uHql0RmmtbfoBF25r7QOyIgqNv0TBf6mlwS3bjE0K6sl08M1mMPjW1PB9e0Qr494H8oBTAB",
  86. 'client_key': '214c9979',
  87. }
  88. sig = cls.calculate_sig(data)
  89. data['sig'] = sig
  90. response = requests.post(url=url, headers=headers, data=data)
  91. body = response.content.decode()
  92. return body
  93. """
  94. 获取观众画像
  95. """
  96. @classmethod
  97. def analyze_photo(cls, photo_id):
  98. url = 'https://creator-app.kuaishou.com/rest/bamboo/inspiration/n/photo/analysis'
  99. headers = {
  100. 'Accept-Language': 'zh-cn',
  101. 'Connection': 'keep-alive',
  102. 'Content-Type': 'application/x-www-form-urlencoded',
  103. 'Host': 'creator-app.kuaishou.com',
  104. }
  105. data = {
  106. 'photoId': photo_id,
  107. 'client_key': '214c9979',
  108. }
  109. sig = cls.calculate_sig(data)
  110. data['sig'] = sig
  111. response = requests.post(url=url, headers=headers, data=data)
  112. body = response.content.decode()
  113. json_body = json.loads(body)
  114. user_range = json_body['data']['play']['userRange']
  115. if len(user_range) == 0:
  116. return False, "无画像"
  117. age_range = user_range['ageRange']
  118. value = age_range[5]['value']
  119. value = int(value.strip('%'))
  120. if value >= 50:
  121. return False, value
  122. else:
  123. return True, value
  124. """
  125. 视频时长转换成秒
  126. """
  127. @classmethod
  128. def milliseconds_to_seconds(cls, milliseconds):
  129. seconds = milliseconds / 1000
  130. return int(seconds)
  131. """
  132. 判断当前视频是否在90天内
  133. """
  134. @classmethod
  135. def get_video_data(cls, timestamp_str):
  136. timestamp = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S")
  137. # 获取当前时间
  138. current_time = datetime.now()
  139. difference = current_time - timestamp
  140. if difference <= timedelta(days=90):
  141. return False
  142. else:
  143. return True
  144. """
  145. 获取票圈ID
  146. """
  147. @classmethod
  148. def get_id_by_category(cls, category_name):
  149. category_list = [
  150. {"id": 71502003, "category": "生活"},
  151. {"id": 71502004, "category": "才艺"},
  152. {"id": 71502005, "category": "时尚"},
  153. {"id": 71502006, "category": "宠物"},
  154. {"id": 71502007, "category": "读书"},
  155. {"id": 71502008, "category": "二次元"},
  156. {"id": 71502009, "category": "家居"},
  157. {"id": 71502010, "category": "数码"},
  158. {"id": 71502011, "category": "搞笑"},
  159. {"id": 71502012, "category": "健康"},
  160. {"id": 71502013, "category": "旅游"},
  161. {"id": 71502014, "category": "美食"},
  162. {"id": 71502015, "category": "美妆"},
  163. {"id": 71502016, "category": "汽车"},
  164. {"id": 71502018, "category": "亲子"},
  165. {"id": 71502019, "category": "情感"},
  166. {"id": 71502020, "category": "三农"},
  167. {"id": 71502021, "category": "摄影"},
  168. {"id": 71502022, "category": "舞蹈"},
  169. {"id": 71502023, "category": "颜值"},
  170. {"id": 71502024, "category": "音乐"},
  171. {"id": 71502025, "category": "影视"},
  172. {"id": 71502026, "category": "短剧"},
  173. {"id": 71502027, "category": "游戏"},
  174. {"id": 71502028, "category": "运动"},
  175. {"id": 71502029, "category": "资讯"},
  176. {"id": 71502030, "category": "人文"}
  177. ]
  178. for category in category_list:
  179. if category['category'] == category_name:
  180. return category['id']
  181. return None
  182. """
  183. 新生成视频上传到对应账号下
  184. """
  185. @classmethod
  186. def insert_piaoquantv(cls, new_video_path, new_title, n_id, cover):
  187. url = "https://vlogapi.piaoquantv.com/longvideoapi/crawler/video/send"
  188. headers = {
  189. 'User-Agent': 'PQSpeed/486 CFNetwork/1410.1 Darwin/22.6.0',
  190. 'cookie': 'JSESSIONID=4DEA2B5173BB9A9E82DB772C0ACDBC9F; JSESSIONID=D02C334150025222A0B824A98B539B78',
  191. 'referer': 'http://appspeed.piaoquantv.com',
  192. 'token': '524a8bc871dbb0f4d4717895083172ab37c02d2f',
  193. 'accept-language': 'zh-CN,zh-Hans;q=0.9',
  194. 'Content-Type': 'application/x-www-form-urlencoded'
  195. }
  196. payload = {
  197. 'coverImgPath': cover,
  198. 'deviceToken': '9ef064f2f7869b3fd67d6141f8a899175dddc91240971172f1f2a662ef891408',
  199. 'fileExtensions': 'MP4',
  200. 'loginUid': n_id,
  201. 'networkType': 'Wi-Fi',
  202. 'platform': 'iOS',
  203. 'requestId': 'fb972cbd4f390afcfd3da1869cd7d001',
  204. 'sessionId': '362290597725ce1fa870d7be4f46dcc2',
  205. 'subSessionId': '362290597725ce1fa870d7be4f46dcc2',
  206. 'title': new_title,
  207. 'token': '524a8bc871dbb0f4d4717895083172ab37c02d2f',
  208. 'uid': n_id,
  209. 'versionCode': '486',
  210. 'versionName': '3.4.12',
  211. 'videoFromScene': '1',
  212. 'videoPath': new_video_path,
  213. 'viewStatus': '1'
  214. }
  215. encoded_payload = urlencode(payload)
  216. response = requests.request("POST", url, headers=headers, data=encoded_payload)
  217. data = response.json()
  218. code = data["code"]
  219. if code == 0:
  220. new_video_id = data["data"]["id"]
  221. return new_video_id
  222. else:
  223. return None
  224. @classmethod
  225. def get_data(cls):
  226. for category_id, category_name in cls.CATEGORY_IDS.items():
  227. try:
  228. feed_data = cls.get_feed_list()
  229. feed_data = json.loads(feed_data)
  230. feeds = feed_data['feeds']
  231. for feed in feeds:
  232. photo_id = feed["photo_id"] # 视频ID
  233. status = sqlCollect.is_used(photo_id)
  234. if status:
  235. continue
  236. user_name = feed["user_name"] # 用户名
  237. user_sex = feed["user_sex"] # 性别 F为女,U为男
  238. time_data = feed["time"] # 发布时间
  239. caption = feed["caption"] # 标题
  240. view_count = feed["view_count"] # 浏览数
  241. like_count = feed["like_count"] # 点赞数
  242. share_count = feed["share_count"] # 分享数
  243. duration = feed["duration"] # 时长/秒
  244. duration = cls.milliseconds_to_seconds(duration)
  245. main_mv_url = feed["main_mv_url"] # 视频链接
  246. thumbnail_url = feed["thumbnail_url"] # 视频封面
  247. user_id = feed["user_id"] # 用户id非用户主页id
  248. # time_status = cls.get_video_data(time_data)
  249. # if time_status:
  250. # sqlCollect.insert_ks_data(user_name, user_sex, time_data, caption, view_count, like_count, share_count, duration, main_mv_url, thumbnail_url, user_id, '1', photo_id, category_name, age=None, oss_object=None, video_uid=None)
  251. # continue
  252. value, age = cls.analyze_photo(photo_id)
  253. if value:
  254. sqlCollect.insert_ks_data(user_name, user_sex, time_data, caption, view_count, like_count,
  255. share_count, duration, main_mv_url, thumbnail_url, user_id, '1',
  256. photo_id, category_name, age, oss_object=None, video_uid=None)
  257. continue
  258. video_percent = '%.2f' % (share_count / view_count)
  259. special = float(0.1)
  260. if float(video_percent) < special or share_count < 500 or duration < 30 or duration > 600:
  261. sqlCollect.insert_ks_data(user_name, user_sex, time_data, caption, view_count, like_count, share_count, duration, main_mv_url, thumbnail_url, user_id, '1', photo_id, category_name, age, oss_object=None, video_uid=None)
  262. continue
  263. oss_object_key = Oss.channel_upload_oss(main_mv_url, photo_id)
  264. time.sleep(2)
  265. oss_object = oss_object_key.get("oss_object_key")
  266. pq_id = cls.get_id_by_category(category_name)
  267. if pq_id:
  268. video_uid = cls.insert_piaoquantv(oss_object, caption, pq_id, thumbnail_url)
  269. sqlCollect.insert_ks_data(user_name, user_sex, time_data, caption, view_count,
  270. like_count, share_count, duration, main_mv_url, thumbnail_url,
  271. user_id, '0', photo_id, category_name, age, oss_object, video_uid)
  272. current_time = datetime.now()
  273. formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
  274. values = [
  275. [category_name, user_name, photo_id, user_sex, caption, view_count, like_count, share_count, duration,
  276. main_mv_url, thumbnail_url, user_id, age, pq_id, video_uid, time_data, formatted_time]]
  277. Feishu.insert_columns("PlcisKhObhzmBothRutc65sJnph", "8fQxFv", "ROWS", 1, 2)
  278. time.sleep(0.5)
  279. Feishu.update_values("PlcisKhObhzmBothRutc65sJnph", "8fQxFv", "A2:Z2", values)
  280. time.sleep(80)
  281. except Exception as exc:
  282. print(f"异常信息: {exc}")
  283. continue
  284. # Example usage:
  285. if __name__ == "__main__":
  286. KsFeedVideo.get_data()