ks_feed_list.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. import json
  2. import os
  3. from hashlib import md5
  4. import requests
  5. import time
  6. from urllib.parse import urlencode
  7. from datetime import datetime, timedelta
  8. from common import Oss, Feishu
  9. from common.sql_help import sqlCollect
  10. headers = {
  11. 'Accept-Language': 'zh-cn',
  12. 'Connection': 'keep-alive',
  13. 'Content-Type': 'application/x-www-form-urlencoded',
  14. 'Host': 'creator-app.kuaishou.com',
  15. 'User-Agent': 'kwai-android aegon/3.12.1',
  16. }
  17. class KsFeedVideo:
  18. CATEGORY_IDS = {
  19. 1: "生活",
  20. 2: "才艺",
  21. 3: "时尚",
  22. 4: "宠物",
  23. 5: "读书",
  24. 6: "二次元",
  25. 7: "家居",
  26. 8: "数码",
  27. 9: "搞笑",
  28. 10: "健康",
  29. 11: "旅游",
  30. 12: "美食",
  31. 13: "美妆",
  32. 14: "汽车",
  33. 15: "亲子",
  34. 16: "情感",
  35. 17: "三农",
  36. 18: "摄影",
  37. 19: "舞蹈",
  38. 20: "颜值",
  39. 21: "音乐",
  40. 22: "影视",
  41. 23: "短剧",
  42. 24: "游戏",
  43. 25: "运动",
  44. 26: "资讯",
  45. 27: "人文"
  46. }
  47. current_category_index = 0
  48. @staticmethod
  49. def calculate_sig(data):
  50. src = ''.join([f'{key}={data[key]}' for key in sorted(data.keys())])
  51. salt = '08d8eece8e83'
  52. return md5(f'{src}{salt}'.encode()).hexdigest()
  53. """
  54. 切换品类
  55. """
  56. @classmethod
  57. def switch_category(cls):
  58. if cls.current_category_index >= len(cls.CATEGORY_IDS):
  59. cls.current_category_index = 0
  60. category_id = list(cls.CATEGORY_IDS.keys())[cls.current_category_index]
  61. url = 'https://creator-app.kuaishou.com/rest/bamboo/inspiration/n/category/confirm/optimize'
  62. data = {
  63. 'isRecommendChange': False,
  64. 'categoryId': category_id,
  65. 'kuaishou.api_st': "Cg9rdWFpc2hvdS5hcGkuc3QSkAGMQoIK2ZpwlQszYISTxSFxzugi58w2U5gpPqa6an0eU6MFcVsXq2rd_K16UTItZ_OzPV-4jmVN5rNXKXW9jL97JV79Y9PqxaR9xOIr1TEyDzpOq2GM-0W1QRW3M8Li_J6NZ5t1hRFCWHBlOESjiBWs7vq4m1bq_ml0dZ6pgEDfpsWNpBaLRzwZwOO1mD4LqO4aEokh6uHql0RmmtbfoBF25r7QOyIgqNv0TBf6mlwS3bjE0K6sl08M1mMPjW1PB9e0Qr494H8oBTAB",
  66. 'client_key': '214c9979',
  67. }
  68. sig = cls.calculate_sig(data)
  69. data['sig'] = sig
  70. response = requests.post(url=url, headers=headers, data=data)
  71. body = response.content.decode()
  72. cls.current_category_index += 1
  73. return body
  74. """
  75. 获取feed流信息
  76. """
  77. @classmethod
  78. def get_feed_list(cls):
  79. cls.switch_category()
  80. url = 'https://creator-app.kuaishou.com/rest/bamboo/inspiration/n/feed'
  81. data = {
  82. 'cs': False,
  83. 'kuaishou.api_st': "Cg9rdWFpc2hvdS5hcGkuc3QSkAGMQoIK2ZpwlQszYISTxSFxzugi58w2U5gpPqa6an0eU6MFcVsXq2rd_K16UTItZ_OzPV-4jmVN5rNXKXW9jL97JV79Y9PqxaR9xOIr1TEyDzpOq2GM-0W1QRW3M8Li_J6NZ5t1hRFCWHBlOESjiBWs7vq4m1bq_ml0dZ6pgEDfpsWNpBaLRzwZwOO1mD4LqO4aEokh6uHql0RmmtbfoBF25r7QOyIgqNv0TBf6mlwS3bjE0K6sl08M1mMPjW1PB9e0Qr494H8oBTAB",
  84. 'client_key': '214c9979',
  85. }
  86. sig = cls.calculate_sig(data)
  87. data['sig'] = sig
  88. response = requests.post(url=url, headers=headers, data=data)
  89. body = response.content.decode()
  90. return body
  91. """
  92. 获取观众画像
  93. """
  94. @classmethod
  95. def analyze_photo(cls, photo_id):
  96. url = 'https://creator-app.kuaishou.com/rest/bamboo/inspiration/n/photo/analysis'
  97. headers = {
  98. 'Accept-Language': 'zh-cn',
  99. 'Connection': 'keep-alive',
  100. 'Content-Type': 'application/x-www-form-urlencoded',
  101. 'Host': 'creator-app.kuaishou.com',
  102. }
  103. data = {
  104. 'photoId': photo_id,
  105. 'client_key': '214c9979',
  106. }
  107. sig = cls.calculate_sig(data)
  108. data['sig'] = sig
  109. response = requests.post(url=url, headers=headers, data=data)
  110. body = response.content.decode()
  111. json_body = json.loads(body)
  112. user_range = json_body['data']['play']['userRange']
  113. if len(user_range) == 0:
  114. return False, "无画像"
  115. age_range = user_range['ageRange']
  116. value = age_range[5]['value']
  117. value = int(value.strip('%'))
  118. if value >= 50:
  119. return False, value
  120. else:
  121. return True, value
  122. """
  123. 视频时长转换成秒
  124. """
  125. @classmethod
  126. def milliseconds_to_seconds(cls, milliseconds):
  127. seconds = milliseconds / 1000
  128. return int(seconds)
  129. """
  130. 判断当前视频是否在90天内
  131. """
  132. @classmethod
  133. def get_video_data(cls, timestamp_str):
  134. timestamp = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S")
  135. # 获取当前时间
  136. current_time = datetime.now()
  137. difference = current_time - timestamp
  138. if difference <= timedelta(days=90):
  139. return False
  140. else:
  141. return True
  142. """
  143. 获取票圈ID
  144. """
  145. @classmethod
  146. def get_id_by_category(cls, category_name):
  147. category_list = [
  148. {"id": 71502003, "category": "生活"},
  149. {"id": 71502004, "category": "才艺"},
  150. {"id": 71502005, "category": "时尚"},
  151. {"id": 71502006, "category": "宠物"},
  152. {"id": 71502007, "category": "读书"},
  153. {"id": 71502008, "category": "二次元"},
  154. {"id": 71502009, "category": "家居"},
  155. {"id": 71502010, "category": "数码"},
  156. {"id": 71502011, "category": "搞笑"},
  157. {"id": 71502012, "category": "健康"},
  158. {"id": 71502013, "category": "旅游"},
  159. {"id": 71502014, "category": "美食"},
  160. {"id": 71502015, "category": "美妆"},
  161. {"id": 71502016, "category": "汽车"},
  162. {"id": 71502018, "category": "亲子"},
  163. {"id": 71502019, "category": "情感"},
  164. {"id": 71502020, "category": "三农"},
  165. {"id": 71502021, "category": "摄影"},
  166. {"id": 71502022, "category": "舞蹈"},
  167. {"id": 71502023, "category": "颜值"},
  168. {"id": 71502024, "category": "音乐"},
  169. {"id": 71502025, "category": "影视"},
  170. {"id": 71502026, "category": "短剧"},
  171. {"id": 71502027, "category": "游戏"},
  172. {"id": 71502028, "category": "运动"},
  173. {"id": 71502029, "category": "资讯"},
  174. {"id": 71502030, "category": "人文"}
  175. ]
  176. for category in category_list:
  177. if category['category'] == category_name:
  178. return category['id']
  179. return None
  180. """
  181. 新生成视频上传到对应账号下
  182. """
  183. @classmethod
  184. def insert_piaoquantv(cls, new_video_path, new_title, n_id, cover):
  185. url = "https://vlogapi.piaoquantv.com/longvideoapi/crawler/video/send"
  186. headers = {
  187. 'User-Agent': 'PQSpeed/486 CFNetwork/1410.1 Darwin/22.6.0',
  188. 'cookie': 'JSESSIONID=4DEA2B5173BB9A9E82DB772C0ACDBC9F; JSESSIONID=D02C334150025222A0B824A98B539B78',
  189. 'referer': 'http://appspeed.piaoquantv.com',
  190. 'token': '524a8bc871dbb0f4d4717895083172ab37c02d2f',
  191. 'accept-language': 'zh-CN,zh-Hans;q=0.9',
  192. 'Content-Type': 'application/x-www-form-urlencoded'
  193. }
  194. payload = {
  195. 'coverImgPath': cover,
  196. 'deviceToken': '9ef064f2f7869b3fd67d6141f8a899175dddc91240971172f1f2a662ef891408',
  197. 'fileExtensions': 'MP4',
  198. 'loginUid': n_id,
  199. 'networkType': 'Wi-Fi',
  200. 'platform': 'iOS',
  201. 'requestId': 'fb972cbd4f390afcfd3da1869cd7d001',
  202. 'sessionId': '362290597725ce1fa870d7be4f46dcc2',
  203. 'subSessionId': '362290597725ce1fa870d7be4f46dcc2',
  204. 'title': new_title,
  205. 'token': '524a8bc871dbb0f4d4717895083172ab37c02d2f',
  206. 'uid': n_id,
  207. 'versionCode': '486',
  208. 'versionName': '3.4.12',
  209. 'videoFromScene': '1',
  210. 'videoPath': new_video_path,
  211. 'viewStatus': '1'
  212. }
  213. encoded_payload = urlencode(payload)
  214. response = requests.request("POST", url, headers=headers, data=encoded_payload)
  215. data = response.json()
  216. code = data["code"]
  217. if code == 0:
  218. new_video_id = data["data"]["id"]
  219. return new_video_id
  220. else:
  221. return None
  222. @classmethod
  223. def get_data(cls):
  224. for category_id, category_name in cls.CATEGORY_IDS.items():
  225. try:
  226. feed_data = cls.get_feed_list()
  227. feed_data = json.loads(feed_data)
  228. feeds = feed_data['feeds']
  229. for feed in feeds:
  230. photo_id = feed["photo_id"] # 视频ID
  231. status = sqlCollect.is_used(photo_id)
  232. if status:
  233. continue
  234. user_name = feed["user_name"] # 用户名
  235. user_sex = feed["user_sex"] # 性别 F为女,U为男
  236. time_data = feed["time"] # 发布时间
  237. caption = feed["caption"] # 标题
  238. view_count = feed["view_count"] # 浏览数
  239. like_count = feed["like_count"] # 点赞数
  240. share_count = feed["share_count"] # 分享数
  241. duration = feed["duration"] # 时长/秒
  242. duration = cls.milliseconds_to_seconds(duration)
  243. main_mv_url = feed["main_mv_url"] # 视频链接
  244. thumbnail_url = feed["thumbnail_url"] # 视频封面
  245. user_id = feed["user_id"] # 用户id非用户主页id
  246. time_status = cls.get_video_data(time_data)
  247. if time_status:
  248. sqlCollect.insert_ks_data(user_name, user_sex, time_data, caption, view_count, like_count, share_count, duration, main_mv_url, thumbnail_url, user_id, '1', photo_id, category_name, age=None, oss_object=None, video_uid=None)
  249. continue
  250. video_percent = '%.2f' % (share_count / like_count)
  251. special = float(0.2)
  252. if float(video_percent) < special or share_count < 2000 or duration < 30 or duration > 6000:
  253. sqlCollect.insert_ks_data(user_name, user_sex, time_data, caption, view_count, like_count, share_count, duration, main_mv_url, thumbnail_url, user_id, '1', photo_id, category_name, age=None, oss_object=None, video_uid=None)
  254. continue
  255. value, age = cls.analyze_photo(photo_id)
  256. if value:
  257. sqlCollect.insert_ks_data(user_name, user_sex, time_data, caption, view_count, like_count,
  258. share_count, duration, main_mv_url, thumbnail_url, user_id, '1',
  259. photo_id, category_name, age=None, oss_object=None, video_uid=None)
  260. continue
  261. oss_object_key = Oss.channel_upload_oss(main_mv_url, photo_id)
  262. time.sleep(2)
  263. oss_object = oss_object_key.get("oss_object_key")
  264. pq_id = cls.get_id_by_category(category_name)
  265. if pq_id:
  266. video_uid = cls.insert_piaoquantv(oss_object, caption, pq_id, thumbnail_url)
  267. sqlCollect.insert_ks_data(user_name, user_sex, time_data, caption, view_count,
  268. like_count, share_count, duration, main_mv_url, thumbnail_url,
  269. user_id, '0', photo_id, category_name, age, oss_object, video_uid)
  270. current_time = datetime.now()
  271. formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
  272. values = [
  273. [category_name, user_name, user_sex, caption, view_count, like_count, share_count, duration,
  274. main_mv_url, thumbnail_url, user_id, age, pq_id, video_uid, time_data, formatted_time]]
  275. Feishu.insert_columns("PlcisKhObhzmBothRutc65sJnph", "823f74", "ROWS", 1, 2)
  276. time.sleep(0.5)
  277. Feishu.update_values("PlcisKhObhzmBothRutc65sJnph", "823f74", "A2:Z2", values)
  278. time.sleep(80)
  279. except Exception as exc:
  280. print(f"异常信息: {exc}")
  281. continue
  282. # Example usage:
  283. if __name__ == "__main__":
  284. KsFeedVideo.get_data()