kuaishouchuangzuozhe.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325
  1. import json
  2. import os
  3. from hashlib import md5
  4. import requests
  5. import time
  6. from urllib.parse import urlencode
  7. from datetime import datetime, timedelta
  8. from common import Oss, Feishu, Common, AliyunLogger
  9. from common.sql_help import sqlCollect
  10. headers = {
  11. 'Accept-Language': 'zh-cn',
  12. 'Connection': 'keep-alive',
  13. 'Content-Type': 'application/x-www-form-urlencoded',
  14. 'Host': 'creator-app.kuaishou.com',
  15. 'User-Agent': 'kwai-android aegon/3.12.1',
  16. }
  17. class KsFeedVideo:
  18. CATEGORY_IDS = {
  19. 1: "生活",
  20. 2: "才艺",
  21. # 3: "时尚",
  22. # 4: "宠物",
  23. 5: "读书",
  24. # 6: "二次元",
  25. 7: "家居",
  26. # 8: "数码",
  27. 9: "搞笑",
  28. 10: "健康",
  29. 11: "旅游",
  30. 12: "美食",
  31. # 13: "美妆",
  32. # 14: "汽车",
  33. 15: "亲子",
  34. 16: "情感",
  35. # 17: "三农",
  36. # 18: "摄影",
  37. # 19: "舞蹈",
  38. # 20: "颜值",
  39. # 21: "音乐",
  40. # 22: "影视",
  41. # 23: "短剧",
  42. # 24: "游戏",
  43. 25: "运动",
  44. 26: "资讯",
  45. 27: "人文"
  46. }
  47. current_category_index = 0
  48. @staticmethod
  49. def calculate_sig(data):
  50. src = ''.join([f'{key}={data[key]}' for key in sorted(data.keys())])
  51. salt = '08d8eece8e83'
  52. return md5(f'{src}{salt}'.encode()).hexdigest()
  53. """
  54. 切换品类
  55. """
  56. @classmethod
  57. def switch_category(cls):
  58. if cls.current_category_index >= len(cls.CATEGORY_IDS):
  59. cls.current_category_index = 0
  60. category_id = list(cls.CATEGORY_IDS.keys())[cls.current_category_index]
  61. url = 'https://creator-app.kuaishou.com/rest/bamboo/inspiration/n/category/confirm/optimize'
  62. data = {
  63. 'isRecommendChange': False,
  64. 'categoryId': category_id,
  65. # 'kuaishou.api_st': "Cg9rdWFpc2hvdS5hcGkuc3QSkAGMQoIK2ZpwlQszYISTxSFxzugi58w2U5gpPqa6an0eU6MFcVsXq2rd_K16UTItZ_OzPV-4jmVN5rNXKXW9jL97JV79Y9PqxaR9xOIr1TEyDzpOq2GM-0W1QRW3M8Li_J6NZ5t1hRFCWHBlOESjiBWs7vq4m1bq_ml0dZ6pgEDfpsWNpBaLRzwZwOO1mD4LqO4aEokh6uHql0RmmtbfoBF25r7QOyIgqNv0TBf6mlwS3bjE0K6sl08M1mMPjW1PB9e0Qr494H8oBTAB",
  66. 'kuaishou.api_st': 'Cg9rdWFpc2hvdS5hcGkuc3QSkAGMQoIK2ZpwlQszYISTxSFxzugi58w2U5gpPqa6an0eU6MFcVsXq2rd_K16UTItZ_OzPV-4jmVN5rNXKXW9jL97JV79Y9PqxaR9xOIr1TEyDzpOq2GM-0W1QRW3M8Li_J6NZ5t1hRFCWHBlOESjiBWs7vq4m1bq_ml0dZ6pgEDfpsWNpBaLRzwZwOO1mD4LqO4aEokh6uHql0RmmtbfoBF25r7QOyIgqNv0TBf6mlwS3bjE0K6sl08M1mMPjW1PB9e0Qr494H8oBTAB;region_ticket=RT_FAC86448E713714136C088FFCC4431455D1FA7E05A6D25DAD4E4B8CC011FB6E8294169DD9',
  67. 'client_key': '214c9979',
  68. }
  69. sig = cls.calculate_sig(data)
  70. data['sig'] = sig
  71. response = requests.post(url=url, headers=headers, data=data)
  72. body = response.content.decode()
  73. cls.current_category_index += 1
  74. return body
  75. """
  76. 获取feed流信息
  77. """
  78. @classmethod
  79. def get_feed_list(cls):
  80. cls.switch_category()
  81. url = 'https://creator-app.kuaishou.com/rest/bamboo/inspiration/n/feed'
  82. data = {
  83. 'cs': False,
  84. 'kuaishou.api_st': 'Cg9rdWFpc2hvdS5hcGkuc3QSkAGMQoIK2ZpwlQszYISTxSFxzugi58w2U5gpPqa6an0eU6MFcVsXq2rd_K16UTItZ_OzPV-4jmVN5rNXKXW9jL97JV79Y9PqxaR9xOIr1TEyDzpOq2GM-0W1QRW3M8Li_J6NZ5t1hRFCWHBlOESjiBWs7vq4m1bq_ml0dZ6pgEDfpsWNpBaLRzwZwOO1mD4LqO4aEokh6uHql0RmmtbfoBF25r7QOyIgqNv0TBf6mlwS3bjE0K6sl08M1mMPjW1PB9e0Qr494H8oBTAB;region_ticket=RT_FAC86448E713714136C088FFCC4431455D1FA7E05A6D25DAD4E4B8CC011FB6E8294169DD9',
  85. # 'kuaishou.api_st': "Cg9rdWFpc2hvdS5hcGkuc3QSkAGMQoIK2ZpwlQszYISTxSFxzugi58w2U5gpPqa6an0eU6MFcVsXq2rd_K16UTItZ_OzPV-4jmVN5rNXKXW9jL97JV79Y9PqxaR9xOIr1TEyDzpOq2GM-0W1QRW3M8Li_J6NZ5t1hRFCWHBlOESjiBWs7vq4m1bq_ml0dZ6pgEDfpsWNpBaLRzwZwOO1mD4LqO4aEokh6uHql0RmmtbfoBF25r7QOyIgqNv0TBf6mlwS3bjE0K6sl08M1mMPjW1PB9e0Qr494H8oBTAB",
  86. 'client_key': '214c9979',
  87. }
  88. sig = cls.calculate_sig(data)
  89. data['sig'] = sig
  90. response = requests.post(url=url, headers=headers, data=data)
  91. body = response.content.decode()
  92. return body
  93. """
  94. 获取观众画像
  95. """
  96. @classmethod
  97. def analyze_photo(cls, photo_id):
  98. url = 'https://creator-app.kuaishou.com/rest/bamboo/inspiration/n/photo/analysis'
  99. headers = {
  100. 'Accept-Language': 'zh-cn',
  101. 'Connection': 'keep-alive',
  102. 'Content-Type': 'application/x-www-form-urlencoded',
  103. 'Host': 'creator-app.kuaishou.com',
  104. }
  105. data = {
  106. 'photoId': photo_id,
  107. 'client_key': '214c9979',
  108. }
  109. sig = cls.calculate_sig(data)
  110. data['sig'] = sig
  111. response = requests.post(url=url, headers=headers, data=data)
  112. body = response.content.decode()
  113. json_body = json.loads(body)
  114. user_range = json_body['data']['play']['userRange']
  115. if len(user_range) == 0:
  116. return False, "无画像"
  117. age_range = user_range['ageRange']
  118. value = age_range[5]['value']
  119. value = int(value.strip('%'))
  120. if value >= 40:
  121. return False, value
  122. else:
  123. return True, value
  124. """
  125. 视频时长转换成秒
  126. """
  127. @classmethod
  128. def milliseconds_to_seconds(cls, milliseconds):
  129. seconds = milliseconds / 1000
  130. return int(seconds)
  131. """
  132. 判断当前视频是否在90天内
  133. """
  134. @classmethod
  135. def get_video_data(cls, timestamp_str):
  136. timestamp = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S")
  137. # 获取当前时间
  138. current_time = datetime.now()
  139. difference = current_time - timestamp
  140. if difference <= timedelta(days=90):
  141. return False
  142. else:
  143. return True
  144. """
  145. 获取票圈ID
  146. """
  147. @classmethod
  148. def get_id_by_category(cls, category_name):
  149. category_list = [
  150. {"id": 71502003, "category": "生活"},
  151. {"id": 71502004, "category": "才艺"},
  152. {"id": 71502005, "category": "时尚"},
  153. {"id": 71502006, "category": "宠物"},
  154. {"id": 71502007, "category": "读书"},
  155. {"id": 71502008, "category": "二次元"},
  156. {"id": 71502009, "category": "家居"},
  157. {"id": 71502010, "category": "数码"},
  158. {"id": 71502011, "category": "搞笑"},
  159. {"id": 71502012, "category": "健康"},
  160. {"id": 71502013, "category": "旅游"},
  161. {"id": 71502014, "category": "美食"},
  162. {"id": 71502015, "category": "美妆"},
  163. {"id": 71502016, "category": "汽车"},
  164. {"id": 71502018, "category": "亲子"},
  165. {"id": 71502019, "category": "情感"},
  166. {"id": 71502020, "category": "三农"},
  167. {"id": 71502021, "category": "摄影"},
  168. {"id": 71502022, "category": "舞蹈"},
  169. {"id": 71502023, "category": "颜值"},
  170. {"id": 71502024, "category": "音乐"},
  171. {"id": 71502025, "category": "影视"},
  172. {"id": 71502026, "category": "短剧"},
  173. {"id": 71502027, "category": "游戏"},
  174. {"id": 71502028, "category": "运动"},
  175. {"id": 71502029, "category": "资讯"},
  176. {"id": 71502030, "category": "人文"}
  177. ]
  178. for category in category_list:
  179. if category['category'] == category_name:
  180. return category['id']
  181. return None
  182. """
  183. 新生成视频上传到对应账号下
  184. """
  185. @classmethod
  186. def insert_piaoquantv(cls, new_video_path, new_title, n_id, cover):
  187. url = "https://vlogapi.piaoquantv.com/longvideoapi/crawler/video/send"
  188. headers = {
  189. 'User-Agent': 'PQSpeed/486 CFNetwork/1410.1 Darwin/22.6.0',
  190. 'cookie': 'JSESSIONID=4DEA2B5173BB9A9E82DB772C0ACDBC9F; JSESSIONID=D02C334150025222A0B824A98B539B78',
  191. 'referer': 'http://appspeed.piaoquantv.com',
  192. 'token': '524a8bc871dbb0f4d4717895083172ab37c02d2f',
  193. 'accept-language': 'zh-CN,zh-Hans;q=0.9',
  194. 'Content-Type': 'application/x-www-form-urlencoded'
  195. }
  196. payload = {
  197. 'coverImgPath': cover,
  198. 'deviceToken': '9ef064f2f7869b3fd67d6141f8a899175dddc91240971172f1f2a662ef891408',
  199. 'fileExtensions': 'MP4',
  200. 'loginUid': n_id,
  201. 'networkType': 'Wi-Fi',
  202. 'platform': 'iOS',
  203. 'requestId': 'fb972cbd4f390afcfd3da1869cd7d001',
  204. 'sessionId': '362290597725ce1fa870d7be4f46dcc2',
  205. 'subSessionId': '362290597725ce1fa870d7be4f46dcc2',
  206. 'title': new_title,
  207. 'token': '524a8bc871dbb0f4d4717895083172ab37c02d2f',
  208. 'uid': n_id,
  209. 'versionCode': '486',
  210. 'versionName': '3.4.12',
  211. 'videoFromScene': '1',
  212. 'videoPath': new_video_path,
  213. 'viewStatus': '1'
  214. }
  215. encoded_payload = urlencode(payload)
  216. response = requests.request("POST", url, headers=headers, data=encoded_payload)
  217. data = response.json()
  218. code = data["code"]
  219. if code == 0:
  220. new_video_id = data["data"]["id"]
  221. return new_video_id
  222. else:
  223. return None
  224. @classmethod
  225. def get_data(cls, channel_id, name):
  226. number = 1
  227. list = []
  228. for category_id, category_name in cls.CATEGORY_IDS.items():
  229. try:
  230. feed_data = cls.get_feed_list()
  231. feed_data = json.loads(feed_data)
  232. feeds = feed_data['feeds']
  233. for feed in feeds:
  234. photo_id = feed["photo_id"] # 视频ID
  235. status = sqlCollect.ks_is_used(photo_id)
  236. user_name = feed["user_name"] # 用户名
  237. user_sex = feed["user_sex"] # 性别 F为女,U为男
  238. time_data = feed["time"] # 发布时间
  239. caption = feed["caption"] # 标题
  240. view_count = feed["view_count"] # 浏览数
  241. like_count = feed["like_count"] # 点赞数
  242. share_count = feed["share_count"] # 分享数
  243. duration = feed["duration"] # 时长/秒
  244. duration = cls.milliseconds_to_seconds(duration)
  245. main_mv_url = feed["main_mv_url"] # 视频链接
  246. thumbnail_url = feed["thumbnail_url"] # 视频封面
  247. user_id = feed["user_id"] # 用户id非用户主页id
  248. log_data = f"user:{user_name},,video_id:{photo_id},,video_url:{main_mv_url},,original_title:{caption},,share_count:{share_count},,view_count:{view_count},,duration:{duration}"
  249. AliyunLogger.logging(channel_id, name, user_name, photo_id, "扫描到一条视频", "2001", log_data)
  250. value, age = cls.analyze_photo(photo_id)
  251. if status:
  252. AliyunLogger.logging(channel_id, name, user_name, photo_id, "该视频已改造过", "2001", log_data)
  253. continue
  254. if value:
  255. AliyunLogger.logging(channel_id, name, user_name, photo_id, f"不符合规则:50+年龄占比小于40%,实际占比{age}", "2003", log_data)
  256. sqlCollect.insert_ks_data(user_name, user_sex, time_data, caption, view_count, like_count,
  257. share_count, duration, main_mv_url, thumbnail_url, user_id, '1',
  258. photo_id, category_name, age, oss_object=None, video_uid=None)
  259. continue
  260. video_percent = '%.4f' % (share_count / view_count)
  261. special = float(0.0005)
  262. if float(video_percent) < special:
  263. AliyunLogger.logging(channel_id, name, user_name, photo_id, "不符合规则:分享/浏览小于0.0005", "2003", log_data)
  264. sqlCollect.insert_ks_data(user_name, user_sex, time_data, caption, view_count, like_count, share_count, duration, main_mv_url, thumbnail_url, user_id, '1', photo_id, category_name, age, oss_object=None, video_uid=None)
  265. continue
  266. if share_count < 100:
  267. AliyunLogger.logging(channel_id, name, user_name, photo_id, "不符合规则:分享小于100", "2003", log_data)
  268. sqlCollect.insert_ks_data(user_name, user_sex, time_data, caption, view_count, like_count,
  269. share_count, duration, main_mv_url, thumbnail_url, user_id, '1',
  270. photo_id, category_name, age, oss_object=None, video_uid=None)
  271. continue
  272. if duration < 30 or duration > 720:
  273. AliyunLogger.logging(channel_id, name, user_name, photo_id, "不符合规则:时长不符合规则大于720秒/小于30秒", "2003", log_data)
  274. sqlCollect.insert_ks_data(user_name, user_sex, time_data, caption, view_count, like_count,
  275. share_count, duration, main_mv_url, thumbnail_url, user_id, '1',
  276. photo_id, category_name, age, oss_object=None, video_uid=None)
  277. continue
  278. sqlCollect.insert_ks_data(user_name, user_sex, time_data, caption, view_count,
  279. like_count, share_count, duration, main_mv_url, thumbnail_url,
  280. user_id, '0', photo_id, category_name, age, oss_object=None, video_uid=None)
  281. all_data = {"video_id": photo_id, "cover": thumbnail_url, "video_url": main_mv_url, "rule": video_percent,
  282. "old_title": caption}
  283. AliyunLogger.logging(channel_id, name, user_name, photo_id, "符合规则等待改造", "2004", log_data)
  284. list.append(all_data)
  285. current_time = datetime.now()
  286. formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
  287. values = [
  288. [category_name, user_name, photo_id, user_sex, caption, view_count, like_count, share_count, duration,
  289. main_mv_url, thumbnail_url, user_id, age, '', '', time_data, formatted_time]]
  290. Feishu.insert_columns("PlcisKhObhzmBothRutc65sJnph", "8fQxFv", "ROWS", 2, 3)
  291. time.sleep(0.5)
  292. Feishu.update_values("PlcisKhObhzmBothRutc65sJnph", "8fQxFv", "A3:Z3", values)
  293. if len(list) == int(number):
  294. Common.logger("ks-czz").info(f"获取快手创作者视频总数:{len(list)}\n")
  295. return list
  296. time.sleep(5)
  297. except Exception as exc:
  298. print(f"异常信息: {exc}")
  299. return list
  300. return list
  301. # Example usage:
  302. if __name__ == "__main__":
  303. KsFeedVideo.get_data(1)