kuaishou_author.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. # -*- coding: utf-8 -*-
  2. # @Time: 2024/01/18
  3. import datetime
  4. import os
  5. import random
  6. import sys
  7. import time
  8. from datetime import datetime
  9. import requests
  10. import json
  11. import urllib3
  12. sys.path.append(os.getcwd())
  13. from common.aliyun_oss_uploading import Oss
  14. from common.common import Common
  15. from common.material import Material
  16. from common.feishu import Feishu
  17. from common.db import MysqlHelper
  18. from requests.adapters import HTTPAdapter
  19. class kuaishouAuthor():
  20. """
  21. oss视频地址 存入数据库
  22. """
  23. @classmethod
  24. def insert_videoUrl(cls, video_id, account_id, oss_object_key, mark):
  25. current_time = datetime.now()
  26. formatted_time = current_time.strftime("%Y-%m-%d %H:%M")
  27. insert_sql = f"""INSERT INTO agc_video_url (video_id, account_id, oss_object_key, time, status, mark) values ("{video_id}", "{account_id}", "{oss_object_key}", "{formatted_time}", 1, "{mark}")"""
  28. MysqlHelper.update_values(
  29. sql=insert_sql,
  30. env="prod",
  31. machine="",
  32. )
  33. """
  34. 获取快手用户主页id
  35. """
  36. @classmethod
  37. def get_kuaishou_videoUserId(cls, mark):
  38. select_user_sql = f"""select user_id, channel from agc_channel_data where mark = '{mark}' and channel = '快手' ORDER BY id DESC;"""
  39. user_list = MysqlHelper.get_values(select_user_sql, "prod")
  40. return user_list
  41. """
  42. 查询该video_id是否在数据库存在
  43. """
  44. @classmethod
  45. def select_videoUrl_id(cls, video_id):
  46. select_user_sql = f"""select video_id from agc_video_url where video_id='{video_id}' ;"""
  47. user_list = MysqlHelper.get_values(select_user_sql, "prod")
  48. if user_list:
  49. return True
  50. else:
  51. return False
  52. """快手读取数据 将数据存储到oss上"""
  53. @classmethod
  54. def get_kuaishou_videoList(cls, data):
  55. try:
  56. mark = data['mark']
  57. token = data['token']
  58. feishu_id = data['feishu_id']
  59. channel_id = data['channel'][0]
  60. channel = data['channel'][1]
  61. Material.insert_user(feishu_id, channel_id, mark, channel)
  62. cookie = Material.get_cookie(feishu_id, token, channel)
  63. # 获取 用户主页id
  64. user_list = cls.get_kuaishou_videoUserId(mark)
  65. if len(user_list) == 0:
  66. return
  67. for i in user_list:
  68. account_id = i[0].replace('(', '').replace(')', '').replace(',', '')
  69. Common.logger("kuaishou").info(f"用户主页ID:{account_id}")
  70. pcursor = ""
  71. count = 0
  72. while True:
  73. if count > 5:
  74. break
  75. time.sleep(random.randint(10, 50))
  76. url = "https://www.kuaishou.com/graphql"
  77. payload = json.dumps({
  78. "operationName": "visionProfilePhotoList",
  79. "variables": {
  80. "userId": account_id,
  81. "pcursor": pcursor,
  82. "page": "profile"
  83. },
  84. "query": "fragment photoContent on PhotoEntity {\n id\n duration\n caption\n originCaption\n likeCount\n viewCount\n commentCount\n realLikeCount\n coverUrl\n photoUrl\n photoH265Url\n manifest\n manifestH265\n videoResource\n coverUrls {\n url\n __typename\n }\n timestamp\n expTag\n animatedCoverUrl\n distance\n videoRatio\n liked\n stereoType\n profileUserTopPhoto\n musicBlocked\n __typename\n}\n\nfragment feedContent on Feed {\n type\n author {\n id\n name\n headerUrl\n following\n headerUrls {\n url\n __typename\n }\n __typename\n }\n photo {\n ...photoContent\n __typename\n }\n canAddComment\n llsid\n status\n currentPcursor\n tags {\n type\n name\n __typename\n }\n __typename\n}\n\nquery visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n ...feedContent\n __typename\n }\n hostName\n pcursor\n __typename\n }\n}\n"
  85. })
  86. headers = {
  87. 'Accept': '*/*',
  88. 'Content-Type': 'application/json',
  89. 'Origin': 'https://www.kuaishou.com',
  90. 'Cookie': cookie,
  91. 'Content-Length': '1260',
  92. 'Accept-Language': 'zh-CN,zh-Hans;q=0.9',
  93. 'Host': 'www.kuaishou.com',
  94. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15',
  95. 'Referer': f'https://www.kuaishou.com/profile/{account_id}',
  96. 'Accept-Encoding': 'gzip, deflate, br',
  97. 'Connection': 'keep-alive'
  98. }
  99. urllib3.disable_warnings()
  100. s = requests.session()
  101. # max_retries=3 重试3次
  102. s.mount('http://', HTTPAdapter(max_retries=3))
  103. s.mount('https://', HTTPAdapter(max_retries=3))
  104. response = s.post(url=url, headers=headers, data=payload, verify=False,
  105. timeout=10)
  106. response.close()
  107. if response.status_code != 200:
  108. Common.logger("kuaishou").info(
  109. f"接口请求失败,请更换cookie,{response.status_code}")
  110. Feishu.bot('recommend', '快手', f'{mark}:快手cookie失效,请及时更换~', mark)
  111. return
  112. elif "feeds" not in response.json()["data"]["visionProfilePhotoList"]:
  113. Common.logger("kuaishou").info(
  114. f'数据为空{response.json()["data"]["visionProfilePhotoList"]}')
  115. break
  116. elif len(response.json()["data"]["visionProfilePhotoList"]["feeds"]) == 0:
  117. Common.logger("kuaishou").info(
  118. f'数据为空{response.json()["data"]["visionProfilePhotoList"]["feeds"]}')
  119. break
  120. pcursor = response.json()['data']['visionProfilePhotoList']['pcursor']
  121. feeds = response.json()['data']['visionProfilePhotoList']['feeds']
  122. for j in range(len(feeds)):
  123. try:
  124. try:
  125. video_id = feeds[j].get("photo", {}).get("videoResource").get("h264", {}).get("videoId", "")
  126. except KeyError:
  127. video_id = feeds[j].get("photo", {}).get("videoResource").get("hevc", {}).get("videoId", "")
  128. video_url = feeds[j].get('photo', {}).get('photoUrl', "")
  129. id = cls.select_videoUrl_id(video_id)
  130. if id:
  131. if count > 5:
  132. count += 1
  133. Common.logger("kuaishou").info(
  134. f"重复视频不在抓取该用户,用户主页id:{account_id}")
  135. break
  136. continue
  137. channel_name = mark+'/kuaishou'
  138. oss_object_key = Oss.video_sync_upload_oss(video_url, video_id, account_id, channel_name)
  139. status = oss_object_key.get("status")
  140. # 发送 oss
  141. oss_object_key = oss_object_key.get("oss_object_key")
  142. Common.logger("kuaishou").info(f"抖音视频链接oss发送成功,oss地址:{oss_object_key}")
  143. if status == 200:
  144. cls.insert_videoUrl(video_id, account_id, oss_object_key, mark)
  145. Common.logger("kuaishou").info(
  146. f"视频地址插入数据库成功,视频id:{video_id},用户主页id:{account_id},视频储存地址:{oss_object_key}")
  147. except Exception as e:
  148. Common.logger("kuaishou").warning(f"抓取单条视频异常:{e}\n")
  149. continue
  150. except Exception as e:
  151. Common.logger("kuaishou").warning(f"抓取异常:{e}\n")
  152. return
  153. if __name__ == '__main__':
  154. kuaishouAuthor.get_kuaishou_videoList()