demo.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2022/11/23
  4. import datetime
  5. import time
  6. import requests
  7. from main.common import Common
  8. from main.feishu_lib import Feishu
  9. class Demo:
  10. @classmethod
  11. def get_sheet(cls, log_type, crawler, sheetid):
  12. sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
  13. print(sheet)
  14. # a = '2023/01/01 13:13:13'
  15. # Feishu.update_values(log_type, crawler, sheetid, 'F2:F2', [[a]])
  16. @classmethod
  17. def publish_time(cls, publish_time):
  18. today = datetime.date.today()
  19. if '刚刚' in publish_time:
  20. publish_time_stamp = int(time.time())
  21. elif '分钟前' in publish_time:
  22. publish_time_stamp = int(time.time()) - int(publish_time[0])*60
  23. elif '小时前' in publish_time:
  24. publish_time_stamp = int(time.time()) - int(publish_time[0])*3600
  25. elif '昨天' in publish_time:
  26. publish_time_str = (datetime.date.today() + datetime.timedelta(days=-1)).strftime("%Y/%m/%d")
  27. publish_time_stamp = int(time.mktime(time.strptime(publish_time_str, "%Y/%m/%d")))
  28. elif '天前' in publish_time:
  29. publish_time_str = today - datetime.timedelta(days=int(publish_time[0]))
  30. publish_time_stamp = int(time.mktime(publish_time_str.timetuple()))
  31. elif '年' in publish_time:
  32. publish_time_str = publish_time.replace('年', '/').replace('月', '/').replace('日', '')
  33. publish_time_stamp = int(time.mktime(time.strptime(publish_time_str, "%Y/%m/%d")))
  34. else:
  35. publish_time_str = publish_time.replace('月', '/').replace('日', '')
  36. this_year = datetime.datetime.now().year
  37. publish_time_stamp = int(time.mktime(time.strptime(f"{this_year}/{publish_time_str}", "%Y/%m/%d")))
  38. print(f'publish_time_stamp:{publish_time_stamp}')
  39. @classmethod
  40. def get_video_url(cls, log_type, video_id):
  41. url = 'https://haokan.hao123.com/v?'
  42. params = {
  43. 'vid': video_id,
  44. '_format': 'json',
  45. }
  46. headers = {
  47. 'Accept': '*/*',
  48. 'Accept-Encoding': 'gzip, deflate, br',
  49. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
  50. 'Cache-Control': 'no-cache',
  51. 'Connection': 'keep-alive',
  52. 'Content-Type': 'application/x-www-form-urlencoded',
  53. 'Cookie': 'PC_TAB_LOG=video_details_page; COMMON_LID=b0be69dd9fcae328d06935bd40f615cd; Hm_lvt_4aadd610dfd2f5972f1efee2653a2bc5=1669029953; hkpcvideolandquery=%u82CF%u5DDE%u6700%u5927%u7684%u4E8C%u624B%u8F66%u8D85%u5E02%uFF0C%u8F6C%u4E00%u8F6C%u91CC%u8FB9%u8C6A%u8F66%u592A%u591A%u4E86%uFF0C%u4EF7%u683C%u66F4%u8BA9%u6211%u5403%u60CA%uFF01; Hm_lpvt_4aadd610dfd2f5972f1efee2653a2bc5=1669875695; ariaDefaultTheme=undefined; reptileData=%7B%22data%22%3A%22636c55e0319da5169a60acec4a264a35c10862f8abfe2f2cc32c55eb6b0ab4de0efdfa115ea522d6d4d361dea07feae2831d3e2c16ed6b051c611ffe5aded6c9f852501759497b9fbd2132a2160e1e40e5845b41f78121ddcc3288bd077ae4e8%22%2C%22key_id%22%3A%2230%22%2C%22sign%22%3A%22f6752aac%22%7D; RT="z=1&dm=hao123.com&si=uc0q7wnm4w&ss=lb4otu71&sl=j&tt=av0&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf&ld=1rdw&cl=7v6c"',
  54. 'Pragma': 'no-cache',
  55. 'Referer': 'https://haokan.hao123.com/v?vid=10623278258033022286&pd=pc&context=',
  56. 'sec-ch-ua': '"Microsoft Edge";v="107", "Chromium";v="107", "Not=A?Brand";v="24"',
  57. 'sec-ch-ua-mobile': '?0',
  58. 'sec-ch-ua-platform': '"macOS"',
  59. 'Sec-Fetch-Dest': 'empty',
  60. 'Sec-Fetch-Mode': 'cors',
  61. 'Sec-Fetch-Site': 'same-origin',
  62. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.62',
  63. }
  64. r = requests.get(url=url, headers=headers, params=params)
  65. if r.status_code != 200:
  66. Common.logger(log_type).info(f'get_video_url_response:{r.text}')
  67. elif r.json()['errno'] != 0 or len(r.json()['data']) == 0:
  68. Common.logger(log_type).info(f'get_video_url_response:{r.json()}')
  69. else:
  70. clarityUrl = r.json()['data']['apiData']['curVideoMeta']['clarityUrl']
  71. video_url = r.json()['data']['apiData']['curVideoMeta']['clarityUrl'][len(clarityUrl) - 1]['url']
  72. return video_url
  73. user_pcursor = ''
  74. user_cnt = []
  75. @classmethod
  76. def get_follow_users(cls):
  77. while True:
  78. url = "https://www.kuaishou.com/graphql"
  79. payload = {
  80. "operationName": "visionProfileUserList",
  81. "variables": {"ftype": 1, "pcursor": str(cls.user_pcursor)},
  82. "query": "query visionProfileUserList($pcursor: String, $ftype: Int) "
  83. "{\n visionProfileUserList(pcursor: $pcursor, ftype: $ftype) "
  84. "{\n result\n fols {\n user_name\n headurl\n "
  85. "user_text\n isFollowing\n user_id\n __typename\n }\n"
  86. " hostName\n pcursor\n __typename\n }\n}\n"
  87. }
  88. headers = {
  89. 'accept': '*/*',
  90. 'Accept-Encoding': 'gzip, deflate, br',
  91. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
  92. 'Cache-Control': 'no-cache',
  93. 'Connection': 'keep-alive',
  94. 'Content-Length': '382',
  95. 'content-type': 'application/json',
  96. 'Cookie': 'kpf=PC_WEB; '
  97. 'kpn=KUAISHOU_VISION; '
  98. 'clientid=3; '
  99. 'did=web_260fc6cc87668951b745d0aa536b6c45; '
  100. 'client_key=65890b29; '
  101. 'userId=1921947321; '
  102. 'kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABeKWnZv84JvnYIdwhX6o5jaBjDblgvsn'
  103. '7o8XEAvihFCj1P-69BK0qcjnCMbpUUmfT7pqlsn0KVsuK5789LXjySbqdee8yKhNhM-kttccnRokJM7BbSl7VzViTd'
  104. 'afHKDM3UEh1oJKy5s_iMUHbXo6wHSbMjdFYP6QsSRf7xU9KA4IC-uNX9ZcBj39OuCUCjlQFOXLjJV8mYAcBt1BqrIT'
  105. 'VhhoS6uws2LN-siMyPVYdMaXTUH7FIiCEI0MRi2vzOYp7_RSnHX_7--xNCDYQVUo05gjITkWaGCgFMAE; '
  106. 'kuaishou.server.web_ph=f2b6b9531d9ade009d5dccf1351ae7d4c7d2',
  107. 'Pragma': 'no-cache',
  108. 'Referer': 'https://www.kuaishou.com/profile/3xbp7922t5sgcpq',
  109. 'sec-ch-ua': '"Not?A_Brand";v="8", "Chromium";v="108", "Google Chrome";v="108"',
  110. 'sec-ch-ua-mobile': '?0',
  111. 'sec-ch-ua-platform': '"macOS"',
  112. 'Sec-Fetch-Dest': 'empty',
  113. 'Sec-Fetch-Mode': 'cors',
  114. 'Sec-Fetch-Site': 'same-origin',
  115. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 '
  116. '(KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36'
  117. }
  118. r = requests.post(url=url, headers=headers, json=payload)
  119. fols = r.json()['data']['visionProfileUserList']['fols']
  120. cls.user_pcursor = r.json()['data']['visionProfileUserList']['pcursor']
  121. if len(fols) == 0:
  122. print(f'到底啦~\n共关注 {len(cls.user_cnt)} 人\n')
  123. cls.user_pcursor = 0
  124. cls.user_cnt = []
  125. return
  126. for i in range(len(fols)):
  127. user_name = fols[i]['user_name']
  128. user_id = fols[i]['user_id']
  129. cls.user_cnt.append(user_id)
  130. print(f'user_name:{user_name}')
  131. print(f'user_id:{user_id}')
  132. print('\n')
  133. @classmethod
  134. def get_video_feeds(cls, log_type, out_id):
  135. url = 'https://www.kuaishou.com/graphql'
  136. headers = {
  137. 'accept': '*/*',
  138. 'Accept-Encoding': 'gzip, deflate, br',
  139. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
  140. 'Cache-Control': 'no-cache',
  141. 'content-type': 'application/json',
  142. 'Cookie': 'kpf=PC_WEB; kpn=KUAISHOU_VISION; clientid=3; did=web_260fc6cc87668951b745d0aa536b6c45; client_key=65890b29; userId=1921947321; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABwCuiLLMSOvJtdcmO-XD0RHf-lRaR--1o0MNMhnsdl1xRZOfkHgEnFM1GNwk-pNxGwIXupJiGfL6JC-dsw6pNVRfW9eiCMUiFfVSS8bbVuBhSnh4wlJc6e2wy6FeYXC4V2WUQ7CKlUc_1sESmPlYq1D2JV7eKyNJP9VVnZ1i_y31SnTnAx2DpncW8UNV2qv_bjTxokg4rZRPnBBRCfAAhShoStEyT9S95saEmiR8Dg-bb1DKRIiDJ8NOu7QSy1FkD3GwzpAURUTTYYTuENNqldrStQMfWxygFMAE; kuaishou.server.web_ph=f80aff104d1749fbb5b6760aee76d5b640e2',
  143. 'Pragma': 'no-cache',
  144. 'Referer': 'https://www.kuaishou.com/profile/'+str(out_id),
  145. 'sec-ch-ua': '"Not?A_Brand";v="8", "Chromium";v="108", "Google Chrome";v="108"',
  146. 'sec-ch-ua-mobile': '?0',
  147. 'sec-ch-ua-platform': '"macOS"',
  148. 'Sec-Fetch-Dest': 'empty',
  149. 'Sec-Fetch-Mode': 'cors',
  150. 'Sec-Fetch-Site': 'same-origin',
  151. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
  152. }
  153. payload = {
  154. "operationName": "visionProfilePhotoList",
  155. "variables": {
  156. "userId": str(out_id),
  157. "pcursor": "",
  158. "page": "profile"
  159. },
  160. "query": "fragment photoContent on PhotoEntity {\n id\n duration\n caption\n originCaption\n likeCount\n viewCount\n realLikeCount\n coverUrl\n photoUrl\n photoH265Url\n manifest\n manifestH265\n videoResource\n coverUrls {\n url\n __typename\n }\n timestamp\n expTag\n animatedCoverUrl\n distance\n videoRatio\n liked\n stereoType\n profileUserTopPhoto\n musicBlocked\n __typename\n}\n\nfragment feedContent on Feed {\n type\n author {\n id\n name\n headerUrl\n following\n headerUrls {\n url\n __typename\n }\n __typename\n }\n photo {\n ...photoContent\n __typename\n }\n canAddComment\n llsid\n status\n currentPcursor\n tags {\n type\n name\n __typename\n }\n __typename\n}\n\nquery visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n ...feedContent\n __typename\n }\n hostName\n pcursor\n __typename\n }\n}\n"
  161. }
  162. r = requests.post(url=url, headers=headers, json=payload)
  163. feeds = r.json()['data']['visionProfilePhotoList']['feeds']
  164. if len(feeds) == 0:
  165. Common.logger(log_type).info('到底啦~\n')
  166. return
  167. for i in range(len(feeds)):
  168. # 视频标题过滤话题及处理特殊字符
  169. kuaishou_title = feeds[i]['photo']["caption"]
  170. title_split1 = kuaishou_title.split(" #")
  171. if title_split1[0] != "":
  172. title1 = title_split1[0]
  173. else:
  174. title1 = title_split1[-1]
  175. title_split2 = title1.split(" #")
  176. if title_split2[0] != "":
  177. title2 = title_split2[0]
  178. else:
  179. title2 = title_split2[-1]
  180. title_split3 = title2.split("@")
  181. if title_split3[0] != "":
  182. title3 = title_split3[0]
  183. else:
  184. title3 = title_split3[-1]
  185. video_title = title3.strip()\
  186. .replace("\n", "").replace("/", "").replace("快手", "").replace(" ", "").replace(" ", "")\
  187. .replace("&NBSP", "").replace("\r", "").replace("#", "").replace(".", "。").replace("\\", "")\
  188. .replace(":", "").replace("*", "").replace("?", "").replace("?", "").replace('"', "")\
  189. .replace("<", "").replace(">", "").replace("|", "").replace("@", "")[:40]
  190. hevc = feeds[i]['photo']['videoResource']['hevc']
  191. if hevc == '':
  192. video_url = feeds[i]['photo']['videoResource']['h264']['adaptationSet'][0]['representation'][0]['url']
  193. else:
  194. video_url = feeds[i]['photo']['videoResource']['hevc']['adaptationSet'][0]['representation'][0]['url']
  195. print(f'video_title:{video_title}')
  196. print(f'video_url:{video_url}')
  197. print('\n')
  198. if __name__ == '__main__':
  199. Demo.get_sheet('demo', 'haokan', 'kVaSjf')
  200. # Demo.publish_time()
  201. # Demo.get_video_url('demo', '10377041690614321392')
  202. # Demo.get_follow_users('demo')
  203. # Demo.get_video_feeds('demo', '3xfr3gqnxmk92y2')
  204. # print(Feishu.get_values_batch('log_type', 'haokan', '5LksMx')[0][0])
  205. # print(type(Feishu.get_values_batch('log_type', 'haokan', '5LksMx')[0][0]))
  206. # Demo.publish_time(publish_time='刚刚')
  207. # Demo.publish_time(publish_time='1分钟前')
  208. # Demo.publish_time(publish_time='1小时前')
  209. # Demo.publish_time(publish_time='昨天')
  210. # Demo.publish_time(publish_time='3天前')
  211. # Demo.publish_time(publish_time='2022年01月10日')
  212. # Demo.publish_time(publish_time='01月10日')
  213. pass