xigua_follow.py 43 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2022/8/23
  4. import base64
  5. import os
  6. import random
  7. import subprocess
  8. import sys
  9. import time
  10. import requests
  11. import urllib3
  12. sys.path.append(os.getcwd())
  13. from main.common import Common
  14. from main.feishu import Feishu
  15. from main.publish import Publish
  16. from main.get_signature import GetSignature
  17. proxies = {"http": None, "https": None}
  18. class Follow:
  19. # 个人主页视频翻页参数
  20. offset = 0
  21. # 获取用户信息(字典格式). 注意:部分 user_id 字符类型是 int / str
  22. @classmethod
  23. def get_user_info_from_feishu(cls, log_type):
  24. try:
  25. user_sheet = Feishu.get_values_batch(log_type, 'xigua', '5tlTYB')
  26. user_dict = {}
  27. for i in range(1, len(user_sheet)):
  28. user_name = user_sheet[i][0]
  29. user_id = user_sheet[i][1]
  30. our_id = user_sheet[i][3]
  31. if user_name is None or user_id is None or our_id is None:
  32. pass
  33. else:
  34. user_dict[user_name] = str(user_id)+','+str(our_id)
  35. return user_dict
  36. except Exception as e:
  37. Common.logger(log_type).error('get_user_id_from_feishu异常:{}\n', e)
  38. # 下载规则
  39. @staticmethod
  40. def download_rule(duration, width, height):
  41. if int(duration) >= 60:
  42. if int(width) >= 720 or int(height) >= 720:
  43. return True
  44. else:
  45. return False
  46. else:
  47. return False
  48. # 过滤词库
  49. @classmethod
  50. def filter_words(cls, log_type):
  51. try:
  52. filter_words_sheet = Feishu.get_values_batch(log_type, 'xigua', 'KGB4Hc')
  53. filter_words_list = []
  54. for x in filter_words_sheet:
  55. for y in x:
  56. if y is None:
  57. pass
  58. else:
  59. filter_words_list.append(y)
  60. return filter_words_list
  61. except Exception as e:
  62. Common.logger(log_type).error('filter_words异常:{}\n', e)
  63. # PC端:西瓜用户主页视频列表. 注意:参数_signature有效期时长只有一小时
  64. @classmethod
  65. def get_follow_feeds_by_pc(cls, log_type, userid):
  66. try:
  67. url = "https://www.ixigua.com/api/videov2/author/new_video_list?"
  68. headers = {
  69. 'sec-ch-ua': '".Not/A)Brand";v="99", "Google Chrome";v="103", "Chromium";v="103"',
  70. 'accept': 'application/json, text/plain, */*',
  71. 'sec-ch-ua-mobile': '?0',
  72. 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko)'
  73. ' Chrome/103.0.0.0 Safari/537.36',
  74. 'sec-ch-ua-platform': '"macOS"',
  75. 'sec-fetch-site': 'same-origin',
  76. 'sec-fetch-mode': 'cors',
  77. 'sec-fetch-dest': 'document',
  78. 'referer': 'https://www.ixigua.com/home/' + str(userid),
  79. 'accept-encoding': 'gzip, deflate, br',
  80. 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
  81. }
  82. params = {
  83. 'to_user_id': str(userid),
  84. 'offset': str(cls.offset),
  85. 'limit': '30',
  86. 'maxBehotTime': '0',
  87. 'order': 'new',
  88. 'isHome': '0',
  89. 'msToken': '2ZHINOMBPK-qlCKApv37xVCBKkXyPli8mTYNlTSXvr17eZ0Ea8B__Otimkx6q_enDc9m8Kgzi3Re7wpLIMSSE9dofTYdqQgvB7mHQbx_AMnVnf5lsByU',
  90. 'X-Bogus': 'DFSzswVuVvTANe2BSBBMCR/F6qyc',
  91. '_signature': Feishu.get_values_batch(log_type, 'xigua', '6tZHhs')[1][1],
  92. }
  93. cookies = {
  94. '__ac_signature': '_02B4Z6wo00f017vzS8QAAIDCwz2gwwDpX9-7009AAI4Bc4',
  95. 'MONITOR_WEB_ID': 'fd4244aa-2003-4e19-a2a4-715c19310a56',
  96. 'ixigua-a-s': '1',
  97. 'support_webp': 'true',
  98. 'support_avif': 'true',
  99. '_tea_utm_cache_1300': 'undefined',
  100. 'ttcid': '16a3b6b9b80b4a87ae258f5f3f101e6310',
  101. 'msToken': 'G8pL2oH-9Zl1hrLZPyOMSceMaII3ejKda2o-tgO1heYrj7b_fgm9vGlvwyLOA2H8oUShZgAYfxEvIuktT7OuxBuy85N-ousFfqxuAIrfruMEFZUTYp2z',
  102. 'tt_scid': 'a0zhISPImN-dVMMdbeb1Kzhl1x4oJS5Yr81FzH6qYk3jDtj1d2E5gsywN4rwna8ib398',
  103. 'ttwid': '1%7CvorN1HQjbSgBViRkEoZYEbqP_sQVoQqaUqGcFA-bzpA%7C1661324763%7Ce040213e1107973ebb0db64f0e77cfb027375f1fb5854bb40588d692d025af1f',
  104. }
  105. # Common.logger(log_type).info('offset:{}', cls.offset)
  106. urllib3.disable_warnings()
  107. response = requests.get(url=url, headers=headers, params=params, cookies=cookies, proxies=proxies, verify=False)
  108. # Common.logger(log_type).info('response:{}', response.text)
  109. cls.offset += 30
  110. if 'data' not in response.text or response.json()['data'] == '' or response.json()['code'] != 200:
  111. Common.logger(log_type).info('get_follow_feeds: response:{}', response.text)
  112. else:
  113. feeds = response.json()['data']['videoList']
  114. # print(len(feeds))
  115. for i in range(len(feeds)):
  116. # video_title
  117. if 'title' not in feeds[i]:
  118. video_title = 0
  119. else:
  120. video_title = feeds[i]['title'].strip().replace('手游', '')\
  121. .replace('/', '').replace('\/', '').replace('\n', '')
  122. # video_id
  123. if 'video_id' not in feeds[i]:
  124. video_id = 0
  125. else:
  126. video_id = feeds[i]['video_id']
  127. # gid
  128. if 'gid' not in feeds[i]:
  129. gid = 0
  130. else:
  131. gid = feeds[i]['gid']
  132. # play_cnt
  133. if 'video_detail_info' not in feeds[i]:
  134. play_cnt = 0
  135. elif 'video_watch_count' not in feeds[i]['video_detail_info']:
  136. play_cnt = 0
  137. else:
  138. play_cnt = feeds[i]['video_detail_info']['video_watch_count']
  139. # comment_cnt
  140. if 'comment_count' not in feeds[i]:
  141. comment_cnt = 0
  142. else:
  143. comment_cnt = feeds[i]['comment_count']
  144. # like_cnt
  145. if 'digg_count' not in feeds[i]:
  146. like_cnt = 0
  147. else:
  148. like_cnt = feeds[i]['digg_count']
  149. # share_cnt
  150. share_cnt = 0
  151. # video_duration
  152. if 'video_duration' not in feeds[i]:
  153. video_duration = 0
  154. else:
  155. video_duration = feeds[i]['video_duration']
  156. # send_time
  157. if 'publish_time' not in feeds[i]:
  158. send_time = 0
  159. else:
  160. send_time = feeds[i]['publish_time']
  161. # is_top
  162. if 'is_top' not in feeds[i]:
  163. is_top = 0
  164. else:
  165. is_top = feeds[i]['is_top']
  166. # user_name
  167. if 'user_info' not in feeds[i]:
  168. user_name = 0
  169. elif 'name' not in feeds[i]['user_info']:
  170. user_name = 0
  171. else:
  172. user_name = feeds[i]['user_info']['name']
  173. # user_id
  174. if 'user_info' not in feeds[i]:
  175. user_id = 0
  176. elif 'user_id' not in feeds[i]['user_info']:
  177. user_id = 0
  178. else:
  179. user_id = feeds[i]['user_info']['user_id']
  180. # head_url
  181. if 'user_info' not in feeds[i]:
  182. head_url = 0
  183. elif 'avatar_url' not in feeds[i]['user_info']:
  184. head_url = 0
  185. else:
  186. head_url = feeds[i]['user_info']['avatar_url']
  187. # cover_url
  188. if 'video_detail_info' not in feeds[i]:
  189. cover_url = 0
  190. elif 'detail_video_large_image' not in feeds[i]['video_detail_info']:
  191. cover_url = 0
  192. elif 'url' in feeds[i]['video_detail_info']['detail_video_large_image']:
  193. cover_url = feeds[i]['video_detail_info']['detail_video_large_image']['url']
  194. else:
  195. cover_url = feeds[i]['video_detail_info']['detail_video_large_image']['url_list'][0]['url']
  196. video_url_info = cls.get_video_info(log_type, gid)
  197. video_width = video_url_info[2]
  198. video_height = video_url_info[-1]
  199. video_url = video_url_info[0]
  200. audio_url = video_url_info[1]
  201. Common.logger(log_type).info('video_title:{}', video_title)
  202. Common.logger(log_type).info('video_id:{}', video_id)
  203. Common.logger(log_type).info('play_cnt:{}', play_cnt)
  204. # Common.logger(log_type).info('is_top:{}, {}', type(is_top), is_top)
  205. Common.logger(log_type).info('send_time:{}',
  206. time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(send_time)))
  207. if gid == 0 or video_id == 0:
  208. Common.logger(log_type).info('无效视频\n')
  209. elif is_top is True and int(time.time()) - int(send_time) > 3600 * 24 * 10:
  210. Common.logger(log_type).info('置顶视频,且发布时间超过10天:{}\n',
  211. time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(send_time)))
  212. elif int(time.time()) - int(send_time) > 3600 * 24 * 10:
  213. Common.logger(log_type).info('发布时间超过10天:{}\n',
  214. time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(send_time)))
  215. cls.offset = 0
  216. return
  217. elif cls.download_rule(video_duration, video_width, video_height) is False:
  218. Common.logger(log_type).info('不满足抓取规则\n')
  219. elif any(word if word in video_title else False for word in cls.filter_words(log_type)) is True:
  220. Common.logger(log_type).info('标题已中过滤词:{}\n', video_title)
  221. elif str(video_id) in [x for y in Feishu.get_values_batch(log_type, 'xigua', 'e075e9') for x in y]:
  222. Common.logger(log_type).info('视频已下载\n')
  223. elif str(video_id) in [x for y in Feishu.get_values_batch(log_type, 'xigua', '3Ul6wZ') for x in y]:
  224. Common.logger(log_type).info('视频已下载\n')
  225. elif str(video_id) in [x for y in Feishu.get_values_batch(log_type, 'xigua', 'wjhpDs') for x in y]:
  226. Common.logger(log_type).info('视频已存在\n')
  227. else:
  228. Feishu.insert_columns(log_type, 'xigua', 'wjhpDs', 'ROWS', 1, 2)
  229. get_feeds_time = time.time()
  230. values = [[time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(get_feeds_time)),
  231. '关注榜',
  232. video_title,
  233. str(video_id),
  234. gid,
  235. play_cnt,
  236. comment_cnt,
  237. like_cnt,
  238. share_cnt,
  239. video_duration,
  240. str(video_width) + '*' + str(video_height),
  241. time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(send_time)),
  242. user_name,
  243. user_id,
  244. head_url,
  245. cover_url,
  246. video_url,
  247. audio_url]]
  248. time.sleep(1)
  249. Feishu.update_values(log_type, 'xigua', 'wjhpDs', 'A2:Z2', values)
  250. Common.logger(log_type).info('视频信息写入飞书成功\n')
  251. time.sleep(random.randint(1, 3))
  252. except Exception as e:
  253. Common.logger(log_type).error('get_follow_feeds_by_pc异常:{}\n', e)
  254. # 获取视频详情
  255. @classmethod
  256. def get_video_info(cls, log_type, gid):
  257. try:
  258. url = 'https://www.ixigua.com/api/mixVideo/information?'
  259. headers = {
  260. "accept-encoding": "gzip, deflate, br",
  261. "accept-language": "zh-CN,zh-Hans;q=0.9",
  262. "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
  263. "AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15",
  264. "referer": "https://www.ixigua.com/7102614741050196520?logTag=0531c88ac04f38ab2c62",
  265. }
  266. params = {
  267. 'mixId': gid,
  268. 'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfC'
  269. 'NVVIOBNjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
  270. 'X-Bogus': 'DFSzswVupYTANCJOSBk0P53WxM-r',
  271. '_signature': '_02B4Z6wo0000119LvEwAAIDCuktNZ0y5wkdfS7jAALThuOR8D9yWNZ.EmWHKV0WSn6Px'
  272. 'fPsH9-BldyxVje0f49ryXgmn7Tzk-swEHNb15TiGqa6YF.cX0jW8Eds1TtJOIZyfc9s5emH7gdWN94',
  273. }
  274. cookies = {
  275. 'ixigua-a-s': '1',
  276. 'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfCNVVIOB'
  277. 'NjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
  278. 'ttwid': '1%7C_yXQeHWwLZgCsgHClOwTCdYSOt_MjdOkgnPIkpi-Sr8%7C1661241238%7Cf57d0c5ef3f1d7'
  279. '6e049fccdca1ac54887c34d1f8731c8e51a49780ff0ceab9f8',
  280. 'tt_scid': 'QZ4l8KXDG0YAEaMCSbADdcybdKbUfG4BC6S4OBv9lpRS5VyqYLX2bIR8CTeZeGHR9ee3',
  281. 'MONITOR_WEB_ID': '0a49204a-7af5-4e96-95f0-f4bafb7450ad',
  282. '__ac_nonce': '06304878000964fdad287',
  283. '__ac_signature': '_02B4Z6wo00f017Rcr3AAAIDCUVxeW1tOKEu0fKvAAI4cvoYzV-wBhq7B6D8k0no7lb'
  284. 'FlvYoinmtK6UXjRIYPXnahUlFTvmWVtb77jsMkKAXzAEsLE56m36RlvL7ky.M3Xn52r9t1IEb7IR3ke8',
  285. 'ttcid': 'e56fabf6e85d4adf9e4d91902496a0e882',
  286. '_tea_utm_cache_1300': 'undefined',
  287. 'support_avif': 'false',
  288. 'support_webp': 'false',
  289. 'xiguavideopcwebid': '7134967546256016900',
  290. 'xiguavideopcwebid.sig': 'xxRww5R1VEMJN_dQepHorEu_eAc',
  291. }
  292. urllib3.disable_warnings()
  293. response = requests.get(url=url, headers=headers, params=params, cookies=cookies, proxies=proxies,
  294. verify=False)
  295. if 'data' not in response.json() or response.json()['data'] == '':
  296. Common.logger(log_type).warning('get_video_info: response: {}', response)
  297. else:
  298. video_info = response.json()['data']['gidInformation']['packerData']['video']
  299. video_url = ''
  300. audio_url = ''
  301. video_width = ''
  302. video_height = ''
  303. # video_url
  304. if 'videoResource' not in video_info:
  305. video_url = 0
  306. audio_url = 0
  307. video_width = 0
  308. video_height = 0
  309. elif 'dash' in video_info['videoResource'] \
  310. and 'dynamic_video' in video_info['videoResource']['dash']\
  311. and 'dynamic_video_list' in video_info['videoResource']['dash']['dynamic_video']:
  312. video_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
  313. 'main_url']
  314. audio_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list'][-1][
  315. 'main_url']
  316. video_url = base64.b64decode(video_url).decode('utf8')
  317. audio_url = base64.b64decode(audio_url).decode('utf8')
  318. video_width = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
  319. 'vwidth']
  320. video_height = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
  321. 'vheight']
  322. elif 'normal' in video_info['videoResource']:
  323. video_list = video_info['videoResource']['normal']['video_list']
  324. if 'video_4' in video_list.keys():
  325. # Common.logger(log_type).info('{}', video_list['video_4'])
  326. video_url = video_list['video_4']['main_url']
  327. audio_url = video_list['video_4']['main_url']
  328. video_url = base64.b64decode(video_url).decode('utf8')
  329. audio_url = base64.b64decode(audio_url).decode('utf8')
  330. video_width = video_list['video_4']['vwidth']
  331. video_height = video_list['video_4']['vheight']
  332. elif 'video_3' in video_list.keys():
  333. # Common.logger(log_type).info('{}', video_list['video_3'])
  334. video_url = video_list['video_3']['main_url']
  335. audio_url = video_list['video_3']['main_url']
  336. video_url = base64.b64decode(video_url).decode('utf8')
  337. audio_url = base64.b64decode(audio_url).decode('utf8')
  338. video_width = video_list['video_3']['vwidth']
  339. video_height = video_list['video_3']['vheight']
  340. elif 'video_2' in video_list.keys():
  341. # Common.logger(log_type).info('{}', video_list['video_2'])
  342. video_url = video_list['video_2']['main_url']
  343. audio_url = video_list['video_2']['main_url']
  344. video_url = base64.b64decode(video_url).decode('utf8')
  345. audio_url = base64.b64decode(audio_url).decode('utf8')
  346. video_width = video_list['video_2']['vwidth']
  347. video_height = video_list['video_2']['vheight']
  348. elif 'video_1' in video_list.keys():
  349. # Common.logger(log_type).info('{}', video_list['video_1'])
  350. video_url = video_list['video_1']['main_url']
  351. audio_url = video_list['video_1']['main_url']
  352. video_url = base64.b64decode(video_url).decode('utf8')
  353. audio_url = base64.b64decode(audio_url).decode('utf8')
  354. video_width = video_list['video_1']['vwidth']
  355. video_height = video_list['video_1']['vheight']
  356. else:
  357. video_url = 0
  358. audio_url = 0
  359. video_width = 0
  360. video_height = 0
  361. return video_url, audio_url, video_width, video_height
  362. except Exception as e:
  363. Common.logger(log_type).error('get_video_info异常:{}\n', e)
  364. # APP端:西瓜视频用户主页
  365. @classmethod
  366. def get_follow_feeds_by_app(cls, log_type, userid):
  367. while True:
  368. try:
  369. url = "https://api5-normal-quic-lq.ixigua.com/video/app/user/videolist_tab/v3/?"
  370. headers = {
  371. 'Host': 'api5-normal-quic-lq.ixigua.com',
  372. 'Cookie': 'passport_csrf_token=9dc29668504aefd8f810d194c1591b27; passport_csrf_token_default=9dc29668504aefd8f810d194c1591b27; d_ticket=8cc008f231ad00a57481e490f82f4bedebe99; n_mh=Zi1ukqZaOfwMQ8RKEEaBFHPd94g9LJFrf_5jskG0uhY; odin_tt=79986f6d46fe14e0f0cf5c6d831005ef2d2ba797151d32eb7678d9ec14a770349dcc7f5cce1746a00dc493838a94db296ef2135712d40b5de1b4ebb170e7e3bf; sessionid=cd61dd6003146ce5b8d19b1eeb29d5b6; sessionid_ss=cd61dd6003146ce5b8d19b1eeb29d5b6; sid_guard=cd61dd6003146ce5b8d19b1eeb29d5b6%7C1661320113%7C5184000%7CSun%2C+23-Oct-2022+05%3A48%3A33+GMT; sid_tt=cd61dd6003146ce5b8d19b1eeb29d5b6; uid_tt=6544aadbdc13b980ab4906f550c70af5; uid_tt_ss=6544aadbdc13b980ab4906f550c70af5; install_id=541373572069224; ttreq=1$27a2ec895a960525ef828e684768bef579920543; msToken=6hA48Lf7RVYOl0Okgng_KQzBwfUpN2M5tB6opL8N6YB3EX0VsNQNhGH4kT-vRxO3Yjac8E4w7Zk4rkFF5JCRTilK',
  373. 'x-tt-token': '00cd61dd6003146ce5b8d19b1eeb29d5b603e056899dfc41b69bf336d3ce3bfc61b2822bbd85f84cfdfb3bf876b7bb71ea85363bff7cb21186b571d3418b30838538c78e169a0db8500261060669094c3ed23032496d65f19a0fa66fc54cc4eed2c55-1.0.1',
  374. 'request-startime': '683091411.831285',
  375. 'x-vc-bdturing-sdk-version': '2.2.8',
  376. 'x-ss-cookie': 'install_id=541373572069224; msToken=6hA48Lf7RVYOl0Okgng_KQzBwfUpN2M5tB6opL8N6YB3EX0VsNQNhGH4kT-vRxO3Yjac8E4w7Zk4rkFF5JCRTilK; ttreq=1$27a2ec895a960525ef828e684768bef579920543; d_ticket=8cc008f231ad00a57481e490f82f4bedebe99; n_mh=Zi1ukqZaOfwMQ8RKEEaBFHPd94g9LJFrf_5jskG0uhY; odin_tt=79986f6d46fe14e0f0cf5c6d831005ef2d2ba797151d32eb7678d9ec14a770349dcc7f5cce1746a00dc493838a94db296ef2135712d40b5de1b4ebb170e7e3bf; sessionid=cd61dd6003146ce5b8d19b1eeb29d5b6; sessionid_ss=cd61dd6003146ce5b8d19b1eeb29d5b6; sid_guard=cd61dd6003146ce5b8d19b1eeb29d5b6%7C1661320113%7C5184000%7CSun%2C+23-Oct-2022+05%3A48%3A33+GMT; sid_tt=cd61dd6003146ce5b8d19b1eeb29d5b6; uid_tt=6544aadbdc13b980ab4906f550c70af5; uid_tt_ss=6544aadbdc13b980ab4906f550c70af5; passport_csrf_token=9dc29668504aefd8f810d194c1591b27; passport_csrf_token_default=9dc29668504aefd8f810d194c1591b27',
  377. 'tt-request-time': '1661398611831',
  378. 'user-agent': 'Video 6.8.8 rv:6.8.8.12 (iPhone; iOS 14.7.1; zh_CN) Cronet',
  379. 'sdk-version': '2',
  380. 'x-tt-dt': 'AAARLMRFIGV63HLKR2OFYMAN4ECX3S3FF7T6VF3ZUGZVJHJRTAR6TZ6TXKNYXU5US4L72542CDEO4CJAORJUPSELHB52LINBZAWN7DIMVSPRKPKSIJYA2S2ZS7PIYZQBQ3OFWJETR35OAD55FXYP6OY',
  381. 'passport-sdk-version': '5.14.3',
  382. 'x-bd-kmsv': '1',
  383. 'x-ss-dp': '32',
  384. 'x-tt-trace-id': '00-d312f8fb0dae06939d00507998be0020-d312f8fb0dae0693-01',
  385. 'x-argus': 'OoPWDUi7xa1FAheuXaB4U+12sViNA+0vZEq7RpA1HvKF5CreKftmWWAtl1ndNdJNbk4zPogps8WNxsRJWdgZOzLg5CUTwVWrMQ/ptLgYrFTXbKf4P4CpqSRoJEHca/LVYRXUrTxTsi+AS7u/S3BTCrzm6nwvZB43GyiLGyN1W38poinJoMkPltgUNoSkAilVXCTu3iSWFLUYayOF7MwFRnYFxU4vBu+XmYCtl74XVCCARZD6uYf/cjkIH9wRD+uv0HBNlI70mqjaQOTYtlINi2i61yctngEjgwpV6s+4GLWQQYY6KXq+eu9mEppFDLSI9WY=',
  386. 'x-gorgon': '8404e06000002dfc1ace57427120b4f72a226ce677bde6d67b92',
  387. 'x-khronos': '1661398611',
  388. 'x-ladon': '7bRfCQvXSDeU17k7XA6Y7TSO0rsUmxbxtqt+apKfuSx/juZZ'
  389. }
  390. params = {
  391. 'anti_addiction_model': '0',
  392. 'version_code': '10.8.8',
  393. 'app_name': 'video_article',
  394. 'device_id': '3061492313228551',
  395. 'channel': 'App%20Store',
  396. 'resolution': '828*1792',
  397. 'aid': '32',
  398. 'ab_feature': 'z1',
  399. 'ab_version': '668851,4601580,668854,4594840,4601552,4622288,4641673,668858,4601444,668859,4601563,668856,4601562,668855,4601507,668853,4601558,668852,4601533',
  400. 'update_version_code': '108812',
  401. 'cdid': '7425DF80-0324-4CEF-AAEC-6596F45F2C7A',
  402. 'ac': 'WIFI',
  403. 'os_version': '14.7.1',
  404. 'user_version': '6.8.8',
  405. 'ssmix': 'a',
  406. 'ipad_adapter_enable': '0',
  407. 'device_platform': 'iphone',
  408. 'iid': '541373572069224',
  409. 'device_type': 'iPhone%2011',
  410. 'ab_client': 'a1,f2,f7,e1',
  411. 'cdid_ts': '1661312788',
  412. 'offset': str(cls.offset),
  413. 'orderby': 'publishtime',
  414. 'to_user_id': userid,
  415. 'count': '20',
  416. 'language': 'zh-Hans-CN',
  417. 'loc_mode': '0',
  418. 'ab_version_vid_list': '4413540%2C2190089',
  419. 'enable_publish_status': '0',
  420. 'play_param': 'codec_type%3A7%2Cenable_dash%3A1%2Cresolution%3A828%2A1792%2Cis_order_flow%3A-1%2Cis_hdr%3A1',
  421. 'client_extra': '%7B%22last_ad_position%22%3A-1%7D',
  422. }
  423. urllib3.disable_warnings()
  424. response = requests.get(url=url, headers=headers, params=params, proxies=proxies, verify=False)
  425. cls.offset += 30
  426. if 'data' not in response.text or response.json()['code'] != 0 or len(response.json()['data']) == 0:
  427. Common.logger(log_type).warning('get_follow_feeds_by_app: response: {}', response.text)
  428. else:
  429. feeds = response.json()['data']
  430. for i in range(len(feeds)):
  431. # video_title
  432. if 'title' in feeds[i]:
  433. video_title = feeds[i]['title'].strip().replace('手游', '')
  434. else:
  435. video_title = 0
  436. # video_id
  437. if 'video_id' in feeds[i]:
  438. video_id = feeds[i]['video_id']
  439. else:
  440. video_id = 0
  441. # gid
  442. if 'gid' in feeds[i]:
  443. gid = feeds[i]['gid']
  444. else:
  445. gid = 0
  446. # play_cnt
  447. if 'video_detail_info' not in feeds[i]:
  448. play_cnt = 0
  449. elif 'video_watch_count' not in feeds[i]['video_detail_info']:
  450. play_cnt = 0
  451. else:
  452. play_cnt = feeds[i]['video_detail_info']['video_watch_count']
  453. # comment_cnt
  454. if 'comment_count' in feeds[i]:
  455. comment_count = feeds[i]['comment_count']
  456. else:
  457. comment_count = 0
  458. # like_cnt
  459. if 'digg_count' in feeds[i]:
  460. like_cnt = feeds[i]['digg_count']
  461. else:
  462. like_cnt = 0
  463. # share_cnt
  464. if 'share_count' in feeds[i]:
  465. share_cnt = feeds[i]['share_count']
  466. else:
  467. share_cnt = 0
  468. # video_duration
  469. if 'video_duration' in feeds[i]:
  470. video_duration = feeds[i]['video_duration']
  471. else:
  472. video_duration = 0
  473. # send_time
  474. if 'publish_time' in feeds[i]:
  475. send_time = feeds[i]['publish_time']
  476. else:
  477. send_time = 0
  478. # user_name
  479. if 'user_info' not in feeds[i]:
  480. user_name = 0
  481. elif 'name' not in feeds[i]['user_info']:
  482. user_name = 0
  483. else:
  484. user_name = feeds[i]['user_info']['name']
  485. # user_id
  486. if 'user_info' not in feeds[i]:
  487. user_id = 0
  488. elif 'user_id' not in feeds[i]['user_info']:
  489. user_id = 0
  490. else:
  491. user_id = feeds[i]['user_info']['user_id']
  492. # head_url
  493. if 'user_info' not in feeds[i]:
  494. head_url = 0
  495. elif 'avatar_url' not in feeds[i]['user_info']:
  496. head_url = 0
  497. else:
  498. head_url = feeds[i]['user_info']['avatar_url']
  499. # cover_url
  500. if 'video_detail_info' not in feeds[i]:
  501. cover_url = 0
  502. elif 'detail_video_large_image' not in feeds[i]['video_detail_info']:
  503. cover_url = 0
  504. elif 'url' not in feeds[i]['video_detail_info']['detail_video_large_image']:
  505. cover_url = 0
  506. else:
  507. cover_url = feeds[i]['video_detail_info']['detail_video_large_image']['url']
  508. url_info = cls.get_video_info(log_type, gid)
  509. video_url = url_info[0]
  510. audio_url = url_info[1]
  511. video_width = url_info[2]
  512. video_height = url_info[3]
  513. Common.logger(log_type).info('video_title:{}', video_title)
  514. Common.logger(log_type).info('video_id:{}', video_id)
  515. Common.logger(log_type).info('play_cnt:{}', play_cnt)
  516. Common.logger(log_type).info('video_duration:{}', video_duration)
  517. Common.logger(log_type).info('video_width_height:{}', str(video_width) + '*' + str(video_height))
  518. Common.logger(log_type).info('send_time:{}',
  519. time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(send_time)))
  520. if gid == 0 or video_url == 0 or audio_url == 0:
  521. Common.logger(log_type).info('无效视频:{}\n', video_title)
  522. elif int(time.time()) - int(send_time) > 3600 * 24 * 10:
  523. Common.logger(log_type).info('发布时间超过10天:{}\n', time.strftime('%Y/%m/%d %H:%M:%S'),
  524. time.localtime(send_time))
  525. cls.offset = 0
  526. return
  527. elif cls.download_rule(video_duration, video_width, video_height) is False:
  528. Common.logger(log_type).info('不满足抓取规则\n')
  529. elif any(word if word in video_title else False for word in cls.filter_words(log_type)) is True:
  530. Common.logger(log_type).info('标题已中过滤词:{}\n', video_title)
  531. elif str(video_id) in [x for y in Feishu.get_values_batch(log_type, 'xigua', 'e075e9') for x in y]:
  532. Common.logger(log_type).info('视频已下载:{}\n', video_title)
  533. elif str(video_id) in [x for y in Feishu.get_values_batch(log_type, 'xigua', '3Ul6wZ') for x in y]:
  534. Common.logger(log_type).info('视频已下载\n')
  535. elif str(video_id) in [x for y in Feishu.get_values_batch(log_type, 'xigua', 'wjhpDs') for x in y]:
  536. Common.logger(log_type).info('视频已存在:{}\n', video_title)
  537. else:
  538. Feishu.insert_columns(log_type, 'xigua', 'wjhpDs', 'ROWS', 1, 2)
  539. get_feeds_time = int(time.time())
  540. values = [[time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(get_feeds_time)),
  541. '关注榜',
  542. video_title,
  543. str(video_id),
  544. gid,
  545. int(play_cnt),
  546. int(comment_count),
  547. int(like_cnt),
  548. int(share_cnt),
  549. video_duration,
  550. str(video_width) + '*' + str(video_height),
  551. time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(send_time)),
  552. user_name,
  553. str(user_id),
  554. head_url,
  555. cover_url,
  556. video_url,
  557. audio_url]]
  558. time.sleep(1)
  559. Feishu.update_values(log_type, 'xigua', 'wjhpDs', 'A2:Z2', values)
  560. Common.logger(log_type).info('当前视频信息写入飞书成功\n')
  561. time.sleep(random.randint(1, 3))
  562. except Exception as e:
  563. Common.logger(log_type).error('get_follow_feeds_by_app异常:{}\n', e)
  564. # 获取所有用户主页视频
  565. @classmethod
  566. def get_all_person_videos(cls, log_type, env):
  567. try:
  568. user_list = cls.get_user_info_from_feishu(log_type)
  569. if len(user_list) == 0:
  570. Common.logger(log_type).warning('用户ID列表为空\n')
  571. else:
  572. for k, v in user_list.items():
  573. Common.logger(log_type).info('正在获取 {} 主页视频\n', k)
  574. GetSignature.get_signature('follow')
  575. cls.get_follow_feeds_by_pc(log_type, v.split(',')[0])
  576. time.sleep(1)
  577. cls.run_download_publish(log_type, env, v.split(',')[-1])
  578. time.sleep(random.randint(30, 60))
  579. cls.offset = 0
  580. except Exception as e:
  581. Common.logger(log_type).error('get_all_person_videos异常:{}\n', e)
  582. # 合并音视频
  583. @classmethod
  584. def video_compose(cls, log_type, video_title):
  585. video_path = './videos/' + str(video_title) + '/video1.mp4'
  586. audio_path = './videos/' + str(video_title) + '/audio1.mp4'
  587. out_path = './videos/' + str(video_title) + '/video.mp4'
  588. cmd = 'ffmpeg -i ' + video_path + ' -i ' + audio_path + ' -c:v copy -c:a aac -strict experimental -map 0:v:0 -map 1:a:0 ' + out_path
  589. # print(cmd)
  590. subprocess.call(cmd, shell=True)
  591. for file in os.listdir('./videos/' + str(video_title)):
  592. if file.split('.mp4')[0] == 'video1' or file.split('.mp4')[0] == 'audio1':
  593. os.remove('./videos/' + str(video_title) + '/' + file)
  594. Common.logger(log_type).info('合成成功')
  595. # 下载 / 上传
  596. @classmethod
  597. def download_publish(cls, log_type, env, uid):
  598. try:
  599. feeds_sheet = Feishu.get_values_batch(log_type, 'xigua', 'wjhpDs')
  600. for i in range(1, len(feeds_sheet)):
  601. download_video_title = feeds_sheet[i][2]
  602. download_video_id = feeds_sheet[i][3]
  603. download_video_gid = feeds_sheet[i][4]
  604. download_play_cnt = feeds_sheet[i][5]
  605. download_comment_cnt = feeds_sheet[i][6]
  606. download_like_cnt = feeds_sheet[i][7]
  607. download_share_cnt = feeds_sheet[i][8]
  608. download_video_duration = feeds_sheet[i][9]
  609. download_video_width_height = feeds_sheet[i][10]
  610. download_send_time = feeds_sheet[i][11]
  611. download_user_name = feeds_sheet[i][12]
  612. download_user_id = feeds_sheet[i][13]
  613. download_head_url = feeds_sheet[i][14]
  614. download_cover_url = feeds_sheet[i][15]
  615. download_video_url = feeds_sheet[i][16]
  616. download_audio_url = feeds_sheet[i][17]
  617. Common.logger(log_type).info('正在判断第{}行:{}', i + 1, download_video_title)
  618. Common.logger(log_type).info('download_video_id:{}', download_video_id)
  619. Common.logger(log_type).info('download_video_duration:{}', download_video_duration)
  620. Common.logger(log_type).info('download_send_time:{}', download_send_time)
  621. # 过滤空行
  622. if download_video_title is None or download_video_id is None:
  623. Feishu.dimension_range(log_type, 'xigua', 'wjhpDs', 'ROWS', i + 1, i + 1)
  624. Common.logger(log_type).info('空行,删除成功\n')
  625. return
  626. elif str(download_video_id) in [x for y in Feishu.get_values_batch(log_type, 'xigua', 'e075e9') for x in
  627. y]:
  628. Feishu.dimension_range(log_type, 'xigua', 'wjhpDs', 'ROWS', i + 1, i + 1)
  629. Common.logger(log_type).info('视频已下载,删除成功\n')
  630. return
  631. elif str(download_video_id) in [x for y in Feishu.get_values_batch(log_type, 'xigua', '3Ul6wZ') for x in
  632. y]:
  633. Feishu.dimension_range(log_type, 'xigua', 'wjhpDs', 'ROWS', i + 1, i + 1)
  634. Common.logger(log_type).info('视频已下载,删除成功\n')
  635. return
  636. else:
  637. # 下载封面
  638. Common.download_method(log_type=log_type, text='cover', d_name=download_video_title,
  639. d_url=download_cover_url)
  640. # 下载视频
  641. Common.download_method(log_type=log_type, text='video', d_name=download_video_title,
  642. d_url=download_video_url)
  643. # 下载音频
  644. Common.download_method(log_type=log_type, text='audio', d_name=download_video_title,
  645. d_url=download_audio_url)
  646. # 保存视频信息至 "./videos/{download_video_title}/info.txt"
  647. with open("./videos/" + download_video_title + "/" + "info.txt",
  648. "a", encoding="UTF-8") as f_a:
  649. f_a.write(str(download_video_id) + "\n" +
  650. str(download_video_title) + "\n" +
  651. str(download_video_duration) + "\n" +
  652. str(download_play_cnt) + "\n" +
  653. str(download_comment_cnt) + "\n" +
  654. str(download_like_cnt) + "\n" +
  655. str(download_share_cnt) + "\n" +
  656. str(download_video_width_height) + "\n" +
  657. str(int(time.mktime(
  658. time.strptime(download_send_time, "%Y/%m/%d %H:%M:%S")))) + "\n" +
  659. str(download_user_name) + "\n" +
  660. str(download_head_url) + "\n" +
  661. str(download_video_url) + "\n" +
  662. str(download_cover_url) + "\n" +
  663. "xigua"+str(int(time.time())))
  664. Common.logger("follow").info("==========视频信息已保存至info.txt==========")
  665. # 合成音视频
  666. cls.video_compose(log_type, download_video_title)
  667. # 上传视频
  668. Common.logger(log_type).info("开始上传视频:{}".format(download_video_title))
  669. our_video_id = Publish.upload_and_publish(log_type, env, uid)
  670. if env == 'dev':
  671. our_video_link = "https://testadmin.piaoquantv.com/cms/post-detail/" + str(our_video_id) + "/info"
  672. else:
  673. our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(our_video_id) + "/info"
  674. Common.logger(log_type).info("视频上传完成:{}\n", download_video_title)
  675. # 视频ID工作表,插入首行
  676. Feishu.insert_columns(log_type, 'xigua', "e075e9", "ROWS", 1, 2)
  677. # 视频ID工作表,首行写入数据
  678. upload_time = int(time.time())
  679. values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
  680. "关注榜",
  681. download_video_title,
  682. str(download_video_id),
  683. our_video_link,
  684. download_video_gid,
  685. download_play_cnt,
  686. download_comment_cnt,
  687. download_like_cnt,
  688. download_share_cnt,
  689. download_video_duration,
  690. download_video_width_height,
  691. download_send_time,
  692. download_user_name,
  693. download_user_id,
  694. download_head_url,
  695. download_cover_url,
  696. download_video_url,
  697. download_audio_url]]
  698. Common.logger(log_type).info('values:{}\n', values)
  699. time.sleep(1)
  700. Feishu.update_values(log_type, 'xigua', "e075e9", "F2:Z2", values)
  701. Common.logger(log_type).info("视频已保存至云文档:{}", download_video_title)
  702. # 删除行或列,可选 ROWS、COLUMNS
  703. Feishu.dimension_range(log_type, 'xigua', "wjhpDs", "ROWS", i + 1, i + 1)
  704. Common.logger(log_type).info("视频:{},下载/上传成功\n", download_video_title)
  705. return
  706. except Exception as e:
  707. Common.logger(log_type).error('download_publish异常:{}\n', e)
  708. # 执行 下载 / 上传
  709. @classmethod
  710. def run_download_publish(cls, log_type, env, uid):
  711. try:
  712. while True:
  713. if len(Feishu.get_values_batch(log_type, 'xigua', 'wjhpDs')) == 1:
  714. Common.logger(log_type).info('下载 / 上传 完成\n')
  715. break
  716. else:
  717. cls.download_publish(log_type, env, uid)
  718. time.sleep(random.randint(1, 3))
  719. except Exception as e:
  720. Common.logger(log_type).error('run_download_publish异常:{}\n', e)
  721. if __name__ == '__main__':
  722. Follow.get_follow_feeds_by_pc('follow', '95420624045')
  723. # Follow.get_follow_feeds_by_app('xigua', '6431477489')
  724. # Follow.get_follow_feeds_by_app('follow', '3865480345435996')
  725. # Follow.get_user_info_from_feishu('follow')
  726. # Follow.filter_words('follow')
  727. # Follow.get_all_person_videos('follow', 'dev')
  728. # Follow.download_publish('follow', 'dev', '6267141')
  729. pass