xigua_author.py 43 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013
  1. import json
  2. import os
  3. import random
  4. import sys
  5. import string
  6. import time
  7. import uuid
  8. import base64
  9. import requests
  10. from fake_useragent import FakeUserAgent
  11. from common.mq import MQ
  12. sys.path.append(os.getcwd())
  13. from common import AliyunLogger, PiaoQuanPipeline, tunnel_proxies
  14. def random_signature():
  15. """
  16. 随机生成签名
  17. """
  18. src_digits = string.digits # string_数字
  19. src_uppercase = string.ascii_uppercase # string_大写字母
  20. src_lowercase = string.ascii_lowercase # string_小写字母
  21. digits_num = random.randint(1, 6)
  22. uppercase_num = random.randint(1, 26 - digits_num - 1)
  23. lowercase_num = 26 - (digits_num + uppercase_num)
  24. password = (
  25. random.sample(src_digits, digits_num)
  26. + random.sample(src_uppercase, uppercase_num)
  27. + random.sample(src_lowercase, lowercase_num)
  28. )
  29. random.shuffle(password)
  30. new_password = "AAAAAAAAAA" + "".join(password)[10:-4] + "AAAB"
  31. new_password_start = new_password[0:18]
  32. new_password_end = new_password[-7:]
  33. if new_password[18] == "8":
  34. new_password = new_password_start + "w" + new_password_end
  35. elif new_password[18] == "9":
  36. new_password = new_password_start + "x" + new_password_end
  37. elif new_password[18] == "-":
  38. new_password = new_password_start + "y" + new_password_end
  39. elif new_password[18] == ".":
  40. new_password = new_password_start + "z" + new_password_end
  41. else:
  42. new_password = new_password_start + "y" + new_password_end
  43. return new_password
  44. def get_video_url(video_info):
  45. """
  46. 获取视频的链接
  47. """
  48. video_url_dict = {}
  49. # video_url
  50. if "videoResource" not in video_info:
  51. video_url_dict["video_url"] = ""
  52. video_url_dict["audio_url"] = ""
  53. video_url_dict["video_width"] = 0
  54. video_url_dict["video_height"] = 0
  55. elif "dash_120fps" in video_info["videoResource"]:
  56. if (
  57. "video_list" in video_info["videoResource"]["dash_120fps"]
  58. and "video_4" in video_info["videoResource"]["dash_120fps"]["video_list"]
  59. ):
  60. video_url = video_info["videoResource"]["dash_120fps"]["video_list"][
  61. "video_4"
  62. ]["backup_url_1"]
  63. audio_url = video_info["videoResource"]["dash_120fps"]["video_list"][
  64. "video_4"
  65. ]["backup_url_1"]
  66. if len(video_url) % 3 == 1:
  67. video_url += "=="
  68. elif len(video_url) % 3 == 2:
  69. video_url += "="
  70. elif len(audio_url) % 3 == 1:
  71. audio_url += "=="
  72. elif len(audio_url) % 3 == 2:
  73. audio_url += "="
  74. video_url = base64.b64decode(video_url).decode("utf8")
  75. audio_url = base64.b64decode(audio_url).decode("utf8")
  76. video_width = video_info["videoResource"]["dash_120fps"]["video_list"][
  77. "video_4"
  78. ]["vwidth"]
  79. video_height = video_info["videoResource"]["dash_120fps"]["video_list"][
  80. "video_4"
  81. ]["vheight"]
  82. video_url_dict["video_url"] = video_url
  83. video_url_dict["audio_url"] = audio_url
  84. video_url_dict["video_width"] = video_width
  85. video_url_dict["video_height"] = video_height
  86. elif (
  87. "video_list" in video_info["videoResource"]["dash_120fps"]
  88. and "video_3" in video_info["videoResource"]["dash_120fps"]["video_list"]
  89. ):
  90. video_url = video_info["videoResource"]["dash_120fps"]["video_list"][
  91. "video_3"
  92. ]["backup_url_1"]
  93. audio_url = video_info["videoResource"]["dash_120fps"]["video_list"][
  94. "video_3"
  95. ]["backup_url_1"]
  96. if len(video_url) % 3 == 1:
  97. video_url += "=="
  98. elif len(video_url) % 3 == 2:
  99. video_url += "="
  100. elif len(audio_url) % 3 == 1:
  101. audio_url += "=="
  102. elif len(audio_url) % 3 == 2:
  103. audio_url += "="
  104. video_url = base64.b64decode(video_url).decode("utf8")
  105. audio_url = base64.b64decode(audio_url).decode("utf8")
  106. video_width = video_info["videoResource"]["dash_120fps"]["video_list"][
  107. "video_3"
  108. ]["vwidth"]
  109. video_height = video_info["videoResource"]["dash_120fps"]["video_list"][
  110. "video_3"
  111. ]["vheight"]
  112. video_url_dict["video_url"] = video_url
  113. video_url_dict["audio_url"] = audio_url
  114. video_url_dict["video_width"] = video_width
  115. video_url_dict["video_height"] = video_height
  116. elif (
  117. "video_list" in video_info["videoResource"]["dash_120fps"]
  118. and "video_2" in video_info["videoResource"]["dash_120fps"]["video_list"]
  119. ):
  120. video_url = video_info["videoResource"]["dash_120fps"]["video_list"][
  121. "video_2"
  122. ]["backup_url_1"]
  123. audio_url = video_info["videoResource"]["dash_120fps"]["video_list"][
  124. "video_2"
  125. ]["backup_url_1"]
  126. if len(video_url) % 3 == 1:
  127. video_url += "=="
  128. elif len(video_url) % 3 == 2:
  129. video_url += "="
  130. elif len(audio_url) % 3 == 1:
  131. audio_url += "=="
  132. elif len(audio_url) % 3 == 2:
  133. audio_url += "="
  134. video_url = base64.b64decode(video_url).decode("utf8")
  135. audio_url = base64.b64decode(audio_url).decode("utf8")
  136. video_width = video_info["videoResource"]["dash_120fps"]["video_list"][
  137. "video_2"
  138. ]["vwidth"]
  139. video_height = video_info["videoResource"]["dash_120fps"]["video_list"][
  140. "video_2"
  141. ]["vheight"]
  142. video_url_dict["video_url"] = video_url
  143. video_url_dict["audio_url"] = audio_url
  144. video_url_dict["video_width"] = video_width
  145. video_url_dict["video_height"] = video_height
  146. elif (
  147. "video_list" in video_info["videoResource"]["dash_120fps"]
  148. and "video_1" in video_info["videoResource"]["dash_120fps"]["video_list"]
  149. ):
  150. video_url = video_info["videoResource"]["dash_120fps"]["video_list"][
  151. "video_1"
  152. ]["backup_url_1"]
  153. audio_url = video_info["videoResource"]["dash_120fps"]["video_list"][
  154. "video_1"
  155. ]["backup_url_1"]
  156. if len(video_url) % 3 == 1:
  157. video_url += "=="
  158. elif len(video_url) % 3 == 2:
  159. video_url += "="
  160. elif len(audio_url) % 3 == 1:
  161. audio_url += "=="
  162. elif len(audio_url) % 3 == 2:
  163. audio_url += "="
  164. video_url = base64.b64decode(video_url).decode("utf8")
  165. audio_url = base64.b64decode(audio_url).decode("utf8")
  166. video_width = video_info["videoResource"]["dash_120fps"]["video_list"][
  167. "video_1"
  168. ]["vwidth"]
  169. video_height = video_info["videoResource"]["dash_120fps"]["video_list"][
  170. "video_1"
  171. ]["vheight"]
  172. video_url_dict["video_url"] = video_url
  173. video_url_dict["audio_url"] = audio_url
  174. video_url_dict["video_width"] = video_width
  175. video_url_dict["video_height"] = video_height
  176. elif (
  177. "dynamic_video" in video_info["videoResource"]["dash_120fps"]
  178. and "dynamic_video_list"
  179. in video_info["videoResource"]["dash_120fps"]["dynamic_video"]
  180. and "dynamic_audio_list"
  181. in video_info["videoResource"]["dash_120fps"]["dynamic_video"]
  182. and len(
  183. video_info["videoResource"]["dash_120fps"]["dynamic_video"][
  184. "dynamic_video_list"
  185. ]
  186. )
  187. != 0
  188. and len(
  189. video_info["videoResource"]["dash_120fps"]["dynamic_video"][
  190. "dynamic_audio_list"
  191. ]
  192. )
  193. != 0
  194. ):
  195. video_url = video_info["videoResource"]["dash_120fps"]["dynamic_video"][
  196. "dynamic_video_list"
  197. ][-1]["backup_url_1"]
  198. audio_url = video_info["videoResource"]["dash_120fps"]["dynamic_video"][
  199. "dynamic_audio_list"
  200. ][-1]["backup_url_1"]
  201. if len(video_url) % 3 == 1:
  202. video_url += "=="
  203. elif len(video_url) % 3 == 2:
  204. video_url += "="
  205. elif len(audio_url) % 3 == 1:
  206. audio_url += "=="
  207. elif len(audio_url) % 3 == 2:
  208. audio_url += "="
  209. video_url = base64.b64decode(video_url).decode("utf8")
  210. audio_url = base64.b64decode(audio_url).decode("utf8")
  211. video_width = video_info["videoResource"]["dash_120fps"]["dynamic_video"][
  212. "dynamic_video_list"
  213. ][-1]["vwidth"]
  214. video_height = video_info["videoResource"]["dash_120fps"]["dynamic_video"][
  215. "dynamic_video_list"
  216. ][-1]["vheight"]
  217. video_url_dict["video_url"] = video_url
  218. video_url_dict["audio_url"] = audio_url
  219. video_url_dict["video_width"] = video_width
  220. video_url_dict["video_height"] = video_height
  221. else:
  222. video_url_dict["video_url"] = ""
  223. video_url_dict["audio_url"] = ""
  224. video_url_dict["video_width"] = 0
  225. video_url_dict["video_height"] = 0
  226. elif "dash" in video_info["videoResource"]:
  227. if (
  228. "video_list" in video_info["videoResource"]["dash"]
  229. and "video_4" in video_info["videoResource"]["dash"]["video_list"]
  230. ):
  231. video_url = video_info["videoResource"]["dash"]["video_list"]["video_4"][
  232. "backup_url_1"
  233. ]
  234. audio_url = video_info["videoResource"]["dash"]["video_list"]["video_4"][
  235. "backup_url_1"
  236. ]
  237. if len(video_url) % 3 == 1:
  238. video_url += "=="
  239. elif len(video_url) % 3 == 2:
  240. video_url += "="
  241. elif len(audio_url) % 3 == 1:
  242. audio_url += "=="
  243. elif len(audio_url) % 3 == 2:
  244. audio_url += "="
  245. video_url = base64.b64decode(video_url).decode("utf8")
  246. audio_url = base64.b64decode(audio_url).decode("utf8")
  247. video_width = video_info["videoResource"]["dash"]["video_list"]["video_4"][
  248. "vwidth"
  249. ]
  250. video_height = video_info["videoResource"]["dash"]["video_list"]["video_4"][
  251. "vheight"
  252. ]
  253. video_url_dict["video_url"] = video_url
  254. video_url_dict["audio_url"] = audio_url
  255. video_url_dict["video_width"] = video_width
  256. video_url_dict["video_height"] = video_height
  257. elif (
  258. "video_list" in video_info["videoResource"]["dash"]
  259. and "video_3" in video_info["videoResource"]["dash"]["video_list"]
  260. ):
  261. video_url = video_info["videoResource"]["dash"]["video_list"]["video_3"][
  262. "backup_url_1"
  263. ]
  264. audio_url = video_info["videoResource"]["dash"]["video_list"]["video_3"][
  265. "backup_url_1"
  266. ]
  267. if len(video_url) % 3 == 1:
  268. video_url += "=="
  269. elif len(video_url) % 3 == 2:
  270. video_url += "="
  271. elif len(audio_url) % 3 == 1:
  272. audio_url += "=="
  273. elif len(audio_url) % 3 == 2:
  274. audio_url += "="
  275. video_url = base64.b64decode(video_url).decode("utf8")
  276. audio_url = base64.b64decode(audio_url).decode("utf8")
  277. video_width = video_info["videoResource"]["dash"]["video_list"]["video_3"][
  278. "vwidth"
  279. ]
  280. video_height = video_info["videoResource"]["dash"]["video_list"]["video_3"][
  281. "vheight"
  282. ]
  283. video_url_dict["video_url"] = video_url
  284. video_url_dict["audio_url"] = audio_url
  285. video_url_dict["video_width"] = video_width
  286. video_url_dict["video_height"] = video_height
  287. elif (
  288. "video_list" in video_info["videoResource"]["dash"]
  289. and "video_2" in video_info["videoResource"]["dash"]["video_list"]
  290. ):
  291. video_url = video_info["videoResource"]["dash"]["video_list"]["video_2"][
  292. "backup_url_1"
  293. ]
  294. audio_url = video_info["videoResource"]["dash"]["video_list"]["video_2"][
  295. "backup_url_1"
  296. ]
  297. if len(video_url) % 3 == 1:
  298. video_url += "=="
  299. elif len(video_url) % 3 == 2:
  300. video_url += "="
  301. elif len(audio_url) % 3 == 1:
  302. audio_url += "=="
  303. elif len(audio_url) % 3 == 2:
  304. audio_url += "="
  305. video_url = base64.b64decode(video_url).decode("utf8")
  306. audio_url = base64.b64decode(audio_url).decode("utf8")
  307. video_width = video_info["videoResource"]["dash"]["video_list"]["video_2"][
  308. "vwidth"
  309. ]
  310. video_height = video_info["videoResource"]["dash"]["video_list"]["video_2"][
  311. "vheight"
  312. ]
  313. video_url_dict["video_url"] = video_url
  314. video_url_dict["audio_url"] = audio_url
  315. video_url_dict["video_width"] = video_width
  316. video_url_dict["video_height"] = video_height
  317. elif (
  318. "video_list" in video_info["videoResource"]["dash"]
  319. and "video_1" in video_info["videoResource"]["dash"]["video_list"]
  320. ):
  321. video_url = video_info["videoResource"]["dash"]["video_list"]["video_1"][
  322. "backup_url_1"
  323. ]
  324. audio_url = video_info["videoResource"]["dash"]["video_list"]["video_1"][
  325. "backup_url_1"
  326. ]
  327. if len(video_url) % 3 == 1:
  328. video_url += "=="
  329. elif len(video_url) % 3 == 2:
  330. video_url += "="
  331. elif len(audio_url) % 3 == 1:
  332. audio_url += "=="
  333. elif len(audio_url) % 3 == 2:
  334. audio_url += "="
  335. video_url = base64.b64decode(video_url).decode("utf8")
  336. audio_url = base64.b64decode(audio_url).decode("utf8")
  337. video_width = video_info["videoResource"]["dash"]["video_list"]["video_1"][
  338. "vwidth"
  339. ]
  340. video_height = video_info["videoResource"]["dash"]["video_list"]["video_1"][
  341. "vheight"
  342. ]
  343. video_url_dict["video_url"] = video_url
  344. video_url_dict["audio_url"] = audio_url
  345. video_url_dict["video_width"] = video_width
  346. video_url_dict["video_height"] = video_height
  347. elif (
  348. "dynamic_video" in video_info["videoResource"]["dash"]
  349. and "dynamic_video_list"
  350. in video_info["videoResource"]["dash"]["dynamic_video"]
  351. and "dynamic_audio_list"
  352. in video_info["videoResource"]["dash"]["dynamic_video"]
  353. and len(
  354. video_info["videoResource"]["dash"]["dynamic_video"][
  355. "dynamic_video_list"
  356. ]
  357. )
  358. != 0
  359. and len(
  360. video_info["videoResource"]["dash"]["dynamic_video"][
  361. "dynamic_audio_list"
  362. ]
  363. )
  364. != 0
  365. ):
  366. video_url = video_info["videoResource"]["dash"]["dynamic_video"][
  367. "dynamic_video_list"
  368. ][-1]["backup_url_1"]
  369. audio_url = video_info["videoResource"]["dash"]["dynamic_video"][
  370. "dynamic_audio_list"
  371. ][-1]["backup_url_1"]
  372. if len(video_url) % 3 == 1:
  373. video_url += "=="
  374. elif len(video_url) % 3 == 2:
  375. video_url += "="
  376. elif len(audio_url) % 3 == 1:
  377. audio_url += "=="
  378. elif len(audio_url) % 3 == 2:
  379. audio_url += "="
  380. video_url = base64.b64decode(video_url).decode("utf8")
  381. audio_url = base64.b64decode(audio_url).decode("utf8")
  382. video_width = video_info["videoResource"]["dash"]["dynamic_video"][
  383. "dynamic_video_list"
  384. ][-1]["vwidth"]
  385. video_height = video_info["videoResource"]["dash"]["dynamic_video"][
  386. "dynamic_video_list"
  387. ][-1]["vheight"]
  388. video_url_dict["video_url"] = video_url
  389. video_url_dict["audio_url"] = audio_url
  390. video_url_dict["video_width"] = video_width
  391. video_url_dict["video_height"] = video_height
  392. else:
  393. video_url_dict["video_url"] = ""
  394. video_url_dict["audio_url"] = ""
  395. video_url_dict["video_width"] = 0
  396. video_url_dict["video_height"] = 0
  397. elif "normal" in video_info["videoResource"]:
  398. if (
  399. "video_list" in video_info["videoResource"]["normal"]
  400. and "video_4" in video_info["videoResource"]["normal"]["video_list"]
  401. ):
  402. video_url = video_info["videoResource"]["normal"]["video_list"]["video_4"][
  403. "backup_url_1"
  404. ]
  405. audio_url = video_info["videoResource"]["normal"]["video_list"]["video_4"][
  406. "backup_url_1"
  407. ]
  408. if len(video_url) % 3 == 1:
  409. video_url += "=="
  410. elif len(video_url) % 3 == 2:
  411. video_url += "="
  412. elif len(audio_url) % 3 == 1:
  413. audio_url += "=="
  414. elif len(audio_url) % 3 == 2:
  415. audio_url += "="
  416. video_url = base64.b64decode(video_url).decode("utf8")
  417. audio_url = base64.b64decode(audio_url).decode("utf8")
  418. video_width = video_info["videoResource"]["normal"]["video_list"][
  419. "video_4"
  420. ]["vwidth"]
  421. video_height = video_info["videoResource"]["normal"]["video_list"][
  422. "video_4"
  423. ]["vheight"]
  424. video_url_dict["video_url"] = video_url
  425. video_url_dict["audio_url"] = audio_url
  426. video_url_dict["video_width"] = video_width
  427. video_url_dict["video_height"] = video_height
  428. elif (
  429. "video_list" in video_info["videoResource"]["normal"]
  430. and "video_3" in video_info["videoResource"]["normal"]["video_list"]
  431. ):
  432. video_url = video_info["videoResource"]["normal"]["video_list"]["video_3"][
  433. "backup_url_1"
  434. ]
  435. audio_url = video_info["videoResource"]["normal"]["video_list"]["video_3"][
  436. "backup_url_1"
  437. ]
  438. if len(video_url) % 3 == 1:
  439. video_url += "=="
  440. elif len(video_url) % 3 == 2:
  441. video_url += "="
  442. elif len(audio_url) % 3 == 1:
  443. audio_url += "=="
  444. elif len(audio_url) % 3 == 2:
  445. audio_url += "="
  446. video_url = base64.b64decode(video_url).decode("utf8")
  447. audio_url = base64.b64decode(audio_url).decode("utf8")
  448. video_width = video_info["videoResource"]["normal"]["video_list"][
  449. "video_3"
  450. ]["vwidth"]
  451. video_height = video_info["videoResource"]["normal"]["video_list"][
  452. "video_3"
  453. ]["vheight"]
  454. video_url_dict["video_url"] = video_url
  455. video_url_dict["audio_url"] = audio_url
  456. video_url_dict["video_width"] = video_width
  457. video_url_dict["video_height"] = video_height
  458. elif (
  459. "video_list" in video_info["videoResource"]["normal"]
  460. and "video_2" in video_info["videoResource"]["normal"]["video_list"]
  461. ):
  462. video_url = video_info["videoResource"]["normal"]["video_list"]["video_2"][
  463. "backup_url_1"
  464. ]
  465. audio_url = video_info["videoResource"]["normal"]["video_list"]["video_2"][
  466. "backup_url_1"
  467. ]
  468. if len(video_url) % 3 == 1:
  469. video_url += "=="
  470. elif len(video_url) % 3 == 2:
  471. video_url += "="
  472. elif len(audio_url) % 3 == 1:
  473. audio_url += "=="
  474. elif len(audio_url) % 3 == 2:
  475. audio_url += "="
  476. video_url = base64.b64decode(video_url).decode("utf8")
  477. audio_url = base64.b64decode(audio_url).decode("utf8")
  478. video_width = video_info["videoResource"]["normal"]["video_list"][
  479. "video_2"
  480. ]["vwidth"]
  481. video_height = video_info["videoResource"]["normal"]["video_list"][
  482. "video_2"
  483. ]["vheight"]
  484. video_url_dict["video_url"] = video_url
  485. video_url_dict["audio_url"] = audio_url
  486. video_url_dict["video_width"] = video_width
  487. video_url_dict["video_height"] = video_height
  488. elif (
  489. "video_list" in video_info["videoResource"]["normal"]
  490. and "video_1" in video_info["videoResource"]["normal"]["video_list"]
  491. ):
  492. video_url = video_info["videoResource"]["normal"]["video_list"]["video_1"][
  493. "backup_url_1"
  494. ]
  495. audio_url = video_info["videoResource"]["normal"]["video_list"]["video_1"][
  496. "backup_url_1"
  497. ]
  498. if len(video_url) % 3 == 1:
  499. video_url += "=="
  500. elif len(video_url) % 3 == 2:
  501. video_url += "="
  502. elif len(audio_url) % 3 == 1:
  503. audio_url += "=="
  504. elif len(audio_url) % 3 == 2:
  505. audio_url += "="
  506. video_url = base64.b64decode(video_url).decode("utf8")
  507. audio_url = base64.b64decode(audio_url).decode("utf8")
  508. video_width = video_info["videoResource"]["normal"]["video_list"][
  509. "video_1"
  510. ]["vwidth"]
  511. video_height = video_info["videoResource"]["normal"]["video_list"][
  512. "video_1"
  513. ]["vheight"]
  514. video_url_dict["video_url"] = video_url
  515. video_url_dict["audio_url"] = audio_url
  516. video_url_dict["video_width"] = video_width
  517. video_url_dict["video_height"] = video_height
  518. elif (
  519. "dynamic_video" in video_info["videoResource"]["normal"]
  520. and "dynamic_video_list"
  521. in video_info["videoResource"]["normal"]["dynamic_video"]
  522. and "dynamic_audio_list"
  523. in video_info["videoResource"]["normal"]["dynamic_video"]
  524. and len(
  525. video_info["videoResource"]["normal"]["dynamic_video"][
  526. "dynamic_video_list"
  527. ]
  528. )
  529. != 0
  530. and len(
  531. video_info["videoResource"]["normal"]["dynamic_video"][
  532. "dynamic_audio_list"
  533. ]
  534. )
  535. != 0
  536. ):
  537. video_url = video_info["videoResource"]["normal"]["dynamic_video"][
  538. "dynamic_video_list"
  539. ][-1]["backup_url_1"]
  540. audio_url = video_info["videoResource"]["normal"]["dynamic_video"][
  541. "dynamic_audio_list"
  542. ][-1]["backup_url_1"]
  543. if len(video_url) % 3 == 1:
  544. video_url += "=="
  545. elif len(video_url) % 3 == 2:
  546. video_url += "="
  547. elif len(audio_url) % 3 == 1:
  548. audio_url += "=="
  549. elif len(audio_url) % 3 == 2:
  550. audio_url += "="
  551. video_url = base64.b64decode(video_url).decode("utf8")
  552. audio_url = base64.b64decode(audio_url).decode("utf8")
  553. video_width = video_info["videoResource"]["normal"]["dynamic_video"][
  554. "dynamic_video_list"
  555. ][-1]["vwidth"]
  556. video_height = video_info["videoResource"]["normal"]["dynamic_video"][
  557. "dynamic_video_list"
  558. ][-1]["vheight"]
  559. video_url_dict["video_url"] = video_url
  560. video_url_dict["audio_url"] = audio_url
  561. video_url_dict["video_width"] = video_width
  562. video_url_dict["video_height"] = video_height
  563. else:
  564. video_url_dict["video_url"] = ""
  565. video_url_dict["audio_url"] = ""
  566. video_url_dict["video_width"] = 0
  567. video_url_dict["video_height"] = 0
  568. else:
  569. video_url_dict["video_url"] = ""
  570. video_url_dict["audio_url"] = ""
  571. video_url_dict["video_width"] = 0
  572. video_url_dict["video_height"] = 0
  573. return video_url_dict
  574. def get_comment_cnt(item_id):
  575. """
  576. 获取视频的评论数量
  577. """
  578. url = "https://www.ixigua.com/tlb/comment/article/v5/tab_comments/?"
  579. params = {
  580. "tab_index": "0",
  581. "count": "10",
  582. "offset": "10",
  583. "group_id": str(item_id),
  584. "item_id": str(item_id),
  585. "aid": "1768",
  586. "msToken": "50-JJObWB07HfHs-BMJWT1eIDX3G-6lPSF_i-QwxBIXE9VVa-iN0jbEXR5pG2DKjXBmP299n6ZTuXzY-GAy968CCvouSAYIS4GzvGQT3pNlKNejr5G4-1g==",
  587. "X-Bogus": "DFSzswVOyGtANVeWtCLMqR/F6q9U",
  588. "_signature": random_signature(),
  589. }
  590. headers = {
  591. "authority": "www.ixigua.com",
  592. "accept": "application/json, text/plain, */*",
  593. "accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
  594. "cache-control": "no-cache",
  595. "cookie": "MONITOR_WEB_ID=67cb5099-a022-4ec3-bb8e-c4de6ba51dd0; passport_csrf_token=72b2574f3c99f8ba670e42df430218fd; passport_csrf_token_default=72b2574f3c99f8ba670e42df430218fd; sid_guard=c7472b508ea631823ba765a60cf8757f%7C1680867422%7C3024002%7CFri%2C+12-May-2023+11%3A37%3A04+GMT; uid_tt=c13f47d51767f616befe32fb3e9f485a; uid_tt_ss=c13f47d51767f616befe32fb3e9f485a; sid_tt=c7472b508ea631823ba765a60cf8757f; sessionid=c7472b508ea631823ba765a60cf8757f; sessionid_ss=c7472b508ea631823ba765a60cf8757f; sid_ucp_v1=1.0.0-KGUzNWYxNmRkZGJiZjgxY2MzZWNkMTEzMTkwYjY1Yjg5OTY5NzVlNmMKFQiu3d-eqQIQ3oDAoQYYGCAMOAhACxoCaGwiIGM3NDcyYjUwOGVhNjMxODIzYmE3NjVhNjBjZjg3NTdm; ssid_ucp_v1=1.0.0-KGUzNWYxNmRkZGJiZjgxY2MzZWNkMTEzMTkwYjY1Yjg5OTY5NzVlNmMKFQiu3d-eqQIQ3oDAoQYYGCAMOAhACxoCaGwiIGM3NDcyYjUwOGVhNjMxODIzYmE3NjVhNjBjZjg3NTdm; odin_tt=b893608d4dde2e1e8df8cd5d97a0e2fbeafc4ca762ac72ebef6e6c97e2ed19859bb01d46b4190ddd6dd17d7f9678e1de; SEARCH_CARD_MODE=7168304743566296612_0; support_webp=true; support_avif=false; csrf_session_id=a5355d954d3c63ed1ba35faada452b4d; tt_scid=7Pux7s634-z8DYvCM20y7KigwH5u7Rh6D9C-RROpnT.aGMEcz6Vsxp.oai47wJqa4f86; ttwid=1%7CHHtv2QqpSGuSu8r-zXF1QoWsvjmNi1SJrqOrZzg-UCY%7C1683858689%7Ca5223fe1500578e01e138a0d71d6444692018296c4c24f5885af174a65873c95; ixigua-a-s=3; msToken=50-JJObWB07HfHs-BMJWT1eIDX3G-6lPSF_i-QwxBIXE9VVa-iN0jbEXR5pG2DKjXBmP299n6ZTuXzY-GAy968CCvouSAYIS4GzvGQT3pNlKNejr5G4-1g==; __ac_nonce=0645dcbf0005064517440; __ac_signature=_02B4Z6wo00f01FEGmAwAAIDBKchzCGqn-MBRJpyAAHAjieFC5GEg6gGiwz.I4PRrJl7f0GcixFrExKmgt6QI1i1S-dQyofPEj2ugWTCnmKUdJQv-wYuDofeKNe8VtMtZq2aKewyUGeKU-5Ud21; ixigua-a-s=3",
  596. "pragma": "no-cache",
  597. "referer": f"https://www.ixigua.com/{item_id}?logTag=3c5aa86a8600b9ab8540",
  598. "sec-ch-ua": '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"',
  599. "sec-ch-ua-mobile": "?0",
  600. "sec-ch-ua-platform": '"macOS"',
  601. "sec-fetch-dest": "empty",
  602. "sec-fetch-mode": "cors",
  603. "sec-fetch-site": "same-origin",
  604. "tt-anti-token": "cBITBHvmYjEygzv-f9c78c1297722cf1f559c74b084e4525ce4900bdcf9e8588f20cc7c2e3234422",
  605. "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.35",
  606. "x-secsdk-csrf-token": "000100000001f8e733cf37f0cd255a51aea9a81ff7bc0c09490cfe41ad827c3c5c18ec809279175e4d9f5553d8a5",
  607. }
  608. response = requests.get(
  609. url=url, headers=headers, params=params, proxies=tunnel_proxies(), timeout=5
  610. )
  611. response.close()
  612. if (
  613. response.status_code != 200
  614. or "total_number" not in response.json()
  615. or response.json() == {}
  616. ):
  617. return 0
  618. return response.json().get("total_number", 0)
  619. class XiGuaAuthor:
  620. """
  621. 西瓜账号爬虫
  622. """
  623. def __init__(self, platform, mode, rule_dict, env, user_list):
  624. self.platform = platform
  625. self.mode = mode
  626. self.rule_dict = rule_dict
  627. self.env = env
  628. self.user_list = user_list
  629. self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
  630. self.download_count = 0
  631. def rule_maker(self, account):
  632. """
  633. 通过不同的账号生成不同的规则
  634. :param account: 输入的账号信息
  635. {'play_cnt': {'min': 100000, 'max': 0}, 'period': {'min': 5, 'max': 5}}
  636. """
  637. flag = account['link'].split("_")[0]
  638. if flag == "V1":
  639. rule_dict = {
  640. "play_cnt": {"min": 100000, "max": 0},
  641. 'period': {"min": 90, "max": 90},
  642. 'special': 0.02
  643. }
  644. return rule_dict
  645. elif flag == "V2":
  646. rule_dict = {
  647. "play_cnt": {"min": 10000, "max": 0},
  648. 'period': {"min": 90, "max": 90},
  649. 'special': 0.01
  650. }
  651. return rule_dict
  652. elif flag == "V3":
  653. rule_dict = {
  654. "play_cnt": {"min": 5000, "max": 0},
  655. 'period': {"min": 90, "max": 90},
  656. 'special': 0.01
  657. }
  658. return rule_dict
  659. else:
  660. return self.rule_dict
  661. def get_author_list(self):
  662. # 每轮只抓取定量的数据,到达数量后自己退出
  663. # max_count = int(self.rule_dict.get("videos_cnt", {}).get("min", 300))
  664. for user_dict in self.user_list:
  665. # if self.download_count <= max_count:
  666. self.get_video_list(user_dict)
  667. # time.sleep(random.randint(1, 15))
  668. # else:
  669. # AliyunLogger.logging(
  670. # code="2000",
  671. # platform=self.platform,
  672. # mode=self.mode,
  673. # env=self.env,
  674. # message="本轮已经抓取足够数量的视频,已经自动退出",
  675. # )
  676. # return
  677. def get_video_list(self, user_dict):
  678. offset = 0
  679. signature = random_signature()
  680. url = "https://www.ixigua.com/api/videov2/author/new_video_list?"
  681. while True:
  682. if user_dict['link'][0] == "V":
  683. to_user_id = str(user_dict["link"][2:].replace("https://www.ixigua.com/home/", ""))
  684. else:
  685. to_user_id = str(
  686. user_dict["link"].replace("https://www.ixigua.com/home/", "")
  687. )
  688. params = {
  689. "to_user_id": to_user_id,
  690. "offset": str(offset),
  691. "limit": "30",
  692. "maxBehotTime": "0",
  693. "order": "new",
  694. "isHome": "0",
  695. # 'msToken': 'G0eRzNkw189a8TLaXjc6nTHVMQwh9XcxVAqTbGKi7iPJdQcLwS3-XRrJ3MZ7QBfqErpxp3EX1WtvWOIcZ3NIgr41hgcd-v64so_RRj3YCRw1UsKW8mIssNLlIMspsg==',
  696. # 'X-Bogus': 'DFSzswVuEkUANjW9ShFTgR/F6qHt',
  697. "_signature": signature,
  698. }
  699. headers = {
  700. "referer": f'https://www.ixigua.com/home/{user_dict["link"].replace("https://www.ixigua.com/home/", "")}/video/?preActiveKey=hotsoon&list_entrance=userdetail',
  701. "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.41",
  702. }
  703. response = requests.get(
  704. url=url,
  705. headers=headers,
  706. params=params,
  707. proxies=tunnel_proxies(),
  708. timeout=5,
  709. )
  710. offset += 30
  711. if "data" not in response.text or response.status_code != 200:
  712. AliyunLogger.logging(
  713. code="2000",
  714. platform=self.platform,
  715. mode=self.mode,
  716. env=self.env,
  717. message=f"get_videoList:{response.text}\n",
  718. )
  719. return
  720. elif not response.json()["data"]["videoList"]:
  721. AliyunLogger.logging(
  722. code="2000",
  723. platform=self.platform,
  724. mode=self.mode,
  725. env=self.env,
  726. message=f"没有更多数据啦~\n",
  727. )
  728. return
  729. else:
  730. feeds = response.json()["data"]["videoList"]
  731. for video_obj in feeds:
  732. try:
  733. AliyunLogger.logging(
  734. code="1001",
  735. platform=self.platform,
  736. mode=self.mode,
  737. env=self.env,
  738. data=video_obj,
  739. message="扫描到一条视频",
  740. )
  741. date_flag = self.process_video_obj(video_obj, user_dict)
  742. if not date_flag:
  743. return
  744. except Exception as e:
  745. AliyunLogger.logging(
  746. code="3000",
  747. platform=self.platform,
  748. mode=self.mode,
  749. env=self.env,
  750. data=video_obj,
  751. message="抓取单条视频异常, 报错原因是: {}".format(e),
  752. )
  753. def process_video_obj(self, video_obj, user_dict):
  754. new_rule = self.rule_maker(user_dict)
  755. trace_id = self.platform + str(uuid.uuid1())
  756. item_id = video_obj.get("item_id", "")
  757. if not item_id:
  758. AliyunLogger.logging(
  759. code="2005",
  760. platform=self.platform,
  761. mode=self.mode,
  762. env=self.env,
  763. message="无效视频",
  764. data=video_obj,
  765. trace_id=trace_id,
  766. )
  767. return
  768. # 获取视频信息
  769. video_dict = self.get_video_info(item_id=item_id, trace_id=trace_id)
  770. video_dict["out_user_id"] = video_dict["user_id"]
  771. video_dict["platform"] = self.platform
  772. video_dict["strategy"] = self.mode
  773. video_dict["out_video_id"] = video_dict["video_id"]
  774. video_dict["width"] = video_dict["video_width"]
  775. video_dict["height"] = video_dict["video_height"]
  776. video_dict["crawler_rule"] = json.dumps(new_rule)
  777. video_dict["user_id"] = user_dict["uid"]
  778. video_dict["publish_time"] = video_dict["publish_time_str"]
  779. video_dict["strategy_type"] = self.mode
  780. video_dict["update_time_stamp"] = int(time.time())
  781. if int(time.time()) - video_dict['publish_time_stamp'] > 3600 * 24 * int(
  782. new_rule.get("period", {}).get("max", 1000)):
  783. if not video_obj['is_top']:
  784. """
  785. 非置顶数据发布时间超过才退出
  786. """
  787. AliyunLogger.logging(
  788. code="2004",
  789. platform=self.platform,
  790. mode=self.mode,
  791. env=self.env,
  792. data=video_dict,
  793. message="发布时间超过{}天".format(
  794. int(new_rule.get("period", {}).get("max", 1000))
  795. ),
  796. )
  797. return False
  798. pipeline = PiaoQuanPipeline(
  799. platform=self.platform,
  800. mode=self.mode,
  801. rule_dict=new_rule,
  802. env=self.env,
  803. item=video_dict,
  804. trace_id=trace_id,
  805. )
  806. title_flag = pipeline.title_flag()
  807. repeat_flag = pipeline.repeat_video()
  808. if title_flag and repeat_flag:
  809. if new_rule.get("special"):
  810. if int(video_dict['play_cnt']) >= int(new_rule.get("play_cnt", {}).get("min", 100000)):
  811. if float(video_dict['like_cnt']) / float(video_dict['play_cnt']) >= new_rule['special']:
  812. self.mq.send_msg(video_dict)
  813. self.download_count += 1
  814. AliyunLogger.logging(
  815. code="1002",
  816. platform=self.platform,
  817. mode=self.mode,
  818. env=self.env,
  819. data=video_dict,
  820. trace_id=trace_id,
  821. message="成功发送 MQ 至 ETL",
  822. )
  823. return True
  824. else:
  825. AliyunLogger.logging(
  826. code="2008",
  827. platform=self.platform,
  828. mode=self.mode,
  829. env=self.env,
  830. message="不满足特殊规则, 点赞量/播放量",
  831. data=video_dict
  832. )
  833. else:
  834. if int(video_dict['play_cnt']) >= int(new_rule.get("play_cnt", {}).get("min", 100000)):
  835. self.mq.send_msg(video_dict)
  836. self.download_count += 1
  837. AliyunLogger.logging(
  838. code="1002",
  839. platform=self.platform,
  840. mode=self.mode,
  841. env=self.env,
  842. data=video_dict,
  843. trace_id=trace_id,
  844. message="成功发送 MQ 至 ETL",
  845. )
  846. return True
  847. else:
  848. AliyunLogger.logging(
  849. code="2008",
  850. platform=self.platform,
  851. mode=self.mode,
  852. env=self.env,
  853. message="不满足特殊规则, 播放量",
  854. data=video_dict
  855. )
  856. return True
  857. def get_video_info(self, item_id, trace_id):
  858. url = "https://www.ixigua.com/api/mixVideo/information?"
  859. headers = {
  860. "accept-encoding": "gzip, deflate",
  861. "accept-language": "zh-CN,zh-Hans;q=0.9",
  862. "user-agent": FakeUserAgent().random,
  863. "referer": "https://www.ixigua.com/7102614741050196520?logTag=0531c88ac04f38ab2c62",
  864. }
  865. params = {
  866. "mixId": str(item_id),
  867. "msToken": "IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfC"
  868. "NVVIOBNjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA",
  869. "X-Bogus": "DFSzswVupYTANCJOSBk0P53WxM-r",
  870. "_signature": "_02B4Z6wo0000119LvEwAAIDCuktNZ0y5wkdfS7jAALThuOR8D9yWNZ.EmWHKV0WSn6Px"
  871. "fPsH9-BldyxVje0f49ryXgmn7Tzk-swEHNb15TiGqa6YF.cX0jW8Eds1TtJOIZyfc9s5emH7gdWN94",
  872. }
  873. cookies = {
  874. "ixigua-a-s": "1",
  875. "msToken": "IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfCNVVIOB"
  876. "NjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA",
  877. "ttwid": "1%7C_yXQeHWwLZgCsgHClOwTCdYSOt_MjdOkgnPIkpi-Sr8%7C1661241238%7Cf57d0c5ef3f1d7"
  878. "6e049fccdca1ac54887c34d1f8731c8e51a49780ff0ceab9f8",
  879. "tt_scid": "QZ4l8KXDG0YAEaMCSbADdcybdKbUfG4BC6S4OBv9lpRS5VyqYLX2bIR8CTeZeGHR9ee3",
  880. "MONITOR_WEB_ID": "0a49204a-7af5-4e96-95f0-f4bafb7450ad",
  881. "__ac_nonce": "06304878000964fdad287",
  882. "__ac_signature": "_02B4Z6wo00f017Rcr3AAAIDCUVxeW1tOKEu0fKvAAI4cvoYzV-wBhq7B6D8k0no7lb"
  883. "FlvYoinmtK6UXjRIYPXnahUlFTvmWVtb77jsMkKAXzAEsLE56m36RlvL7ky.M3Xn52r9t1IEb7IR3ke8",
  884. "ttcid": "e56fabf6e85d4adf9e4d91902496a0e882",
  885. "_tea_utm_cache_1300": "undefined",
  886. "support_avif": "false",
  887. "support_webp": "false",
  888. "xiguavideopcwebid": "7134967546256016900",
  889. "xiguavideopcwebid.sig": "xxRww5R1VEMJN_dQepHorEu_eAc",
  890. }
  891. response = requests.get(
  892. url=url,
  893. headers=headers,
  894. params=params,
  895. cookies=cookies,
  896. proxies=tunnel_proxies(),
  897. timeout=5,
  898. )
  899. if (
  900. response.status_code != 200
  901. or "data" not in response.json()
  902. or response.json()["data"] == {}
  903. ):
  904. AliyunLogger.logging(
  905. code="2000",
  906. platform=self.platform,
  907. mode=self.mode,
  908. env=self.env,
  909. message="获取视频信息失败",
  910. trace_id=trace_id,
  911. )
  912. return None
  913. else:
  914. video_info = (
  915. response.json()["data"]
  916. .get("gidInformation", {})
  917. .get("packerData", {})
  918. .get("video", {})
  919. )
  920. if video_info == {}:
  921. return None
  922. video_detail = get_video_url(video_info)
  923. video_dict = {
  924. "video_title": video_info.get("title", ""),
  925. "video_id": video_info.get("videoResource", {}).get("vid", ""),
  926. "gid": str(item_id),
  927. "play_cnt": int(video_info.get("video_watch_count", 0)),
  928. "like_cnt": int(video_info.get("video_like_count", 0)),
  929. "comment_cnt": int(get_comment_cnt(item_id)),
  930. "share_cnt": 0,
  931. "favorite_cnt": 0,
  932. "duration": int(video_info.get("video_duration", 0)),
  933. "video_width": int(video_detail["video_width"]),
  934. "video_height": int(video_detail["video_height"]),
  935. "publish_time_stamp": int(video_info.get("video_publish_time", 0)),
  936. "publish_time_str": time.strftime(
  937. "%Y-%m-%d %H:%M:%S",
  938. time.localtime(int(video_info.get("video_publish_time", 0))),
  939. ),
  940. "user_name": video_info.get("user_info", {}).get("name", ""),
  941. "user_id": str(video_info.get("user_info", {}).get("user_id", "")),
  942. "avatar_url": str(
  943. video_info.get("user_info", {}).get("avatar_url", "")
  944. ),
  945. "cover_url": video_info.get("poster_url", ""),
  946. "audio_url": video_detail["audio_url"],
  947. "video_url": video_detail["video_url"],
  948. "session": f"xigua-search-{int(time.time())}",
  949. }
  950. return video_dict
  951. if __name__ == "__main__":
  952. user_list = [
  953. {
  954. "uid": 6267140,
  955. "source": "xigua",
  956. "link": "https://www.ixigua.com/home/2779177225827568",
  957. "nick_name": "秋晴爱音乐",
  958. "avatar_url": "",
  959. "mode": "author",
  960. },
  961. {
  962. "uid": 6267140,
  963. "source": "xigua",
  964. "link": "https://www.ixigua.com/home/2885546124776780",
  965. "nick_name": "朗诵放歌的老山羊",
  966. "avatar_url": "",
  967. "mode": "author",
  968. },
  969. {
  970. "uid": 6267140,
  971. "source": "xigua",
  972. "link": "https://www.ixigua.com/home/5880938217",
  973. "nick_name": "天原声疗",
  974. "avatar_url": "",
  975. "mode": "author",
  976. },
  977. ]
  978. # rule = {'period': {'min': 30, 'max': 30}, 'duration': {'min': 20, 'max': 0}, 'play_cnt': {'min': 100000, 'max': 0}}
  979. # XGA = XiGuaAuthor(
  980. # platform="xigua",
  981. # mode="author",
  982. # rule_dict=rule,
  983. # env="prod",
  984. # user_list=user_list
  985. # )
  986. # XGA.get_author_list()