xigua_author.py 43 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009
  1. import json
  2. import os
  3. import random
  4. import sys
  5. import string
  6. import time
  7. import uuid
  8. import base64
  9. import requests
  10. from fake_useragent import FakeUserAgent
  11. from common.mq import MQ
  12. sys.path.append(os.getcwd())
  13. from common import AliyunLogger, PiaoQuanPipeline, tunnel_proxies
  14. def random_signature():
  15. """
  16. 随机生成签名
  17. """
  18. src_digits = string.digits # string_数字
  19. src_uppercase = string.ascii_uppercase # string_大写字母
  20. src_lowercase = string.ascii_lowercase # string_小写字母
  21. digits_num = random.randint(1, 6)
  22. uppercase_num = random.randint(1, 26 - digits_num - 1)
  23. lowercase_num = 26 - (digits_num + uppercase_num)
  24. password = (
  25. random.sample(src_digits, digits_num)
  26. + random.sample(src_uppercase, uppercase_num)
  27. + random.sample(src_lowercase, lowercase_num)
  28. )
  29. random.shuffle(password)
  30. new_password = "AAAAAAAAAA" + "".join(password)[10:-4] + "AAAB"
  31. new_password_start = new_password[0:18]
  32. new_password_end = new_password[-7:]
  33. if new_password[18] == "8":
  34. new_password = new_password_start + "w" + new_password_end
  35. elif new_password[18] == "9":
  36. new_password = new_password_start + "x" + new_password_end
  37. elif new_password[18] == "-":
  38. new_password = new_password_start + "y" + new_password_end
  39. elif new_password[18] == ".":
  40. new_password = new_password_start + "z" + new_password_end
  41. else:
  42. new_password = new_password_start + "y" + new_password_end
  43. return new_password
  44. def get_video_url(video_info):
  45. """
  46. 获取视频的链接
  47. """
  48. video_url_dict = {}
  49. # video_url
  50. if "videoResource" not in video_info:
  51. video_url_dict["video_url"] = ""
  52. video_url_dict["audio_url"] = ""
  53. video_url_dict["video_width"] = 0
  54. video_url_dict["video_height"] = 0
  55. elif "dash_120fps" in video_info["videoResource"]:
  56. if (
  57. "video_list" in video_info["videoResource"]["dash_120fps"]
  58. and "video_4" in video_info["videoResource"]["dash_120fps"]["video_list"]
  59. ):
  60. video_url = video_info["videoResource"]["dash_120fps"]["video_list"][
  61. "video_4"
  62. ]["backup_url_1"]
  63. audio_url = video_info["videoResource"]["dash_120fps"]["video_list"][
  64. "video_4"
  65. ]["backup_url_1"]
  66. if len(video_url) % 3 == 1:
  67. video_url += "=="
  68. elif len(video_url) % 3 == 2:
  69. video_url += "="
  70. elif len(audio_url) % 3 == 1:
  71. audio_url += "=="
  72. elif len(audio_url) % 3 == 2:
  73. audio_url += "="
  74. video_url = base64.b64decode(video_url).decode("utf8")
  75. audio_url = base64.b64decode(audio_url).decode("utf8")
  76. video_width = video_info["videoResource"]["dash_120fps"]["video_list"][
  77. "video_4"
  78. ]["vwidth"]
  79. video_height = video_info["videoResource"]["dash_120fps"]["video_list"][
  80. "video_4"
  81. ]["vheight"]
  82. video_url_dict["video_url"] = video_url
  83. video_url_dict["audio_url"] = audio_url
  84. video_url_dict["video_width"] = video_width
  85. video_url_dict["video_height"] = video_height
  86. elif (
  87. "video_list" in video_info["videoResource"]["dash_120fps"]
  88. and "video_3" in video_info["videoResource"]["dash_120fps"]["video_list"]
  89. ):
  90. video_url = video_info["videoResource"]["dash_120fps"]["video_list"][
  91. "video_3"
  92. ]["backup_url_1"]
  93. audio_url = video_info["videoResource"]["dash_120fps"]["video_list"][
  94. "video_3"
  95. ]["backup_url_1"]
  96. if len(video_url) % 3 == 1:
  97. video_url += "=="
  98. elif len(video_url) % 3 == 2:
  99. video_url += "="
  100. elif len(audio_url) % 3 == 1:
  101. audio_url += "=="
  102. elif len(audio_url) % 3 == 2:
  103. audio_url += "="
  104. video_url = base64.b64decode(video_url).decode("utf8")
  105. audio_url = base64.b64decode(audio_url).decode("utf8")
  106. video_width = video_info["videoResource"]["dash_120fps"]["video_list"][
  107. "video_3"
  108. ]["vwidth"]
  109. video_height = video_info["videoResource"]["dash_120fps"]["video_list"][
  110. "video_3"
  111. ]["vheight"]
  112. video_url_dict["video_url"] = video_url
  113. video_url_dict["audio_url"] = audio_url
  114. video_url_dict["video_width"] = video_width
  115. video_url_dict["video_height"] = video_height
  116. elif (
  117. "video_list" in video_info["videoResource"]["dash_120fps"]
  118. and "video_2" in video_info["videoResource"]["dash_120fps"]["video_list"]
  119. ):
  120. video_url = video_info["videoResource"]["dash_120fps"]["video_list"][
  121. "video_2"
  122. ]["backup_url_1"]
  123. audio_url = video_info["videoResource"]["dash_120fps"]["video_list"][
  124. "video_2"
  125. ]["backup_url_1"]
  126. if len(video_url) % 3 == 1:
  127. video_url += "=="
  128. elif len(video_url) % 3 == 2:
  129. video_url += "="
  130. elif len(audio_url) % 3 == 1:
  131. audio_url += "=="
  132. elif len(audio_url) % 3 == 2:
  133. audio_url += "="
  134. video_url = base64.b64decode(video_url).decode("utf8")
  135. audio_url = base64.b64decode(audio_url).decode("utf8")
  136. video_width = video_info["videoResource"]["dash_120fps"]["video_list"][
  137. "video_2"
  138. ]["vwidth"]
  139. video_height = video_info["videoResource"]["dash_120fps"]["video_list"][
  140. "video_2"
  141. ]["vheight"]
  142. video_url_dict["video_url"] = video_url
  143. video_url_dict["audio_url"] = audio_url
  144. video_url_dict["video_width"] = video_width
  145. video_url_dict["video_height"] = video_height
  146. elif (
  147. "video_list" in video_info["videoResource"]["dash_120fps"]
  148. and "video_1" in video_info["videoResource"]["dash_120fps"]["video_list"]
  149. ):
  150. video_url = video_info["videoResource"]["dash_120fps"]["video_list"][
  151. "video_1"
  152. ]["backup_url_1"]
  153. audio_url = video_info["videoResource"]["dash_120fps"]["video_list"][
  154. "video_1"
  155. ]["backup_url_1"]
  156. if len(video_url) % 3 == 1:
  157. video_url += "=="
  158. elif len(video_url) % 3 == 2:
  159. video_url += "="
  160. elif len(audio_url) % 3 == 1:
  161. audio_url += "=="
  162. elif len(audio_url) % 3 == 2:
  163. audio_url += "="
  164. video_url = base64.b64decode(video_url).decode("utf8")
  165. audio_url = base64.b64decode(audio_url).decode("utf8")
  166. video_width = video_info["videoResource"]["dash_120fps"]["video_list"][
  167. "video_1"
  168. ]["vwidth"]
  169. video_height = video_info["videoResource"]["dash_120fps"]["video_list"][
  170. "video_1"
  171. ]["vheight"]
  172. video_url_dict["video_url"] = video_url
  173. video_url_dict["audio_url"] = audio_url
  174. video_url_dict["video_width"] = video_width
  175. video_url_dict["video_height"] = video_height
  176. elif (
  177. "dynamic_video" in video_info["videoResource"]["dash_120fps"]
  178. and "dynamic_video_list"
  179. in video_info["videoResource"]["dash_120fps"]["dynamic_video"]
  180. and "dynamic_audio_list"
  181. in video_info["videoResource"]["dash_120fps"]["dynamic_video"]
  182. and len(
  183. video_info["videoResource"]["dash_120fps"]["dynamic_video"][
  184. "dynamic_video_list"
  185. ]
  186. )
  187. != 0
  188. and len(
  189. video_info["videoResource"]["dash_120fps"]["dynamic_video"][
  190. "dynamic_audio_list"
  191. ]
  192. )
  193. != 0
  194. ):
  195. video_url = video_info["videoResource"]["dash_120fps"]["dynamic_video"][
  196. "dynamic_video_list"
  197. ][-1]["backup_url_1"]
  198. audio_url = video_info["videoResource"]["dash_120fps"]["dynamic_video"][
  199. "dynamic_audio_list"
  200. ][-1]["backup_url_1"]
  201. if len(video_url) % 3 == 1:
  202. video_url += "=="
  203. elif len(video_url) % 3 == 2:
  204. video_url += "="
  205. elif len(audio_url) % 3 == 1:
  206. audio_url += "=="
  207. elif len(audio_url) % 3 == 2:
  208. audio_url += "="
  209. video_url = base64.b64decode(video_url).decode("utf8")
  210. audio_url = base64.b64decode(audio_url).decode("utf8")
  211. video_width = video_info["videoResource"]["dash_120fps"]["dynamic_video"][
  212. "dynamic_video_list"
  213. ][-1]["vwidth"]
  214. video_height = video_info["videoResource"]["dash_120fps"]["dynamic_video"][
  215. "dynamic_video_list"
  216. ][-1]["vheight"]
  217. video_url_dict["video_url"] = video_url
  218. video_url_dict["audio_url"] = audio_url
  219. video_url_dict["video_width"] = video_width
  220. video_url_dict["video_height"] = video_height
  221. else:
  222. video_url_dict["video_url"] = ""
  223. video_url_dict["audio_url"] = ""
  224. video_url_dict["video_width"] = 0
  225. video_url_dict["video_height"] = 0
  226. elif "dash" in video_info["videoResource"]:
  227. if (
  228. "video_list" in video_info["videoResource"]["dash"]
  229. and "video_4" in video_info["videoResource"]["dash"]["video_list"]
  230. ):
  231. video_url = video_info["videoResource"]["dash"]["video_list"]["video_4"][
  232. "backup_url_1"
  233. ]
  234. audio_url = video_info["videoResource"]["dash"]["video_list"]["video_4"][
  235. "backup_url_1"
  236. ]
  237. if len(video_url) % 3 == 1:
  238. video_url += "=="
  239. elif len(video_url) % 3 == 2:
  240. video_url += "="
  241. elif len(audio_url) % 3 == 1:
  242. audio_url += "=="
  243. elif len(audio_url) % 3 == 2:
  244. audio_url += "="
  245. video_url = base64.b64decode(video_url).decode("utf8")
  246. audio_url = base64.b64decode(audio_url).decode("utf8")
  247. video_width = video_info["videoResource"]["dash"]["video_list"]["video_4"][
  248. "vwidth"
  249. ]
  250. video_height = video_info["videoResource"]["dash"]["video_list"]["video_4"][
  251. "vheight"
  252. ]
  253. video_url_dict["video_url"] = video_url
  254. video_url_dict["audio_url"] = audio_url
  255. video_url_dict["video_width"] = video_width
  256. video_url_dict["video_height"] = video_height
  257. elif (
  258. "video_list" in video_info["videoResource"]["dash"]
  259. and "video_3" in video_info["videoResource"]["dash"]["video_list"]
  260. ):
  261. video_url = video_info["videoResource"]["dash"]["video_list"]["video_3"][
  262. "backup_url_1"
  263. ]
  264. audio_url = video_info["videoResource"]["dash"]["video_list"]["video_3"][
  265. "backup_url_1"
  266. ]
  267. if len(video_url) % 3 == 1:
  268. video_url += "=="
  269. elif len(video_url) % 3 == 2:
  270. video_url += "="
  271. elif len(audio_url) % 3 == 1:
  272. audio_url += "=="
  273. elif len(audio_url) % 3 == 2:
  274. audio_url += "="
  275. video_url = base64.b64decode(video_url).decode("utf8")
  276. audio_url = base64.b64decode(audio_url).decode("utf8")
  277. video_width = video_info["videoResource"]["dash"]["video_list"]["video_3"][
  278. "vwidth"
  279. ]
  280. video_height = video_info["videoResource"]["dash"]["video_list"]["video_3"][
  281. "vheight"
  282. ]
  283. video_url_dict["video_url"] = video_url
  284. video_url_dict["audio_url"] = audio_url
  285. video_url_dict["video_width"] = video_width
  286. video_url_dict["video_height"] = video_height
  287. elif (
  288. "video_list" in video_info["videoResource"]["dash"]
  289. and "video_2" in video_info["videoResource"]["dash"]["video_list"]
  290. ):
  291. video_url = video_info["videoResource"]["dash"]["video_list"]["video_2"][
  292. "backup_url_1"
  293. ]
  294. audio_url = video_info["videoResource"]["dash"]["video_list"]["video_2"][
  295. "backup_url_1"
  296. ]
  297. if len(video_url) % 3 == 1:
  298. video_url += "=="
  299. elif len(video_url) % 3 == 2:
  300. video_url += "="
  301. elif len(audio_url) % 3 == 1:
  302. audio_url += "=="
  303. elif len(audio_url) % 3 == 2:
  304. audio_url += "="
  305. video_url = base64.b64decode(video_url).decode("utf8")
  306. audio_url = base64.b64decode(audio_url).decode("utf8")
  307. video_width = video_info["videoResource"]["dash"]["video_list"]["video_2"][
  308. "vwidth"
  309. ]
  310. video_height = video_info["videoResource"]["dash"]["video_list"]["video_2"][
  311. "vheight"
  312. ]
  313. video_url_dict["video_url"] = video_url
  314. video_url_dict["audio_url"] = audio_url
  315. video_url_dict["video_width"] = video_width
  316. video_url_dict["video_height"] = video_height
  317. elif (
  318. "video_list" in video_info["videoResource"]["dash"]
  319. and "video_1" in video_info["videoResource"]["dash"]["video_list"]
  320. ):
  321. video_url = video_info["videoResource"]["dash"]["video_list"]["video_1"][
  322. "backup_url_1"
  323. ]
  324. audio_url = video_info["videoResource"]["dash"]["video_list"]["video_1"][
  325. "backup_url_1"
  326. ]
  327. if len(video_url) % 3 == 1:
  328. video_url += "=="
  329. elif len(video_url) % 3 == 2:
  330. video_url += "="
  331. elif len(audio_url) % 3 == 1:
  332. audio_url += "=="
  333. elif len(audio_url) % 3 == 2:
  334. audio_url += "="
  335. video_url = base64.b64decode(video_url).decode("utf8")
  336. audio_url = base64.b64decode(audio_url).decode("utf8")
  337. video_width = video_info["videoResource"]["dash"]["video_list"]["video_1"][
  338. "vwidth"
  339. ]
  340. video_height = video_info["videoResource"]["dash"]["video_list"]["video_1"][
  341. "vheight"
  342. ]
  343. video_url_dict["video_url"] = video_url
  344. video_url_dict["audio_url"] = audio_url
  345. video_url_dict["video_width"] = video_width
  346. video_url_dict["video_height"] = video_height
  347. elif (
  348. "dynamic_video" in video_info["videoResource"]["dash"]
  349. and "dynamic_video_list"
  350. in video_info["videoResource"]["dash"]["dynamic_video"]
  351. and "dynamic_audio_list"
  352. in video_info["videoResource"]["dash"]["dynamic_video"]
  353. and len(
  354. video_info["videoResource"]["dash"]["dynamic_video"][
  355. "dynamic_video_list"
  356. ]
  357. )
  358. != 0
  359. and len(
  360. video_info["videoResource"]["dash"]["dynamic_video"][
  361. "dynamic_audio_list"
  362. ]
  363. )
  364. != 0
  365. ):
  366. video_url = video_info["videoResource"]["dash"]["dynamic_video"][
  367. "dynamic_video_list"
  368. ][-1]["backup_url_1"]
  369. audio_url = video_info["videoResource"]["dash"]["dynamic_video"][
  370. "dynamic_audio_list"
  371. ][-1]["backup_url_1"]
  372. if len(video_url) % 3 == 1:
  373. video_url += "=="
  374. elif len(video_url) % 3 == 2:
  375. video_url += "="
  376. elif len(audio_url) % 3 == 1:
  377. audio_url += "=="
  378. elif len(audio_url) % 3 == 2:
  379. audio_url += "="
  380. video_url = base64.b64decode(video_url).decode("utf8")
  381. audio_url = base64.b64decode(audio_url).decode("utf8")
  382. video_width = video_info["videoResource"]["dash"]["dynamic_video"][
  383. "dynamic_video_list"
  384. ][-1]["vwidth"]
  385. video_height = video_info["videoResource"]["dash"]["dynamic_video"][
  386. "dynamic_video_list"
  387. ][-1]["vheight"]
  388. video_url_dict["video_url"] = video_url
  389. video_url_dict["audio_url"] = audio_url
  390. video_url_dict["video_width"] = video_width
  391. video_url_dict["video_height"] = video_height
  392. else:
  393. video_url_dict["video_url"] = ""
  394. video_url_dict["audio_url"] = ""
  395. video_url_dict["video_width"] = 0
  396. video_url_dict["video_height"] = 0
  397. elif "normal" in video_info["videoResource"]:
  398. if (
  399. "video_list" in video_info["videoResource"]["normal"]
  400. and "video_4" in video_info["videoResource"]["normal"]["video_list"]
  401. ):
  402. video_url = video_info["videoResource"]["normal"]["video_list"]["video_4"][
  403. "backup_url_1"
  404. ]
  405. audio_url = video_info["videoResource"]["normal"]["video_list"]["video_4"][
  406. "backup_url_1"
  407. ]
  408. if len(video_url) % 3 == 1:
  409. video_url += "=="
  410. elif len(video_url) % 3 == 2:
  411. video_url += "="
  412. elif len(audio_url) % 3 == 1:
  413. audio_url += "=="
  414. elif len(audio_url) % 3 == 2:
  415. audio_url += "="
  416. video_url = base64.b64decode(video_url).decode("utf8")
  417. audio_url = base64.b64decode(audio_url).decode("utf8")
  418. video_width = video_info["videoResource"]["normal"]["video_list"][
  419. "video_4"
  420. ]["vwidth"]
  421. video_height = video_info["videoResource"]["normal"]["video_list"][
  422. "video_4"
  423. ]["vheight"]
  424. video_url_dict["video_url"] = video_url
  425. video_url_dict["audio_url"] = audio_url
  426. video_url_dict["video_width"] = video_width
  427. video_url_dict["video_height"] = video_height
  428. elif (
  429. "video_list" in video_info["videoResource"]["normal"]
  430. and "video_3" in video_info["videoResource"]["normal"]["video_list"]
  431. ):
  432. video_url = video_info["videoResource"]["normal"]["video_list"]["video_3"][
  433. "backup_url_1"
  434. ]
  435. audio_url = video_info["videoResource"]["normal"]["video_list"]["video_3"][
  436. "backup_url_1"
  437. ]
  438. if len(video_url) % 3 == 1:
  439. video_url += "=="
  440. elif len(video_url) % 3 == 2:
  441. video_url += "="
  442. elif len(audio_url) % 3 == 1:
  443. audio_url += "=="
  444. elif len(audio_url) % 3 == 2:
  445. audio_url += "="
  446. video_url = base64.b64decode(video_url).decode("utf8")
  447. audio_url = base64.b64decode(audio_url).decode("utf8")
  448. video_width = video_info["videoResource"]["normal"]["video_list"][
  449. "video_3"
  450. ]["vwidth"]
  451. video_height = video_info["videoResource"]["normal"]["video_list"][
  452. "video_3"
  453. ]["vheight"]
  454. video_url_dict["video_url"] = video_url
  455. video_url_dict["audio_url"] = audio_url
  456. video_url_dict["video_width"] = video_width
  457. video_url_dict["video_height"] = video_height
  458. elif (
  459. "video_list" in video_info["videoResource"]["normal"]
  460. and "video_2" in video_info["videoResource"]["normal"]["video_list"]
  461. ):
  462. video_url = video_info["videoResource"]["normal"]["video_list"]["video_2"][
  463. "backup_url_1"
  464. ]
  465. audio_url = video_info["videoResource"]["normal"]["video_list"]["video_2"][
  466. "backup_url_1"
  467. ]
  468. if len(video_url) % 3 == 1:
  469. video_url += "=="
  470. elif len(video_url) % 3 == 2:
  471. video_url += "="
  472. elif len(audio_url) % 3 == 1:
  473. audio_url += "=="
  474. elif len(audio_url) % 3 == 2:
  475. audio_url += "="
  476. video_url = base64.b64decode(video_url).decode("utf8")
  477. audio_url = base64.b64decode(audio_url).decode("utf8")
  478. video_width = video_info["videoResource"]["normal"]["video_list"][
  479. "video_2"
  480. ]["vwidth"]
  481. video_height = video_info["videoResource"]["normal"]["video_list"][
  482. "video_2"
  483. ]["vheight"]
  484. video_url_dict["video_url"] = video_url
  485. video_url_dict["audio_url"] = audio_url
  486. video_url_dict["video_width"] = video_width
  487. video_url_dict["video_height"] = video_height
  488. elif (
  489. "video_list" in video_info["videoResource"]["normal"]
  490. and "video_1" in video_info["videoResource"]["normal"]["video_list"]
  491. ):
  492. video_url = video_info["videoResource"]["normal"]["video_list"]["video_1"][
  493. "backup_url_1"
  494. ]
  495. audio_url = video_info["videoResource"]["normal"]["video_list"]["video_1"][
  496. "backup_url_1"
  497. ]
  498. if len(video_url) % 3 == 1:
  499. video_url += "=="
  500. elif len(video_url) % 3 == 2:
  501. video_url += "="
  502. elif len(audio_url) % 3 == 1:
  503. audio_url += "=="
  504. elif len(audio_url) % 3 == 2:
  505. audio_url += "="
  506. video_url = base64.b64decode(video_url).decode("utf8")
  507. audio_url = base64.b64decode(audio_url).decode("utf8")
  508. video_width = video_info["videoResource"]["normal"]["video_list"][
  509. "video_1"
  510. ]["vwidth"]
  511. video_height = video_info["videoResource"]["normal"]["video_list"][
  512. "video_1"
  513. ]["vheight"]
  514. video_url_dict["video_url"] = video_url
  515. video_url_dict["audio_url"] = audio_url
  516. video_url_dict["video_width"] = video_width
  517. video_url_dict["video_height"] = video_height
  518. elif (
  519. "dynamic_video" in video_info["videoResource"]["normal"]
  520. and "dynamic_video_list"
  521. in video_info["videoResource"]["normal"]["dynamic_video"]
  522. and "dynamic_audio_list"
  523. in video_info["videoResource"]["normal"]["dynamic_video"]
  524. and len(
  525. video_info["videoResource"]["normal"]["dynamic_video"][
  526. "dynamic_video_list"
  527. ]
  528. )
  529. != 0
  530. and len(
  531. video_info["videoResource"]["normal"]["dynamic_video"][
  532. "dynamic_audio_list"
  533. ]
  534. )
  535. != 0
  536. ):
  537. video_url = video_info["videoResource"]["normal"]["dynamic_video"][
  538. "dynamic_video_list"
  539. ][-1]["backup_url_1"]
  540. audio_url = video_info["videoResource"]["normal"]["dynamic_video"][
  541. "dynamic_audio_list"
  542. ][-1]["backup_url_1"]
  543. if len(video_url) % 3 == 1:
  544. video_url += "=="
  545. elif len(video_url) % 3 == 2:
  546. video_url += "="
  547. elif len(audio_url) % 3 == 1:
  548. audio_url += "=="
  549. elif len(audio_url) % 3 == 2:
  550. audio_url += "="
  551. video_url = base64.b64decode(video_url).decode("utf8")
  552. audio_url = base64.b64decode(audio_url).decode("utf8")
  553. video_width = video_info["videoResource"]["normal"]["dynamic_video"][
  554. "dynamic_video_list"
  555. ][-1]["vwidth"]
  556. video_height = video_info["videoResource"]["normal"]["dynamic_video"][
  557. "dynamic_video_list"
  558. ][-1]["vheight"]
  559. video_url_dict["video_url"] = video_url
  560. video_url_dict["audio_url"] = audio_url
  561. video_url_dict["video_width"] = video_width
  562. video_url_dict["video_height"] = video_height
  563. else:
  564. video_url_dict["video_url"] = ""
  565. video_url_dict["audio_url"] = ""
  566. video_url_dict["video_width"] = 0
  567. video_url_dict["video_height"] = 0
  568. else:
  569. video_url_dict["video_url"] = ""
  570. video_url_dict["audio_url"] = ""
  571. video_url_dict["video_width"] = 0
  572. video_url_dict["video_height"] = 0
  573. return video_url_dict
  574. def get_comment_cnt(item_id):
  575. """
  576. 获取视频的评论数量
  577. """
  578. url = "https://www.ixigua.com/tlb/comment/article/v5/tab_comments/?"
  579. params = {
  580. "tab_index": "0",
  581. "count": "10",
  582. "offset": "10",
  583. "group_id": str(item_id),
  584. "item_id": str(item_id),
  585. "aid": "1768",
  586. "msToken": "50-JJObWB07HfHs-BMJWT1eIDX3G-6lPSF_i-QwxBIXE9VVa-iN0jbEXR5pG2DKjXBmP299n6ZTuXzY-GAy968CCvouSAYIS4GzvGQT3pNlKNejr5G4-1g==",
  587. "X-Bogus": "DFSzswVOyGtANVeWtCLMqR/F6q9U",
  588. "_signature": random_signature(),
  589. }
  590. headers = {
  591. "authority": "www.ixigua.com",
  592. "accept": "application/json, text/plain, */*",
  593. "accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
  594. "cache-control": "no-cache",
  595. "cookie": "MONITOR_WEB_ID=67cb5099-a022-4ec3-bb8e-c4de6ba51dd0; passport_csrf_token=72b2574f3c99f8ba670e42df430218fd; passport_csrf_token_default=72b2574f3c99f8ba670e42df430218fd; sid_guard=c7472b508ea631823ba765a60cf8757f%7C1680867422%7C3024002%7CFri%2C+12-May-2023+11%3A37%3A04+GMT; uid_tt=c13f47d51767f616befe32fb3e9f485a; uid_tt_ss=c13f47d51767f616befe32fb3e9f485a; sid_tt=c7472b508ea631823ba765a60cf8757f; sessionid=c7472b508ea631823ba765a60cf8757f; sessionid_ss=c7472b508ea631823ba765a60cf8757f; sid_ucp_v1=1.0.0-KGUzNWYxNmRkZGJiZjgxY2MzZWNkMTEzMTkwYjY1Yjg5OTY5NzVlNmMKFQiu3d-eqQIQ3oDAoQYYGCAMOAhACxoCaGwiIGM3NDcyYjUwOGVhNjMxODIzYmE3NjVhNjBjZjg3NTdm; ssid_ucp_v1=1.0.0-KGUzNWYxNmRkZGJiZjgxY2MzZWNkMTEzMTkwYjY1Yjg5OTY5NzVlNmMKFQiu3d-eqQIQ3oDAoQYYGCAMOAhACxoCaGwiIGM3NDcyYjUwOGVhNjMxODIzYmE3NjVhNjBjZjg3NTdm; odin_tt=b893608d4dde2e1e8df8cd5d97a0e2fbeafc4ca762ac72ebef6e6c97e2ed19859bb01d46b4190ddd6dd17d7f9678e1de; SEARCH_CARD_MODE=7168304743566296612_0; support_webp=true; support_avif=false; csrf_session_id=a5355d954d3c63ed1ba35faada452b4d; tt_scid=7Pux7s634-z8DYvCM20y7KigwH5u7Rh6D9C-RROpnT.aGMEcz6Vsxp.oai47wJqa4f86; ttwid=1%7CHHtv2QqpSGuSu8r-zXF1QoWsvjmNi1SJrqOrZzg-UCY%7C1683858689%7Ca5223fe1500578e01e138a0d71d6444692018296c4c24f5885af174a65873c95; ixigua-a-s=3; msToken=50-JJObWB07HfHs-BMJWT1eIDX3G-6lPSF_i-QwxBIXE9VVa-iN0jbEXR5pG2DKjXBmP299n6ZTuXzY-GAy968CCvouSAYIS4GzvGQT3pNlKNejr5G4-1g==; __ac_nonce=0645dcbf0005064517440; __ac_signature=_02B4Z6wo00f01FEGmAwAAIDBKchzCGqn-MBRJpyAAHAjieFC5GEg6gGiwz.I4PRrJl7f0GcixFrExKmgt6QI1i1S-dQyofPEj2ugWTCnmKUdJQv-wYuDofeKNe8VtMtZq2aKewyUGeKU-5Ud21; ixigua-a-s=3",
  596. "pragma": "no-cache",
  597. "referer": f"https://www.ixigua.com/{item_id}?logTag=3c5aa86a8600b9ab8540",
  598. "sec-ch-ua": '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"',
  599. "sec-ch-ua-mobile": "?0",
  600. "sec-ch-ua-platform": '"macOS"',
  601. "sec-fetch-dest": "empty",
  602. "sec-fetch-mode": "cors",
  603. "sec-fetch-site": "same-origin",
  604. "tt-anti-token": "cBITBHvmYjEygzv-f9c78c1297722cf1f559c74b084e4525ce4900bdcf9e8588f20cc7c2e3234422",
  605. "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.35",
  606. "x-secsdk-csrf-token": "000100000001f8e733cf37f0cd255a51aea9a81ff7bc0c09490cfe41ad827c3c5c18ec809279175e4d9f5553d8a5",
  607. }
  608. response = requests.get(
  609. url=url, headers=headers, params=params, proxies=tunnel_proxies(), timeout=5
  610. )
  611. response.close()
  612. if (
  613. response.status_code != 200
  614. or "total_number" not in response.json()
  615. or response.json() == {}
  616. ):
  617. return 0
  618. return response.json().get("total_number", 0)
  619. class XiGuaAuthor:
  620. """
  621. 西瓜账号爬虫
  622. """
  623. def __init__(self, platform, mode, rule_dict, env, user_list):
  624. self.platform = platform
  625. self.mode = mode
  626. self.rule_dict = rule_dict
  627. self.env = env
  628. self.user_list = user_list
  629. self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
  630. self.download_count = 0
  631. def rule_maker(self, account):
  632. """
  633. 通过不同的账号生成不同的规则
  634. :param account: 输入的账号信息
  635. {'play_cnt': {'min': 100000, 'max': 0}, 'period': {'min': 5, 'max': 5}}
  636. """
  637. flag = account.split("_")[-1]
  638. if flag == "V1":
  639. rule_dict = {
  640. "play_cnt": {"min": 50000, "max": 0},
  641. 'period': {"min": 15, "max": 15},
  642. 'special': 0.01
  643. }
  644. return rule_dict
  645. elif flag == "V2":
  646. rule_dict = {
  647. "play_cnt": {"min": 10000, "max": 0},
  648. 'period': {"min": 7, "max": 7},
  649. 'special': 0.01
  650. }
  651. return rule_dict
  652. elif flag == "V3":
  653. rule_dict = {
  654. "play_cnt": {"min": 5000, "max": 0},
  655. 'period': {"min": 3, "max": 3},
  656. 'special': 0.01
  657. }
  658. return rule_dict
  659. else:
  660. return self.rule_dict
  661. def get_author_list(self):
  662. # 每轮只抓取定量的数据,到达数量后自己退出
  663. # max_count = int(self.rule_dict.get("videos_cnt", {}).get("min", 300))
  664. for user_dict in self.user_list:
  665. # if self.download_count <= max_count:
  666. self.get_video_list(user_dict)
  667. # time.sleep(random.randint(1, 15))
  668. # else:
  669. # AliyunLogger.logging(
  670. # code="2000",
  671. # platform=self.platform,
  672. # mode=self.mode,
  673. # env=self.env,
  674. # message="本轮已经抓取足够数量的视频,已经自动退出",
  675. # )
  676. # return
  677. def get_video_list(self, user_dict):
  678. offset = 0
  679. signature = random_signature()
  680. url = "https://www.ixigua.com/api/videov2/author/new_video_list?"
  681. while True:
  682. params = {
  683. "to_user_id": str(
  684. user_dict["link"].replace("https://www.ixigua.com/home/", "")
  685. ),
  686. "offset": str(offset),
  687. "limit": "30",
  688. "maxBehotTime": "0",
  689. "order": "new",
  690. "isHome": "0",
  691. # 'msToken': 'G0eRzNkw189a8TLaXjc6nTHVMQwh9XcxVAqTbGKi7iPJdQcLwS3-XRrJ3MZ7QBfqErpxp3EX1WtvWOIcZ3NIgr41hgcd-v64so_RRj3YCRw1UsKW8mIssNLlIMspsg==',
  692. # 'X-Bogus': 'DFSzswVuEkUANjW9ShFTgR/F6qHt',
  693. "_signature": signature,
  694. }
  695. headers = {
  696. "referer": f'https://www.ixigua.com/home/{user_dict["link"].replace("https://www.ixigua.com/home/", "")}/video/?preActiveKey=hotsoon&list_entrance=userdetail',
  697. "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.41",
  698. }
  699. response = requests.get(
  700. url=url,
  701. headers=headers,
  702. params=params,
  703. proxies=tunnel_proxies(),
  704. timeout=5,
  705. )
  706. offset += 30
  707. if "data" not in response.text or response.status_code != 200:
  708. AliyunLogger.logging(
  709. code="2000",
  710. platform=self.platform,
  711. mode=self.mode,
  712. env=self.env,
  713. message=f"get_videoList:{response.text}\n",
  714. )
  715. return
  716. elif not response.json()["data"]["videoList"]:
  717. AliyunLogger.logging(
  718. code="2000",
  719. platform=self.platform,
  720. mode=self.mode,
  721. env=self.env,
  722. message=f"没有更多数据啦~\n",
  723. )
  724. return
  725. else:
  726. feeds = response.json()["data"]["videoList"]
  727. for video_obj in feeds:
  728. try:
  729. AliyunLogger.logging(
  730. code="1001",
  731. platform=self.platform,
  732. mode=self.mode,
  733. env=self.env,
  734. data=video_obj,
  735. message="扫描到一条视频",
  736. )
  737. date_flag = self.process_video_obj(video_obj, user_dict)
  738. if not date_flag:
  739. return
  740. except Exception as e:
  741. AliyunLogger.logging(
  742. code="3000",
  743. platform=self.platform,
  744. mode=self.mode,
  745. env=self.env,
  746. data=video_obj,
  747. message="抓取单条视频异常, 报错原因是: {}".format(e),
  748. )
  749. def process_video_obj(self, video_obj, user_dict):
  750. new_rule = self.rule_maker(user_dict)
  751. trace_id = self.platform + str(uuid.uuid1())
  752. item_id = video_obj.get("item_id", "")
  753. if not item_id:
  754. AliyunLogger.logging(
  755. code="2005",
  756. platform=self.platform,
  757. mode=self.mode,
  758. env=self.env,
  759. message="无效视频",
  760. data=video_obj,
  761. trace_id=trace_id,
  762. )
  763. return
  764. # 获取视频信息
  765. video_dict = self.get_video_info(item_id=item_id, trace_id=trace_id)
  766. video_dict["out_user_id"] = video_dict["user_id"]
  767. video_dict["platform"] = self.platform
  768. video_dict["strategy"] = self.mode
  769. video_dict["out_video_id"] = video_dict["video_id"]
  770. video_dict["width"] = video_dict["video_width"]
  771. video_dict["height"] = video_dict["video_height"]
  772. video_dict["crawler_rule"] = json.dumps(new_rule)
  773. video_dict["user_id"] = user_dict["uid"]
  774. video_dict["publish_time"] = video_dict["publish_time_str"]
  775. video_dict["strategy_type"] = self.mode
  776. video_dict["update_time_stamp"] = int(time.time())
  777. if int(time.time()) - video_dict['publish_time_stamp'] > 3600 * 24 * int(
  778. new_rule.get("period", {}).get("max", 1000)):
  779. if not video_obj['is_top']:
  780. """
  781. 非置顶数据发布时间超过才退出
  782. """
  783. AliyunLogger.logging(
  784. code="2004",
  785. platform=self.platform,
  786. mode=self.mode,
  787. env=self.env,
  788. data=video_dict,
  789. message="发布时间超过{}天".format(
  790. int(new_rule.get("period", {}).get("max", 1000))
  791. ),
  792. )
  793. return False
  794. pipeline = PiaoQuanPipeline(
  795. platform=self.platform,
  796. mode=self.mode,
  797. rule_dict=new_rule,
  798. env=self.env,
  799. item=video_dict,
  800. trace_id=trace_id,
  801. )
  802. title_flag = pipeline.title_flag()
  803. repeat_flag = pipeline.repeat_video()
  804. if title_flag and repeat_flag:
  805. if new_rule.get("special"):
  806. if int(video_dict['play_cnt']) >= int(new_rule.get("play_cnt", {}).get("min", 100000)):
  807. if float(video_dict['like_cnt']) / float(video_dict['play_cnt']) >= new_rule['special']:
  808. self.mq.send_msg(video_dict)
  809. self.download_count += 1
  810. AliyunLogger.logging(
  811. code="1002",
  812. platform=self.platform,
  813. mode=self.mode,
  814. env=self.env,
  815. data=video_dict,
  816. trace_id=trace_id,
  817. message="成功发送 MQ 至 ETL",
  818. )
  819. return True
  820. else:
  821. AliyunLogger.logging(
  822. code="2008",
  823. platform=self.platform,
  824. mode=self.mode,
  825. env=self.env,
  826. message="不满足特殊规则, 点赞量/播放量",
  827. data=video_dict
  828. )
  829. else:
  830. if int(video_dict['play_cnt']) >= int(new_rule.get("play_cnt", {}).get("min", 100000)):
  831. self.mq.send_msg(video_dict)
  832. self.download_count += 1
  833. AliyunLogger.logging(
  834. code="1002",
  835. platform=self.platform,
  836. mode=self.mode,
  837. env=self.env,
  838. data=video_dict,
  839. trace_id=trace_id,
  840. message="成功发送 MQ 至 ETL",
  841. )
  842. return True
  843. else:
  844. AliyunLogger.logging(
  845. code="2008",
  846. platform=self.platform,
  847. mode=self.mode,
  848. env=self.env,
  849. message="不满足特殊规则, 播放量",
  850. data=video_dict
  851. )
  852. return True
  853. def get_video_info(self, item_id, trace_id):
  854. url = "https://www.ixigua.com/api/mixVideo/information?"
  855. headers = {
  856. "accept-encoding": "gzip, deflate",
  857. "accept-language": "zh-CN,zh-Hans;q=0.9",
  858. "user-agent": FakeUserAgent().random,
  859. "referer": "https://www.ixigua.com/7102614741050196520?logTag=0531c88ac04f38ab2c62",
  860. }
  861. params = {
  862. "mixId": str(item_id),
  863. "msToken": "IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfC"
  864. "NVVIOBNjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA",
  865. "X-Bogus": "DFSzswVupYTANCJOSBk0P53WxM-r",
  866. "_signature": "_02B4Z6wo0000119LvEwAAIDCuktNZ0y5wkdfS7jAALThuOR8D9yWNZ.EmWHKV0WSn6Px"
  867. "fPsH9-BldyxVje0f49ryXgmn7Tzk-swEHNb15TiGqa6YF.cX0jW8Eds1TtJOIZyfc9s5emH7gdWN94",
  868. }
  869. cookies = {
  870. "ixigua-a-s": "1",
  871. "msToken": "IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfCNVVIOB"
  872. "NjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA",
  873. "ttwid": "1%7C_yXQeHWwLZgCsgHClOwTCdYSOt_MjdOkgnPIkpi-Sr8%7C1661241238%7Cf57d0c5ef3f1d7"
  874. "6e049fccdca1ac54887c34d1f8731c8e51a49780ff0ceab9f8",
  875. "tt_scid": "QZ4l8KXDG0YAEaMCSbADdcybdKbUfG4BC6S4OBv9lpRS5VyqYLX2bIR8CTeZeGHR9ee3",
  876. "MONITOR_WEB_ID": "0a49204a-7af5-4e96-95f0-f4bafb7450ad",
  877. "__ac_nonce": "06304878000964fdad287",
  878. "__ac_signature": "_02B4Z6wo00f017Rcr3AAAIDCUVxeW1tOKEu0fKvAAI4cvoYzV-wBhq7B6D8k0no7lb"
  879. "FlvYoinmtK6UXjRIYPXnahUlFTvmWVtb77jsMkKAXzAEsLE56m36RlvL7ky.M3Xn52r9t1IEb7IR3ke8",
  880. "ttcid": "e56fabf6e85d4adf9e4d91902496a0e882",
  881. "_tea_utm_cache_1300": "undefined",
  882. "support_avif": "false",
  883. "support_webp": "false",
  884. "xiguavideopcwebid": "7134967546256016900",
  885. "xiguavideopcwebid.sig": "xxRww5R1VEMJN_dQepHorEu_eAc",
  886. }
  887. response = requests.get(
  888. url=url,
  889. headers=headers,
  890. params=params,
  891. cookies=cookies,
  892. proxies=tunnel_proxies(),
  893. timeout=5,
  894. )
  895. if (
  896. response.status_code != 200
  897. or "data" not in response.json()
  898. or response.json()["data"] == {}
  899. ):
  900. AliyunLogger.logging(
  901. code="2000",
  902. platform=self.platform,
  903. mode=self.mode,
  904. env=self.env,
  905. message="获取视频信息失败",
  906. trace_id=trace_id,
  907. )
  908. return None
  909. else:
  910. video_info = (
  911. response.json()["data"]
  912. .get("gidInformation", {})
  913. .get("packerData", {})
  914. .get("video", {})
  915. )
  916. if video_info == {}:
  917. return None
  918. video_detail = get_video_url(video_info)
  919. video_dict = {
  920. "video_title": video_info.get("title", ""),
  921. "video_id": video_info.get("videoResource", {}).get("vid", ""),
  922. "gid": str(item_id),
  923. "play_cnt": int(video_info.get("video_watch_count", 0)),
  924. "like_cnt": int(video_info.get("video_like_count", 0)),
  925. "comment_cnt": int(get_comment_cnt(item_id)),
  926. "share_cnt": 0,
  927. "favorite_cnt": 0,
  928. "duration": int(video_info.get("video_duration", 0)),
  929. "video_width": int(video_detail["video_width"]),
  930. "video_height": int(video_detail["video_height"]),
  931. "publish_time_stamp": int(video_info.get("video_publish_time", 0)),
  932. "publish_time_str": time.strftime(
  933. "%Y-%m-%d %H:%M:%S",
  934. time.localtime(int(video_info.get("video_publish_time", 0))),
  935. ),
  936. "user_name": video_info.get("user_info", {}).get("name", ""),
  937. "user_id": str(video_info.get("user_info", {}).get("user_id", "")),
  938. "avatar_url": str(
  939. video_info.get("user_info", {}).get("avatar_url", "")
  940. ),
  941. "cover_url": video_info.get("poster_url", ""),
  942. "audio_url": video_detail["audio_url"],
  943. "video_url": video_detail["video_url"],
  944. "session": f"xigua-search-{int(time.time())}",
  945. }
  946. return video_dict
  947. if __name__ == "__main__":
  948. user_list = [
  949. {
  950. "uid": 6267140,
  951. "source": "xigua",
  952. "link": "https://www.ixigua.com/home/2779177225827568",
  953. "nick_name": "秋晴爱音乐",
  954. "avatar_url": "",
  955. "mode": "author",
  956. },
  957. {
  958. "uid": 6267140,
  959. "source": "xigua",
  960. "link": "https://www.ixigua.com/home/2885546124776780",
  961. "nick_name": "朗诵放歌的老山羊",
  962. "avatar_url": "",
  963. "mode": "author",
  964. },
  965. {
  966. "uid": 6267140,
  967. "source": "xigua",
  968. "link": "https://www.ixigua.com/home/5880938217",
  969. "nick_name": "天原声疗",
  970. "avatar_url": "",
  971. "mode": "author",
  972. },
  973. ]
  974. # rule = {'period': {'min': 30, 'max': 30}, 'duration': {'min': 20, 'max': 0}, 'play_cnt': {'min': 100000, 'max': 0}}
  975. # XGA = XiGuaAuthor(
  976. # platform="xigua",
  977. # mode="author",
  978. # rule_dict=rule,
  979. # env="prod",
  980. # user_list=user_list
  981. # )
  982. # XGA.get_author_list()