xigua_author.py 47 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144
  1. import json
  2. import os
  3. import re
  4. import random
  5. import sys
  6. import string
  7. import time
  8. import uuid
  9. import base64
  10. import requests
  11. from fake_useragent import FakeUserAgent
  12. from common.mq import MQ
  13. sys.path.append(os.getcwd())
  14. from common import AliyunLogger, PiaoQuanPipeline, tunnel_proxies
  15. from common.limit import AuthorLimit
  16. def extract_info_by_re(text):
  17. """
  18. 通过正则表达式获取文本中的信息
  19. :param text:
  20. :return:
  21. """
  22. # 标题
  23. title_match = re.search(r'<title[^>]*>(.*?)</title>', text)
  24. if title_match:
  25. title_content = title_match.group(1)
  26. title_content = title_content.split(" - ")[0]
  27. title_content = bytes(title_content, "latin1").decode()
  28. else:
  29. title_content = ""
  30. # video_url
  31. main_url = re.search(r'("main_url":")(.*?)"', text)[0]
  32. main_url = main_url.split(":")[1]
  33. decoded_data = base64.b64decode(main_url)
  34. try:
  35. # 尝试使用utf-8解码
  36. video_url = decoded_data.decode()
  37. except UnicodeDecodeError:
  38. # 如果utf-8解码失败,尝试使用其他编码方式
  39. video_url = decoded_data.decode('latin-1')
  40. # video_id
  41. video_id = re.search(r'"vid":"(.*?)"', text).group(1)
  42. # like_count
  43. like_count = re.search(r'"video_like_count":"(.*?)"', text).group(1)
  44. # cover_url
  45. cover_url = re.search(r'"avatar_url":"(.*?)"', text).group(1)
  46. # video_play
  47. video_watch_count = re.search(r'"video_watch_count":"(.*?)"', text).group(1)
  48. # "video_publish_time"
  49. publish_time = re.search(r'"video_publish_time":"(.*?)"', text).group(1)
  50. # video_duration
  51. duration = re.search(r'("video_duration":)(.*?)"', text).group(2).replace(",", "")
  52. return {
  53. "title": title_content,
  54. "url": video_url,
  55. "video_id": video_id,
  56. "like_count": like_count,
  57. "cover_url": cover_url,
  58. "play_count": video_watch_count,
  59. "publish_time": publish_time,
  60. "duration": duration
  61. }
  62. def random_signature():
  63. """
  64. 随机生成签名
  65. """
  66. src_digits = string.digits # string_数字
  67. src_uppercase = string.ascii_uppercase # string_大写字母
  68. src_lowercase = string.ascii_lowercase # string_小写字母
  69. digits_num = random.randint(1, 6)
  70. uppercase_num = random.randint(1, 26 - digits_num - 1)
  71. lowercase_num = 26 - (digits_num + uppercase_num)
  72. password = (
  73. random.sample(src_digits, digits_num)
  74. + random.sample(src_uppercase, uppercase_num)
  75. + random.sample(src_lowercase, lowercase_num)
  76. )
  77. random.shuffle(password)
  78. new_password = "AAAAAAAAAA" + "".join(password)[10:-4] + "AAAB"
  79. new_password_start = new_password[0:18]
  80. new_password_end = new_password[-7:]
  81. if new_password[18] == "8":
  82. new_password = new_password_start + "w" + new_password_end
  83. elif new_password[18] == "9":
  84. new_password = new_password_start + "x" + new_password_end
  85. elif new_password[18] == "-":
  86. new_password = new_password_start + "y" + new_password_end
  87. elif new_password[18] == ".":
  88. new_password = new_password_start + "z" + new_password_end
  89. else:
  90. new_password = new_password_start + "y" + new_password_end
  91. return new_password
  92. def get_video_url(video_info):
  93. """
  94. 获取视频的链接
  95. """
  96. video_url_dict = {}
  97. # video_url
  98. if "videoResource" not in video_info:
  99. video_url_dict["video_url"] = ""
  100. video_url_dict["audio_url"] = ""
  101. video_url_dict["video_width"] = 0
  102. video_url_dict["video_height"] = 0
  103. elif "dash_120fps" in video_info["videoResource"]:
  104. if (
  105. "video_list" in video_info["videoResource"]["dash_120fps"]
  106. and "video_4" in video_info["videoResource"]["dash_120fps"]["video_list"]
  107. ):
  108. video_url = video_info["videoResource"]["dash_120fps"]["video_list"][
  109. "video_4"
  110. ]["backup_url_1"]
  111. audio_url = video_info["videoResource"]["dash_120fps"]["video_list"][
  112. "video_4"
  113. ]["backup_url_1"]
  114. if len(video_url) % 3 == 1:
  115. video_url += "=="
  116. elif len(video_url) % 3 == 2:
  117. video_url += "="
  118. elif len(audio_url) % 3 == 1:
  119. audio_url += "=="
  120. elif len(audio_url) % 3 == 2:
  121. audio_url += "="
  122. video_url = base64.b64decode(video_url).decode("utf8")
  123. audio_url = base64.b64decode(audio_url).decode("utf8")
  124. video_width = video_info["videoResource"]["dash_120fps"]["video_list"][
  125. "video_4"
  126. ]["vwidth"]
  127. video_height = video_info["videoResource"]["dash_120fps"]["video_list"][
  128. "video_4"
  129. ]["vheight"]
  130. video_url_dict["video_url"] = video_url
  131. video_url_dict["audio_url"] = audio_url
  132. video_url_dict["video_width"] = video_width
  133. video_url_dict["video_height"] = video_height
  134. elif (
  135. "video_list" in video_info["videoResource"]["dash_120fps"]
  136. and "video_3" in video_info["videoResource"]["dash_120fps"]["video_list"]
  137. ):
  138. video_url = video_info["videoResource"]["dash_120fps"]["video_list"][
  139. "video_3"
  140. ]["backup_url_1"]
  141. audio_url = video_info["videoResource"]["dash_120fps"]["video_list"][
  142. "video_3"
  143. ]["backup_url_1"]
  144. if len(video_url) % 3 == 1:
  145. video_url += "=="
  146. elif len(video_url) % 3 == 2:
  147. video_url += "="
  148. elif len(audio_url) % 3 == 1:
  149. audio_url += "=="
  150. elif len(audio_url) % 3 == 2:
  151. audio_url += "="
  152. video_url = base64.b64decode(video_url).decode("utf8")
  153. audio_url = base64.b64decode(audio_url).decode("utf8")
  154. video_width = video_info["videoResource"]["dash_120fps"]["video_list"][
  155. "video_3"
  156. ]["vwidth"]
  157. video_height = video_info["videoResource"]["dash_120fps"]["video_list"][
  158. "video_3"
  159. ]["vheight"]
  160. video_url_dict["video_url"] = video_url
  161. video_url_dict["audio_url"] = audio_url
  162. video_url_dict["video_width"] = video_width
  163. video_url_dict["video_height"] = video_height
  164. elif (
  165. "video_list" in video_info["videoResource"]["dash_120fps"]
  166. and "video_2" in video_info["videoResource"]["dash_120fps"]["video_list"]
  167. ):
  168. video_url = video_info["videoResource"]["dash_120fps"]["video_list"][
  169. "video_2"
  170. ]["backup_url_1"]
  171. audio_url = video_info["videoResource"]["dash_120fps"]["video_list"][
  172. "video_2"
  173. ]["backup_url_1"]
  174. if len(video_url) % 3 == 1:
  175. video_url += "=="
  176. elif len(video_url) % 3 == 2:
  177. video_url += "="
  178. elif len(audio_url) % 3 == 1:
  179. audio_url += "=="
  180. elif len(audio_url) % 3 == 2:
  181. audio_url += "="
  182. video_url = base64.b64decode(video_url).decode("utf8")
  183. audio_url = base64.b64decode(audio_url).decode("utf8")
  184. video_width = video_info["videoResource"]["dash_120fps"]["video_list"][
  185. "video_2"
  186. ]["vwidth"]
  187. video_height = video_info["videoResource"]["dash_120fps"]["video_list"][
  188. "video_2"
  189. ]["vheight"]
  190. video_url_dict["video_url"] = video_url
  191. video_url_dict["audio_url"] = audio_url
  192. video_url_dict["video_width"] = video_width
  193. video_url_dict["video_height"] = video_height
  194. elif (
  195. "video_list" in video_info["videoResource"]["dash_120fps"]
  196. and "video_1" in video_info["videoResource"]["dash_120fps"]["video_list"]
  197. ):
  198. video_url = video_info["videoResource"]["dash_120fps"]["video_list"][
  199. "video_1"
  200. ]["backup_url_1"]
  201. audio_url = video_info["videoResource"]["dash_120fps"]["video_list"][
  202. "video_1"
  203. ]["backup_url_1"]
  204. if len(video_url) % 3 == 1:
  205. video_url += "=="
  206. elif len(video_url) % 3 == 2:
  207. video_url += "="
  208. elif len(audio_url) % 3 == 1:
  209. audio_url += "=="
  210. elif len(audio_url) % 3 == 2:
  211. audio_url += "="
  212. video_url = base64.b64decode(video_url).decode("utf8")
  213. audio_url = base64.b64decode(audio_url).decode("utf8")
  214. video_width = video_info["videoResource"]["dash_120fps"]["video_list"][
  215. "video_1"
  216. ]["vwidth"]
  217. video_height = video_info["videoResource"]["dash_120fps"]["video_list"][
  218. "video_1"
  219. ]["vheight"]
  220. video_url_dict["video_url"] = video_url
  221. video_url_dict["audio_url"] = audio_url
  222. video_url_dict["video_width"] = video_width
  223. video_url_dict["video_height"] = video_height
  224. elif (
  225. "dynamic_video" in video_info["videoResource"]["dash_120fps"]
  226. and "dynamic_video_list"
  227. in video_info["videoResource"]["dash_120fps"]["dynamic_video"]
  228. and "dynamic_audio_list"
  229. in video_info["videoResource"]["dash_120fps"]["dynamic_video"]
  230. and len(
  231. video_info["videoResource"]["dash_120fps"]["dynamic_video"][
  232. "dynamic_video_list"
  233. ]
  234. )
  235. != 0
  236. and len(
  237. video_info["videoResource"]["dash_120fps"]["dynamic_video"][
  238. "dynamic_audio_list"
  239. ]
  240. )
  241. != 0
  242. ):
  243. video_url = video_info["videoResource"]["dash_120fps"]["dynamic_video"][
  244. "dynamic_video_list"
  245. ][-1]["backup_url_1"]
  246. audio_url = video_info["videoResource"]["dash_120fps"]["dynamic_video"][
  247. "dynamic_audio_list"
  248. ][-1]["backup_url_1"]
  249. if len(video_url) % 3 == 1:
  250. video_url += "=="
  251. elif len(video_url) % 3 == 2:
  252. video_url += "="
  253. elif len(audio_url) % 3 == 1:
  254. audio_url += "=="
  255. elif len(audio_url) % 3 == 2:
  256. audio_url += "="
  257. video_url = base64.b64decode(video_url).decode("utf8")
  258. audio_url = base64.b64decode(audio_url).decode("utf8")
  259. video_width = video_info["videoResource"]["dash_120fps"]["dynamic_video"][
  260. "dynamic_video_list"
  261. ][-1]["vwidth"]
  262. video_height = video_info["videoResource"]["dash_120fps"]["dynamic_video"][
  263. "dynamic_video_list"
  264. ][-1]["vheight"]
  265. video_url_dict["video_url"] = video_url
  266. video_url_dict["audio_url"] = audio_url
  267. video_url_dict["video_width"] = video_width
  268. video_url_dict["video_height"] = video_height
  269. else:
  270. video_url_dict["video_url"] = ""
  271. video_url_dict["audio_url"] = ""
  272. video_url_dict["video_width"] = 0
  273. video_url_dict["video_height"] = 0
  274. elif "dash" in video_info["videoResource"]:
  275. if (
  276. "video_list" in video_info["videoResource"]["dash"]
  277. and "video_4" in video_info["videoResource"]["dash"]["video_list"]
  278. ):
  279. video_url = video_info["videoResource"]["dash"]["video_list"]["video_4"][
  280. "backup_url_1"
  281. ]
  282. audio_url = video_info["videoResource"]["dash"]["video_list"]["video_4"][
  283. "backup_url_1"
  284. ]
  285. if len(video_url) % 3 == 1:
  286. video_url += "=="
  287. elif len(video_url) % 3 == 2:
  288. video_url += "="
  289. elif len(audio_url) % 3 == 1:
  290. audio_url += "=="
  291. elif len(audio_url) % 3 == 2:
  292. audio_url += "="
  293. video_url = base64.b64decode(video_url).decode("utf8")
  294. audio_url = base64.b64decode(audio_url).decode("utf8")
  295. video_width = video_info["videoResource"]["dash"]["video_list"]["video_4"][
  296. "vwidth"
  297. ]
  298. video_height = video_info["videoResource"]["dash"]["video_list"]["video_4"][
  299. "vheight"
  300. ]
  301. video_url_dict["video_url"] = video_url
  302. video_url_dict["audio_url"] = audio_url
  303. video_url_dict["video_width"] = video_width
  304. video_url_dict["video_height"] = video_height
  305. elif (
  306. "video_list" in video_info["videoResource"]["dash"]
  307. and "video_3" in video_info["videoResource"]["dash"]["video_list"]
  308. ):
  309. video_url = video_info["videoResource"]["dash"]["video_list"]["video_3"][
  310. "backup_url_1"
  311. ]
  312. audio_url = video_info["videoResource"]["dash"]["video_list"]["video_3"][
  313. "backup_url_1"
  314. ]
  315. if len(video_url) % 3 == 1:
  316. video_url += "=="
  317. elif len(video_url) % 3 == 2:
  318. video_url += "="
  319. elif len(audio_url) % 3 == 1:
  320. audio_url += "=="
  321. elif len(audio_url) % 3 == 2:
  322. audio_url += "="
  323. video_url = base64.b64decode(video_url).decode("utf8")
  324. audio_url = base64.b64decode(audio_url).decode("utf8")
  325. video_width = video_info["videoResource"]["dash"]["video_list"]["video_3"][
  326. "vwidth"
  327. ]
  328. video_height = video_info["videoResource"]["dash"]["video_list"]["video_3"][
  329. "vheight"
  330. ]
  331. video_url_dict["video_url"] = video_url
  332. video_url_dict["audio_url"] = audio_url
  333. video_url_dict["video_width"] = video_width
  334. video_url_dict["video_height"] = video_height
  335. elif (
  336. "video_list" in video_info["videoResource"]["dash"]
  337. and "video_2" in video_info["videoResource"]["dash"]["video_list"]
  338. ):
  339. video_url = video_info["videoResource"]["dash"]["video_list"]["video_2"][
  340. "backup_url_1"
  341. ]
  342. audio_url = video_info["videoResource"]["dash"]["video_list"]["video_2"][
  343. "backup_url_1"
  344. ]
  345. if len(video_url) % 3 == 1:
  346. video_url += "=="
  347. elif len(video_url) % 3 == 2:
  348. video_url += "="
  349. elif len(audio_url) % 3 == 1:
  350. audio_url += "=="
  351. elif len(audio_url) % 3 == 2:
  352. audio_url += "="
  353. video_url = base64.b64decode(video_url).decode("utf8")
  354. audio_url = base64.b64decode(audio_url).decode("utf8")
  355. video_width = video_info["videoResource"]["dash"]["video_list"]["video_2"][
  356. "vwidth"
  357. ]
  358. video_height = video_info["videoResource"]["dash"]["video_list"]["video_2"][
  359. "vheight"
  360. ]
  361. video_url_dict["video_url"] = video_url
  362. video_url_dict["audio_url"] = audio_url
  363. video_url_dict["video_width"] = video_width
  364. video_url_dict["video_height"] = video_height
  365. elif (
  366. "video_list" in video_info["videoResource"]["dash"]
  367. and "video_1" in video_info["videoResource"]["dash"]["video_list"]
  368. ):
  369. video_url = video_info["videoResource"]["dash"]["video_list"]["video_1"][
  370. "backup_url_1"
  371. ]
  372. audio_url = video_info["videoResource"]["dash"]["video_list"]["video_1"][
  373. "backup_url_1"
  374. ]
  375. if len(video_url) % 3 == 1:
  376. video_url += "=="
  377. elif len(video_url) % 3 == 2:
  378. video_url += "="
  379. elif len(audio_url) % 3 == 1:
  380. audio_url += "=="
  381. elif len(audio_url) % 3 == 2:
  382. audio_url += "="
  383. video_url = base64.b64decode(video_url).decode("utf8")
  384. audio_url = base64.b64decode(audio_url).decode("utf8")
  385. video_width = video_info["videoResource"]["dash"]["video_list"]["video_1"][
  386. "vwidth"
  387. ]
  388. video_height = video_info["videoResource"]["dash"]["video_list"]["video_1"][
  389. "vheight"
  390. ]
  391. video_url_dict["video_url"] = video_url
  392. video_url_dict["audio_url"] = audio_url
  393. video_url_dict["video_width"] = video_width
  394. video_url_dict["video_height"] = video_height
  395. elif (
  396. "dynamic_video" in video_info["videoResource"]["dash"]
  397. and "dynamic_video_list"
  398. in video_info["videoResource"]["dash"]["dynamic_video"]
  399. and "dynamic_audio_list"
  400. in video_info["videoResource"]["dash"]["dynamic_video"]
  401. and len(
  402. video_info["videoResource"]["dash"]["dynamic_video"][
  403. "dynamic_video_list"
  404. ]
  405. )
  406. != 0
  407. and len(
  408. video_info["videoResource"]["dash"]["dynamic_video"][
  409. "dynamic_audio_list"
  410. ]
  411. )
  412. != 0
  413. ):
  414. video_url = video_info["videoResource"]["dash"]["dynamic_video"][
  415. "dynamic_video_list"
  416. ][-1]["backup_url_1"]
  417. audio_url = video_info["videoResource"]["dash"]["dynamic_video"][
  418. "dynamic_audio_list"
  419. ][-1]["backup_url_1"]
  420. if len(video_url) % 3 == 1:
  421. video_url += "=="
  422. elif len(video_url) % 3 == 2:
  423. video_url += "="
  424. elif len(audio_url) % 3 == 1:
  425. audio_url += "=="
  426. elif len(audio_url) % 3 == 2:
  427. audio_url += "="
  428. video_url = base64.b64decode(video_url).decode("utf8")
  429. audio_url = base64.b64decode(audio_url).decode("utf8")
  430. video_width = video_info["videoResource"]["dash"]["dynamic_video"][
  431. "dynamic_video_list"
  432. ][-1]["vwidth"]
  433. video_height = video_info["videoResource"]["dash"]["dynamic_video"][
  434. "dynamic_video_list"
  435. ][-1]["vheight"]
  436. video_url_dict["video_url"] = video_url
  437. video_url_dict["audio_url"] = audio_url
  438. video_url_dict["video_width"] = video_width
  439. video_url_dict["video_height"] = video_height
  440. else:
  441. video_url_dict["video_url"] = ""
  442. video_url_dict["audio_url"] = ""
  443. video_url_dict["video_width"] = 0
  444. video_url_dict["video_height"] = 0
  445. elif "normal" in video_info["videoResource"]:
  446. if (
  447. "video_list" in video_info["videoResource"]["normal"]
  448. and "video_4" in video_info["videoResource"]["normal"]["video_list"]
  449. ):
  450. video_url = video_info["videoResource"]["normal"]["video_list"]["video_4"][
  451. "backup_url_1"
  452. ]
  453. audio_url = video_info["videoResource"]["normal"]["video_list"]["video_4"][
  454. "backup_url_1"
  455. ]
  456. if len(video_url) % 3 == 1:
  457. video_url += "=="
  458. elif len(video_url) % 3 == 2:
  459. video_url += "="
  460. elif len(audio_url) % 3 == 1:
  461. audio_url += "=="
  462. elif len(audio_url) % 3 == 2:
  463. audio_url += "="
  464. video_url = base64.b64decode(video_url).decode("utf8")
  465. audio_url = base64.b64decode(audio_url).decode("utf8")
  466. video_width = video_info["videoResource"]["normal"]["video_list"][
  467. "video_4"
  468. ]["vwidth"]
  469. video_height = video_info["videoResource"]["normal"]["video_list"][
  470. "video_4"
  471. ]["vheight"]
  472. video_url_dict["video_url"] = video_url
  473. video_url_dict["audio_url"] = audio_url
  474. video_url_dict["video_width"] = video_width
  475. video_url_dict["video_height"] = video_height
  476. elif (
  477. "video_list" in video_info["videoResource"]["normal"]
  478. and "video_3" in video_info["videoResource"]["normal"]["video_list"]
  479. ):
  480. video_url = video_info["videoResource"]["normal"]["video_list"]["video_3"][
  481. "backup_url_1"
  482. ]
  483. audio_url = video_info["videoResource"]["normal"]["video_list"]["video_3"][
  484. "backup_url_1"
  485. ]
  486. if len(video_url) % 3 == 1:
  487. video_url += "=="
  488. elif len(video_url) % 3 == 2:
  489. video_url += "="
  490. elif len(audio_url) % 3 == 1:
  491. audio_url += "=="
  492. elif len(audio_url) % 3 == 2:
  493. audio_url += "="
  494. video_url = base64.b64decode(video_url).decode("utf8")
  495. audio_url = base64.b64decode(audio_url).decode("utf8")
  496. video_width = video_info["videoResource"]["normal"]["video_list"][
  497. "video_3"
  498. ]["vwidth"]
  499. video_height = video_info["videoResource"]["normal"]["video_list"][
  500. "video_3"
  501. ]["vheight"]
  502. video_url_dict["video_url"] = video_url
  503. video_url_dict["audio_url"] = audio_url
  504. video_url_dict["video_width"] = video_width
  505. video_url_dict["video_height"] = video_height
  506. elif (
  507. "video_list" in video_info["videoResource"]["normal"]
  508. and "video_2" in video_info["videoResource"]["normal"]["video_list"]
  509. ):
  510. video_url = video_info["videoResource"]["normal"]["video_list"]["video_2"][
  511. "backup_url_1"
  512. ]
  513. audio_url = video_info["videoResource"]["normal"]["video_list"]["video_2"][
  514. "backup_url_1"
  515. ]
  516. if len(video_url) % 3 == 1:
  517. video_url += "=="
  518. elif len(video_url) % 3 == 2:
  519. video_url += "="
  520. elif len(audio_url) % 3 == 1:
  521. audio_url += "=="
  522. elif len(audio_url) % 3 == 2:
  523. audio_url += "="
  524. video_url = base64.b64decode(video_url).decode("utf8")
  525. audio_url = base64.b64decode(audio_url).decode("utf8")
  526. video_width = video_info["videoResource"]["normal"]["video_list"][
  527. "video_2"
  528. ]["vwidth"]
  529. video_height = video_info["videoResource"]["normal"]["video_list"][
  530. "video_2"
  531. ]["vheight"]
  532. video_url_dict["video_url"] = video_url
  533. video_url_dict["audio_url"] = audio_url
  534. video_url_dict["video_width"] = video_width
  535. video_url_dict["video_height"] = video_height
  536. elif (
  537. "video_list" in video_info["videoResource"]["normal"]
  538. and "video_1" in video_info["videoResource"]["normal"]["video_list"]
  539. ):
  540. video_url = video_info["videoResource"]["normal"]["video_list"]["video_1"][
  541. "backup_url_1"
  542. ]
  543. audio_url = video_info["videoResource"]["normal"]["video_list"]["video_1"][
  544. "backup_url_1"
  545. ]
  546. if len(video_url) % 3 == 1:
  547. video_url += "=="
  548. elif len(video_url) % 3 == 2:
  549. video_url += "="
  550. elif len(audio_url) % 3 == 1:
  551. audio_url += "=="
  552. elif len(audio_url) % 3 == 2:
  553. audio_url += "="
  554. video_url = base64.b64decode(video_url).decode("utf8")
  555. audio_url = base64.b64decode(audio_url).decode("utf8")
  556. video_width = video_info["videoResource"]["normal"]["video_list"][
  557. "video_1"
  558. ]["vwidth"]
  559. video_height = video_info["videoResource"]["normal"]["video_list"][
  560. "video_1"
  561. ]["vheight"]
  562. video_url_dict["video_url"] = video_url
  563. video_url_dict["audio_url"] = audio_url
  564. video_url_dict["video_width"] = video_width
  565. video_url_dict["video_height"] = video_height
  566. elif (
  567. "dynamic_video" in video_info["videoResource"]["normal"]
  568. and "dynamic_video_list"
  569. in video_info["videoResource"]["normal"]["dynamic_video"]
  570. and "dynamic_audio_list"
  571. in video_info["videoResource"]["normal"]["dynamic_video"]
  572. and len(
  573. video_info["videoResource"]["normal"]["dynamic_video"][
  574. "dynamic_video_list"
  575. ]
  576. )
  577. != 0
  578. and len(
  579. video_info["videoResource"]["normal"]["dynamic_video"][
  580. "dynamic_audio_list"
  581. ]
  582. )
  583. != 0
  584. ):
  585. video_url = video_info["videoResource"]["normal"]["dynamic_video"][
  586. "dynamic_video_list"
  587. ][-1]["backup_url_1"]
  588. audio_url = video_info["videoResource"]["normal"]["dynamic_video"][
  589. "dynamic_audio_list"
  590. ][-1]["backup_url_1"]
  591. if len(video_url) % 3 == 1:
  592. video_url += "=="
  593. elif len(video_url) % 3 == 2:
  594. video_url += "="
  595. elif len(audio_url) % 3 == 1:
  596. audio_url += "=="
  597. elif len(audio_url) % 3 == 2:
  598. audio_url += "="
  599. video_url = base64.b64decode(video_url).decode("utf8")
  600. audio_url = base64.b64decode(audio_url).decode("utf8")
  601. video_width = video_info["videoResource"]["normal"]["dynamic_video"][
  602. "dynamic_video_list"
  603. ][-1]["vwidth"]
  604. video_height = video_info["videoResource"]["normal"]["dynamic_video"][
  605. "dynamic_video_list"
  606. ][-1]["vheight"]
  607. video_url_dict["video_url"] = video_url
  608. video_url_dict["audio_url"] = audio_url
  609. video_url_dict["video_width"] = video_width
  610. video_url_dict["video_height"] = video_height
  611. else:
  612. video_url_dict["video_url"] = ""
  613. video_url_dict["audio_url"] = ""
  614. video_url_dict["video_width"] = 0
  615. video_url_dict["video_height"] = 0
  616. else:
  617. video_url_dict["video_url"] = ""
  618. video_url_dict["audio_url"] = ""
  619. video_url_dict["video_width"] = 0
  620. video_url_dict["video_height"] = 0
  621. return video_url_dict
  622. def get_comment_cnt(item_id):
  623. """
  624. 获取视频的评论数量
  625. """
  626. url = "https://www.ixigua.com/tlb/comment/article/v5/tab_comments/?"
  627. params = {
  628. "tab_index": "0",
  629. "count": "10",
  630. "offset": "10",
  631. "group_id": str(item_id),
  632. "item_id": str(item_id),
  633. "aid": "1768",
  634. "msToken": "50-JJObWB07HfHs-BMJWT1eIDX3G-6lPSF_i-QwxBIXE9VVa-iN0jbEXR5pG2DKjXBmP299n6ZTuXzY-GAy968CCvouSAYIS4GzvGQT3pNlKNejr5G4-1g==",
  635. "X-Bogus": "DFSzswVOyGtANVeWtCLMqR/F6q9U",
  636. "_signature": random_signature(),
  637. }
  638. headers = {
  639. "authority": "www.ixigua.com",
  640. "accept": "application/json, text/plain, */*",
  641. "accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
  642. "cache-control": "no-cache",
  643. "cookie": "MONITOR_WEB_ID=67cb5099-a022-4ec3-bb8e-c4de6ba51dd0; passport_csrf_token=72b2574f3c99f8ba670e42df430218fd; passport_csrf_token_default=72b2574f3c99f8ba670e42df430218fd; sid_guard=c7472b508ea631823ba765a60cf8757f%7C1680867422%7C3024002%7CFri%2C+12-May-2023+11%3A37%3A04+GMT; uid_tt=c13f47d51767f616befe32fb3e9f485a; uid_tt_ss=c13f47d51767f616befe32fb3e9f485a; sid_tt=c7472b508ea631823ba765a60cf8757f; sessionid=c7472b508ea631823ba765a60cf8757f; sessionid_ss=c7472b508ea631823ba765a60cf8757f; sid_ucp_v1=1.0.0-KGUzNWYxNmRkZGJiZjgxY2MzZWNkMTEzMTkwYjY1Yjg5OTY5NzVlNmMKFQiu3d-eqQIQ3oDAoQYYGCAMOAhACxoCaGwiIGM3NDcyYjUwOGVhNjMxODIzYmE3NjVhNjBjZjg3NTdm; ssid_ucp_v1=1.0.0-KGUzNWYxNmRkZGJiZjgxY2MzZWNkMTEzMTkwYjY1Yjg5OTY5NzVlNmMKFQiu3d-eqQIQ3oDAoQYYGCAMOAhACxoCaGwiIGM3NDcyYjUwOGVhNjMxODIzYmE3NjVhNjBjZjg3NTdm; odin_tt=b893608d4dde2e1e8df8cd5d97a0e2fbeafc4ca762ac72ebef6e6c97e2ed19859bb01d46b4190ddd6dd17d7f9678e1de; SEARCH_CARD_MODE=7168304743566296612_0; support_webp=true; support_avif=false; csrf_session_id=a5355d954d3c63ed1ba35faada452b4d; tt_scid=7Pux7s634-z8DYvCM20y7KigwH5u7Rh6D9C-RROpnT.aGMEcz6Vsxp.oai47wJqa4f86; ttwid=1%7CHHtv2QqpSGuSu8r-zXF1QoWsvjmNi1SJrqOrZzg-UCY%7C1683858689%7Ca5223fe1500578e01e138a0d71d6444692018296c4c24f5885af174a65873c95; ixigua-a-s=3; msToken=50-JJObWB07HfHs-BMJWT1eIDX3G-6lPSF_i-QwxBIXE9VVa-iN0jbEXR5pG2DKjXBmP299n6ZTuXzY-GAy968CCvouSAYIS4GzvGQT3pNlKNejr5G4-1g==; __ac_nonce=0645dcbf0005064517440; __ac_signature=_02B4Z6wo00f01FEGmAwAAIDBKchzCGqn-MBRJpyAAHAjieFC5GEg6gGiwz.I4PRrJl7f0GcixFrExKmgt6QI1i1S-dQyofPEj2ugWTCnmKUdJQv-wYuDofeKNe8VtMtZq2aKewyUGeKU-5Ud21; ixigua-a-s=3",
  644. "pragma": "no-cache",
  645. "referer": f"https://www.ixigua.com/{item_id}?logTag=3c5aa86a8600b9ab8540",
  646. "sec-ch-ua": '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"',
  647. "sec-ch-ua-mobile": "?0",
  648. "sec-ch-ua-platform": '"macOS"',
  649. "sec-fetch-dest": "empty",
  650. "sec-fetch-mode": "cors",
  651. "sec-fetch-site": "same-origin",
  652. "tt-anti-token": "cBITBHvmYjEygzv-f9c78c1297722cf1f559c74b084e4525ce4900bdcf9e8588f20cc7c2e3234422",
  653. "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.35",
  654. "x-secsdk-csrf-token": "000100000001f8e733cf37f0cd255a51aea9a81ff7bc0c09490cfe41ad827c3c5c18ec809279175e4d9f5553d8a5",
  655. }
  656. response = requests.get(
  657. url=url, headers=headers, params=params, proxies=tunnel_proxies(), timeout=5
  658. )
  659. response.close()
  660. if (
  661. response.status_code != 200
  662. or "total_number" not in response.json()
  663. or response.json() == {}
  664. ):
  665. return 0
  666. return response.json().get("total_number", 0)
  667. class XiGuaAuthor:
  668. """
  669. 西瓜账号爬虫
  670. """
  671. def __init__(self, platform, mode, rule_dict, env, user_list):
  672. self.platform = platform
  673. self.mode = mode
  674. self.rule_dict = rule_dict
  675. self.env = env
  676. self.user_list = user_list
  677. self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
  678. self.download_count = 0
  679. self.limiter = AuthorLimit(platform=self.platform, mode=self.mode)
  680. def rule_maker(self, account):
  681. """
  682. 通过不同的账号生成不同的规则
  683. :param account: 输入的账号信息
  684. {'play_cnt': {'min': 100000, 'max': 0}, 'period': {'min': 5, 'max': 5}}
  685. """
  686. temp = account['link'].split("_")
  687. if len(temp) == 1:
  688. return self.rule_dict
  689. else:
  690. flag = temp[-2]
  691. match flag:
  692. case "V1":
  693. rule_dict = {
  694. "play_cnt": {"min": 100000, "max": 0},
  695. 'period': {"min": 90, "max": 90},
  696. 'special': 0.02
  697. }
  698. return rule_dict
  699. case "V2":
  700. rule_dict = {
  701. "play_cnt": {"min": 10000, "max": 0},
  702. 'period': {"min": 90, "max": 90},
  703. 'special': 0.01
  704. }
  705. return rule_dict
  706. case "V3":
  707. rule_dict = {
  708. "play_cnt": {"min": 5000, "max": 0},
  709. 'period': {"min": 90, "max": 90},
  710. 'special': 0.01
  711. }
  712. return rule_dict
  713. def get_author_list(self):
  714. """
  715. 每轮只抓取定量的数据,到达数量后自己退出
  716. 获取账号列表以及账号信息
  717. """
  718. # max_count = int(self.rule_dict.get("videos_cnt", {}).get("min", 300))
  719. for user_dict in self.user_list:
  720. # if self.download_count <= max_count:
  721. try:
  722. flag = user_dict["link"][0]
  723. match flag:
  724. case "V":
  725. self.get_video_list(user_dict)
  726. case "X":
  727. self.get_tiny_video_list(user_dict)
  728. case "h":
  729. self.get_video_list(user_dict)
  730. case "D":
  731. self.get_video_list(user_dict)
  732. case "B":
  733. self.get_video_list(user_dict)
  734. self.get_tiny_video_list(user_dict)
  735. except Exception as e:
  736. AliyunLogger.logging(
  737. code="3001",
  738. account=user_dict["uid"],
  739. platform=self.platform,
  740. mode=self.mode,
  741. env=self.env,
  742. message="扫描账号时出现bug, 报错是 {}".format(e)
  743. )
  744. # time.sleep(random.randint(1, 15))
  745. # else:
  746. # AliyunLogger.logging(
  747. # code="2000",
  748. # platform=self.platform,
  749. # mode=self.mode,
  750. # env=self.env,
  751. # message="本轮已经抓取足够数量的视频,已经自动退出",
  752. # )
  753. # return
  754. def get_video_list(self, user_dict):
  755. """
  756. 获取某个账号的视频列表
  757. 账号分为 3 类
  758. """
  759. offset = 0
  760. signature = random_signature()
  761. link = user_dict['link'].split("_")[-1]
  762. url = "https://www.ixigua.com/api/videov2/author/new_video_list?"
  763. while True:
  764. to_user_id = str(link.replace("https://www.ixigua.com/home/", ""))
  765. params = {
  766. "to_user_id": to_user_id,
  767. "offset": str(offset),
  768. "limit": "30",
  769. "maxBehotTime": "0",
  770. "order": "new",
  771. "isHome": "0",
  772. "_signature": signature,
  773. }
  774. headers = {
  775. "referer": f'https://www.ixigua.com/home/{link.replace("https://www.ixigua.com/home/", "")}/video/?preActiveKey=hotsoon&list_entrance=userdetail',
  776. "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.41",
  777. }
  778. response = requests.get(
  779. url=url,
  780. headers=headers,
  781. params=params,
  782. proxies=tunnel_proxies(),
  783. timeout=5,
  784. )
  785. offset += 30
  786. if "data" not in response.text or response.status_code != 200:
  787. AliyunLogger.logging(
  788. code="3000",
  789. platform=self.platform,
  790. mode=self.mode,
  791. env=self.env,
  792. message=f"get_videoList:{response.text}\n",
  793. )
  794. return
  795. elif not response.json()["data"]["videoList"]:
  796. AliyunLogger.logging(
  797. code="3000",
  798. platform=self.platform,
  799. mode=self.mode,
  800. env=self.env,
  801. message=f"没有更多数据啦~\n",
  802. )
  803. return
  804. else:
  805. feeds = response.json()["data"]["videoList"]
  806. for video_obj in feeds:
  807. try:
  808. AliyunLogger.logging(
  809. code="1001",
  810. account=user_dict['uid'],
  811. platform=self.platform,
  812. mode=self.mode,
  813. env=self.env,
  814. data=video_obj,
  815. message="扫描到一条视频",
  816. )
  817. date_flag = self.process_video_obj(video_obj, user_dict, "l")
  818. if not date_flag:
  819. return
  820. except Exception as e:
  821. AliyunLogger.logging(
  822. code="3000",
  823. platform=self.platform,
  824. mode=self.mode,
  825. env=self.env,
  826. data=video_obj,
  827. message="抓取单条视频异常, 报错原因是: {}".format(e),
  828. )
  829. def get_tiny_video_list(self, user_dict):
  830. """
  831. 获取小视频
  832. """
  833. url = "https://www.ixigua.com/api/videov2/hotsoon/video"
  834. max_behot_time = "0"
  835. link = user_dict['link'].split("_")[-1]
  836. to_user_id = str(link.replace("https://www.ixigua.com/home/", ""))
  837. while True:
  838. params = {
  839. "to_user_id": to_user_id,
  840. "max_behot_time": max_behot_time,
  841. "_signature": random_signature()
  842. }
  843. headers = {
  844. "referer": "https://www.ixigua.com/{}?&".format(to_user_id),
  845. "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.41",
  846. }
  847. response = requests.get(
  848. url=url,
  849. headers=headers,
  850. params=params,
  851. proxies=tunnel_proxies(),
  852. timeout=5,
  853. )
  854. if "data" not in response.text or response.status_code != 200:
  855. AliyunLogger.logging(
  856. code="2000",
  857. platform=self.platform,
  858. mode=self.mode,
  859. env=self.env,
  860. message=f"get_videoList:{response.text}\n",
  861. )
  862. return
  863. elif not response.json()["data"]["data"]:
  864. AliyunLogger.logging(
  865. code="2000",
  866. platform=self.platform,
  867. mode=self.mode,
  868. env=self.env,
  869. message=f"没有更多数据啦~\n",
  870. )
  871. return
  872. else:
  873. video_list = response.json()['data']['data']
  874. max_behot_time = video_list[-1]["max_behot_time"]
  875. for video_obj in video_list:
  876. try:
  877. AliyunLogger.logging(
  878. code="1001",
  879. account=user_dict['uid'],
  880. platform=self.platform,
  881. mode=self.mode,
  882. env=self.env,
  883. data=video_obj,
  884. message="扫描到一条小视频",
  885. )
  886. date_flag = self.process_video_obj(video_obj, user_dict, "s")
  887. if not date_flag:
  888. return
  889. except Exception as e:
  890. AliyunLogger.logging(
  891. code="3000",
  892. platform=self.platform,
  893. mode=self.mode,
  894. env=self.env,
  895. data=video_obj,
  896. message="抓取单条视频异常, 报错原因是: {}".format(e),
  897. )
  898. def process_video_obj(self, video_obj, user_dict, f):
  899. """
  900. process video_obj and extract video_url
  901. """
  902. new_rule = self.rule_maker(user_dict)
  903. trace_id = self.platform + str(uuid.uuid1())
  904. if f == "s":
  905. item_id = video_obj.get("id_str", "")
  906. else:
  907. item_id = video_obj.get("item_id", "")
  908. if not item_id:
  909. AliyunLogger.logging(
  910. code="2005",
  911. account=user_dict['uid'],
  912. platform=self.platform,
  913. mode=self.mode,
  914. env=self.env,
  915. message="无效视频",
  916. data=video_obj,
  917. trace_id=trace_id,
  918. )
  919. return
  920. # 获取视频信息
  921. video_dict = self.get_video_info(item_id=item_id, trace_id=trace_id)
  922. video_dict["out_user_id"] = video_dict["user_id"]
  923. video_dict["platform"] = self.platform
  924. video_dict["strategy"] = self.mode
  925. video_dict["out_video_id"] = video_dict["video_id"]
  926. video_dict["width"] = video_dict["video_width"]
  927. video_dict["height"] = video_dict["video_height"]
  928. video_dict["crawler_rule"] = json.dumps(new_rule)
  929. video_dict["user_id"] = user_dict["uid"]
  930. video_dict["publish_time"] = video_dict["publish_time_str"]
  931. video_dict["strategy_type"] = self.mode
  932. video_dict["update_time_stamp"] = int(time.time())
  933. if int(time.time()) - video_dict['publish_time_stamp'] > 3600 * 24 * int(
  934. new_rule.get("period", {}).get("max", 1000)):
  935. if not video_obj['is_top']:
  936. """
  937. 非置顶数据发布时间超过才退出
  938. """
  939. AliyunLogger.logging(
  940. code="2004",
  941. account=user_dict['uid'],
  942. platform=self.platform,
  943. mode=self.mode,
  944. env=self.env,
  945. data=video_dict,
  946. message="发布时间超过{}天".format(
  947. int(new_rule.get("period", {}).get("max", 1000))
  948. ),
  949. )
  950. return False
  951. pipeline = PiaoQuanPipeline(
  952. platform=self.platform,
  953. mode=self.mode,
  954. rule_dict=new_rule,
  955. env=self.env,
  956. item=video_dict,
  957. trace_id=trace_id,
  958. )
  959. limit_flag = self.limiter.author_limitation(user_id=video_dict['user_id'])
  960. if limit_flag:
  961. title_flag = pipeline.title_flag()
  962. repeat_flag = pipeline.repeat_video()
  963. if title_flag and repeat_flag:
  964. if new_rule.get("special"):
  965. if int(video_dict['play_cnt']) >= int(new_rule.get("play_cnt", {}).get("min", 100000)):
  966. if float(video_dict['like_cnt']) / float(video_dict['play_cnt']) >= new_rule['special']:
  967. self.mq.send_msg(video_dict)
  968. self.download_count += 1
  969. AliyunLogger.logging(
  970. code="1002",
  971. account=user_dict['uid'],
  972. platform=self.platform,
  973. mode=self.mode,
  974. env=self.env,
  975. data=video_dict,
  976. trace_id=trace_id,
  977. message="成功发送 MQ 至 ETL",
  978. )
  979. return True
  980. else:
  981. AliyunLogger.logging(
  982. code="2008",
  983. account=user_dict['uid'],
  984. platform=self.platform,
  985. mode=self.mode,
  986. env=self.env,
  987. message="不满足特殊规则, 点赞量/播放量",
  988. data=video_dict
  989. )
  990. else:
  991. if int(video_dict['play_cnt']) >= int(new_rule.get("play_cnt", {}).get("min", 100000)):
  992. self.mq.send_msg(video_dict)
  993. self.download_count += 1
  994. AliyunLogger.logging(
  995. code="1002",
  996. account=user_dict['uid'],
  997. platform=self.platform,
  998. mode=self.mode,
  999. env=self.env,
  1000. data=video_dict,
  1001. trace_id=trace_id,
  1002. message="成功发送 MQ 至 ETL",
  1003. )
  1004. return True
  1005. else:
  1006. AliyunLogger.logging(
  1007. code="2008",
  1008. account=user_dict['uid'],
  1009. platform=self.platform,
  1010. mode=self.mode,
  1011. env=self.env,
  1012. message="不满足特殊规则, 播放量",
  1013. data=video_dict
  1014. )
  1015. return True
  1016. def get_video_info(self, item_id, trace_id):
  1017. """
  1018. 获取视频信息
  1019. """
  1020. url = "https://www.ixigua.com/{}".format(item_id)
  1021. headers = {
  1022. "accept-encoding": "gzip, deflate",
  1023. "accept-language": "zh-CN,zh-Hans;q=0.9",
  1024. "user-agent": FakeUserAgent().random,
  1025. "referer": "https://www.ixigua.com/{}/".format(item_id),
  1026. }
  1027. response = requests.get(
  1028. url=url,
  1029. headers=headers,
  1030. proxies=tunnel_proxies(),
  1031. timeout=5,
  1032. )
  1033. if (
  1034. response.status_code != 200
  1035. or "data" not in response.json()
  1036. or response.json()["data"] == {}
  1037. ):
  1038. AliyunLogger.logging(
  1039. code="2000",
  1040. platform=self.platform,
  1041. mode=self.mode,
  1042. env=self.env,
  1043. message="获取视频信息失败",
  1044. trace_id=trace_id,
  1045. )
  1046. return None
  1047. else:
  1048. video_info = extract_info_by_re(response.text)
  1049. video_dict = {
  1050. "video_title": video_info.get("title", ""),
  1051. "video_id": video_info.get("video_id"),
  1052. "gid": str(item_id),
  1053. "play_cnt": int(video_info.get("play_count", 0)),
  1054. "like_cnt": int(video_info.get("like_count", 0)),
  1055. "comment_cnt": 0,
  1056. "share_cnt": 0,
  1057. "favorite_cnt": 0,
  1058. "duration": int(video_info.get("duration", 0)),
  1059. "video_width": 0,
  1060. "video_height": 0,
  1061. "publish_time_stamp": int(video_info.get("publish_time", 0)),
  1062. "publish_time_str": time.strftime(
  1063. "%Y-%m-%d %H:%M:%S",
  1064. time.localtime(int(video_info.get("publish_time", 0))),
  1065. ),
  1066. "avatar_url": str(
  1067. video_info.get("user_info", {}).get("avatar_url", "")
  1068. ),
  1069. "cover_url": video_info.get("cover_url", ""),
  1070. "video_url": video_info.get("url"),
  1071. "session": f"xigua-search-{int(time.time())}",
  1072. }
  1073. return video_dict
  1074. if __name__ == "__main__":
  1075. user_list = [
  1076. {
  1077. "uid": 6267140,
  1078. "source": "xigua",
  1079. "link": "https://www.ixigua.com/home/2779177225827568",
  1080. "nick_name": "秋晴爱音乐",
  1081. "avatar_url": "",
  1082. "mode": "author",
  1083. },
  1084. {
  1085. "uid": 6267140,
  1086. "source": "xigua",
  1087. "link": "https://www.ixigua.com/home/2885546124776780",
  1088. "nick_name": "朗诵放歌的老山羊",
  1089. "avatar_url": "",
  1090. "mode": "author",
  1091. },
  1092. {
  1093. "uid": 6267140,
  1094. "source": "xigua",
  1095. "link": "https://www.ixigua.com/home/5880938217",
  1096. "nick_name": "天原声疗",
  1097. "avatar_url": "",
  1098. "mode": "author",
  1099. },
  1100. ]
  1101. # rule = {'period': {'min': 30, 'max': 30}, 'duration': {'min': 20, 'max': 0}, 'play_cnt': {'min': 100000, 'max': 0}}
  1102. # XGA = XiGuaAuthor(
  1103. # platform="xigua",
  1104. # mode="author",
  1105. # rule_dict=rule,
  1106. # env="prod",
  1107. # user_list=user_list
  1108. # )
  1109. # XGA.get_author_list()