xigua_search.py 53 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2023/2/17
  4. import base64
  5. import json
  6. import os
  7. import random
  8. import shutil
  9. import string
  10. import sys
  11. import time
  12. import requests
  13. import urllib3
  14. from urllib.parse import quote
  15. from requests.adapters import HTTPAdapter
  16. sys.path.append(os.getcwd())
  17. from common.db import MysqlHelper
  18. from common.users import Users
  19. from common.common import Common
  20. from common.feishu import Feishu
  21. from common.publish import Publish
  22. from common.userAgent import get_random_user_agent, get_random_header
  23. class XiguaSearch:
  24. platform = "西瓜视频"
  25. tag = "西瓜视频爬虫,搜索爬虫策略"
  26. @classmethod
  27. def get_rule(cls, log_type, crawler):
  28. try:
  29. while True:
  30. rule_sheet = Feishu.get_values_batch(log_type, crawler, "shxOl7")
  31. if rule_sheet is None:
  32. Common.logger(log_type, crawler).warning("rule_sheet is None! 10秒后重新获取")
  33. time.sleep(10)
  34. continue
  35. rule_dict = {
  36. "play_cnt": int(rule_sheet[1][2]),
  37. "min_duration": int(rule_sheet[2][2]),
  38. "max_duration": int(rule_sheet[3][2]),
  39. "publish_time": int(rule_sheet[4][2]),
  40. }
  41. return rule_dict
  42. except Exception as e:
  43. Common.logger(log_type, crawler).error(f"get_rule:{e}\n")
  44. # 下载规则
  45. @classmethod
  46. def download_rule(cls, video_info_dict, rule_dict):
  47. if video_info_dict['play_cnt'] >= rule_dict['play_cnt']:
  48. if video_info_dict['comment_cnt'] >= rule_dict['comment_cnt']:
  49. if video_info_dict['like_cnt'] >= rule_dict['like_cnt']:
  50. if video_info_dict['duration'] >= rule_dict['duration']:
  51. if video_info_dict['video_width'] >= rule_dict['video_width'] \
  52. or video_info_dict['video_height'] >= rule_dict['video_height']:
  53. return True
  54. else:
  55. return False
  56. else:
  57. return False
  58. else:
  59. return False
  60. else:
  61. return False
  62. else:
  63. return False
  64. # 过滤词库
  65. @classmethod
  66. def filter_words(cls, log_type, crawler):
  67. try:
  68. while True:
  69. filter_words_sheet = Feishu.get_values_batch(log_type, crawler, 'KGB4Hc')
  70. if filter_words_sheet is None:
  71. Common.logger(log_type, crawler).warning(f"filter_words_sheet:{filter_words_sheet} 10秒钟后重试")
  72. continue
  73. filter_words_list = []
  74. for x in filter_words_sheet:
  75. for y in x:
  76. if y is None:
  77. pass
  78. else:
  79. filter_words_list.append(y)
  80. return filter_words_list
  81. except Exception as e:
  82. Common.logger(log_type, crawler).error(f'filter_words异常:{e}\n')
  83. # 获取用户信息(字典格式). 注意:部分 user_id 字符类型是 int / str
  84. @classmethod
  85. def get_user_list(cls, log_type, crawler, sheetid, env, machine):
  86. try:
  87. while True:
  88. user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
  89. if user_sheet is None:
  90. Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet} 10秒钟后重试")
  91. continue
  92. our_user_list = []
  93. for i in range(1, len(user_sheet)):
  94. our_uid = user_sheet[i][6]
  95. search_word = user_sheet[i][4]
  96. tag1 = user_sheet[i][8]
  97. tag2 = user_sheet[i][9]
  98. tag3 = user_sheet[i][10]
  99. tag4 = user_sheet[i][11]
  100. Common.logger(log_type, crawler).info(f"正在更新 {search_word} 关键词信息\n")
  101. if our_uid is None:
  102. default_user = Users.get_default_user()
  103. # 用来创建our_id的信息
  104. user_dict = {
  105. 'nickName': default_user['nickName'],
  106. 'avatarUrl': default_user['avatarUrl'],
  107. 'tagName': f'{tag1},{tag2},{tag3},{tag4}',
  108. }
  109. Common.logger(log_type, crawler).info(f'新创建的站内UID:{our_uid}')
  110. our_uid = Users.create_uid(log_type, crawler, user_dict, env)
  111. if env == 'prod':
  112. our_user_link = f'https://admin.piaoquantv.com/ums/user/{our_uid}/post'
  113. else:
  114. our_user_link = f'https://testadmin.piaoquantv.com/ums/user/{our_uid}/post'
  115. Feishu.update_values(log_type, crawler, sheetid, f'G{i + 1}:H{i + 1}',
  116. [[our_uid, our_user_link]])
  117. Common.logger(log_type, crawler).info(f'站内用户信息写入飞书成功!\n')
  118. our_user_dict = {
  119. 'out_uid': '',
  120. 'search_word': search_word,
  121. 'our_uid': our_uid,
  122. 'our_user_link': f'https://admin.piaoquantv.com/ums/user/{our_uid}/post',
  123. }
  124. our_user_list.append(our_user_dict)
  125. return our_user_list
  126. except Exception as e:
  127. Common.logger(log_type, crawler).error(f'get_user_id_from_feishu异常:{e}\n')
  128. @classmethod
  129. def random_signature(cls):
  130. src_digits = string.digits # string_数字
  131. src_uppercase = string.ascii_uppercase # string_大写字母
  132. src_lowercase = string.ascii_lowercase # string_小写字母
  133. digits_num = random.randint(1, 6)
  134. uppercase_num = random.randint(1, 26 - digits_num - 1)
  135. lowercase_num = 26 - (digits_num + uppercase_num)
  136. password = random.sample(src_digits, digits_num) + random.sample(src_uppercase, uppercase_num) + random.sample(
  137. src_lowercase, lowercase_num)
  138. random.shuffle(password)
  139. new_password = 'AAAAAAAAAA' + ''.join(password)[10:-4] + 'AAAB'
  140. new_password_start = new_password[0:18]
  141. new_password_end = new_password[-7:]
  142. if new_password[18] == '8':
  143. new_password = new_password_start + 'w' + new_password_end
  144. elif new_password[18] == '9':
  145. new_password = new_password_start + 'x' + new_password_end
  146. elif new_password[18] == '-':
  147. new_password = new_password_start + 'y' + new_password_end
  148. elif new_password[18] == '.':
  149. new_password = new_password_start + 'z' + new_password_end
  150. else:
  151. new_password = new_password_start + 'y' + new_password_end
  152. return new_password
  153. # 获取视频详情
  154. @classmethod
  155. def get_video_url(cls, log_type, crawler, gid):
  156. try:
  157. url = 'https://www.ixigua.com/api/mixVideo/information?'
  158. headers = {
  159. "accept-encoding": "gzip, deflate",
  160. "accept-language": "zh-CN,zh-Hans;q=0.9",
  161. "user-agent": get_random_user_agent('pc'),
  162. "referer": "https://www.ixigua.com/7102614741050196520?logTag=0531c88ac04f38ab2c62",
  163. }
  164. params = {
  165. 'mixId': gid,
  166. 'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfC'
  167. 'NVVIOBNjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
  168. 'X-Bogus': 'DFSzswVupYTANCJOSBk0P53WxM-r',
  169. '_signature': '_02B4Z6wo0000119LvEwAAIDCuktNZ0y5wkdfS7jAALThuOR8D9yWNZ.EmWHKV0WSn6Px'
  170. 'fPsH9-BldyxVje0f49ryXgmn7Tzk-swEHNb15TiGqa6YF.cX0jW8Eds1TtJOIZyfc9s5emH7gdWN94',
  171. }
  172. cookies = {
  173. 'ixigua-a-s': '1',
  174. 'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfCNVVIOB'
  175. 'NjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
  176. 'ttwid': '1%7C_yXQeHWwLZgCsgHClOwTCdYSOt_MjdOkgnPIkpi-Sr8%7C1661241238%7Cf57d0c5ef3f1d7'
  177. '6e049fccdca1ac54887c34d1f8731c8e51a49780ff0ceab9f8',
  178. 'tt_scid': 'QZ4l8KXDG0YAEaMCSbADdcybdKbUfG4BC6S4OBv9lpRS5VyqYLX2bIR8CTeZeGHR9ee3',
  179. 'MONITOR_WEB_ID': '0a49204a-7af5-4e96-95f0-f4bafb7450ad',
  180. '__ac_nonce': '06304878000964fdad287',
  181. '__ac_signature': '_02B4Z6wo00f017Rcr3AAAIDCUVxeW1tOKEu0fKvAAI4cvoYzV-wBhq7B6D8k0no7lb'
  182. 'FlvYoinmtK6UXjRIYPXnahUlFTvmWVtb77jsMkKAXzAEsLE56m36RlvL7ky.M3Xn52r9t1IEb7IR3ke8',
  183. 'ttcid': 'e56fabf6e85d4adf9e4d91902496a0e882',
  184. '_tea_utm_cache_1300': 'undefined',
  185. 'support_avif': 'false',
  186. 'support_webp': 'false',
  187. 'xiguavideopcwebid': '7134967546256016900',
  188. 'xiguavideopcwebid.sig': 'xxRww5R1VEMJN_dQepHorEu_eAc',
  189. }
  190. urllib3.disable_warnings()
  191. s = requests.session()
  192. # max_retries=3 重试3次
  193. s.mount('http://', HTTPAdapter(max_retries=3))
  194. s.mount('https://', HTTPAdapter(max_retries=3))
  195. response = s.get(url=url, headers=headers, params=params, cookies=cookies, verify=False,
  196. proxies=Common.tunnel_proxies(), timeout=5)
  197. response.close()
  198. if 'data' not in response.json() or response.json()['data'] == '':
  199. Common.logger(log_type, crawler).warning('get_video_info: response: {}', response)
  200. else:
  201. video_info = response.json()['data']['gidInformation']['packerData']['video']
  202. video_url_dict = {}
  203. # video_url
  204. if 'videoResource' not in video_info:
  205. video_url_dict["video_url"] = ''
  206. video_url_dict["audio_url"] = ''
  207. video_url_dict["video_width"] = 0
  208. video_url_dict["video_height"] = 0
  209. elif 'dash_120fps' in video_info['videoResource']:
  210. if "video_list" in video_info['videoResource']['dash_120fps'] and 'video_4' in \
  211. video_info['videoResource']['dash_120fps']['video_list']:
  212. video_url = video_info['videoResource']['dash_120fps']['video_list']['video_4']['backup_url_1']
  213. audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_4']['backup_url_1']
  214. if len(video_url) % 3 == 1:
  215. video_url += '=='
  216. elif len(video_url) % 3 == 2:
  217. video_url += '='
  218. elif len(audio_url) % 3 == 1:
  219. audio_url += '=='
  220. elif len(audio_url) % 3 == 2:
  221. audio_url += '='
  222. video_url = base64.b64decode(video_url).decode('utf8')
  223. audio_url = base64.b64decode(audio_url).decode('utf8')
  224. video_width = video_info['videoResource']['dash_120fps']['video_list']['video_4']['vwidth']
  225. video_height = video_info['videoResource']['dash_120fps']['video_list']['video_4']['vheight']
  226. video_url_dict["video_url"] = video_url
  227. video_url_dict["audio_url"] = audio_url
  228. video_url_dict["video_width"] = video_width
  229. video_url_dict["video_height"] = video_height
  230. elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_3' in \
  231. video_info['videoResource']['dash_120fps']['video_list']:
  232. video_url = video_info['videoResource']['dash_120fps']['video_list']['video_3']['backup_url_1']
  233. audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_3']['backup_url_1']
  234. if len(video_url) % 3 == 1:
  235. video_url += '=='
  236. elif len(video_url) % 3 == 2:
  237. video_url += '='
  238. elif len(audio_url) % 3 == 1:
  239. audio_url += '=='
  240. elif len(audio_url) % 3 == 2:
  241. audio_url += '='
  242. video_url = base64.b64decode(video_url).decode('utf8')
  243. audio_url = base64.b64decode(audio_url).decode('utf8')
  244. video_width = video_info['videoResource']['dash_120fps']['video_list']['video_3']['vwidth']
  245. video_height = video_info['videoResource']['dash_120fps']['video_list']['video_3']['vheight']
  246. video_url_dict["video_url"] = video_url
  247. video_url_dict["audio_url"] = audio_url
  248. video_url_dict["video_width"] = video_width
  249. video_url_dict["video_height"] = video_height
  250. elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_2' in \
  251. video_info['videoResource']['dash_120fps']['video_list']:
  252. video_url = video_info['videoResource']['dash_120fps']['video_list']['video_2']['backup_url_1']
  253. audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_2']['backup_url_1']
  254. if len(video_url) % 3 == 1:
  255. video_url += '=='
  256. elif len(video_url) % 3 == 2:
  257. video_url += '='
  258. elif len(audio_url) % 3 == 1:
  259. audio_url += '=='
  260. elif len(audio_url) % 3 == 2:
  261. audio_url += '='
  262. video_url = base64.b64decode(video_url).decode('utf8')
  263. audio_url = base64.b64decode(audio_url).decode('utf8')
  264. video_width = video_info['videoResource']['dash_120fps']['video_list']['video_2']['vwidth']
  265. video_height = video_info['videoResource']['dash_120fps']['video_list']['video_2']['vheight']
  266. video_url_dict["video_url"] = video_url
  267. video_url_dict["audio_url"] = audio_url
  268. video_url_dict["video_width"] = video_width
  269. video_url_dict["video_height"] = video_height
  270. elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_1' in \
  271. video_info['videoResource']['dash_120fps']['video_list']:
  272. video_url = video_info['videoResource']['dash_120fps']['video_list']['video_1']['backup_url_1']
  273. audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_1']['backup_url_1']
  274. if len(video_url) % 3 == 1:
  275. video_url += '=='
  276. elif len(video_url) % 3 == 2:
  277. video_url += '='
  278. elif len(audio_url) % 3 == 1:
  279. audio_url += '=='
  280. elif len(audio_url) % 3 == 2:
  281. audio_url += '='
  282. video_url = base64.b64decode(video_url).decode('utf8')
  283. audio_url = base64.b64decode(audio_url).decode('utf8')
  284. video_width = video_info['videoResource']['dash_120fps']['video_list']['video_1']['vwidth']
  285. video_height = video_info['videoResource']['dash_120fps']['video_list']['video_1']['vheight']
  286. video_url_dict["video_url"] = video_url
  287. video_url_dict["audio_url"] = audio_url
  288. video_url_dict["video_width"] = video_width
  289. video_url_dict["video_height"] = video_height
  290. elif 'dynamic_video' in video_info['videoResource']['dash_120fps'] \
  291. and 'dynamic_video_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
  292. and 'dynamic_audio_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
  293. and len(
  294. video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list']) != 0 \
  295. and len(
  296. video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list']) != 0:
  297. video_url = \
  298. video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
  299. 'backup_url_1']
  300. audio_url = \
  301. video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list'][-1][
  302. 'backup_url_1']
  303. if len(video_url) % 3 == 1:
  304. video_url += '=='
  305. elif len(video_url) % 3 == 2:
  306. video_url += '='
  307. elif len(audio_url) % 3 == 1:
  308. audio_url += '=='
  309. elif len(audio_url) % 3 == 2:
  310. audio_url += '='
  311. video_url = base64.b64decode(video_url).decode('utf8')
  312. audio_url = base64.b64decode(audio_url).decode('utf8')
  313. video_width = \
  314. video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
  315. 'vwidth']
  316. video_height = \
  317. video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
  318. 'vheight']
  319. video_url_dict["video_url"] = video_url
  320. video_url_dict["audio_url"] = audio_url
  321. video_url_dict["video_width"] = video_width
  322. video_url_dict["video_height"] = video_height
  323. else:
  324. video_url_dict["video_url"] = ''
  325. video_url_dict["audio_url"] = ''
  326. video_url_dict["video_width"] = 0
  327. video_url_dict["video_height"] = 0
  328. elif 'dash' in video_info['videoResource']:
  329. if "video_list" in video_info['videoResource']['dash'] and 'video_4' in \
  330. video_info['videoResource']['dash']['video_list']:
  331. video_url = video_info['videoResource']['dash']['video_list']['video_4']['backup_url_1']
  332. audio_url = video_info['videoResource']['dash']['video_list']['video_4']['backup_url_1']
  333. if len(video_url) % 3 == 1:
  334. video_url += '=='
  335. elif len(video_url) % 3 == 2:
  336. video_url += '='
  337. elif len(audio_url) % 3 == 1:
  338. audio_url += '=='
  339. elif len(audio_url) % 3 == 2:
  340. audio_url += '='
  341. video_url = base64.b64decode(video_url).decode('utf8')
  342. audio_url = base64.b64decode(audio_url).decode('utf8')
  343. video_width = video_info['videoResource']['dash']['video_list']['video_4']['vwidth']
  344. video_height = video_info['videoResource']['dash']['video_list']['video_4']['vheight']
  345. video_url_dict["video_url"] = video_url
  346. video_url_dict["audio_url"] = audio_url
  347. video_url_dict["video_width"] = video_width
  348. video_url_dict["video_height"] = video_height
  349. elif "video_list" in video_info['videoResource']['dash'] and 'video_3' in \
  350. video_info['videoResource']['dash']['video_list']:
  351. video_url = video_info['videoResource']['dash']['video_list']['video_3']['backup_url_1']
  352. audio_url = video_info['videoResource']['dash']['video_list']['video_3']['backup_url_1']
  353. if len(video_url) % 3 == 1:
  354. video_url += '=='
  355. elif len(video_url) % 3 == 2:
  356. video_url += '='
  357. elif len(audio_url) % 3 == 1:
  358. audio_url += '=='
  359. elif len(audio_url) % 3 == 2:
  360. audio_url += '='
  361. video_url = base64.b64decode(video_url).decode('utf8')
  362. audio_url = base64.b64decode(audio_url).decode('utf8')
  363. video_width = video_info['videoResource']['dash']['video_list']['video_3']['vwidth']
  364. video_height = video_info['videoResource']['dash']['video_list']['video_3']['vheight']
  365. video_url_dict["video_url"] = video_url
  366. video_url_dict["audio_url"] = audio_url
  367. video_url_dict["video_width"] = video_width
  368. video_url_dict["video_height"] = video_height
  369. elif "video_list" in video_info['videoResource']['dash'] and 'video_2' in \
  370. video_info['videoResource']['dash']['video_list']:
  371. video_url = video_info['videoResource']['dash']['video_list']['video_2']['backup_url_1']
  372. audio_url = video_info['videoResource']['dash']['video_list']['video_2']['backup_url_1']
  373. if len(video_url) % 3 == 1:
  374. video_url += '=='
  375. elif len(video_url) % 3 == 2:
  376. video_url += '='
  377. elif len(audio_url) % 3 == 1:
  378. audio_url += '=='
  379. elif len(audio_url) % 3 == 2:
  380. audio_url += '='
  381. video_url = base64.b64decode(video_url).decode('utf8')
  382. audio_url = base64.b64decode(audio_url).decode('utf8')
  383. video_width = video_info['videoResource']['dash']['video_list']['video_2']['vwidth']
  384. video_height = video_info['videoResource']['dash']['video_list']['video_2']['vheight']
  385. video_url_dict["video_url"] = video_url
  386. video_url_dict["audio_url"] = audio_url
  387. video_url_dict["video_width"] = video_width
  388. video_url_dict["video_height"] = video_height
  389. elif "video_list" in video_info['videoResource']['dash'] and 'video_1' in \
  390. video_info['videoResource']['dash']['video_list']:
  391. video_url = video_info['videoResource']['dash']['video_list']['video_1']['backup_url_1']
  392. audio_url = video_info['videoResource']['dash']['video_list']['video_1']['backup_url_1']
  393. if len(video_url) % 3 == 1:
  394. video_url += '=='
  395. elif len(video_url) % 3 == 2:
  396. video_url += '='
  397. elif len(audio_url) % 3 == 1:
  398. audio_url += '=='
  399. elif len(audio_url) % 3 == 2:
  400. audio_url += '='
  401. video_url = base64.b64decode(video_url).decode('utf8')
  402. audio_url = base64.b64decode(audio_url).decode('utf8')
  403. video_width = video_info['videoResource']['dash']['video_list']['video_1']['vwidth']
  404. video_height = video_info['videoResource']['dash']['video_list']['video_1']['vheight']
  405. video_url_dict["video_url"] = video_url
  406. video_url_dict["audio_url"] = audio_url
  407. video_url_dict["video_width"] = video_width
  408. video_url_dict["video_height"] = video_height
  409. elif 'dynamic_video' in video_info['videoResource']['dash'] \
  410. and 'dynamic_video_list' in video_info['videoResource']['dash']['dynamic_video'] \
  411. and 'dynamic_audio_list' in video_info['videoResource']['dash']['dynamic_video'] \
  412. and len(video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list']) != 0 \
  413. and len(video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list']) != 0:
  414. video_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
  415. 'backup_url_1']
  416. audio_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list'][-1][
  417. 'backup_url_1']
  418. if len(video_url) % 3 == 1:
  419. video_url += '=='
  420. elif len(video_url) % 3 == 2:
  421. video_url += '='
  422. elif len(audio_url) % 3 == 1:
  423. audio_url += '=='
  424. elif len(audio_url) % 3 == 2:
  425. audio_url += '='
  426. video_url = base64.b64decode(video_url).decode('utf8')
  427. audio_url = base64.b64decode(audio_url).decode('utf8')
  428. video_width = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
  429. 'vwidth']
  430. video_height = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
  431. 'vheight']
  432. video_url_dict["video_url"] = video_url
  433. video_url_dict["audio_url"] = audio_url
  434. video_url_dict["video_width"] = video_width
  435. video_url_dict["video_height"] = video_height
  436. else:
  437. video_url_dict["video_url"] = ''
  438. video_url_dict["audio_url"] = ''
  439. video_url_dict["video_width"] = 0
  440. video_url_dict["video_height"] = 0
  441. elif 'normal' in video_info['videoResource']:
  442. if "video_list" in video_info['videoResource']['normal'] and 'video_4' in \
  443. video_info['videoResource']['normal']['video_list']:
  444. video_url = video_info['videoResource']['normal']['video_list']['video_4']['backup_url_1']
  445. audio_url = video_info['videoResource']['normal']['video_list']['video_4']['backup_url_1']
  446. if len(video_url) % 3 == 1:
  447. video_url += '=='
  448. elif len(video_url) % 3 == 2:
  449. video_url += '='
  450. elif len(audio_url) % 3 == 1:
  451. audio_url += '=='
  452. elif len(audio_url) % 3 == 2:
  453. audio_url += '='
  454. video_url = base64.b64decode(video_url).decode('utf8')
  455. audio_url = base64.b64decode(audio_url).decode('utf8')
  456. video_width = video_info['videoResource']['normal']['video_list']['video_4']['vwidth']
  457. video_height = video_info['videoResource']['normal']['video_list']['video_4']['vheight']
  458. video_url_dict["video_url"] = video_url
  459. video_url_dict["audio_url"] = audio_url
  460. video_url_dict["video_width"] = video_width
  461. video_url_dict["video_height"] = video_height
  462. elif "video_list" in video_info['videoResource']['normal'] and 'video_3' in \
  463. video_info['videoResource']['normal']['video_list']:
  464. video_url = video_info['videoResource']['normal']['video_list']['video_3']['backup_url_1']
  465. audio_url = video_info['videoResource']['normal']['video_list']['video_3']['backup_url_1']
  466. if len(video_url) % 3 == 1:
  467. video_url += '=='
  468. elif len(video_url) % 3 == 2:
  469. video_url += '='
  470. elif len(audio_url) % 3 == 1:
  471. audio_url += '=='
  472. elif len(audio_url) % 3 == 2:
  473. audio_url += '='
  474. video_url = base64.b64decode(video_url).decode('utf8')
  475. audio_url = base64.b64decode(audio_url).decode('utf8')
  476. video_width = video_info['videoResource']['normal']['video_list']['video_3']['vwidth']
  477. video_height = video_info['videoResource']['normal']['video_list']['video_3']['vheight']
  478. video_url_dict["video_url"] = video_url
  479. video_url_dict["audio_url"] = audio_url
  480. video_url_dict["video_width"] = video_width
  481. video_url_dict["video_height"] = video_height
  482. elif "video_list" in video_info['videoResource']['normal'] and 'video_2' in \
  483. video_info['videoResource']['normal']['video_list']:
  484. video_url = video_info['videoResource']['normal']['video_list']['video_2']['backup_url_1']
  485. audio_url = video_info['videoResource']['normal']['video_list']['video_2']['backup_url_1']
  486. if len(video_url) % 3 == 1:
  487. video_url += '=='
  488. elif len(video_url) % 3 == 2:
  489. video_url += '='
  490. elif len(audio_url) % 3 == 1:
  491. audio_url += '=='
  492. elif len(audio_url) % 3 == 2:
  493. audio_url += '='
  494. video_url = base64.b64decode(video_url).decode('utf8')
  495. audio_url = base64.b64decode(audio_url).decode('utf8')
  496. video_width = video_info['videoResource']['normal']['video_list']['video_2']['vwidth']
  497. video_height = video_info['videoResource']['normal']['video_list']['video_2']['vheight']
  498. video_url_dict["video_url"] = video_url
  499. video_url_dict["audio_url"] = audio_url
  500. video_url_dict["video_width"] = video_width
  501. video_url_dict["video_height"] = video_height
  502. elif "video_list" in video_info['videoResource']['normal'] and 'video_1' in \
  503. video_info['videoResource']['normal']['video_list']:
  504. video_url = video_info['videoResource']['normal']['video_list']['video_1']['backup_url_1']
  505. audio_url = video_info['videoResource']['normal']['video_list']['video_1']['backup_url_1']
  506. if len(video_url) % 3 == 1:
  507. video_url += '=='
  508. elif len(video_url) % 3 == 2:
  509. video_url += '='
  510. elif len(audio_url) % 3 == 1:
  511. audio_url += '=='
  512. elif len(audio_url) % 3 == 2:
  513. audio_url += '='
  514. video_url = base64.b64decode(video_url).decode('utf8')
  515. audio_url = base64.b64decode(audio_url).decode('utf8')
  516. video_width = video_info['videoResource']['normal']['video_list']['video_1']['vwidth']
  517. video_height = video_info['videoResource']['normal']['video_list']['video_1']['vheight']
  518. video_url_dict["video_url"] = video_url
  519. video_url_dict["audio_url"] = audio_url
  520. video_url_dict["video_width"] = video_width
  521. video_url_dict["video_height"] = video_height
  522. elif 'dynamic_video' in video_info['videoResource']['normal'] \
  523. and 'dynamic_video_list' in video_info['videoResource']['normal']['dynamic_video'] \
  524. and 'dynamic_audio_list' in video_info['videoResource']['normal']['dynamic_video'] \
  525. and len(video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list']) != 0 \
  526. and len(video_info['videoResource']['normal']['dynamic_video']['dynamic_audio_list']) != 0:
  527. video_url = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
  528. 'backup_url_1']
  529. audio_url = video_info['videoResource']['normal']['dynamic_video']['dynamic_audio_list'][-1][
  530. 'backup_url_1']
  531. if len(video_url) % 3 == 1:
  532. video_url += '=='
  533. elif len(video_url) % 3 == 2:
  534. video_url += '='
  535. elif len(audio_url) % 3 == 1:
  536. audio_url += '=='
  537. elif len(audio_url) % 3 == 2:
  538. audio_url += '='
  539. video_url = base64.b64decode(video_url).decode('utf8')
  540. audio_url = base64.b64decode(audio_url).decode('utf8')
  541. video_width = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
  542. 'vwidth']
  543. video_height = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
  544. 'vheight']
  545. video_url_dict["video_url"] = video_url
  546. video_url_dict["audio_url"] = audio_url
  547. video_url_dict["video_width"] = video_width
  548. video_url_dict["video_height"] = video_height
  549. else:
  550. video_url_dict["video_url"] = ''
  551. video_url_dict["audio_url"] = ''
  552. video_url_dict["video_width"] = 0
  553. video_url_dict["video_height"] = 0
  554. else:
  555. video_url_dict["video_url"] = ''
  556. video_url_dict["audio_url"] = ''
  557. video_url_dict["video_width"] = 0
  558. video_url_dict["video_height"] = 0
  559. return video_url_dict
  560. except Exception as e:
  561. Common.logger(log_type, crawler).error(f'get_video_url:{e}\n')
  562. @classmethod
  563. def get_video_info(cls, log_type, crawler, item_id):
  564. d_url = "http://a6.pstatp.com/article/full/11/1/{video_id}/{video_id}/1/0/?iid=3636030325&device_id=5787057242" \
  565. "&ac=wifi&channel=wandoujia&aid=13&app_name=news_article&version_code=532&version_name=5.3.2&device_platform" \
  566. "=android&ab_client=a1%2Cc2%2Ce1%2Cf2%2Cg2%2Cb3%2Cf4&abflag=3&ssmix=a&device_type=SM705" \
  567. "&device_brand=smartisan&os_api=19&os_version=4.4.2&uuid=864593021012562&openudid=e23a5ff037ef2d1a" \
  568. "&manifest_version_code=532&resolution=1080*1920&dpi=480&update_version_code=5320".format(
  569. video_id=item_id)
  570. res = requests.get(url=d_url, headers=get_random_header('pc'), proxies=Common.tunnel_proxies())
  571. data = json.loads(res.text)['data']
  572. item_counter = data['h5_extra']['itemCell']['itemCounter']
  573. user_info = data['user_info']
  574. detail_info = data['video_detail_info']
  575. video_dict = {'video_title': data['title'],
  576. 'video_id': detail_info['video_id'],
  577. 'gid': data['group_id'],
  578. 'play_cnt': item_counter['videoWatchCount'],
  579. 'comment_cnt': item_counter['commentCount'],
  580. 'like_cnt': item_counter['diggCount'],
  581. 'share_cnt': item_counter['shareCount'],
  582. 'duration': data['video_duration'],
  583. 'publish_time_stamp': data['publish_time'],
  584. 'publish_time_str': time.strftime("%Y-%m-%d %H:%M:%S",
  585. time.localtime(data['publish_time'])),
  586. 'user_name': user_info['name'],
  587. 'user_id': user_info['user_id'],
  588. 'avatar_url': user_info['avatar_url'],
  589. 'cover_url': data['large_image']['url'].replace('\u0026', '&'),
  590. }
  591. return video_dict
  592. @classmethod
  593. def is_ruled(cls, log_type, crawler, video_dict, rule_dict):
  594. old_time = int(time.time()) - (3600 * 24 * rule_dict['publish_time'])
  595. if video_dict['publish_time_stamp'] <= old_time:
  596. return False
  597. elif video_dict['play_cnt'] <= rule_dict['play_cnt']:
  598. return False
  599. elif video_dict['duration'] < rule_dict['min_duration'] or video_dict['duration'] > rule_dict['max_duration']:
  600. return False
  601. else:
  602. return True
  603. @classmethod
  604. def get_videolist(cls, log_type, crawler, strategy, our_uid, search_word, oss_endpoint, env, machine):
  605. total_count = 1
  606. offset = 0
  607. while True:
  608. signature = cls.random_signature()
  609. url = "https://www.ixigua.com/api/searchv2/complex/{}/{}?order_type=publish_time&click_position=new".format(
  610. quote(search_word), offset, signature)
  611. headers = {
  612. 'referer': 'https://www.ixigua.com/search/{}/?logTag=594535e3690f17a88cdb&tab_name=search'.format(
  613. quote(search_word)),
  614. 'cookie': 'ttwid=1%7Cx_4RDmVTqp6BQ5Xy5AnuCZCQdDyDxv-fnMVWzj19VU0%7C1679382377%7C4e25692dc4b9d5dca56d690001d168b21ed028a9ac075808ab9262238cb405ee;',
  615. 'user-agent': get_random_user_agent('pc'),
  616. }
  617. try:
  618. res = requests.request("GET", url, headers=headers, proxies=Common.tunnel_proxies())
  619. search_list = res.json()['data']['data']
  620. except Exception as e:
  621. continue
  622. if not search_list:
  623. Common.logger(log_type, crawler).error(f'关键词:{search_word},没有获取到视频列表:offset{offset}')
  624. return
  625. for video_info in search_list:
  626. v_type = video_info['type']
  627. rule_dict = cls.get_rule(log_type, crawler)
  628. publish_time = video_info['data']['publish_time']
  629. old_time = int(time.time()) - (3600 * 24 * rule_dict['publish_time'])
  630. if publish_time <= old_time:
  631. Common.logger(log_type, crawler).error(f'关键词:{search_word},抓取完毕,退出抓取\n')
  632. return
  633. if v_type == 'video':
  634. item_id = video_info['data']['group_id']
  635. if video_info['data']['publish_time'] <= old_time:
  636. Common.logger(log_type, crawler).error(f'关键词:{search_word},视频:{item_id},不符合抓取规则\n')
  637. continue
  638. elif video_info['data']['video_watch_count'] <= rule_dict['play_cnt']:
  639. Common.logger(log_type, crawler).error(f'关键词:{search_word},视频:{item_id},不符合抓取规则\n')
  640. continue
  641. elif video_info['data']['video_time'] < rule_dict['min_duration'] or video_info['data'][
  642. 'video_time'] > rule_dict['max_duration']:
  643. Common.logger(log_type, crawler).error(f'关键词:{search_word},视频:{item_id},不符合抓取规则\n')
  644. continue
  645. try:
  646. video_dict = cls.get_video_info(log_type, crawler, item_id)
  647. video_url_dict = cls.get_video_url(log_type, crawler, video_dict['gid'])
  648. video_dict['video_width'] = video_url_dict["video_width"]
  649. video_dict['video_height'] = video_url_dict["video_height"]
  650. video_dict['audio_url'] = video_url_dict["audio_url"]
  651. video_dict['video_url'] = video_url_dict["video_url"]
  652. video_dict['session'] = signature
  653. except Exception as e:
  654. Common.logger(log_type, crawler).error(f'关键词:{search_word},视频:{item_id},获取详情失败,原因:{e}')
  655. continue
  656. if cls.repeat_video(log_type, crawler, video_dict['video_id'], env, machine) != 0:
  657. Common.logger(log_type, crawler).info(f'关键词:{search_word},gid:{video_dict["gid"]},视频已下载,无需重复下载\n')
  658. continue
  659. for k, v in video_dict.items():
  660. Common.logger(log_type, crawler).info(f"{k}:{v}")
  661. try:
  662. # print(
  663. # f'search_word:{search_word},title:{video_dict["video_title"]},gid:{video_dict["gid"]},offset:{offset}, total:{total_count}')
  664. cls.download_publish(
  665. search_word=search_word,
  666. log_type=log_type,
  667. crawler=crawler,
  668. video_dict=video_dict,
  669. rule_dict=rule_dict,
  670. strategy=strategy,
  671. our_uid=our_uid,
  672. oss_endpoint=oss_endpoint,
  673. env=env,
  674. machine=machine
  675. )
  676. except Exception as e:
  677. Common.logger(log_type, crawler).error(f'关键词:{search_word},视频:{item_id},下载失败,原因:{e}')
  678. continue
  679. total_count += 1
  680. if total_count >= 30:
  681. return
  682. # elif v_type == 'pseries':
  683. # try:
  684. # item_id = video_info['data']['group_id']
  685. # p_url = "https://www.ixigua.com/api/videov2/pseries_more_v2?pSeriesId={}&rank=0&tailCount=30&aid=1768&msToken=wHEafKFLx0k3hihOPbhXYNsfMBxWiq2AB0K5R-34kEFixyq3ATi_DuXbL4Q47J9C2uK2zgWItMa1g2yc4FyDxM4dMijmSdwF4c4T8sSmOkoOI0wGzeEcPw==&X-Bogus=DFSzswVOzdUANG3ItaVHYr7TlqCv&_signature=_02B4Z6wo00001vB6l3QAAIDBZKzMeTihTmbwepPAANgh1Ai3JgFFo4e6anoezmBEpHfEMEYlWISGhXI-QKfev4N-2bwgXsHOuNGLnOsGqMbANIjFPh7Yj6OakQWrkbACenlv0P-arswtB6Zn45".format(
  686. # item_id)
  687. # p_headers = {
  688. # 'referer': 'https://www.ixigua.com/{}?series_flow=1&logTag=cfec9d927da968feff89'.format(
  689. # item_id),
  690. # 'user-agent': get_random_user_agent('pc'),
  691. # }
  692. # p_res = requests.request("GET", p_url, headers=p_headers,
  693. # proxies=Common.tunnel_proxies()).json()
  694. # except Exception as e:
  695. # Common.logger(log_type, crawler).error(f'合集:{item_id},没有获取到合集详情,原因:{e}')
  696. # continue
  697. # for video in p_res['data']:
  698. # item_id = video['item_id']
  699. # try:
  700. # video_dict = cls.get_video_info(log_type, crawler, item_id)
  701. # video_url_dict = cls.get_video_url(log_type, crawler, video_dict['gid'])
  702. # video_dict['video_width'] = video_url_dict["video_width"]
  703. # video_dict['video_height'] = video_url_dict["video_height"]
  704. # video_dict['audio_url'] = video_url_dict["audio_url"]
  705. # video_dict['video_url'] = video_url_dict["video_url"]
  706. # video_dict['session'] = signature
  707. # except Exception as e:
  708. # Common.logger(log_type, crawler).error(f'视频:{item_id},没有获取到视频详情,原因:{e}')
  709. # continue
  710. # if cls.repeat_video(log_type, crawler, video_dict['video_id'], env, machine) != 0:
  711. # Common.logger(log_type, crawler).info(
  712. # f'gid:{video_dict["gid"]},视频已下载,无需重复下载\n')
  713. # continue
  714. # if not cls.is_ruled(log_type, crawler, video_dict, rule_dict):
  715. # Common.logger(log_type, crawler).error(f'视频:{item_id},不符合抓取规则\n')
  716. # continue
  717. # for k, v in video_dict.items():
  718. # Common.logger(log_type, crawler).info(f"{k}:{v}")
  719. # try:
  720. # # print(
  721. # # f'search_word:{search_word},title:{video_dict["video_title"]},gid:{video_dict["gid"]},offset:{offset}, total:{total_count}')
  722. # cls.download_publish(
  723. # search_word=search_word,
  724. # log_type=log_type,
  725. # crawler=crawler,
  726. # video_dict=video_dict,
  727. # rule_dict=rule_dict,
  728. # strategy=strategy,
  729. # our_uid=our_uid,
  730. # oss_endpoint=oss_endpoint,
  731. # env=env,
  732. # machine=machine
  733. # )
  734. # total_count += 1
  735. # if total_count >= 30:
  736. # return
  737. # else:
  738. # break
  739. # except Exception as e:
  740. # Common.logger(log_type, crawler).error(f'视频:{item_id},download_publish异常:{e}\n')
  741. offset += 10
  742. @classmethod
  743. def repeat_video(cls, log_type, crawler, video_id, env, machine):
  744. sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{video_id}"; """
  745. repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env, machine)
  746. return len(repeat_video)
  747. # 下载 / 上传
  748. @classmethod
  749. def download_publish(cls, log_type, crawler, search_word, strategy, video_dict, rule_dict, our_uid, oss_endpoint,
  750. env, machine):
  751. Common.download_method(log_type=log_type, crawler=crawler, text='xigua_video',
  752. title=video_dict['video_title'], url=video_dict['video_url'])
  753. # 下载音频
  754. Common.download_method(log_type=log_type, crawler=crawler, text='xigua_audio',
  755. title=video_dict['video_title'], url=video_dict['audio_url'])
  756. # 合成音视频
  757. Common.video_compose(log_type=log_type, crawler=crawler,
  758. video_dir=f"./{crawler}/videos/{video_dict['video_title']}")
  759. ffmpeg_dict = Common.ffmpeg(log_type, crawler,
  760. f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
  761. if ffmpeg_dict is None or ffmpeg_dict['size'] == 0:
  762. Common.logger(log_type, crawler).warning(f"下载的视频无效,已删除\n")
  763. # 删除视频文件夹
  764. shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
  765. return
  766. # 下载封面
  767. Common.download_method(log_type=log_type, crawler=crawler, text='cover',
  768. title=video_dict['video_title'], url=video_dict['cover_url'])
  769. # 保存视频信息至txt
  770. Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
  771. # 上传视频
  772. Common.logger(log_type, crawler).info("开始上传视频...")
  773. our_video_id = Publish.upload_and_publish(log_type=log_type,
  774. crawler=crawler,
  775. strategy=strategy,
  776. our_uid=our_uid,
  777. env=env,
  778. oss_endpoint=oss_endpoint)
  779. if env == 'dev':
  780. our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
  781. else:
  782. our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
  783. Common.logger(log_type, crawler).info("视频上传完成")
  784. if our_video_id is None:
  785. # 删除视频文件夹
  786. shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
  787. return
  788. # 视频写入飞书
  789. Feishu.insert_columns(log_type, 'xigua', "BUNvGC", "ROWS", 1, 2)
  790. upload_time = int(time.time())
  791. values = [[
  792. search_word,
  793. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
  794. "关键词搜索",
  795. video_dict['video_title'],
  796. str(video_dict['video_id']),
  797. our_video_link,
  798. video_dict['gid'],
  799. video_dict['play_cnt'],
  800. video_dict['comment_cnt'],
  801. video_dict['like_cnt'],
  802. video_dict['share_cnt'],
  803. video_dict['duration'],
  804. str(video_dict['video_width']) + '*' + str(video_dict['video_height']),
  805. video_dict['publish_time_str'],
  806. video_dict['user_name'],
  807. video_dict['user_id'],
  808. video_dict['avatar_url'],
  809. video_dict['cover_url'],
  810. video_dict['video_url'],
  811. video_dict['audio_url']]]
  812. time.sleep(1)
  813. Feishu.update_values(log_type, 'xigua', "BUNvGC", "E2:Z2", values)
  814. Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
  815. # 视频信息保存数据库
  816. insert_sql = f""" insert into crawler_video(video_id,
  817. user_id,
  818. out_user_id,
  819. platform,
  820. strategy,
  821. out_video_id,
  822. video_title,
  823. cover_url,
  824. video_url,
  825. duration,
  826. publish_time,
  827. play_cnt,
  828. crawler_rule,
  829. width,
  830. height)
  831. values({our_video_id},
  832. {our_uid},
  833. "{video_dict['user_id']}",
  834. "{cls.platform}",
  835. "定向爬虫策略",
  836. "{video_dict['video_id']}",
  837. "{video_dict['video_title']}",
  838. "{video_dict['cover_url']}",
  839. "{video_dict['video_url']}",
  840. {int(video_dict['duration'])},
  841. "{video_dict['publish_time_str']}",
  842. {int(video_dict['play_cnt'])},
  843. '{json.dumps(rule_dict)}',
  844. {int(video_dict['video_width'])},
  845. {int(video_dict['video_height'])}) """
  846. Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
  847. MysqlHelper.update_values(log_type, crawler, insert_sql, env, machine)
  848. Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
  849. @classmethod
  850. def get_search_videos(cls, log_type, crawler, strategy, oss_endpoint, env, machine):
  851. try:
  852. user_list = cls.get_user_list(log_type=log_type, crawler=crawler, sheetid="SSPNPW", env=env,
  853. machine=machine)
  854. for user in user_list:
  855. search_word = user["search_word"]
  856. our_uid = user["our_uid"]
  857. Common.logger(log_type, crawler).info(f"开始抓取 {search_word} 用户主页视频\n")
  858. cls.get_videolist(log_type=log_type,
  859. crawler=crawler,
  860. strategy=strategy,
  861. our_uid=our_uid,
  862. search_word=search_word,
  863. oss_endpoint=oss_endpoint,
  864. env=env,
  865. machine=machine)
  866. except Exception as e:
  867. Common.logger(log_type, crawler).error(f"get_search_videos:{e}\n")
  868. if __name__ == '__main__':
  869. # print(Follow.get_signature("follow", "xigua", "95420624045", "local"))
  870. # XiguaSearch.get_search_videos('search', 'xigua', 'xigua_search', 'inner', 'prod', 'aliyun')
  871. # Follow.get_videolist(log_type="follow",
  872. # crawler="xigua",
  873. # strategy="定向爬虫策略",
  874. # our_uid="6267141",
  875. # out_uid="95420624045",
  876. # oss_endpoint="out",
  877. # env="dev",
  878. # machine="local")
  879. # print(Follow.random_signature())
  880. # rule = Follow.get_rule("follow", "xigua")
  881. # print(type(rule))
  882. # print(type(json.dumps(rule)))
  883. # print(json.dumps(rule))
  884. pass