xigua_search.py 54 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2023/2/17
  4. import base64
  5. import json
  6. import os
  7. import random
  8. import shutil
  9. import string
  10. import sys
  11. import time
  12. import requests
  13. import urllib3
  14. from urllib.parse import quote
  15. from requests.adapters import HTTPAdapter
  16. sys.path.append(os.getcwd())
  17. from common.db import MysqlHelper
  18. from common.users import Users
  19. from common.common import Common
  20. from common.feishu import Feishu
  21. from common.publish import Publish
  22. from common.userAgent import get_random_user_agent, get_random_header
  23. class XiguaSearch:
  24. platform = "西瓜视频"
  25. tag = "西瓜视频爬虫,搜索爬虫策略"
  26. @classmethod
  27. def get_rule(cls, log_type, crawler):
  28. try:
  29. while True:
  30. rule_sheet = Feishu.get_values_batch(log_type, crawler, "shxOl7")
  31. if rule_sheet is None:
  32. Common.logger(log_type, crawler).warning("rule_sheet is None! 10秒后重新获取")
  33. time.sleep(10)
  34. continue
  35. rule_dict = {
  36. "play_cnt": int(rule_sheet[1][2]),
  37. "min_duration": int(rule_sheet[2][2]),
  38. "max_duration": int(rule_sheet[3][2]),
  39. "publish_time": int(rule_sheet[4][2]),
  40. }
  41. return rule_dict
  42. except Exception as e:
  43. Common.logger(log_type, crawler).error(f"get_rule:{e}\n")
  44. # 下载规则
  45. @classmethod
  46. def download_rule(cls, video_info_dict, rule_dict):
  47. if video_info_dict['play_cnt'] >= rule_dict['play_cnt']:
  48. if video_info_dict['comment_cnt'] >= rule_dict['comment_cnt']:
  49. if video_info_dict['like_cnt'] >= rule_dict['like_cnt']:
  50. if video_info_dict['duration'] >= rule_dict['duration']:
  51. if video_info_dict['video_width'] >= rule_dict['video_width'] \
  52. or video_info_dict['video_height'] >= rule_dict['video_height']:
  53. return True
  54. else:
  55. return False
  56. else:
  57. return False
  58. else:
  59. return False
  60. else:
  61. return False
  62. else:
  63. return False
  64. # 过滤词库
  65. @classmethod
  66. def filter_words(cls, log_type, crawler):
  67. try:
  68. while True:
  69. filter_words_sheet = Feishu.get_values_batch(log_type, crawler, 'KGB4Hc')
  70. if filter_words_sheet is None:
  71. Common.logger(log_type, crawler).warning(f"filter_words_sheet:{filter_words_sheet} 10秒钟后重试")
  72. continue
  73. filter_words_list = []
  74. for x in filter_words_sheet:
  75. for y in x:
  76. if y is None:
  77. pass
  78. else:
  79. filter_words_list.append(y)
  80. return filter_words_list
  81. except Exception as e:
  82. Common.logger(log_type, crawler).error(f'filter_words异常:{e}\n')
  83. # 获取用户信息(字典格式). 注意:部分 user_id 字符类型是 int / str
  84. @classmethod
  85. def get_user_list(cls, log_type, crawler, sheetid, env, machine):
  86. try:
  87. while True:
  88. user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
  89. if user_sheet is None:
  90. Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet} 10秒钟后重试")
  91. continue
  92. our_user_list = []
  93. for i in range(1, len(user_sheet)):
  94. our_uid = user_sheet[i][6]
  95. search_word = user_sheet[i][4]
  96. tag1 = user_sheet[i][8]
  97. tag2 = user_sheet[i][9]
  98. tag3 = user_sheet[i][10]
  99. tag4 = user_sheet[i][11]
  100. tag5 = user_sheet[i][12]
  101. tag6 = user_sheet[i][13]
  102. tag7 = user_sheet[i][14]
  103. Common.logger(log_type, crawler).info(f"正在更新 {search_word} 关键词信息\n")
  104. if our_uid is None:
  105. default_user = Users.get_default_user()
  106. # 用来创建our_id的信息
  107. user_dict = {
  108. 'nickName': default_user['nickName'],
  109. 'avatarUrl': default_user['avatarUrl'],
  110. 'tagName': f'{tag1},{tag2},{tag3},{tag4},{tag5},{tag6},{tag7}',
  111. }
  112. Common.logger(log_type, crawler).info(f'新创建的站内UID:{our_uid}')
  113. our_uid = Users.create_uid(log_type, crawler, user_dict, env)
  114. if env == 'prod':
  115. our_user_link = f'https://admin.piaoquantv.com/ums/user/{our_uid}/post'
  116. else:
  117. our_user_link = f'https://testadmin.piaoquantv.com/ums/user/{our_uid}/post'
  118. Feishu.update_values(log_type, crawler, sheetid, f'G{i + 1}:H{i + 1}',
  119. [[our_uid, our_user_link]])
  120. Common.logger(log_type, crawler).info(f'站内用户信息写入飞书成功!\n')
  121. our_user_dict = {
  122. 'out_uid': '',
  123. 'search_word': search_word,
  124. 'our_uid': our_uid,
  125. 'our_user_link': f'https://admin.piaoquantv.com/ums/user/{our_uid}/post',
  126. }
  127. our_user_list.append(our_user_dict)
  128. return our_user_list
  129. except Exception as e:
  130. Common.logger(log_type, crawler).error(f'get_user_id_from_feishu异常:{e}\n')
  131. @classmethod
  132. def random_signature(cls):
  133. src_digits = string.digits # string_数字
  134. src_uppercase = string.ascii_uppercase # string_大写字母
  135. src_lowercase = string.ascii_lowercase # string_小写字母
  136. digits_num = random.randint(1, 6)
  137. uppercase_num = random.randint(1, 26 - digits_num - 1)
  138. lowercase_num = 26 - (digits_num + uppercase_num)
  139. password = random.sample(src_digits, digits_num) + random.sample(src_uppercase, uppercase_num) + random.sample(
  140. src_lowercase, lowercase_num)
  141. random.shuffle(password)
  142. new_password = 'AAAAAAAAAA' + ''.join(password)[10:-4] + 'AAAB'
  143. new_password_start = new_password[0:18]
  144. new_password_end = new_password[-7:]
  145. if new_password[18] == '8':
  146. new_password = new_password_start + 'w' + new_password_end
  147. elif new_password[18] == '9':
  148. new_password = new_password_start + 'x' + new_password_end
  149. elif new_password[18] == '-':
  150. new_password = new_password_start + 'y' + new_password_end
  151. elif new_password[18] == '.':
  152. new_password = new_password_start + 'z' + new_password_end
  153. else:
  154. new_password = new_password_start + 'y' + new_password_end
  155. return new_password
  156. # 获取视频详情
  157. @classmethod
  158. def get_video_url(cls, log_type, crawler, gid):
  159. try:
  160. url = 'https://www.ixigua.com/api/mixVideo/information?'
  161. headers = {
  162. "accept-encoding": "gzip, deflate",
  163. "accept-language": "zh-CN,zh-Hans;q=0.9",
  164. "user-agent": get_random_user_agent('pc'),
  165. "referer": "https://www.ixigua.com/7102614741050196520?logTag=0531c88ac04f38ab2c62",
  166. }
  167. params = {
  168. 'mixId': gid,
  169. 'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfC'
  170. 'NVVIOBNjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
  171. 'X-Bogus': 'DFSzswVupYTANCJOSBk0P53WxM-r',
  172. '_signature': '_02B4Z6wo0000119LvEwAAIDCuktNZ0y5wkdfS7jAALThuOR8D9yWNZ.EmWHKV0WSn6Px'
  173. 'fPsH9-BldyxVje0f49ryXgmn7Tzk-swEHNb15TiGqa6YF.cX0jW8Eds1TtJOIZyfc9s5emH7gdWN94',
  174. }
  175. cookies = {
  176. 'ixigua-a-s': '1',
  177. 'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfCNVVIOB'
  178. 'NjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
  179. 'ttwid': '1%7C_yXQeHWwLZgCsgHClOwTCdYSOt_MjdOkgnPIkpi-Sr8%7C1661241238%7Cf57d0c5ef3f1d7'
  180. '6e049fccdca1ac54887c34d1f8731c8e51a49780ff0ceab9f8',
  181. 'tt_scid': 'QZ4l8KXDG0YAEaMCSbADdcybdKbUfG4BC6S4OBv9lpRS5VyqYLX2bIR8CTeZeGHR9ee3',
  182. 'MONITOR_WEB_ID': '0a49204a-7af5-4e96-95f0-f4bafb7450ad',
  183. '__ac_nonce': '06304878000964fdad287',
  184. '__ac_signature': '_02B4Z6wo00f017Rcr3AAAIDCUVxeW1tOKEu0fKvAAI4cvoYzV-wBhq7B6D8k0no7lb'
  185. 'FlvYoinmtK6UXjRIYPXnahUlFTvmWVtb77jsMkKAXzAEsLE56m36RlvL7ky.M3Xn52r9t1IEb7IR3ke8',
  186. 'ttcid': 'e56fabf6e85d4adf9e4d91902496a0e882',
  187. '_tea_utm_cache_1300': 'undefined',
  188. 'support_avif': 'false',
  189. 'support_webp': 'false',
  190. 'xiguavideopcwebid': '7134967546256016900',
  191. 'xiguavideopcwebid.sig': 'xxRww5R1VEMJN_dQepHorEu_eAc',
  192. }
  193. urllib3.disable_warnings()
  194. s = requests.session()
  195. # max_retries=3 重试3次
  196. s.mount('http://', HTTPAdapter(max_retries=3))
  197. s.mount('https://', HTTPAdapter(max_retries=3))
  198. response = s.get(url=url, headers=headers, params=params, cookies=cookies, verify=False,
  199. proxies=Common.tunnel_proxies(), timeout=5)
  200. response.close()
  201. if 'data' not in response.json() or response.json()['data'] == '':
  202. Common.logger(log_type, crawler).warning('get_video_info: response: {}', response)
  203. else:
  204. video_info = response.json()['data']['gidInformation']['packerData']['video']
  205. video_url_dict = {}
  206. # video_url
  207. if 'videoResource' not in video_info:
  208. video_url_dict["video_url"] = ''
  209. video_url_dict["audio_url"] = ''
  210. video_url_dict["video_width"] = 0
  211. video_url_dict["video_height"] = 0
  212. elif 'dash_120fps' in video_info['videoResource']:
  213. if "video_list" in video_info['videoResource']['dash_120fps'] and 'video_4' in \
  214. video_info['videoResource']['dash_120fps']['video_list']:
  215. video_url = video_info['videoResource']['dash_120fps']['video_list']['video_4']['backup_url_1']
  216. audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_4']['backup_url_1']
  217. if len(video_url) % 3 == 1:
  218. video_url += '=='
  219. elif len(video_url) % 3 == 2:
  220. video_url += '='
  221. elif len(audio_url) % 3 == 1:
  222. audio_url += '=='
  223. elif len(audio_url) % 3 == 2:
  224. audio_url += '='
  225. video_url = base64.b64decode(video_url).decode('utf8')
  226. audio_url = base64.b64decode(audio_url).decode('utf8')
  227. video_width = video_info['videoResource']['dash_120fps']['video_list']['video_4']['vwidth']
  228. video_height = video_info['videoResource']['dash_120fps']['video_list']['video_4']['vheight']
  229. video_url_dict["video_url"] = video_url
  230. video_url_dict["audio_url"] = audio_url
  231. video_url_dict["video_width"] = video_width
  232. video_url_dict["video_height"] = video_height
  233. elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_3' in \
  234. video_info['videoResource']['dash_120fps']['video_list']:
  235. video_url = video_info['videoResource']['dash_120fps']['video_list']['video_3']['backup_url_1']
  236. audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_3']['backup_url_1']
  237. if len(video_url) % 3 == 1:
  238. video_url += '=='
  239. elif len(video_url) % 3 == 2:
  240. video_url += '='
  241. elif len(audio_url) % 3 == 1:
  242. audio_url += '=='
  243. elif len(audio_url) % 3 == 2:
  244. audio_url += '='
  245. video_url = base64.b64decode(video_url).decode('utf8')
  246. audio_url = base64.b64decode(audio_url).decode('utf8')
  247. video_width = video_info['videoResource']['dash_120fps']['video_list']['video_3']['vwidth']
  248. video_height = video_info['videoResource']['dash_120fps']['video_list']['video_3']['vheight']
  249. video_url_dict["video_url"] = video_url
  250. video_url_dict["audio_url"] = audio_url
  251. video_url_dict["video_width"] = video_width
  252. video_url_dict["video_height"] = video_height
  253. elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_2' in \
  254. video_info['videoResource']['dash_120fps']['video_list']:
  255. video_url = video_info['videoResource']['dash_120fps']['video_list']['video_2']['backup_url_1']
  256. audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_2']['backup_url_1']
  257. if len(video_url) % 3 == 1:
  258. video_url += '=='
  259. elif len(video_url) % 3 == 2:
  260. video_url += '='
  261. elif len(audio_url) % 3 == 1:
  262. audio_url += '=='
  263. elif len(audio_url) % 3 == 2:
  264. audio_url += '='
  265. video_url = base64.b64decode(video_url).decode('utf8')
  266. audio_url = base64.b64decode(audio_url).decode('utf8')
  267. video_width = video_info['videoResource']['dash_120fps']['video_list']['video_2']['vwidth']
  268. video_height = video_info['videoResource']['dash_120fps']['video_list']['video_2']['vheight']
  269. video_url_dict["video_url"] = video_url
  270. video_url_dict["audio_url"] = audio_url
  271. video_url_dict["video_width"] = video_width
  272. video_url_dict["video_height"] = video_height
  273. elif "video_list" in video_info['videoResource']['dash_120fps'] and 'video_1' in \
  274. video_info['videoResource']['dash_120fps']['video_list']:
  275. video_url = video_info['videoResource']['dash_120fps']['video_list']['video_1']['backup_url_1']
  276. audio_url = video_info['videoResource']['dash_120fps']['video_list']['video_1']['backup_url_1']
  277. if len(video_url) % 3 == 1:
  278. video_url += '=='
  279. elif len(video_url) % 3 == 2:
  280. video_url += '='
  281. elif len(audio_url) % 3 == 1:
  282. audio_url += '=='
  283. elif len(audio_url) % 3 == 2:
  284. audio_url += '='
  285. video_url = base64.b64decode(video_url).decode('utf8')
  286. audio_url = base64.b64decode(audio_url).decode('utf8')
  287. video_width = video_info['videoResource']['dash_120fps']['video_list']['video_1']['vwidth']
  288. video_height = video_info['videoResource']['dash_120fps']['video_list']['video_1']['vheight']
  289. video_url_dict["video_url"] = video_url
  290. video_url_dict["audio_url"] = audio_url
  291. video_url_dict["video_width"] = video_width
  292. video_url_dict["video_height"] = video_height
  293. elif 'dynamic_video' in video_info['videoResource']['dash_120fps'] \
  294. and 'dynamic_video_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
  295. and 'dynamic_audio_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
  296. and len(
  297. video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list']) != 0 \
  298. and len(
  299. video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list']) != 0:
  300. video_url = \
  301. video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
  302. 'backup_url_1']
  303. audio_url = \
  304. video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list'][-1][
  305. 'backup_url_1']
  306. if len(video_url) % 3 == 1:
  307. video_url += '=='
  308. elif len(video_url) % 3 == 2:
  309. video_url += '='
  310. elif len(audio_url) % 3 == 1:
  311. audio_url += '=='
  312. elif len(audio_url) % 3 == 2:
  313. audio_url += '='
  314. video_url = base64.b64decode(video_url).decode('utf8')
  315. audio_url = base64.b64decode(audio_url).decode('utf8')
  316. video_width = \
  317. video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
  318. 'vwidth']
  319. video_height = \
  320. video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1][
  321. 'vheight']
  322. video_url_dict["video_url"] = video_url
  323. video_url_dict["audio_url"] = audio_url
  324. video_url_dict["video_width"] = video_width
  325. video_url_dict["video_height"] = video_height
  326. else:
  327. video_url_dict["video_url"] = ''
  328. video_url_dict["audio_url"] = ''
  329. video_url_dict["video_width"] = 0
  330. video_url_dict["video_height"] = 0
  331. elif 'dash' in video_info['videoResource']:
  332. if "video_list" in video_info['videoResource']['dash'] and 'video_4' in \
  333. video_info['videoResource']['dash']['video_list']:
  334. video_url = video_info['videoResource']['dash']['video_list']['video_4']['backup_url_1']
  335. audio_url = video_info['videoResource']['dash']['video_list']['video_4']['backup_url_1']
  336. if len(video_url) % 3 == 1:
  337. video_url += '=='
  338. elif len(video_url) % 3 == 2:
  339. video_url += '='
  340. elif len(audio_url) % 3 == 1:
  341. audio_url += '=='
  342. elif len(audio_url) % 3 == 2:
  343. audio_url += '='
  344. video_url = base64.b64decode(video_url).decode('utf8')
  345. audio_url = base64.b64decode(audio_url).decode('utf8')
  346. video_width = video_info['videoResource']['dash']['video_list']['video_4']['vwidth']
  347. video_height = video_info['videoResource']['dash']['video_list']['video_4']['vheight']
  348. video_url_dict["video_url"] = video_url
  349. video_url_dict["audio_url"] = audio_url
  350. video_url_dict["video_width"] = video_width
  351. video_url_dict["video_height"] = video_height
  352. elif "video_list" in video_info['videoResource']['dash'] and 'video_3' in \
  353. video_info['videoResource']['dash']['video_list']:
  354. video_url = video_info['videoResource']['dash']['video_list']['video_3']['backup_url_1']
  355. audio_url = video_info['videoResource']['dash']['video_list']['video_3']['backup_url_1']
  356. if len(video_url) % 3 == 1:
  357. video_url += '=='
  358. elif len(video_url) % 3 == 2:
  359. video_url += '='
  360. elif len(audio_url) % 3 == 1:
  361. audio_url += '=='
  362. elif len(audio_url) % 3 == 2:
  363. audio_url += '='
  364. video_url = base64.b64decode(video_url).decode('utf8')
  365. audio_url = base64.b64decode(audio_url).decode('utf8')
  366. video_width = video_info['videoResource']['dash']['video_list']['video_3']['vwidth']
  367. video_height = video_info['videoResource']['dash']['video_list']['video_3']['vheight']
  368. video_url_dict["video_url"] = video_url
  369. video_url_dict["audio_url"] = audio_url
  370. video_url_dict["video_width"] = video_width
  371. video_url_dict["video_height"] = video_height
  372. elif "video_list" in video_info['videoResource']['dash'] and 'video_2' in \
  373. video_info['videoResource']['dash']['video_list']:
  374. video_url = video_info['videoResource']['dash']['video_list']['video_2']['backup_url_1']
  375. audio_url = video_info['videoResource']['dash']['video_list']['video_2']['backup_url_1']
  376. if len(video_url) % 3 == 1:
  377. video_url += '=='
  378. elif len(video_url) % 3 == 2:
  379. video_url += '='
  380. elif len(audio_url) % 3 == 1:
  381. audio_url += '=='
  382. elif len(audio_url) % 3 == 2:
  383. audio_url += '='
  384. video_url = base64.b64decode(video_url).decode('utf8')
  385. audio_url = base64.b64decode(audio_url).decode('utf8')
  386. video_width = video_info['videoResource']['dash']['video_list']['video_2']['vwidth']
  387. video_height = video_info['videoResource']['dash']['video_list']['video_2']['vheight']
  388. video_url_dict["video_url"] = video_url
  389. video_url_dict["audio_url"] = audio_url
  390. video_url_dict["video_width"] = video_width
  391. video_url_dict["video_height"] = video_height
  392. elif "video_list" in video_info['videoResource']['dash'] and 'video_1' in \
  393. video_info['videoResource']['dash']['video_list']:
  394. video_url = video_info['videoResource']['dash']['video_list']['video_1']['backup_url_1']
  395. audio_url = video_info['videoResource']['dash']['video_list']['video_1']['backup_url_1']
  396. if len(video_url) % 3 == 1:
  397. video_url += '=='
  398. elif len(video_url) % 3 == 2:
  399. video_url += '='
  400. elif len(audio_url) % 3 == 1:
  401. audio_url += '=='
  402. elif len(audio_url) % 3 == 2:
  403. audio_url += '='
  404. video_url = base64.b64decode(video_url).decode('utf8')
  405. audio_url = base64.b64decode(audio_url).decode('utf8')
  406. video_width = video_info['videoResource']['dash']['video_list']['video_1']['vwidth']
  407. video_height = video_info['videoResource']['dash']['video_list']['video_1']['vheight']
  408. video_url_dict["video_url"] = video_url
  409. video_url_dict["audio_url"] = audio_url
  410. video_url_dict["video_width"] = video_width
  411. video_url_dict["video_height"] = video_height
  412. elif 'dynamic_video' in video_info['videoResource']['dash'] \
  413. and 'dynamic_video_list' in video_info['videoResource']['dash']['dynamic_video'] \
  414. and 'dynamic_audio_list' in video_info['videoResource']['dash']['dynamic_video'] \
  415. and len(video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list']) != 0 \
  416. and len(video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list']) != 0:
  417. video_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
  418. 'backup_url_1']
  419. audio_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list'][-1][
  420. 'backup_url_1']
  421. if len(video_url) % 3 == 1:
  422. video_url += '=='
  423. elif len(video_url) % 3 == 2:
  424. video_url += '='
  425. elif len(audio_url) % 3 == 1:
  426. audio_url += '=='
  427. elif len(audio_url) % 3 == 2:
  428. audio_url += '='
  429. video_url = base64.b64decode(video_url).decode('utf8')
  430. audio_url = base64.b64decode(audio_url).decode('utf8')
  431. video_width = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
  432. 'vwidth']
  433. video_height = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1][
  434. 'vheight']
  435. video_url_dict["video_url"] = video_url
  436. video_url_dict["audio_url"] = audio_url
  437. video_url_dict["video_width"] = video_width
  438. video_url_dict["video_height"] = video_height
  439. else:
  440. video_url_dict["video_url"] = ''
  441. video_url_dict["audio_url"] = ''
  442. video_url_dict["video_width"] = 0
  443. video_url_dict["video_height"] = 0
  444. elif 'normal' in video_info['videoResource']:
  445. if "video_list" in video_info['videoResource']['normal'] and 'video_4' in \
  446. video_info['videoResource']['normal']['video_list']:
  447. video_url = video_info['videoResource']['normal']['video_list']['video_4']['backup_url_1']
  448. audio_url = video_info['videoResource']['normal']['video_list']['video_4']['backup_url_1']
  449. if len(video_url) % 3 == 1:
  450. video_url += '=='
  451. elif len(video_url) % 3 == 2:
  452. video_url += '='
  453. elif len(audio_url) % 3 == 1:
  454. audio_url += '=='
  455. elif len(audio_url) % 3 == 2:
  456. audio_url += '='
  457. video_url = base64.b64decode(video_url).decode('utf8')
  458. audio_url = base64.b64decode(audio_url).decode('utf8')
  459. video_width = video_info['videoResource']['normal']['video_list']['video_4']['vwidth']
  460. video_height = video_info['videoResource']['normal']['video_list']['video_4']['vheight']
  461. video_url_dict["video_url"] = video_url
  462. video_url_dict["audio_url"] = audio_url
  463. video_url_dict["video_width"] = video_width
  464. video_url_dict["video_height"] = video_height
  465. elif "video_list" in video_info['videoResource']['normal'] and 'video_3' in \
  466. video_info['videoResource']['normal']['video_list']:
  467. video_url = video_info['videoResource']['normal']['video_list']['video_3']['backup_url_1']
  468. audio_url = video_info['videoResource']['normal']['video_list']['video_3']['backup_url_1']
  469. if len(video_url) % 3 == 1:
  470. video_url += '=='
  471. elif len(video_url) % 3 == 2:
  472. video_url += '='
  473. elif len(audio_url) % 3 == 1:
  474. audio_url += '=='
  475. elif len(audio_url) % 3 == 2:
  476. audio_url += '='
  477. video_url = base64.b64decode(video_url).decode('utf8')
  478. audio_url = base64.b64decode(audio_url).decode('utf8')
  479. video_width = video_info['videoResource']['normal']['video_list']['video_3']['vwidth']
  480. video_height = video_info['videoResource']['normal']['video_list']['video_3']['vheight']
  481. video_url_dict["video_url"] = video_url
  482. video_url_dict["audio_url"] = audio_url
  483. video_url_dict["video_width"] = video_width
  484. video_url_dict["video_height"] = video_height
  485. elif "video_list" in video_info['videoResource']['normal'] and 'video_2' in \
  486. video_info['videoResource']['normal']['video_list']:
  487. video_url = video_info['videoResource']['normal']['video_list']['video_2']['backup_url_1']
  488. audio_url = video_info['videoResource']['normal']['video_list']['video_2']['backup_url_1']
  489. if len(video_url) % 3 == 1:
  490. video_url += '=='
  491. elif len(video_url) % 3 == 2:
  492. video_url += '='
  493. elif len(audio_url) % 3 == 1:
  494. audio_url += '=='
  495. elif len(audio_url) % 3 == 2:
  496. audio_url += '='
  497. video_url = base64.b64decode(video_url).decode('utf8')
  498. audio_url = base64.b64decode(audio_url).decode('utf8')
  499. video_width = video_info['videoResource']['normal']['video_list']['video_2']['vwidth']
  500. video_height = video_info['videoResource']['normal']['video_list']['video_2']['vheight']
  501. video_url_dict["video_url"] = video_url
  502. video_url_dict["audio_url"] = audio_url
  503. video_url_dict["video_width"] = video_width
  504. video_url_dict["video_height"] = video_height
  505. elif "video_list" in video_info['videoResource']['normal'] and 'video_1' in \
  506. video_info['videoResource']['normal']['video_list']:
  507. video_url = video_info['videoResource']['normal']['video_list']['video_1']['backup_url_1']
  508. audio_url = video_info['videoResource']['normal']['video_list']['video_1']['backup_url_1']
  509. if len(video_url) % 3 == 1:
  510. video_url += '=='
  511. elif len(video_url) % 3 == 2:
  512. video_url += '='
  513. elif len(audio_url) % 3 == 1:
  514. audio_url += '=='
  515. elif len(audio_url) % 3 == 2:
  516. audio_url += '='
  517. video_url = base64.b64decode(video_url).decode('utf8')
  518. audio_url = base64.b64decode(audio_url).decode('utf8')
  519. video_width = video_info['videoResource']['normal']['video_list']['video_1']['vwidth']
  520. video_height = video_info['videoResource']['normal']['video_list']['video_1']['vheight']
  521. video_url_dict["video_url"] = video_url
  522. video_url_dict["audio_url"] = audio_url
  523. video_url_dict["video_width"] = video_width
  524. video_url_dict["video_height"] = video_height
  525. elif 'dynamic_video' in video_info['videoResource']['normal'] \
  526. and 'dynamic_video_list' in video_info['videoResource']['normal']['dynamic_video'] \
  527. and 'dynamic_audio_list' in video_info['videoResource']['normal']['dynamic_video'] \
  528. and len(video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list']) != 0 \
  529. and len(video_info['videoResource']['normal']['dynamic_video']['dynamic_audio_list']) != 0:
  530. video_url = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
  531. 'backup_url_1']
  532. audio_url = video_info['videoResource']['normal']['dynamic_video']['dynamic_audio_list'][-1][
  533. 'backup_url_1']
  534. if len(video_url) % 3 == 1:
  535. video_url += '=='
  536. elif len(video_url) % 3 == 2:
  537. video_url += '='
  538. elif len(audio_url) % 3 == 1:
  539. audio_url += '=='
  540. elif len(audio_url) % 3 == 2:
  541. audio_url += '='
  542. video_url = base64.b64decode(video_url).decode('utf8')
  543. audio_url = base64.b64decode(audio_url).decode('utf8')
  544. video_width = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
  545. 'vwidth']
  546. video_height = video_info['videoResource']['normal']['dynamic_video']['dynamic_video_list'][-1][
  547. 'vheight']
  548. video_url_dict["video_url"] = video_url
  549. video_url_dict["audio_url"] = audio_url
  550. video_url_dict["video_width"] = video_width
  551. video_url_dict["video_height"] = video_height
  552. else:
  553. video_url_dict["video_url"] = ''
  554. video_url_dict["audio_url"] = ''
  555. video_url_dict["video_width"] = 0
  556. video_url_dict["video_height"] = 0
  557. else:
  558. video_url_dict["video_url"] = ''
  559. video_url_dict["audio_url"] = ''
  560. video_url_dict["video_width"] = 0
  561. video_url_dict["video_height"] = 0
  562. return video_url_dict
  563. except Exception as e:
  564. Common.logger(log_type, crawler).error(f'get_video_url:{e}\n')
  565. @classmethod
  566. def get_video_info(cls, log_type, crawler, item_id):
  567. d_url = "http://a6.pstatp.com/article/full/11/1/{video_id}/{video_id}/1/0/?iid=3636030325&device_id=5787057242" \
  568. "&ac=wifi&channel=wandoujia&aid=13&app_name=news_article&version_code=532&version_name=5.3.2&device_platform" \
  569. "=android&ab_client=a1%2Cc2%2Ce1%2Cf2%2Cg2%2Cb3%2Cf4&abflag=3&ssmix=a&device_type=SM705" \
  570. "&device_brand=smartisan&os_api=19&os_version=4.4.2&uuid=864593021012562&openudid=e23a5ff037ef2d1a" \
  571. "&manifest_version_code=532&resolution=1080*1920&dpi=480&update_version_code=5320".format(
  572. video_id=item_id)
  573. res = requests.get(url=d_url, headers=get_random_header('pc'), proxies=Common.tunnel_proxies())
  574. data = json.loads(res.text)['data']
  575. item_counter = data['h5_extra']['itemCell']['itemCounter']
  576. user_info = data['user_info']
  577. detail_info = data['video_detail_info']
  578. video_dict = {'video_title': data['title'],
  579. 'video_id': detail_info['video_id'],
  580. 'gid': data['group_id'],
  581. 'play_cnt': item_counter['videoWatchCount'],
  582. 'comment_cnt': item_counter['commentCount'],
  583. 'like_cnt': item_counter['diggCount'],
  584. 'share_cnt': item_counter['shareCount'],
  585. 'duration': data['video_duration'],
  586. 'publish_time_stamp': data['publish_time'],
  587. 'publish_time_str': time.strftime("%Y-%m-%d %H:%M:%S",
  588. time.localtime(data['publish_time'])),
  589. 'user_name': user_info['name'],
  590. 'user_id': user_info['user_id'],
  591. 'avatar_url': user_info['avatar_url'],
  592. 'cover_url': data['large_image']['url'].replace('\u0026', '&'),
  593. }
  594. return video_dict
  595. @classmethod
  596. def is_ruled(cls, log_type, crawler, video_dict, rule_dict):
  597. old_time = int(time.time()) - (3600 * 24 * rule_dict['publish_time'])
  598. if video_dict['publish_time_stamp'] <= old_time:
  599. return False
  600. elif video_dict['play_cnt'] <= rule_dict['play_cnt']:
  601. return False
  602. elif video_dict['duration'] < rule_dict['min_duration'] or video_dict['duration'] > rule_dict['max_duration']:
  603. return False
  604. else:
  605. return True
  606. @classmethod
  607. def get_videolist(cls, log_type, crawler, strategy, our_uid, search_word, oss_endpoint, env, machine):
  608. total_count = 1
  609. offset = 0
  610. while True:
  611. signature = cls.random_signature()
  612. url = "https://www.ixigua.com/api/searchv2/complex/{}/{}?order_type=publish_time&click_position=new".format(
  613. quote(search_word), offset, signature)
  614. headers = {
  615. 'referer': 'https://www.ixigua.com/search/{}/?logTag=594535e3690f17a88cdb&tab_name=search'.format(
  616. quote(search_word)),
  617. 'cookie': 'ttwid=1%7Cx_4RDmVTqp6BQ5Xy5AnuCZCQdDyDxv-fnMVWzj19VU0%7C1679382377%7C4e25692dc4b9d5dca56d690001d168b21ed028a9ac075808ab9262238cb405ee;',
  618. 'user-agent': get_random_user_agent('pc'),
  619. }
  620. try:
  621. res = requests.request("GET", url, headers=headers, proxies=Common.tunnel_proxies())
  622. search_list = res.json()['data']['data']
  623. except Exception as e:
  624. continue
  625. if not search_list:
  626. Common.logger(log_type, crawler).error(f'关键词:{search_word},没有获取到视频列表:offset{offset}')
  627. return
  628. for video_info in search_list:
  629. v_type = video_info['type']
  630. rule_dict = cls.get_rule(log_type, crawler)
  631. publish_time = video_info['data']['publish_time']
  632. old_time = int(time.time()) - (3600 * 24 * rule_dict['publish_time'])
  633. if publish_time <= old_time:
  634. Common.logger(log_type, crawler).error(f'关键词:{search_word},抓取完毕,退出抓取\n')
  635. return
  636. if v_type == 'video':
  637. item_id = video_info['data']['group_id']
  638. if video_info['data']['publish_time'] <= old_time:
  639. Common.logger(log_type, crawler).error(f'关键词:{search_word},视频:{item_id},不符合抓取规则\n')
  640. continue
  641. elif video_info['data']['video_watch_count'] <= rule_dict['play_cnt']:
  642. Common.logger(log_type, crawler).error(f'关键词:{search_word},视频:{item_id},不符合抓取规则\n')
  643. continue
  644. elif video_info['data']['video_time'] < rule_dict['min_duration'] or video_info['data'][
  645. 'video_time'] > rule_dict['max_duration']:
  646. Common.logger(log_type, crawler).error(f'关键词:{search_word},视频:{item_id},不符合抓取规则\n')
  647. continue
  648. try:
  649. video_dict = cls.get_video_info(log_type, crawler, item_id)
  650. video_url_dict = cls.get_video_url(log_type, crawler, video_dict['gid'])
  651. video_dict['video_width'] = video_url_dict["video_width"]
  652. video_dict['video_height'] = video_url_dict["video_height"]
  653. video_dict['audio_url'] = video_url_dict["audio_url"]
  654. video_dict['video_url'] = video_url_dict["video_url"]
  655. video_dict['session'] = signature
  656. except Exception as e:
  657. Common.logger(log_type, crawler).error(f'关键词:{search_word},视频:{item_id},获取详情失败,原因:{e}')
  658. continue
  659. if cls.repeat_video(log_type, crawler, video_dict['video_id'], env, machine) != 0:
  660. Common.logger(log_type, crawler).info(f'关键词:{search_word},gid:{video_dict["gid"]},视频已下载,无需重复下载\n')
  661. continue
  662. for k, v in video_dict.items():
  663. Common.logger(log_type, crawler).info(f"{k}:{v}")
  664. try:
  665. # print(
  666. # f'search_word:{search_word},title:{video_dict["video_title"]},gid:{video_dict["gid"]},offset:{offset}, total:{total_count}')
  667. cls.download_publish(
  668. search_word=search_word,
  669. log_type=log_type,
  670. crawler=crawler,
  671. video_dict=video_dict,
  672. rule_dict=rule_dict,
  673. strategy=strategy,
  674. our_uid=our_uid,
  675. oss_endpoint=oss_endpoint,
  676. env=env,
  677. machine=machine
  678. )
  679. except Exception as e:
  680. Common.logger(log_type, crawler).error(f'关键词:{search_word},视频:{item_id},下载失败,原因:{e}')
  681. continue
  682. total_count += 1
  683. Common.logger(log_type, crawler).info(f'search_word:{search_word},title:{video_dict["video_title"]},gid:{video_dict["gid"]},offset:{offset}, total:{total_count}')
  684. if total_count >= 30:
  685. return
  686. # elif v_type == 'pseries':
  687. # try:
  688. # item_id = video_info['data']['group_id']
  689. # p_url = "https://www.ixigua.com/api/videov2/pseries_more_v2?pSeriesId={}&rank=0&tailCount=30&aid=1768&msToken=wHEafKFLx0k3hihOPbhXYNsfMBxWiq2AB0K5R-34kEFixyq3ATi_DuXbL4Q47J9C2uK2zgWItMa1g2yc4FyDxM4dMijmSdwF4c4T8sSmOkoOI0wGzeEcPw==&X-Bogus=DFSzswVOzdUANG3ItaVHYr7TlqCv&_signature=_02B4Z6wo00001vB6l3QAAIDBZKzMeTihTmbwepPAANgh1Ai3JgFFo4e6anoezmBEpHfEMEYlWISGhXI-QKfev4N-2bwgXsHOuNGLnOsGqMbANIjFPh7Yj6OakQWrkbACenlv0P-arswtB6Zn45".format(
  690. # item_id)
  691. # p_headers = {
  692. # 'referer': 'https://www.ixigua.com/{}?series_flow=1&logTag=cfec9d927da968feff89'.format(
  693. # item_id),
  694. # 'user-agent': get_random_user_agent('pc'),
  695. # }
  696. # p_res = requests.request("GET", p_url, headers=p_headers,
  697. # proxies=Common.tunnel_proxies()).json()
  698. # except Exception as e:
  699. # Common.logger(log_type, crawler).error(f'合集:{item_id},没有获取到合集详情,原因:{e}')
  700. # continue
  701. # for video in p_res['data']:
  702. # item_id = video['item_id']
  703. # try:
  704. # video_dict = cls.get_video_info(log_type, crawler, item_id)
  705. # video_url_dict = cls.get_video_url(log_type, crawler, video_dict['gid'])
  706. # video_dict['video_width'] = video_url_dict["video_width"]
  707. # video_dict['video_height'] = video_url_dict["video_height"]
  708. # video_dict['audio_url'] = video_url_dict["audio_url"]
  709. # video_dict['video_url'] = video_url_dict["video_url"]
  710. # video_dict['session'] = signature
  711. # except Exception as e:
  712. # Common.logger(log_type, crawler).error(f'视频:{item_id},没有获取到视频详情,原因:{e}')
  713. # continue
  714. # if cls.repeat_video(log_type, crawler, video_dict['video_id'], env, machine) != 0:
  715. # Common.logger(log_type, crawler).info(
  716. # f'gid:{video_dict["gid"]},视频已下载,无需重复下载\n')
  717. # continue
  718. # if not cls.is_ruled(log_type, crawler, video_dict, rule_dict):
  719. # Common.logger(log_type, crawler).error(f'视频:{item_id},不符合抓取规则\n')
  720. # continue
  721. # for k, v in video_dict.items():
  722. # Common.logger(log_type, crawler).info(f"{k}:{v}")
  723. # try:
  724. # # print(
  725. # # f'search_word:{search_word},title:{video_dict["video_title"]},gid:{video_dict["gid"]},offset:{offset}, total:{total_count}')
  726. # cls.download_publish(
  727. # search_word=search_word,
  728. # log_type=log_type,
  729. # crawler=crawler,
  730. # video_dict=video_dict,
  731. # rule_dict=rule_dict,
  732. # strategy=strategy,
  733. # our_uid=our_uid,
  734. # oss_endpoint=oss_endpoint,
  735. # env=env,
  736. # machine=machine
  737. # )
  738. # total_count += 1
  739. # if total_count >= 30:
  740. # return
  741. # else:
  742. # break
  743. # except Exception as e:
  744. # Common.logger(log_type, crawler).error(f'视频:{item_id},download_publish异常:{e}\n')
  745. offset += 10
  746. @classmethod
  747. def repeat_video(cls, log_type, crawler, video_id, env, machine):
  748. sql = f""" select * from crawler_video where platform="{cls.platform}" and out_video_id="{video_id}"; """
  749. repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env, machine)
  750. return len(repeat_video)
  751. # 下载 / 上传
  752. @classmethod
  753. def download_publish(cls, log_type, crawler, search_word, strategy, video_dict, rule_dict, our_uid, oss_endpoint,
  754. env, machine):
  755. Common.download_method(log_type=log_type, crawler=crawler, text='xigua_video',
  756. title=video_dict['video_title'], url=video_dict['video_url'])
  757. # 下载音频
  758. Common.download_method(log_type=log_type, crawler=crawler, text='xigua_audio',
  759. title=video_dict['video_title'], url=video_dict['audio_url'])
  760. # 合成音视频
  761. Common.video_compose(log_type=log_type, crawler=crawler,
  762. video_dir=f"./{crawler}/videos/{video_dict['video_title']}")
  763. ffmpeg_dict = Common.ffmpeg(log_type, crawler,
  764. f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
  765. if ffmpeg_dict is None or ffmpeg_dict['size'] == 0:
  766. Common.logger(log_type, crawler).warning(f"下载的视频无效,已删除\n")
  767. # 删除视频文件夹
  768. shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
  769. return
  770. # 下载封面
  771. Common.download_method(log_type=log_type, crawler=crawler, text='cover',
  772. title=video_dict['video_title'], url=video_dict['cover_url'])
  773. # 保存视频信息至txt
  774. Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
  775. # 上传视频
  776. Common.logger(log_type, crawler).info("开始上传视频...")
  777. our_video_id = Publish.upload_and_publish(log_type=log_type,
  778. crawler=crawler,
  779. strategy=strategy,
  780. our_uid=our_uid,
  781. env=env,
  782. oss_endpoint=oss_endpoint)
  783. if env == 'dev':
  784. our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
  785. else:
  786. our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
  787. Common.logger(log_type, crawler).info("视频上传完成")
  788. if our_video_id is None:
  789. # 删除视频文件夹
  790. shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
  791. return
  792. # 视频写入飞书
  793. Feishu.insert_columns(log_type, 'xigua', "BUNvGC", "ROWS", 1, 2)
  794. upload_time = int(time.time())
  795. values = [[
  796. search_word,
  797. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
  798. "关键词搜索",
  799. video_dict['video_title'],
  800. str(video_dict['video_id']),
  801. our_video_link,
  802. video_dict['gid'],
  803. video_dict['play_cnt'],
  804. video_dict['comment_cnt'],
  805. video_dict['like_cnt'],
  806. video_dict['share_cnt'],
  807. video_dict['duration'],
  808. str(video_dict['video_width']) + '*' + str(video_dict['video_height']),
  809. video_dict['publish_time_str'],
  810. video_dict['user_name'],
  811. video_dict['user_id'],
  812. video_dict['avatar_url'],
  813. video_dict['cover_url'],
  814. video_dict['video_url'],
  815. video_dict['audio_url']]]
  816. time.sleep(1)
  817. Feishu.update_values(log_type, 'xigua', "BUNvGC", "E2:Z2", values)
  818. Common.logger(log_type, crawler).info(f"视频已保存至云文档\n")
  819. # 视频信息保存数据库
  820. insert_sql = f""" insert into crawler_video(video_id,
  821. user_id,
  822. out_user_id,
  823. platform,
  824. strategy,
  825. out_video_id,
  826. video_title,
  827. cover_url,
  828. video_url,
  829. duration,
  830. publish_time,
  831. play_cnt,
  832. crawler_rule,
  833. width,
  834. height)
  835. values({our_video_id},
  836. {our_uid},
  837. "{video_dict['user_id']}",
  838. "{cls.platform}",
  839. "定向爬虫策略",
  840. "{video_dict['video_id']}",
  841. "{video_dict['video_title']}",
  842. "{video_dict['cover_url']}",
  843. "{video_dict['video_url']}",
  844. {int(video_dict['duration'])},
  845. "{video_dict['publish_time_str']}",
  846. {int(video_dict['play_cnt'])},
  847. '{json.dumps(rule_dict)}',
  848. {int(video_dict['video_width'])},
  849. {int(video_dict['video_height'])}) """
  850. Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
  851. MysqlHelper.update_values(log_type, crawler, insert_sql, env, machine)
  852. Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
  853. @classmethod
  854. def get_search_videos(cls, log_type, crawler, strategy, oss_endpoint, env, machine):
  855. try:
  856. user_list = cls.get_user_list(log_type=log_type, crawler=crawler, sheetid="SSPNPW", env=env,
  857. machine=machine)
  858. for user in user_list:
  859. search_word = user["search_word"]
  860. our_uid = user["our_uid"]
  861. Common.logger(log_type, crawler).info(f"开始抓取 {search_word} 用户主页视频\n")
  862. cls.get_videolist(log_type=log_type,
  863. crawler=crawler,
  864. strategy=strategy,
  865. our_uid=our_uid,
  866. search_word=search_word,
  867. oss_endpoint=oss_endpoint,
  868. env=env,
  869. machine=machine)
  870. except Exception as e:
  871. Common.logger(log_type, crawler).error(f"get_search_videos:{e}\n")
  872. if __name__ == '__main__':
  873. # print(Follow.get_signature("follow", "xigua", "95420624045", "local"))
  874. # XiguaSearch.get_search_videos('search', 'xigua', 'xigua_search', 'inner', 'prod', 'aliyun')
  875. # Follow.get_videolist(log_type="follow",
  876. # crawler="xigua",
  877. # strategy="定向爬虫策略",
  878. # our_uid="6267141",
  879. # out_uid="95420624045",
  880. # oss_endpoint="out",
  881. # env="dev",
  882. # machine="local")
  883. # print(Follow.random_signature())
  884. # rule = Follow.get_rule("follow", "xigua")
  885. # print(type(rule))
  886. # print(type(json.dumps(rule)))
  887. # print(json.dumps(rule))
  888. pass