gongzhonghao_follow.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2023/3/28
  4. import datetime
  5. import difflib
  6. import json
  7. import os
  8. import shutil
  9. import sys
  10. import time
  11. from hashlib import md5
  12. import requests
  13. import urllib3
  14. from selenium.webdriver import DesiredCapabilities
  15. from selenium.webdriver.chrome.service import Service
  16. from selenium.webdriver.common.by import By
  17. from selenium import webdriver
  18. sys.path.append(os.getcwd())
  19. from common.common import Common
  20. from common.feishu import Feishu
  21. from common.public import filter_word
  22. from common.publish import Publish
  23. from common.scheduling_db import MysqlHelper
  24. class GongzhonghaoFollow:
  25. # 翻页参数
  26. begin = 0
  27. platform = "公众号"
  28. # 基础门槛规则
  29. @staticmethod
  30. def download_rule(video_dict):
  31. """
  32. 下载视频的基本规则
  33. :param video_dict: 视频信息,字典格式
  34. :return: 满足规则,返回 True;反之,返回 False
  35. """
  36. # 视频时长 20秒 - 45 分钟
  37. if 60 * 45 >= int(float(video_dict['duration'])) >= 20:
  38. # 宽或高
  39. if int(video_dict['video_width']) >= 0 or int(video_dict['video_height']) >= 0:
  40. return True
  41. else:
  42. return False
  43. else:
  44. return False
  45. @classmethod
  46. def title_like(cls, log_type, crawler, title, env):
  47. select_sql = f""" select * from crawler_video where platform="公众号" """
  48. video_list = MysqlHelper.get_values(log_type, crawler, select_sql, env, action="")
  49. if len(video_list) == 0:
  50. return None
  51. for video_dict in video_list:
  52. video_title = video_dict["video_title"]
  53. if difflib.SequenceMatcher(None, title, video_title).quick_ratio() >= 0.8:
  54. return True
  55. else:
  56. pass
  57. # 获取 token
  58. @classmethod
  59. def get_token(cls, log_type, crawler):
  60. while True:
  61. try:
  62. sheet = Feishu.get_values_batch(log_type, "gongzhonghao", "OjyJqs")
  63. if sheet is None:
  64. time.sleep(1)
  65. continue
  66. token = sheet[0][1]
  67. cookie = sheet[1][1]
  68. gzh_name = sheet[2][1]
  69. gzh_time = sheet[3][1]
  70. token_dict = {'token': token, 'cookie': cookie, 'gzh_name': gzh_name, 'gzh_time': gzh_time}
  71. print(type(token_dict['gzh_time']))
  72. print(token_dict['gzh_time'])
  73. return token_dict
  74. except Exception as e:
  75. Common.logger(log_type, crawler).error(f"get_cookie_token异常:{e}\n")
  76. # 获取用户 fakeid
  77. @classmethod
  78. def get_fakeid(cls, log_type, crawler, user, index):
  79. try:
  80. while True:
  81. token_dict = cls.get_token(log_type, crawler)
  82. url = "https://mp.weixin.qq.com/cgi-bin/searchbiz?"
  83. headers = {
  84. "accept": "*/*",
  85. "accept-encoding": "gzip, deflate, br",
  86. "accept-language": "zh-CN,zh;q=0.9",
  87. "referer": "https://mp.weixin.qq.com/cgi-bin/appmsg?"
  88. "t=media/appmsg_edit_v2&action=edit&isNew=1"
  89. "&type=77&createType=5&token=1011071554&lang=zh_CN",
  90. 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
  91. "sec-ch-ua-mobile": "?0",
  92. "sec-ch-ua-platform": '"Windows"',
  93. "sec-fetch-dest": "empty",
  94. "sec-fetch-mode": "cors",
  95. "sec-fetch-site": "same-origin",
  96. "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
  97. " (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
  98. "x-requested-with": "XMLHttpRequest",
  99. 'cookie': token_dict['cookie'],
  100. }
  101. params = {
  102. "action": "search_biz",
  103. "begin": "0",
  104. "count": "5",
  105. "query": str(user),
  106. "token": token_dict['token'],
  107. "lang": "zh_CN",
  108. "f": "json",
  109. "ajax": "1",
  110. }
  111. urllib3.disable_warnings()
  112. r = requests.get(url=url, headers=headers, params=params, verify=False)
  113. if r.json()["base_resp"]["err_msg"] == "invalid session":
  114. Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
  115. Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
  116. if 20 >= datetime.datetime.now().hour >= 10:
  117. Feishu.bot(log_type, crawler, f"token_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
  118. time.sleep(60 * 10)
  119. continue
  120. if r.json()["base_resp"]["err_msg"] == "freq control":
  121. Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
  122. Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
  123. if 20 >= datetime.datetime.now().hour >= 10:
  124. Feishu.bot(log_type, crawler, f"公众号_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
  125. time.sleep(60 * 10)
  126. continue
  127. if "list" not in r.json() or len(r.json()["list"]) == 0:
  128. Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
  129. Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
  130. if 20 >= datetime.datetime.now().hour >= 10:
  131. Feishu.bot(log_type, crawler, f"公众号_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
  132. time.sleep(60 * 10)
  133. continue
  134. fakeid = r.json()["list"][int(index) - 1]["fakeid"]
  135. head_url = r.json()["list"][int(index) - 1]["round_head_img"]
  136. fakeid_dict = {'fakeid': fakeid, 'head_url': head_url}
  137. return fakeid_dict
  138. except Exception as e:
  139. Common.logger(log_type, crawler).error(f"get_fakeid异常:{e}\n")
  140. # 获取腾讯视频下载链接
  141. @classmethod
  142. def get_tencent_video_url(cls, log_type, crawler, video_id):
  143. try:
  144. url = 'https://vv.video.qq.com/getinfo?vids=' + str(video_id) + '&platform=101001&charge=0&otype=json'
  145. response = requests.get(url=url).text.replace('QZOutputJson=', '').replace('"};', '"}')
  146. response = json.loads(response)
  147. url = response['vl']['vi'][0]['ul']['ui'][0]['url']
  148. fvkey = response['vl']['vi'][0]['fvkey']
  149. video_url = url + str(video_id) + '.mp4?vkey=' + fvkey
  150. return video_url
  151. except Exception as e:
  152. Common.logger(log_type, crawler).error(f"get_tencent_video_url异常:{e}\n")
  153. @classmethod
  154. def get_video_url(cls, log_type, crawler, article_url, env):
  155. try:
  156. # 打印请求配置
  157. ca = DesiredCapabilities.CHROME
  158. ca["goog:loggingPrefs"] = {"performance": "ALL"}
  159. # 不打开浏览器运行
  160. chrome_options = webdriver.ChromeOptions()
  161. chrome_options.add_argument("headless")
  162. chrome_options.add_argument(
  163. f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
  164. chrome_options.add_argument("--no-sandbox")
  165. # driver初始化
  166. if env == "prod":
  167. driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
  168. else:
  169. driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service(
  170. '/Users/wangkun/Downloads/chromedriver/chromedriver_v111/chromedriver'))
  171. driver.implicitly_wait(10)
  172. # Common.logger(log_type, crawler).info('打开文章链接')
  173. driver.get(article_url)
  174. time.sleep(1)
  175. if len(driver.find_elements(By.XPATH, '//div[@class="js_video_poster video_poster"]/*[2]')) != 0:
  176. video_url = driver.find_element(
  177. By.XPATH, '//div[@class="js_video_poster video_poster"]/*[2]').get_attribute('src')
  178. elif len(driver.find_elements(By.XPATH, '//span[@class="js_tx_video_container"]/*[1]')) != 0:
  179. iframe = driver.find_element(By.XPATH, '//span[@class="js_tx_video_container"]/*[1]').get_attribute(
  180. 'src')
  181. video_id = iframe.split('vid=')[-1].split('&')[0]
  182. video_url = cls.get_tencent_video_url(log_type, crawler, video_id)
  183. else:
  184. video_url = 0
  185. driver.quit()
  186. return video_url
  187. except Exception as e:
  188. Common.logger(log_type, crawler).info(f'get_video_url异常:{e}\n')
  189. # 获取文章列表
  190. @classmethod
  191. def get_videoList(cls, log_type, crawler, user, index, oss_endpoint, env):
  192. try:
  193. while True:
  194. token_dict = cls.get_token(log_type, crawler)
  195. fakeid_dict = cls.get_fakeid(log_type, crawler, user, index)
  196. url = "https://mp.weixin.qq.com/cgi-bin/appmsg?"
  197. headers = {
  198. "accept": "*/*",
  199. "accept-encoding": "gzip, deflate, br",
  200. "accept-language": "zh-CN,zh;q=0.9",
  201. "referer": "https://mp.weixin.qq.com/cgi-bin/appmsg?"
  202. "t=media/appmsg_edit_v2&action=edit&isNew=1"
  203. "&type=77&createType=5&token=" + str(token_dict['token']) + "&lang=zh_CN",
  204. 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
  205. "sec-ch-ua-mobile": "?0",
  206. "sec-ch-ua-platform": '"Windows"',
  207. "sec-fetch-dest": "empty",
  208. "sec-fetch-mode": "cors",
  209. "sec-fetch-site": "same-origin",
  210. "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
  211. " (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
  212. "x-requested-with": "XMLHttpRequest",
  213. 'cookie': token_dict['cookie'],
  214. }
  215. params = {
  216. "action": "list_ex",
  217. "begin": str(cls.begin),
  218. "count": "5",
  219. "fakeid": fakeid_dict['fakeid'],
  220. "type": "9",
  221. "query": "",
  222. "token": str(token_dict['token']),
  223. "lang": "zh_CN",
  224. "f": "json",
  225. "ajax": "1",
  226. }
  227. urllib3.disable_warnings()
  228. r = requests.get(url=url, headers=headers, params=params, verify=False)
  229. if r.json()["base_resp"]["err_msg"] == "invalid session":
  230. Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
  231. Common.logger(log_type, crawler).info(f"get_videoList:{r.text}\n")
  232. if 20 >= datetime.datetime.now().hour >= 10:
  233. Feishu.bot(log_type, crawler, f"token_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
  234. time.sleep(60 * 10)
  235. continue
  236. if r.json()["base_resp"]["err_msg"] == "freq control":
  237. Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
  238. Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
  239. if 20 >= datetime.datetime.now().hour >= 10:
  240. Feishu.bot(log_type, crawler,f"公众号_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
  241. time.sleep(60 * 10)
  242. continue
  243. if 'app_msg_list' not in r.json():
  244. Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
  245. Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
  246. if 20 >= datetime.datetime.now().hour >= 10:
  247. Feishu.bot(log_type, crawler, f"公众号_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
  248. time.sleep(60 * 10)
  249. continue
  250. if len(r.json()['app_msg_list']) == 0:
  251. Common.logger(log_type, crawler).info('没有更多视频了\n')
  252. return
  253. else:
  254. cls.begin += 5
  255. app_msg_list = r.json()['app_msg_list']
  256. for article_url in app_msg_list:
  257. # title
  258. if 'title' in article_url:
  259. title = article_url['title'].replace('/', '').replace('\n', '') \
  260. .replace('.', '').replace('“', '').replace('”', '').replace(' ', '')
  261. else:
  262. title = 0
  263. # aid
  264. if 'aid' in article_url:
  265. aid = article_url['aid']
  266. else:
  267. aid = 0
  268. # create_time
  269. if 'create_time' in article_url:
  270. create_time = article_url['create_time']
  271. else:
  272. create_time = 0
  273. publish_time_stamp = int(create_time)
  274. publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
  275. avatar_url = fakeid_dict['head_url']
  276. # cover_url
  277. if 'cover' in article_url:
  278. cover_url = article_url['cover']
  279. else:
  280. cover_url = 0
  281. # article_url
  282. if 'link' in article_url:
  283. article_url = article_url['link']
  284. else:
  285. article_url = 0
  286. video_url = cls.get_video_url(log_type, crawler, article_url, env)
  287. video_dict = {
  288. 'video_id': aid,
  289. 'video_title': title,
  290. 'publish_time_stamp': publish_time_stamp,
  291. 'publish_time_str': publish_time_str,
  292. 'user_name': user,
  293. 'play_cnt': 0,
  294. 'comment_cnt': 0,
  295. 'like_cnt': 0,
  296. 'share_cnt': 0,
  297. 'user_id': fakeid_dict['fakeid'],
  298. 'avatar_url': avatar_url,
  299. 'cover_url': cover_url,
  300. 'article_url': article_url,
  301. 'video_url': video_url,
  302. 'session': f'gongzhonghao-follow-{int(time.time())}'
  303. }
  304. for k, v in video_dict.items():
  305. Common.logger(log_type, crawler).info(f"{k}:{v}")
  306. if int(time.time()) - publish_time_stamp > 3600 * 24 * 3:
  307. Common.logger(log_type, crawler).info(f'发布时间{publish_time_str} > 3 天\n')
  308. cls.begin = 0
  309. return
  310. cls.download_publish(log_type, crawler, video_dict, oss_endpoint, env)
  311. Common.logger(log_type, crawler).info('休眠 60 秒\n')
  312. time.sleep(60)
  313. except Exception as e:
  314. Common.logger(log_type, crawler).error(f"get_videoList异常:{e}\n")
  315. @classmethod
  316. def repeat_video(cls, log_type, crawler, video_id, env):
  317. sql = f""" select * from crawler_video where platform="公众号" and out_video_id="{video_id}"; """
  318. repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
  319. return len(repeat_video)
  320. # 下载/上传
  321. @classmethod
  322. def download_publish(cls, log_type, crawler, video_dict, oss_endpoint, env):
  323. try:
  324. if video_dict['article_url'] == 0 or video_dict['video_url'] == 0:
  325. Common.logger(log_type, crawler).info("文章涉嫌违反相关法律法规和政策\n")
  326. # 标题敏感词过滤
  327. elif any(word if word in video_dict['video_title'] else False for word in
  328. filter_word(log_type, crawler, "公众号", env)) is True:
  329. Common.logger(log_type, crawler).info("标题已中过滤词\n")
  330. # 已下载判断
  331. elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
  332. Common.logger(log_type, crawler).info("视频已下载\n")
  333. # 标题相似度
  334. elif cls.title_like(log_type, crawler, video_dict['video_title'], env) is True:
  335. Common.logger(log_type, crawler).info(f'标题相似度>=80%:{video_dict["video_title"]}\n')
  336. else:
  337. # 下载视频
  338. Common.download_method(log_type=log_type, crawler=crawler, text="video",
  339. title=video_dict["video_title"], url=video_dict["video_url"])
  340. md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
  341. # 获取视频时长
  342. ffmpeg_dict = Common.ffmpeg(log_type, crawler,
  343. f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
  344. if ffmpeg_dict is None:
  345. # 删除视频文件夹
  346. shutil.rmtree(f"./{crawler}/videos/{md_title}")
  347. Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
  348. return
  349. video_dict["video_width"] = ffmpeg_dict["width"]
  350. video_dict["video_height"] = ffmpeg_dict["height"]
  351. video_dict["duration"] = ffmpeg_dict["duration"]
  352. video_size = ffmpeg_dict["size"]
  353. Common.logger(log_type, crawler).info(f'video_width:{video_dict["video_width"]}')
  354. Common.logger(log_type, crawler).info(f'video_height:{video_dict["video_height"]}')
  355. Common.logger(log_type, crawler).info(f'duration:{video_dict["duration"]}')
  356. Common.logger(log_type, crawler).info(f'video_size:{video_size}')
  357. # 视频size=0,直接删除
  358. if int(video_size) == 0 or cls.download_rule(video_dict) is False:
  359. # 删除视频文件夹
  360. shutil.rmtree(f"./{crawler}/videos/{md_title}")
  361. Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
  362. return
  363. # 下载封面
  364. Common.download_method(log_type=log_type, crawler=crawler, text="cover",
  365. title=video_dict["video_title"], url=video_dict["cover_url"])
  366. # 保存视频信息至 "./videos/{video_title}/info.txt"
  367. Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
  368. # 上传视频
  369. Common.logger(log_type, crawler).info("开始上传视频...")
  370. strategy = "定向爬虫策略"
  371. our_video_id = Publish.upload_and_publish(log_type=log_type,
  372. crawler=crawler,
  373. strategy=strategy,
  374. our_uid="follow",
  375. oss_endpoint=oss_endpoint,
  376. env=env)
  377. if env == 'prod':
  378. our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{str(our_video_id)}/info"
  379. else:
  380. our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{str(our_video_id)}/info"
  381. Common.logger(log_type, crawler).info("视频上传完成")
  382. if our_video_id is None:
  383. # 删除视频文件夹
  384. shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
  385. return
  386. # 视频信息保存数据库
  387. rule_dict = {
  388. "duration": {"min": 20, "max": 45 * 60},
  389. "publish_day": {"min": 3}
  390. }
  391. insert_sql = f""" insert into crawler_video(video_id,
  392. out_user_id,
  393. platform,
  394. strategy,
  395. out_video_id,
  396. video_title,
  397. cover_url,
  398. video_url,
  399. duration,
  400. publish_time,
  401. play_cnt,
  402. crawler_rule,
  403. width,
  404. height)
  405. values({our_video_id},
  406. "{video_dict['user_id']}",
  407. "{cls.platform}",
  408. "定向爬虫策略",
  409. "{video_dict['video_id']}",
  410. "{video_dict['video_title']}",
  411. "{video_dict['cover_url']}",
  412. "{video_dict['video_url']}",
  413. {int(video_dict['duration'])},
  414. "{video_dict['publish_time_str']}",
  415. {int(video_dict['play_cnt'])},
  416. '{json.dumps(rule_dict)}',
  417. {int(video_dict['video_width'])},
  418. {int(video_dict['video_height'])}) """
  419. Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
  420. MysqlHelper.update_values(log_type, crawler, insert_sql, env)
  421. Common.logger(log_type, crawler).info('视频信息插入数据库成功!')
  422. # 视频写入飞书
  423. Feishu.insert_columns(log_type, crawler, "47e39d", "ROWS", 1, 2)
  424. # 视频ID工作表,首行写入数据
  425. upload_time = int(time.time())
  426. values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
  427. "用户主页",
  428. video_dict['video_title'],
  429. video_dict['video_id'],
  430. our_video_link,
  431. int(video_dict['duration']),
  432. f"{video_dict['video_width']}*{video_dict['video_height']}",
  433. video_dict['publish_time_str'],
  434. video_dict['user_name'],
  435. video_dict['user_id'],
  436. video_dict['avatar_url'],
  437. video_dict['cover_url'],
  438. video_dict['article_url'],
  439. video_dict['video_url']]]
  440. time.sleep(0.5)
  441. Feishu.update_values(log_type, crawler, "47e39d", "F2:Z2", values)
  442. Common.logger(log_type, crawler).info('视频下载/上传成功\n')
  443. except Exception as e:
  444. Common.logger(log_type, crawler).error(f"download_publish异常:{e}\n")
  445. @classmethod
  446. def get_users(cls):
  447. # user_sheet = Feishu.get_values_batch("follow", 'gongzhonghao', 'Bzv72P')
  448. # user_list = []
  449. # for i in range(1, 41):
  450. # user_name = user_sheet[i][0]
  451. # index = user_sheet[i][1]
  452. # user_dict = {
  453. # "user_name": user_name,
  454. # "index": index,
  455. # }
  456. # user_list.append(user_dict)
  457. # print(len(user_list))
  458. # print(user_list)
  459. user_list = [{'user_name': '香音难忘', 'index': 1}, {'user_name': '墨儿心灵驿站', 'index': 1}, {'user_name': '荒烟茶生', 'index': 1}, {'user_name': '幸福花朵', 'index': 1}, {'user_name': '我的节日祝福', 'index': 1}, {'user_name': '生活创意妙招', 'index': 1}, {'user_name': '二大妈有话说', 'index': 1}, {'user_name': '医路健康美食', 'index': 1}, {'user_name': '老年相知相伴', 'index': 1}, {'user_name': '一争', 'index': 1}, {'user_name': '老年企退群', 'index': 1}, {'user_name': '消逝的哨声', 'index': 1}, {'user_name': '一颗打破石头的蛋', 'index': 1}, {'user_name': '叩问苍穹荒烟茶生', 'index': 1}, {'user_name': '布衣星火', 'index': 1}, {'user_name': '叩问苍穹', 'index': 1}, {'user_name': '微观调查', 'index': 2}, {'user_name': '传统节日祝福', 'index': 1}, {'user_name': '因和德尚', 'index': 1}, {'user_name': '飨宴心灵', 'index': 1}, {'user_name': '朝闻解局', 'index': 1}, {'user_name': '远见光芒', 'index': 1}, {'user_name': '墨儿微刊', 'index': 1}, {'user_name': '博爱论', 'index': 1}, {'user_name': '张大春讲堂', 'index': 1}, {'user_name': ' 司马南频道', 'index': 1}, {'user_name': '音乐小镇', 'index': 1}, {'user_name': '节日祝福365', 'index': 1}, {'user_name': '动画音乐相册', 'index': 1}, {'user_name': '音乐动漫相册', 'index': 1}, {'user_name': '早点谈健康', 'index': 1}, {'user_name': '早点谈养生', 'index': 1}, {'user_name': '早点谈养身', 'index': 1}, {'user_name': '医道谈养身', 'index': 1}, {'user_name': '中老年谈养身', 'index': 1}, {'user_name': '尼古拉斯瞭望', 'index': 1}, {'user_name': '奇易时光百姓的福音', 'index': 1}, {'user_name': '寰宇时光', 'index': 1}, {'user_name': '红兴文化公苑', 'index': 1}, {'user_name': '早点音乐', 'index': 1}]
  460. return user_list
  461. @classmethod
  462. def get_all_videos(cls, log_type, crawler, oss_endpoint, env):
  463. user_list = cls.get_users()
  464. for user_dict in user_list:
  465. try:
  466. user_name = user_dict['user_name']
  467. index = user_dict['index']
  468. Common.logger(log_type, crawler).info(f'获取 {user_name} 公众号视频\n')
  469. cls.get_videoList(log_type, crawler, user_name, index, oss_endpoint, env)
  470. cls.begin = 0
  471. Common.logger(log_type, crawler).info('休眠 60 秒\n')
  472. time.sleep(60)
  473. except Exception as e:
  474. Common.logger(log_type, crawler).info(f'get_all_videos异常:{e}\n')
  475. if __name__ == "__main__":
  476. GongzhonghaoFollow.get_token(log_type="follow", crawler="gongzhonghao")
  477. # GongzhonghaoFollow.get_users()
  478. # GongzhonghaoFollow.get_videoList(log_type="follow",
  479. # crawler="gongzhonghao",
  480. # user="香音难忘",
  481. # index=1,
  482. # oss_endpoint="out",
  483. # env="dev")
  484. pass