gongzhonghao_follow_2.py 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2023/3/28
  4. import datetime
  5. import difflib
  6. import json
  7. import os
  8. import shutil
  9. import sys
  10. import time
  11. from hashlib import md5
  12. import requests
  13. import urllib3
  14. # from requests.adapters import HTTPAdapter
  15. from selenium.webdriver import DesiredCapabilities
  16. from selenium.webdriver.chrome.service import Service
  17. from selenium.webdriver.common.by import By
  18. from selenium import webdriver
  19. sys.path.append(os.getcwd())
  20. from common.common import Common
  21. from common.feishu import Feishu
  22. from common.public import get_config_from_mysql
  23. from common.publish import Publish
  24. from common.scheduling_db import MysqlHelper
  25. class GongzhonghaoFollow2:
  26. # 翻页参数
  27. begin = 0
  28. platform = "公众号"
  29. # 基础门槛规则
  30. @staticmethod
  31. def download_rule(video_dict):
  32. """
  33. 下载视频的基本规则
  34. :param video_dict: 视频信息,字典格式
  35. :return: 满足规则,返回 True;反之,返回 False
  36. """
  37. # 视频时长 20秒 - 45 分钟
  38. if 60 * 45 >= int(float(video_dict['duration'])) >= 20:
  39. # 宽或高
  40. if int(video_dict['video_width']) >= 0 or int(video_dict['video_height']) >= 0:
  41. return True
  42. else:
  43. return False
  44. else:
  45. return False
  46. @classmethod
  47. def title_like(cls, log_type, crawler, title, env):
  48. select_sql = f""" select * from crawler_video where platform="公众号" """
  49. video_list = MysqlHelper.get_values(log_type, crawler, select_sql, env, action="")
  50. if len(video_list) == 0:
  51. return None
  52. for video_dict in video_list:
  53. video_title = video_dict["video_title"]
  54. if difflib.SequenceMatcher(None, title, video_title).quick_ratio() >= 0.8:
  55. return True
  56. else:
  57. pass
  58. # 获取 token
  59. @classmethod
  60. def get_token(cls, log_type, crawler):
  61. while True:
  62. # try:
  63. sheet = Feishu.get_values_batch(log_type, crawler, "I4aeh3")
  64. if sheet is None:
  65. time.sleep(1)
  66. continue
  67. token = sheet[0][1]
  68. cookie = sheet[1][1]
  69. gzh_name = sheet[2][1]
  70. gzh_time = sheet[3][1]
  71. token_dict = {'token': token, 'cookie': cookie, 'gzh_name': gzh_name, 'gzh_time': gzh_time}
  72. return token_dict
  73. # except Exception as e:
  74. # Common.logger(log_type, crawler).error(f"get_cookie_token异常:{e}\n")
  75. # 获取用户 fakeid
  76. @classmethod
  77. def get_fakeid(cls, log_type, crawler, user, index):
  78. # try:
  79. while True:
  80. token_dict = cls.get_token(log_type, crawler)
  81. url = "https://mp.weixin.qq.com/cgi-bin/searchbiz?"
  82. headers = {
  83. "accept": "*/*",
  84. "accept-encoding": "gzip, deflate, br",
  85. "accept-language": "zh-CN,zh;q=0.9",
  86. "referer": "https://mp.weixin.qq.com/cgi-bin/appmsg?"
  87. "t=media/appmsg_edit_v2&action=edit&isNew=1"
  88. "&type=77&createType=5&token=1011071554&lang=zh_CN",
  89. 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
  90. "sec-ch-ua-mobile": "?0",
  91. "sec-ch-ua-platform": '"Windows"',
  92. "sec-fetch-dest": "empty",
  93. "sec-fetch-mode": "cors",
  94. "sec-fetch-site": "same-origin",
  95. "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
  96. " (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
  97. "x-requested-with": "XMLHttpRequest",
  98. 'cookie': token_dict['cookie'],
  99. }
  100. params = {
  101. "action": "search_biz",
  102. "begin": "0",
  103. "count": "5",
  104. "query": str(user),
  105. "token": token_dict['token'],
  106. "lang": "zh_CN",
  107. "f": "json",
  108. "ajax": "1",
  109. }
  110. urllib3.disable_warnings()
  111. # s = requests.session()
  112. # # max_retries=3 重试3次
  113. # s.mount('http://', HTTPAdapter(max_retries=3))
  114. # s.mount('https://', HTTPAdapter(max_retries=3))
  115. # r = s.get(url=url, headers=headers, params=params, verify=False, proxies=Common.tunnel_proxies(), timeout=5)
  116. r = requests.get(url=url, headers=headers, params=params, verify=False)
  117. r.close()
  118. if r.json()["base_resp"]["err_msg"] == "invalid session":
  119. Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
  120. Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
  121. Common.logger(log_type, crawler).warning(
  122. f"公众号_2:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 过期啦\n")
  123. if 20 >= datetime.datetime.now().hour >= 10:
  124. Feishu.bot(log_type, crawler, f"token_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
  125. time.sleep(60 * 10)
  126. continue
  127. if r.json()["base_resp"]["err_msg"] == "freq control":
  128. Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
  129. Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
  130. Common.logger(log_type, crawler).warning(
  131. f"公众号_2:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
  132. if 20 >= datetime.datetime.now().hour >= 10:
  133. Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
  134. time.sleep(60 * 10)
  135. continue
  136. if "list" not in r.json() or len(r.json()["list"]) == 0:
  137. Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
  138. Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
  139. Common.logger(log_type, crawler).warning(
  140. f"公众号_2:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
  141. if 20 >= datetime.datetime.now().hour >= 10:
  142. Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
  143. time.sleep(60 * 10)
  144. continue
  145. fakeid = r.json()["list"][int(index) - 1]["fakeid"]
  146. head_url = r.json()["list"][int(index) - 1]["round_head_img"]
  147. fakeid_dict = {'fakeid': fakeid, 'head_url': head_url}
  148. return fakeid_dict
  149. # except Exception as e:
  150. # Common.logger(log_type, crawler).error(f"get_fakeid异常:{e}\n")
  151. # 获取腾讯视频下载链接
  152. @classmethod
  153. def get_tencent_video_url(cls, video_id):
  154. # try:
  155. url = 'https://vv.video.qq.com/getinfo?vids=' + str(video_id) + '&platform=101001&charge=0&otype=json'
  156. response = requests.get(url=url).text.replace('QZOutputJson=', '').replace('"};', '"}')
  157. response = json.loads(response)
  158. url = response['vl']['vi'][0]['ul']['ui'][0]['url']
  159. fvkey = response['vl']['vi'][0]['fvkey']
  160. video_url = url + str(video_id) + '.mp4?vkey=' + fvkey
  161. return video_url
  162. # except Exception as e:
  163. # Common.logger(log_type, crawler).error(f"get_tencent_video_url异常:{e}\n")
  164. @classmethod
  165. def get_video_url(cls, article_url, env):
  166. # try:
  167. # 打印请求配置
  168. ca = DesiredCapabilities.CHROME
  169. ca["goog:loggingPrefs"] = {"performance": "ALL"}
  170. # 不打开浏览器运行
  171. chrome_options = webdriver.ChromeOptions()
  172. chrome_options.add_argument("headless")
  173. chrome_options.add_argument(
  174. f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
  175. chrome_options.add_argument("--no-sandbox")
  176. # driver初始化
  177. if env == "prod":
  178. driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
  179. else:
  180. driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service(
  181. '/Users/wangkun/Downloads/chromedriver/chromedriver_v111/chromedriver'))
  182. driver.implicitly_wait(10)
  183. # Common.logger(log_type, crawler).info('打开文章链接')
  184. driver.get(article_url)
  185. time.sleep(1)
  186. if len(driver.find_elements(By.XPATH, '//div[@class="js_video_poster video_poster"]/*[2]')) != 0:
  187. video_url = driver.find_element(
  188. By.XPATH, '//div[@class="js_video_poster video_poster"]/*[2]').get_attribute('src')
  189. elif len(driver.find_elements(By.XPATH, '//span[@class="js_tx_video_container"]/*[1]')) != 0:
  190. iframe = driver.find_element(By.XPATH, '//span[@class="js_tx_video_container"]/*[1]').get_attribute(
  191. 'src')
  192. video_id = iframe.split('vid=')[-1].split('&')[0]
  193. video_url = cls.get_tencent_video_url(video_id)
  194. else:
  195. video_url = 0
  196. driver.quit()
  197. return video_url
  198. # except Exception as e:
  199. # Common.logger(log_type, crawler).info(f'get_video_url异常:{e}\n')
  200. # 获取文章列表
  201. @classmethod
  202. def get_videoList(cls, log_type, crawler, user, index, oss_endpoint, env):
  203. # try:
  204. while True:
  205. fakeid_dict = cls.get_fakeid(log_type, crawler, user, index)
  206. token_dict = cls.get_token(log_type, crawler)
  207. url = "https://mp.weixin.qq.com/cgi-bin/appmsg?"
  208. headers = {
  209. "accept": "*/*",
  210. "accept-encoding": "gzip, deflate, br",
  211. "accept-language": "zh-CN,zh;q=0.9",
  212. "referer": "https://mp.weixin.qq.com/cgi-bin/appmsg?"
  213. "t=media/appmsg_edit_v2&action=edit&isNew=1"
  214. "&type=77&createType=5&token=" + str(token_dict['token']) + "&lang=zh_CN",
  215. 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
  216. "sec-ch-ua-mobile": "?0",
  217. "sec-ch-ua-platform": '"Windows"',
  218. "sec-fetch-dest": "empty",
  219. "sec-fetch-mode": "cors",
  220. "sec-fetch-site": "same-origin",
  221. "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
  222. " (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
  223. "x-requested-with": "XMLHttpRequest",
  224. 'cookie': token_dict['cookie'],
  225. }
  226. params = {
  227. "action": "list_ex",
  228. "begin": str(cls.begin),
  229. "count": "5",
  230. "fakeid": fakeid_dict['fakeid'],
  231. "type": "9",
  232. "query": "",
  233. "token": str(token_dict['token']),
  234. "lang": "zh_CN",
  235. "f": "json",
  236. "ajax": "1",
  237. }
  238. urllib3.disable_warnings()
  239. # s = requests.session()
  240. # # max_retries=3 重试3次
  241. # s.mount('http://', HTTPAdapter(max_retries=3))
  242. # s.mount('https://', HTTPAdapter(max_retries=3))
  243. # r = s.get(url=url, headers=headers, params=params, verify=False, proxies=Common.tunnel_proxies(), timeout=5)
  244. r = requests.get(url=url, headers=headers, params=params, verify=False)
  245. r.close()
  246. if r.json()["base_resp"]["err_msg"] == "invalid session":
  247. Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
  248. Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}")
  249. Common.logger(log_type, crawler).warning(
  250. f"公众号_2:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 过期啦\n")
  251. if 20 >= datetime.datetime.now().hour >= 10:
  252. Feishu.bot(log_type, crawler, f"token_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
  253. time.sleep(60 * 10)
  254. continue
  255. if r.json()["base_resp"]["err_msg"] == "freq control":
  256. Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
  257. Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
  258. Common.logger(log_type, crawler).warning(
  259. f"公众号_2:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
  260. if 20 >= datetime.datetime.now().hour >= 10:
  261. Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
  262. time.sleep(60 * 10)
  263. continue
  264. if 'app_msg_list' not in r.json():
  265. Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
  266. Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
  267. Common.logger(log_type, crawler).warning(
  268. f"公众号_2:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
  269. if 20 >= datetime.datetime.now().hour >= 10:
  270. Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
  271. time.sleep(60 * 10)
  272. continue
  273. if len(r.json()['app_msg_list']) == 0:
  274. Common.logger(log_type, crawler).info('没有更多视频了\n')
  275. return
  276. else:
  277. cls.begin += 5
  278. app_msg_list = r.json()['app_msg_list']
  279. for article_url in app_msg_list:
  280. # title
  281. if 'title' in article_url:
  282. title = article_url['title'].replace('/', '').replace('\n', '') \
  283. .replace('.', '').replace('“', '').replace('”', '').replace(' ', '')\
  284. .replace('"', '').replace("'", "")
  285. else:
  286. title = 0
  287. # aid
  288. if 'aid' in article_url:
  289. aid = article_url['aid']
  290. else:
  291. aid = 0
  292. # create_time
  293. if 'create_time' in article_url:
  294. create_time = article_url['create_time']
  295. else:
  296. create_time = 0
  297. publish_time_stamp = int(create_time)
  298. publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
  299. avatar_url = fakeid_dict['head_url']
  300. # cover_url
  301. if 'cover' in article_url:
  302. cover_url = article_url['cover']
  303. else:
  304. cover_url = 0
  305. # article_url
  306. if 'link' in article_url:
  307. article_url = article_url['link']
  308. else:
  309. article_url = 0
  310. video_url = cls.get_video_url(article_url, env)
  311. video_dict = {
  312. 'video_id': aid,
  313. 'video_title': title,
  314. 'publish_time_stamp': publish_time_stamp,
  315. 'publish_time_str': publish_time_str,
  316. 'user_name': user,
  317. 'play_cnt': 0,
  318. 'comment_cnt': 0,
  319. 'like_cnt': 0,
  320. 'share_cnt': 0,
  321. 'user_id': fakeid_dict['fakeid'],
  322. 'avatar_url': avatar_url,
  323. 'cover_url': cover_url,
  324. 'article_url': article_url,
  325. 'video_url': video_url,
  326. 'session': f'gongzhonghao-follow-{int(time.time())}'
  327. }
  328. for k, v in video_dict.items():
  329. Common.logger(log_type, crawler).info(f"{k}:{v}")
  330. if int(time.time()) - publish_time_stamp >= 3600 * 24 * 3:
  331. Common.logger(log_type, crawler).info(f'发布时间{publish_time_str} > 3 天\n')
  332. cls.begin = 0
  333. return
  334. cls.download_publish(log_type, crawler, video_dict, oss_endpoint, env)
  335. Common.logger(log_type, crawler).info('休眠 60 秒\n')
  336. time.sleep(60)
  337. # except Exception as e:
  338. # Common.logger(log_type, crawler).error(f"get_videoList异常:{e}\n")
  339. @classmethod
  340. def repeat_video(cls, log_type, crawler, video_id, env):
  341. sql = f""" select * from crawler_video where platform="公众号" and out_video_id="{video_id}"; """
  342. repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env)
  343. return len(repeat_video)
  344. # 下载/上传
  345. @classmethod
  346. def download_publish(cls, log_type, crawler, video_dict, oss_endpoint, env):
  347. # try:
  348. if video_dict['article_url'] == 0 or video_dict['video_url'] == 0:
  349. Common.logger(log_type, crawler).info("文章涉嫌违反相关法律法规和政策\n")
  350. # 标题敏感词过滤
  351. elif any(word if word in video_dict['video_title']
  352. else False for word in get_config_from_mysql(log_type=log_type,
  353. source=crawler,
  354. env=env,
  355. text="filter",
  356. action="")) is True:
  357. Common.logger(log_type, crawler).info("标题已中过滤词\n")
  358. # 已下载判断
  359. elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
  360. Common.logger(log_type, crawler).info("视频已下载\n")
  361. # 标题相似度
  362. elif cls.title_like(log_type, crawler, video_dict['video_title'], env) is True:
  363. Common.logger(log_type, crawler).info(f'标题相似度>=80%:{video_dict["video_title"]}\n')
  364. else:
  365. # 下载视频
  366. Common.download_method(log_type=log_type, crawler=crawler, text="video",
  367. title=video_dict["video_title"], url=video_dict["video_url"])
  368. md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
  369. # 获取视频时长
  370. ffmpeg_dict = Common.ffmpeg(log_type, crawler,
  371. f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
  372. if ffmpeg_dict is None:
  373. # 删除视频文件夹
  374. shutil.rmtree(f"./{crawler}/videos/{md_title}")
  375. Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
  376. return
  377. video_dict["video_width"] = ffmpeg_dict["width"]
  378. video_dict["video_height"] = ffmpeg_dict["height"]
  379. video_dict["duration"] = ffmpeg_dict["duration"]
  380. video_size = ffmpeg_dict["size"]
  381. Common.logger(log_type, crawler).info(f'video_width:{video_dict["video_width"]}')
  382. Common.logger(log_type, crawler).info(f'video_height:{video_dict["video_height"]}')
  383. Common.logger(log_type, crawler).info(f'duration:{video_dict["duration"]}')
  384. Common.logger(log_type, crawler).info(f'video_size:{video_size}')
  385. # 视频size=0,直接删除
  386. if int(video_size) == 0 or cls.download_rule(video_dict) is False:
  387. # 删除视频文件夹
  388. shutil.rmtree(f"./{crawler}/videos/{md_title}")
  389. Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
  390. return
  391. # 下载封面
  392. Common.download_method(log_type=log_type, crawler=crawler, text="cover",
  393. title=video_dict["video_title"], url=video_dict["cover_url"])
  394. # 保存视频信息至 "./videos/{video_title}/info.txt"
  395. Common.save_video_info(log_type=log_type, crawler=crawler, video_dict=video_dict)
  396. # 上传视频
  397. Common.logger(log_type, crawler).info("开始上传视频...")
  398. strategy = "定向爬虫策略"
  399. our_video_id = Publish.upload_and_publish(log_type=log_type,
  400. crawler=crawler,
  401. strategy=strategy,
  402. our_uid="follow",
  403. oss_endpoint=oss_endpoint,
  404. env=env)
  405. if env == 'prod':
  406. our_video_link = f"https://admin.piaoquantv.com/cms/post-detail/{str(our_video_id)}/info"
  407. else:
  408. our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{str(our_video_id)}/info"
  409. Common.logger(log_type, crawler).info("视频上传完成")
  410. if our_video_id is None:
  411. # 删除视频文件夹
  412. shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
  413. return
  414. # 视频信息保存数据库
  415. rule_dict = {
  416. "duration": {"min": 20, "max": 45 * 60},
  417. "publish_day": {"min": 3}
  418. }
  419. insert_sql = f""" insert into crawler_video(video_id,
  420. out_user_id,
  421. platform,
  422. strategy,
  423. out_video_id,
  424. video_title,
  425. cover_url,
  426. video_url,
  427. duration,
  428. publish_time,
  429. play_cnt,
  430. crawler_rule,
  431. width,
  432. height)
  433. values({our_video_id},
  434. "{video_dict['user_id']}",
  435. "{cls.platform}",
  436. "定向爬虫策略",
  437. "{video_dict['video_id']}",
  438. "{video_dict['video_title']}",
  439. "{video_dict['cover_url']}",
  440. "{video_dict['video_url']}",
  441. {int(video_dict['duration'])},
  442. "{video_dict['publish_time_str']}",
  443. {int(video_dict['play_cnt'])},
  444. '{json.dumps(rule_dict)}',
  445. {int(video_dict['video_width'])},
  446. {int(video_dict['video_height'])}) """
  447. Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
  448. MysqlHelper.update_values(log_type, crawler, insert_sql, env)
  449. Common.logger(log_type, crawler).info('视频信息插入数据库成功!')
  450. # 视频写入飞书
  451. Feishu.insert_columns(log_type, crawler, "47e39d", "ROWS", 1, 2)
  452. # 视频ID工作表,首行写入数据
  453. upload_time = int(time.time())
  454. values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(upload_time)),
  455. "用户主页",
  456. video_dict['video_title'],
  457. video_dict['video_id'],
  458. our_video_link,
  459. int(video_dict['duration']),
  460. f"{video_dict['video_width']}*{video_dict['video_height']}",
  461. video_dict['publish_time_str'],
  462. video_dict['user_name'],
  463. video_dict['user_id'],
  464. video_dict['avatar_url'],
  465. video_dict['cover_url'],
  466. video_dict['article_url'],
  467. video_dict['video_url']]]
  468. time.sleep(0.5)
  469. Feishu.update_values(log_type, crawler, "47e39d", "F2:Z2", values)
  470. Common.logger(log_type, crawler).info('视频下载/上传成功\n')
  471. # except Exception as e:
  472. # Common.logger(log_type, crawler).error(f"download_publish异常:{e}\n")
  473. @classmethod
  474. def get_users(cls):
  475. # user_sheet = Feishu.get_values_batch("follow", 'gongzhonghao', 'Bzv72P')
  476. # user_list = []
  477. # # for i in range(41, 81):
  478. # for i in range(101, len(user_sheet)):
  479. # user_name = user_sheet[i][0]
  480. # index = user_sheet[i][1]
  481. # user_dict = {
  482. # "user_name": user_name,
  483. # "index": index,
  484. # }
  485. # user_list.append(user_dict)
  486. # print(len(user_list))
  487. # print(user_list)
  488. user_list = [{'user_name': '惊爆视频', 'index': 3}, {'user_name': '绝美生活', 'index': 2}, {'user_name': '新龙虎局势', 'index': 1}, {'user_name': '行走的足音', 'index': 1}, {'user_name': '月光下小夜曲', 'index': 1}, {'user_name': '罪与罚的言', 'index': 1}, {'user_name': '祝福音画', 'index': 1}, {'user_name': '这年头儿', 'index': 1}, {'user_name': '祝福励志正能量', 'index': 1}, {'user_name': '出借人清查组', 'index': 1}, {'user_name': '强哥来了', 'index': 1}, {'user_name': '绝美相册', 'index': 1}, {'user_name': '绝美立体相册', 'index': 1}, {'user_name': '生活美相册', 'index': 1}, {'user_name': '祝您生活幸福', 'index': 1}, {'user_name': '完美生活', 'index': 3}, {'user_name': '新龙虎局世', 'index': 1}, {'user_name': '精美音画相册', 'index': 1}, {'user_name': '音画场景', 'index': 1}, {'user_name': '出借人投诉处', 'index': 1}, {'user_name': '十点学健康', 'index': 1}, {'user_name': '回忆录影带', 'index': 1}, {'user_name': '山花烂漫', 'index': 1}, {'user_name': '中老年生活乐', 'index': 1}, {'user_name': '音乐之春', 'index': 1}, {'user_name': '战友回忆录', 'index': 1}, {'user_name': '中老年生活之乐', 'index': 1}, {'user_name': '中老年退休生活乐', 'index': 1}, {'user_name': '中老年生活科普小妙招', 'index': 1}, {'user_name': '中老年生活好文', 'index': 1}, {'user_name': '中老年乐看', 'index': 1}, {'user_name': '晚安懂你心', 'index': 1}, {'user_name': '知心情送你', 'index': 1}, {'user_name': '好文与你入眠', 'index': 1}, {'user_name': '知心好文送你', 'index': 1}, {'user_name': '退休读书社', 'index': 1}, {'user_name': '退休的精彩生活', 'index': 1}, {'user_name': '老年享生活群', 'index': 1}, {'user_name': '民间文化交流群', 'index': 1}, {'user_name': '中老年每日看', 'index': 1}, {'user_name': '老年养老知识', 'index': 1}, {'user_name': '北京大妈有话说视频', 'index': 3}, {'user_name': '中老年兴趣群', 'index': 1}, {'user_name': '中老年说养身', 'index': 1}, {'user_name': '中老年每日养身', 'index': 1}]
  489. return user_list
  490. @classmethod
  491. def get_all_videos(cls, log_type, crawler, oss_endpoint, env):
  492. user_list = cls.get_users()
  493. for user_dict in user_list:
  494. try:
  495. user_name = user_dict['user_name']
  496. index = user_dict['index']
  497. Common.logger(log_type, crawler).info(f'获取 {user_name} 公众号视频\n')
  498. cls.get_videoList(log_type, crawler, user_name, index, oss_endpoint, env)
  499. cls.begin = 0
  500. Common.logger(log_type, crawler).info('休眠 60 秒\n')
  501. time.sleep(60)
  502. except Exception as e:
  503. Common.logger(log_type, crawler).info(f'get_all_videos异常:{e}\n')
  504. if __name__ == "__main__":
  505. GongzhonghaoFollow2.get_users()
  506. # GongzhonghaoFollow.get_users()
  507. # GongzhonghaoFollow.get_videoList(log_type="follow",
  508. # crawler="gongzhonghao",
  509. # user="香音难忘",
  510. # index=1,
  511. # oss_endpoint="out",
  512. # env="dev")
  513. pass