get_feeds.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2022/4/18
  4. """
  5. 获取看一看+小程序,首页推荐视频列表
  6. """
  7. import json
  8. import os
  9. import random
  10. import sys
  11. import time
  12. import requests
  13. import urllib3
  14. from main.feishu_lib import Feishu
  15. sys.path.append(os.getcwd())
  16. from main.common import Common
  17. proxies = {"http": None, "https": None}
  18. def get_feeds():
  19. """
  20. 1.从看一看+小程序首页推荐,获取视频列表
  21. 2.先在 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c 中去重
  22. 3.再从 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM 中去重
  23. 4.添加视频信息至 https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
  24. """
  25. host = "https://search.weixin.qq.com"
  26. url = '/cgi-bin/recwxa/recwxavideolist?'
  27. video_list_session = Common.get_session()
  28. Common.logger().info("获取视频list时,session:{}", video_list_session)
  29. header = {
  30. "Connection": "keep-alive",
  31. "content-type": "application/json",
  32. "Accept-Encoding": "gzip,compress,br,deflate",
  33. "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) "
  34. "AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.18(0x18001236) "
  35. "NetType/WIFI Language/zh_CN",
  36. "Referer": "https://servicewechat.com/wxbb9a805eb4f9533c/234/page-frame.html",
  37. }
  38. params = {
  39. 'session': video_list_session,
  40. "offset": 0,
  41. "wxaVersion": "3.9.2",
  42. "count": "10",
  43. "channelid": "208",
  44. "scene": '310',
  45. "subscene": '1089',
  46. "clientVersion": '8.0.18',
  47. "sharesearchid": '0',
  48. "nettype": 'wifi',
  49. "switchprofile": "0",
  50. "switchnewuser": "0",
  51. }
  52. try:
  53. urllib3.disable_warnings()
  54. r = requests.get(host + url, headers=header, params=params, proxies=proxies, verify=False)
  55. response = json.loads(r.content.decode("utf8"))
  56. if "data" not in response:
  57. Common.logger().info("获取视频list时,session过期,随机睡眠 31-50 秒")
  58. # 如果返回空信息,则随机睡眠 31-40 秒
  59. time.sleep(random.randint(31, 40))
  60. get_feeds()
  61. elif "items" not in response["data"]:
  62. Common.logger().info("获取视频list时,返回空信息,随机睡眠 1-3 分钟")
  63. # 如果返回空信息,则随机睡眠 1-3 分钟
  64. time.sleep(random.randint(60, 180))
  65. get_feeds()
  66. else:
  67. items = response["data"]["items"]
  68. for i in range(len(items)):
  69. # 如果该视频没有视频信息,则忽略
  70. if "videoInfo" not in items[i]:
  71. Common.logger().info("无视频信息")
  72. else:
  73. # 获取视频标题
  74. video_title = items[i]["title"].strip().replace("\n", "")\
  75. .replace("/", "").replace("\\", "").replace("\r", "")\
  76. .replace(":", "").replace("*", "").replace("?", "")\
  77. .replace("?", "").replace('"', "").replace("<", "")\
  78. .replace(">", "").replace("|", "").replace(" ", "")\
  79. .replace("&NBSP", "").replace(".", "。").replace(" ", "")
  80. Common.logger().info('视频标题:{}', video_title)
  81. # 获取视频ID
  82. video_id = items[i]["videoId"]
  83. Common.logger().info('视频ID:{}', video_id)
  84. # 获取视频播放次数
  85. video_play_cnt = items[i]["playCount"]
  86. Common.logger().info('视频播放次数:{}', video_play_cnt)
  87. # 获取视频点赞数
  88. video_liked_cnt = items[i]["liked_cnt"]
  89. Common.logger().info('视频点赞数:{}', video_liked_cnt)
  90. # 获取视频评论数
  91. video_comment_cnt = items[i]["comment_cnt"]
  92. Common.logger().info('视频评论数:{}', video_comment_cnt)
  93. # 获取视频分享数
  94. video_shared_cnt = items[i]["shared_cnt"]
  95. Common.logger().info('视频分享数:{}', video_shared_cnt)
  96. # 获取视频时长
  97. video_duration = items[i]["mediaDuration"]
  98. Common.logger().info('视频时长:{}秒', video_duration)
  99. # 获取视频宽高
  100. if "short_video_info" not in items[i]:
  101. video_width = "0"
  102. video_height = "0"
  103. video_resolution = str(video_width) + "*" + str(video_height)
  104. Common.logger().info("无分辨率:{}", video_resolution)
  105. elif len(items[i]["short_video_info"]) == 0:
  106. video_width = "0"
  107. video_height = "0"
  108. video_resolution = str(video_width) + "*" + str(video_height)
  109. Common.logger().info("无分辨率:{}", video_resolution)
  110. else:
  111. # 视频宽
  112. video_width = items[i]["short_video_info"]["width"]
  113. # 视频高
  114. video_height = items[i]["short_video_info"]["height"]
  115. video_resolution = str(video_width) + "*" + str(video_height)
  116. Common.logger().info('视频宽高:{}', video_resolution)
  117. # 获取视频发布时间
  118. video_send_date = items[i]["date"]
  119. Common.logger().info("视频发布时间:{}",
  120. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(video_send_date)))
  121. # 获取视频用户名
  122. video_user = items[i]["source"].strip().replace("\n", "")
  123. Common.logger().info('视频用户名:{}', video_user)
  124. # 获取视频用户头像
  125. video_user_cover = items[i]["bizIcon"]
  126. Common.logger().info('视频用户头像:{}', video_user_cover)
  127. # 获取视频封面
  128. if "smartCoverUrl" in items[i]:
  129. video_cover = items[i]["smartCoverUrl"]
  130. Common.logger().info('视频封面:{}', video_cover)
  131. else:
  132. video_cover = items[i]["thumbUrl"]
  133. Common.logger().info('视频封面:{}', video_cover)
  134. # 获取播放地址
  135. if "mpInfo" in items[i]["videoInfo"]["videoCdnInfo"].keys():
  136. if len(items[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"]) > 2:
  137. url = items[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"][2]["url"]
  138. Common.logger().info('视频播放地址:{}', url)
  139. else:
  140. url = items[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"][0]["url"]
  141. Common.logger().info('视频播放地址:{}', url)
  142. elif "ctnInfo" in items[i]["videoInfo"]["videoCdnInfo"]:
  143. url = items[i]["videoInfo"]["videoCdnInfo"]["ctnInfo"]["urlInfo"][0]["url"]
  144. Common.logger().info('视频播放地址:{}', url)
  145. else:
  146. url = items[i]["videoInfo"]["videoCdnInfo"]["urlInfo"][0]["url"]
  147. Common.logger().info('视频播放地址:{}', url)
  148. # 过滤无效视频
  149. if video_id == "" \
  150. or video_send_date == "" \
  151. or video_title.strip() == "" \
  152. or video_play_cnt == "" \
  153. or video_liked_cnt == "" \
  154. or video_duration == "" \
  155. or video_comment_cnt == "" \
  156. or video_shared_cnt == "" \
  157. or video_user == "" \
  158. or video_user_cover == "" \
  159. or video_cover == "" \
  160. or url == "":
  161. Common.logger().info("无效视频")
  162. # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c
  163. elif video_id in [j for i in Feishu.get_values_batch("20ce0c") for j in i]:
  164. Common.logger().info("该视频已下载:{}", video_title)
  165. # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=SdCHOM
  166. elif video_id in [j for i in Feishu.get_values_batch("SdCHOM") for j in i]:
  167. Common.logger().info("该视频已在kanyikan_feeds中:{}", video_title)
  168. else:
  169. Common.logger().info("该视频未下载,添加至kanyikan_feeds:{}", video_title)
  170. # 看一看+工作表,插入首行
  171. Feishu.insert_columns("SdCHOM")
  172. # 获取当前时间
  173. get_feeds_time = int(time.time())
  174. # 看一看云文档,工作表 kanyikan_feeds 中写入数据
  175. Feishu.update_values("SdCHOM",
  176. a1=str(get_feeds_time),
  177. b1=str(video_id),
  178. c1=str(video_play_cnt),
  179. d1=str(video_title),
  180. e1=str(video_duration),
  181. f1=str(video_comment_cnt),
  182. g1=str(video_liked_cnt),
  183. h1=str(video_shared_cnt),
  184. i1=str(video_resolution),
  185. j1=str(video_send_date),
  186. k1=str(video_user),
  187. l1=str(video_user_cover),
  188. m1=str(video_cover),
  189. n1=str(url),
  190. o1=str(video_list_session))
  191. except Exception as e:
  192. Common.logger().error("获取视频 list 时异常:{}", e)
  193. if __name__ == "__main__":
  194. get_feeds()