zhihu_follow.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2022/10/31
  4. import os
  5. import sys
  6. import time
  7. import requests
  8. import urllib3
  9. sys.path.append(os.getcwd())
  10. from main.common import Common
  11. from main.feishu_lib import Feishu
  12. from main.zhihu_follow_publish import Publish
  13. proxies = {'http': None, 'https': None}
  14. class ZhihuFollow:
  15. offset = 0
  16. @classmethod
  17. def get_users_from_feishu(cls, log_type):
  18. try:
  19. user_sheet = Feishu.get_values_batch(log_type, 'zhihu', '4NTla6')
  20. user_dict = {}
  21. for i in range(1, len(user_sheet)):
  22. user_name = user_sheet[i][0]
  23. url_token = user_sheet[i][1]
  24. referer = user_sheet[i][2]
  25. our_id = user_sheet[i][3]
  26. if user_name is None or url_token is None or our_id is None:
  27. pass
  28. else:
  29. user_dict[user_name] = str(url_token) + ',' + str(referer) + ',' + str(our_id)
  30. return user_dict
  31. except Exception as e:
  32. Common.logger(log_type).error('get_users_from_feishu异常:{}\n', e)
  33. @classmethod
  34. def get_follow_feeds(cls, log_type, url_token, referer, our_uid, env):
  35. while True:
  36. try:
  37. # url = "https://www.zhihu.com//api//v4//members//" + str(url_token) + "//zvideos?"
  38. url = f"https://www.zhihu.com//api//v4//members//{url_token}//zvideos?offset={cls.offset}&limit=20&similar_aggregation=true&include=similar_zvideo%2Ccreation_relationship%2Creaction_instruction"
  39. payload = {}
  40. headers = {
  41. 'authority': 'www.zhihu.com',
  42. 'accept': '*/*',
  43. 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
  44. 'cache-control': 'no-cache',
  45. 'cookie': '_zap=246102fb-af66-40c3-a5a5-9901921d5a71; d_c0=AHCWVw5U5hWPTqifPR-jYwskMnmcUFEgHzQ=|1669014326; q_c1=40c865e7cbed4099b5d090229d3096f5|1669983925000|1669983925000; _xsrf=05151c3d-2d05-47fe-98bc-7b01dae731ba; Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49=1675166229; arialoadData=false; z_c0=2|1:0|10:1675650146|4:z_c0|80:MS4xVFdsTlB3QUFBQUFtQUFBQVlBSlZUVDFJeG1TWWVaZEJSVWdsSWdRalloaWlGaVlqYlFrVmpRPT0=|c0945918804e3c623699052665e50a2452bceb732d25e544df0ca9419e50fa6d; SESSIONID=AgocnyI3witm93R7LqTR2y59rcyJQ9p0QONL8jD8laf; JOID=V1sUA0OVH-cf-lXUM5BktSnhBDki0nqJdJkqnwHvcJFzy2uABvN4DnnxUdc6mEsEpn2HejuruTnxTM_CAaxxrJQ=; osd=UVodBEiTHu4Y8VPVOpdvsyjoAzIk03OOf58rlgbkdpB6zGCGB_p_BX_wWNAxnkoNoXaBezKssj_wRcjJB614q58=; tst=v; Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49=1675650941; unlock_ticket=AGBWwkpQuhUmAAAAYAJVTYVu4GP28ZqvlQjQejVT8YHwQEuzKp3jPQ==; KLBRSID=b5ffb4aa1a842930a6f64d0a8f93e9bf|1675650942|1675650143; KLBRSID=b5ffb4aa1a842930a6f64d0a8f93e9bf|1675651010|1675650143',
  46. 'pragma': 'no-cache',
  47. 'referer': f'https://www.zhihu.com/people/{url_token}/zvideos',
  48. 'sec-ch-ua': '"Not_A Brand";v="99", "Microsoft Edge";v="109", "Chromium";v="109"',
  49. 'sec-ch-ua-mobile': '?0',
  50. 'sec-ch-ua-platform': '"macOS"',
  51. 'sec-fetch-dest': 'empty',
  52. 'sec-fetch-mode': 'cors',
  53. 'sec-fetch-site': 'same-origin',
  54. 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 Edg/109.0.1518.52',
  55. 'x-ab-param': '',
  56. 'x-ab-pb': 'CmQIABsAPwBHALQAaQFqAXQBOwLMAtcC2AK3A9YEEQVRBYsFjAWeBTAGMQbrBicHdAh2CHkIPwlgCfQJBApJCmUKawq+Cv4KQwtxC4cLjQvXC+AL5QvmCzgMcQyPDKwMwwzJDPgMEjIBAAAAAAAAAAADAAAABAAAAAABAAABAAAABgABAwAAAAAAAAEAAAUCAQAAAgYAAAIAAA==',
  57. 'x-requested-with': 'fetch',
  58. 'x-zse-93': '101_3_3.0',
  59. 'x-zse-96': '2.0_YgMIhebWasNsCXw=FiiAUcdqV88f=NTzIsgp2cwr0d2+eelDIHhqN3Tru+P9NAs6',
  60. 'x-zst-81': '3_2.0aR_sn77yn6O92wOB8hPZn79qE72xcXFZ16fyQArZ39Sm7820XM20cL_1kwxYUqwT16P0EiUZbR2x-LOmwhp1tD_I-JOfgGXTzJO1ADRZ0cHsTJXII820Eer0c4nVDJH8zGCBADwMuukRo4Cqm4w0riRO70CB70O83uPmgbgmhufXiqomKbO1FJLYiRnxEL2ZZrxmDucmqhPXnXFMTAoTF6RhRuLPFXwMxBwBogLLLhSVPgSfRcxqjhOY2BSVibNfX9eLXJp9cTgVxrOB4h2C2uoTv0SpiBFMvBHCoAxmZq2pauV1cuw9kUeLkR38iqHO8RtCgUo0-geLgwtyzvSLFDUfWwgMIhVBcCXOIgxfLBcMawLG1TNqBrpCeUNYEwFfEhwO6vV_HCXsCqfz5DeBS6S9xUw9urO__Dg1cqHByqX_k9g0iUw1urN0Qicf6qFGFqc8Qi9GQqgxWDLm3UgfiJHBwww8DBeLyvO1ucS1n92mO9gmxgFmbuN1LGcpWhXBBBLC'
  61. }
  62. # headers = {
  63. # 'pragma': 'no-cache',
  64. # 'cache-control': 'no-cache',
  65. # 'x-zse-93': '101_3_3.0',
  66. # 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
  67. # 'x-zst-81': '3_2.0aR_sn77yn6O92wOB8hPZnQr0EMYxc4f18wNBUgpTQ6nxERFZMRY0-4Lm-h3_tufIwJS8gcxTgJS_'
  68. # 'AuPZNcXCTwxI78YxEM20s4PGDwN8gGcYAupMWufIoLVqr4gxrRPOI0cY7HL8qun9g93mFukyigcmebS_'
  69. # 'FwOYPRP0E4rZUrN9DDom3hnynAUMnAVPF_PhaueTF7C89Uw8_w2YiDNBOcC_ggeL2HN1fTN0WvUq_'
  70. # 'UY9aCLBMQO0VvUfoTp9y9eTV4xC8b3YCGNs2bSfXwCZpUw06hcMJ_w0B8NmhvwMcGe9iqCZlcSftv3qTug'
  71. # '_-BVqgwL1r7H99CeV24C9_Up9HqHmj9CsqUH8tBNLUbxm1gOprAO_6CL0Vg_z9DVs9rS1R9x08wxG2Cxme9'
  72. # 'FLlUc9jJu027gB3UVBJqpBbqUCrwo1nqHmggO_Nr9_BqcxfC2GnvSqNcxyIDOfiuw9dgCC27emaUpMxuVqq'
  73. # 'uFYwUSL2BXs',
  74. # 'accept': '*/*',
  75. # 'referer': referer,
  76. # 'sec-ch-ua': '"Chromium";v="106", "Google Chrome";v="106", "Not;A=Brand";v="99"',
  77. # 'sec-ch-ua-mobile': '?0',
  78. # 'sec-ch-ua-platform': '"macOS"',
  79. # 'sec-fetch-dest': 'empty',
  80. # 'sec-fetch-mode': 'cors',
  81. # 'sec-fetch-site': 'same-origin',
  82. # 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 '
  83. # '(KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36',
  84. # 'x-ab-pb': 'CsIBCAAbAD8ARwC0AGkBagF0ATsCzALXAtgCTwNQA6ADoQOiA7cD8wP0AzMEjASNBKYE1gQR'
  85. # 'BVEFiwWMBZ4FMAYxBusGJwd3B3gH2AfcB90HZwh0CHYIeQjaCD8JQglgCY0JwwnECcUJxgnH'
  86. # 'CcgJyQnKCcsJzAnRCfQJBApJCmUKawqYCqUKqQq+CsQK1ArdCu0K/Qr+CjsLPAtDC0YLcQt2'
  87. # 'C4ULhwuNC8AL1wvgC+UL5gssDDgMcQyPDKwMuQzDDMkM+AwSYQEAAAAAAAAAAAMAAAAAAAAA'
  88. # 'AAAAAAQEAAQAAAAAAQAAAQAAAAACAgQAAAYAAAEBAAAAAAAAAAAAAAAAAwAAAAABAAAAAQEAA'
  89. # 'AAAAQABAAAAAAAFAAIBAAAGAgYAAAECAAA=',
  90. # 'x-requested-with': 'fetch',
  91. # # 'accept-encoding': 'gzip, deflate, br',
  92. # 'accept-encoding': 'gzip, deflate',
  93. # 'x-zse-96': '2.0_wSyMHDQ7MZYfDZWRCM=Il3PaIuFuaTZf=spPXrAiJo83rlG65AU+b6IIBIr5nJ5L',
  94. # }
  95. # params = {
  96. # 'offset': str(cls.offset),
  97. # 'limit': '20',
  98. # 'similar_aggregation': True,
  99. # 'include': 'similar_zvideo,creation_relationship',
  100. # }
  101. # cookies = {
  102. # '_zap': 'a17e5882-775f-49dd-b541-e854c0299999',
  103. # 'd_c0': '"AFAcR4hr0RKPTn2-RL935cm51KQZn9-Xlb0=|1616071586"',
  104. # '_9755xjdesxxd_': '32',
  105. # 'YD00517437729195%3AWM_TID': 'yR6Kt%2FwMI2xEFEQRVBYrou2J%2F14yfdzt',
  106. # 'q_c1': '947ece3f1ccd4ccd83b718c113fe3935|1658911548000|1658911548000',
  107. # '_xsrf': 'cAM8ovVh43f0oJE2erl6ho8Y4CTZhFKF',
  108. # '__snaker__id': 'mqTPtP3IwbuoHA2K',
  109. # 'gdxidpyhxdE': 'a2d%2FeD%2BEUNxnnTaLDjQfd28sI39g3pnKAqS%2BmodWMqYcMa3AMzV5eom2APR2z5rDHAVvfubp'
  110. # '%2BWEzkcxdb9UjWNT0gbshTpBmHzU1%2BMB%2BfW5sAAUz1%5CTWIWbP34z7SV4TCB%2BfbQtoAdRA'
  111. # 'ZANzEbcollrLlPmyAG%2FtPICs5ewAvswneclw%3A1666061422009',
  112. # 'YD00517437729195%3AWM_NI': 'bTzKLUbqXTrTGVFS91056cj27L%2F9PQ8MG8ofPHnJc98014ktS5xa09N8yJZho7j'
  113. # 'EeQfeueH0sJLDH5YmaA2nCP1kQ1jaB4FChGGNA7HbM%2B5yRCuYOmGjfIs5LOdoxm'
  114. # '1WSHY%3D',
  115. # 'YD00517437729195%3AWM_NIKE': '9ca17ae2e6ffcda170e2e6eeb2f5628de7a5b2db5cfc8e8eb6d85f929e8aacc'
  116. # '150b6ed9bd8b33c93afc089cf2af0fea7c3b92aa28cb6accf5fa2af98b5f1608'
  117. # '58a8ab4bc3bf4ebfa84e93af594e1dad35bfc9389d9ee6e9498a2abe233fcf0f'
  118. # 'fd5ea8091ebb882b3338ee8b7abd964ade9a6a6e85ba6bbbb86cb3cb19684a5c'
  119. # 'b3f9bb3afa2b1668190ff8cd43baabf8ed5c549ac9989afc43d8cbe83ccb8739'
  120. # '29dbbbace6785b6aaaaf23f9091ac90fb67f3899da9dc37e2a3',
  121. # 'captcha_ticket_v2': '2|1:0|10:1666060537|17:captcha_ticket_v2|704:eyJ2YWxpZGF0ZSI6IkNOMzFfVkk0'
  122. # 'TmhfRVJ1SGxhQW4yV0ZHWFlXTjdIWXM3TnZQSUFMY19MRGdVV1dMX3Vody0wMFAtYnA0UXBFV'
  123. # 'nI5akI1NFNVajhZVEpLeXVLODJWajJoVU83MnBya2xyLi5TalExekwyZ3hQU3JjZTI4TkJweT'
  124. # 'h2NEg5QjFyVUlBME1zSGtfUlRqQ3lDd1BkQ2Y3c3J3WWl0V0Qyci5FTWV6eEJRMUdQcGlWS2d'
  125. # 'CXzRkQzhSWmNMT1ZmWnlxUDZWbTh6ZEdDdmVnVkhFTTFGVWNpNVJRV2NWLjZ6V2V4UUpJOWJi'
  126. # 'bUdRZHpJQ0VQUFlZZU5ObVBKOTlKMUw1OWFxUmRxUzVtY1hDbXBsRzVBTnM0LmEyS1BDRGNTY'
  127. # 'WVTWUlpMFdfc0JlTE44QlZCXzFkWjFLV2xoUmx3bkR6QjJMdUdOcm1IRXRaejQ2T0ViVzhVS3'
  128. # 'ZSNjU2YUJEUk9tYUNscVM5cDk2T2cwY1JadlhEZFZOSlFSeG0wNkRUMEs5cHRLLS1hajFWaWt'
  129. # 'EMnp4UU50MEpFZE9PRGs2RWtqdklnWnM0S0s0bUVBd0tGS2FIMklJaXVod1dMck9WOGdKLUpw'
  130. # 'NjlYNU9hWWxNcEtCbk9pU2JYNUJfUnBmeU1hLlAwbUlBYXYwbnBvY0xRWmdMMXJpelN5OFVaV'
  131. # 'l9QMXduWTByR1BtMyJ9|557dafa54cac0a5cd55b3e8ebbb626e6817010e985c2a3c60a82f'
  132. # 'b00bcb8b72a',
  133. # 'captcha_session_v2': '2|1:0|10:1666060537|18:captcha_session_v2|88:SUlnbE5uR29rUC83Z0VvWVpjTS9'
  134. # 'JV3VKSFFYTUl4T1hFbVdZZnB2Wms1Z1cxYkZFQndYWHhqVDRGWEE0NzVaTQ==|a40c62b74a'
  135. # '94ad618205342e3f87091b525a50fe9c8f153af02b08a3569fa7c0',
  136. # 'z_c0': '2|1:0|10:1666767387|4:z_c0|92:Mi4xVFdsTlB3QUFBQUFBVUJ4SGlHdlJFaVlBQUFCZ0FsVk5CVjg3WkFB'
  137. # 'OTBXb185c2wyUjJ6ZmQ1OTYtamliYWJWTUp3|abd71d40da50ae308430e26ce358d34cd4bccdfe575bf544d'
  138. # '212019c7189ecdb',
  139. # 'Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49': '1665574748,1666678329,1667194593',
  140. # 'SESSIONID': 'hyrUgL9IzongRfviGSTmGR2sDyVliSwHuWy0fXouVk7',
  141. # 'JOID': 'W14WBkoyfRA9faU0WD7qwUz2wuJAVQFkbBLmCBdAHXtcK8tNNNogTVJ6rzdabb3cImOxRThXGFkflrdU1CD0UWI=',
  142. # 'osd': 'U10VAU46fhM6ea03WznuyU_1xeZIVgJjaBrlCxBEFXhfLM9FN9knSVp5rDBeZb7fJWe5RjtQHFEclbBQ3CP3VmY=',
  143. # 'tst': 'v',
  144. # 'NOT_UNREGISTER_WAITING': '1',
  145. # 'Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49': '1667373061',
  146. # 'unlock_ticket': 'AGBWwkpQuhUmAAAAYAJVTQ0fYmOTbjeHqRp3XiDPD3ZVB5KkeaEeYw==',
  147. # 'KLBRSID': '031b5396d5ab406499e2ac6fe1bb1a43|1667373074|1667370552',
  148. # }
  149. urllib3.disable_warnings()
  150. response = requests.get(url=url, headers=headers, data=payload, verify=False, proxies=proxies)
  151. cls.offset += 20
  152. if 'data' not in response.json():
  153. Common.logger(log_type).error('response:{}\n', response.text)
  154. cls.offset = 0
  155. return
  156. elif len(response.json()['data']) == 0:
  157. Common.logger(log_type).error('response:{}\n', response.text)
  158. cls.offset = 0
  159. return
  160. else:
  161. feeds = response.json()['data']
  162. is_end = response.json()['paging']['is_end']
  163. for i in range(len(feeds)):
  164. # video_title
  165. if 'title' not in feeds[i]:
  166. video_title = 0
  167. else:
  168. video_title = feeds[i]['title']
  169. # video_id
  170. if 'video' not in feeds[i]:
  171. video_id = 0
  172. elif 'video_id' not in feeds[i]['video']:
  173. video_id = 0
  174. else:
  175. video_id = feeds[i]['video']['video_id']
  176. # play_cnt
  177. if 'play_count' not in feeds[i]:
  178. play_cnt = 0
  179. else:
  180. play_cnt = feeds[i]['play_count']
  181. # comment_cnt
  182. if 'comment_count' not in feeds[i]:
  183. comment_cnt = 0
  184. else:
  185. comment_cnt = feeds[i]['comment_count']
  186. # like_cnt
  187. if 'liked_count' not in feeds[i]:
  188. like_cnt = 0
  189. else:
  190. like_cnt = feeds[i]['liked_count']
  191. # share_cnt
  192. if 'share_count' not in feeds[i]:
  193. share_cnt = 0
  194. else:
  195. share_cnt = feeds[i]['share_count']
  196. # voteup_cnt 赞同数
  197. if 'voteup_count' not in feeds[i]:
  198. voteup_cnt = 0
  199. else:
  200. voteup_cnt = feeds[i]['voteup_count']
  201. # publish_time
  202. if 'published_at' not in feeds[i]:
  203. publish_time = 0
  204. else:
  205. publish_time = feeds[i]['published_at']
  206. # duration
  207. if 'video' not in feeds[i]:
  208. duration = 0
  209. elif 'duration' not in feeds[i]['video']:
  210. duration = 0
  211. else:
  212. duration = feeds[i]['video']['duration']
  213. # width / height / video_url
  214. if 'video' not in feeds[i]:
  215. video_width = 0
  216. video_height = 0
  217. video_url = 0
  218. elif 'playlist' in feeds[i]['video'] and 'fhd' in feeds[i]['video']['playlist'] \
  219. and 'width' in feeds[i]['video']['playlist']['fhd'] \
  220. and 'height' in feeds[i]['video']['playlist']['fhd'] \
  221. and 'play_url' in feeds[i]['video']['playlist']['fhd']:
  222. video_width = feeds[i]['video']['playlist']['fhd']['width']
  223. video_height = feeds[i]['video']['playlist']['fhd']['height']
  224. video_url = feeds[i]['video']['playlist']['fhd']['play_url']
  225. elif 'playlist' in feeds[i]['video'] and 'fhd' in feeds[i]['video']['playlist'] \
  226. and 'width' in feeds[i]['video']['playlist']['fhd'] \
  227. and 'height' in feeds[i]['video']['playlist']['fhd'] \
  228. and 'url' in feeds[i]['video']['playlist']['fhd']:
  229. video_width = feeds[i]['video']['playlist']['fhd']['width']
  230. video_height = feeds[i]['video']['playlist']['fhd']['height']
  231. video_url = feeds[i]['video']['playlist']['fhd']['url']
  232. elif 'playlist' in feeds[i]['video'] and 'hd' in feeds[i]['video']['playlist'] \
  233. and 'width' in feeds[i]['video']['playlist']['hd'] \
  234. and 'height' in feeds[i]['video']['playlist']['hd'] \
  235. and 'play_url' in feeds[i]['video']['playlist']['hd']:
  236. video_width = feeds[i]['video']['playlist']['hd']['width']
  237. video_height = feeds[i]['video']['playlist']['hd']['height']
  238. video_url = feeds[i]['video']['playlist']['hd']['play_url']
  239. elif 'playlist' in feeds[i]['video'] and 'hd' in feeds[i]['video']['playlist'] \
  240. and 'width' in feeds[i]['video']['playlist']['hd'] \
  241. and 'height' in feeds[i]['video']['playlist']['hd'] \
  242. and 'url' in feeds[i]['video']['playlist']['hd']:
  243. video_width = feeds[i]['video']['playlist']['hd']['width']
  244. video_height = feeds[i]['video']['playlist']['hd']['height']
  245. video_url = feeds[i]['video']['playlist']['hd']['url']
  246. elif 'playlist' in feeds[i]['video'] and 'ld' in feeds[i]['video']['playlist'] \
  247. and 'width' in feeds[i]['video']['playlist']['ld'] \
  248. and 'height' in feeds[i]['video']['playlist']['ld'] \
  249. and 'play_url' in feeds[i]['video']['playlist']['ld']:
  250. video_width = feeds[i]['video']['playlist']['ld']['width']
  251. video_height = feeds[i]['video']['playlist']['ld']['height']
  252. video_url = feeds[i]['video']['playlist']['ld']['play_url']
  253. elif 'playlist' in feeds[i]['video'] and 'ld' in feeds[i]['video']['playlist'] \
  254. and 'width' in feeds[i]['video']['playlist']['ld'] \
  255. and 'height' in feeds[i]['video']['playlist']['ld'] \
  256. and 'url' in feeds[i]['video']['playlist']['ld']:
  257. video_width = feeds[i]['video']['playlist']['ld']['width']
  258. video_height = feeds[i]['video']['playlist']['ld']['height']
  259. video_url = feeds[i]['video']['playlist']['ld']['url']
  260. elif 'playlist' in feeds[i]['video'] and 'sd' in feeds[i]['video']['playlist'] \
  261. and 'width' in feeds[i]['video']['playlist']['sd'] \
  262. and 'height' in feeds[i]['video']['playlist']['sd'] \
  263. and 'play_url' in feeds[i]['video']['playlist']['sd']:
  264. video_width = feeds[i]['video']['playlist']['sd']['width']
  265. video_height = feeds[i]['video']['playlist']['sd']['height']
  266. video_url = feeds[i]['video']['playlist']['sd']['play_url']
  267. elif 'playlist' in feeds[i]['video'] and 'sd' in feeds[i]['video']['playlist'] \
  268. and 'width' in feeds[i]['video']['playlist']['sd'] \
  269. and 'height' in feeds[i]['video']['playlist']['sd'] \
  270. and 'url' in feeds[i]['video']['playlist']['sd']:
  271. video_width = feeds[i]['video']['playlist']['sd']['width']
  272. video_height = feeds[i]['video']['playlist']['sd']['height']
  273. video_url = feeds[i]['video']['playlist']['sd']['url']
  274. elif 'playlist_v2' in feeds[i]['video'] and 'fhd' in feeds[i]['video']['playlist_v2'] \
  275. and 'width' in feeds[i]['video']['playlist_v2']['fhd'] \
  276. and 'height' in feeds[i]['video']['playlist_v2']['fhd'] \
  277. and 'play_url' in feeds[i]['video']['playlist_v2']['fhd']:
  278. video_width = feeds[i]['video']['playlist_v2']['fhd']['width']
  279. video_height = feeds[i]['video']['playlist_v2']['fhd']['height']
  280. video_url = feeds[i]['video']['playlist_v2']['fhd']['play_url']
  281. elif 'playlist_v2' in feeds[i]['video'] and 'fhd' in feeds[i]['video']['playlist_v2'] \
  282. and 'width' in feeds[i]['video']['playlist_v2']['fhd'] \
  283. and 'height' in feeds[i]['video']['playlist_v2']['fhd'] \
  284. and 'url' in feeds[i]['video']['playlist_v2']['fhd']:
  285. video_width = feeds[i]['video']['playlist_v2']['fhd']['width']
  286. video_height = feeds[i]['video']['playlist_v2']['fhd']['height']
  287. video_url = feeds[i]['video']['playlist_v2']['fhd']['url']
  288. elif 'playlist_v2' in feeds[i]['video'] and 'hd' in feeds[i]['video']['playlist_v2'] \
  289. and 'width' in feeds[i]['video']['playlist_v2']['hd'] \
  290. and 'height' in feeds[i]['video']['playlist_v2']['hd'] \
  291. and 'play_url' in feeds[i]['video']['playlist_v2']['hd']:
  292. video_width = feeds[i]['video']['playlist_v2']['hd']['width']
  293. video_height = feeds[i]['video']['playlist_v2']['hd']['height']
  294. video_url = feeds[i]['video']['playlist_v2']['hd']['play_url']
  295. elif 'playlist_v2' in feeds[i]['video'] and 'hd' in feeds[i]['video']['playlist_v2'] \
  296. and 'width' in feeds[i]['video']['playlist_v2']['hd'] \
  297. and 'height' in feeds[i]['video']['playlist_v2']['hd'] \
  298. and 'url' in feeds[i]['video']['playlist_v2']['hd']:
  299. video_width = feeds[i]['video']['playlist_v2']['hd']['width']
  300. video_height = feeds[i]['video']['playlist_v2']['hd']['height']
  301. video_url = feeds[i]['video']['playlist_v2']['hd']['url']
  302. elif 'playlist_v2' in feeds[i]['video'] and 'ld' in feeds[i]['video']['playlist_v2'] \
  303. and 'width' in feeds[i]['video']['playlist_v2']['ld'] \
  304. and 'height' in feeds[i]['video']['playlist_v2']['ld'] \
  305. and 'play_url' in feeds[i]['video']['playlist_v2']['ld']:
  306. video_width = feeds[i]['video']['playlist_v2']['ld']['width']
  307. video_height = feeds[i]['video']['playlist_v2']['ld']['height']
  308. video_url = feeds[i]['video']['playlist_v2']['ld']['play_url']
  309. elif 'playlist_v2' in feeds[i]['video'] and 'ld' in feeds[i]['video']['playlist_v2'] \
  310. and 'width' in feeds[i]['video']['playlist_v2']['ld'] \
  311. and 'height' in feeds[i]['video']['playlist_v2']['ld'] \
  312. and 'url' in feeds[i]['video']['playlist_v2']['ld']:
  313. video_width = feeds[i]['video']['playlist_v2']['ld']['width']
  314. video_height = feeds[i]['video']['playlist_v2']['ld']['height']
  315. video_url = feeds[i]['video']['playlist_v2']['ld']['url']
  316. elif 'playlist_v2' in feeds[i]['video'] and 'sd' in feeds[i]['video']['playlist_v2'] \
  317. and 'width' in feeds[i]['video']['playlist_v2']['sd'] \
  318. and 'height' in feeds[i]['video']['playlist_v2']['sd'] \
  319. and 'play_url' in feeds[i]['video']['playlist_v2']['sd']:
  320. video_width = feeds[i]['video']['playlist_v2']['sd']['width']
  321. video_height = feeds[i]['video']['playlist_v2']['sd']['height']
  322. video_url = feeds[i]['video']['playlist_v2']['sd']['play_url']
  323. elif 'playlist_v2' in feeds[i]['video'] and 'sd' in feeds[i]['video']['playlist_v2'] \
  324. and 'width' in feeds[i]['video']['playlist_v2']['sd'] \
  325. and 'height' in feeds[i]['video']['playlist_v2']['sd'] \
  326. and 'url' in feeds[i]['video']['playlist_v2']['sd']:
  327. video_width = feeds[i]['video']['playlist_v2']['sd']['width']
  328. video_height = feeds[i]['video']['playlist_v2']['sd']['height']
  329. video_url = feeds[i]['video']['playlist_v2']['sd']['url']
  330. else:
  331. video_width = 0
  332. video_height = 0
  333. video_url = 0
  334. # cover_url
  335. if 'video' not in feeds[i]:
  336. cover_url = 0
  337. elif 'thumbnail' not in feeds[i]['video']:
  338. cover_url = 0
  339. else:
  340. cover_url = feeds[i]['video']['thumbnail']
  341. # user_name / uid / user_type / url_token / avatar_url
  342. if 'author' not in feeds[i]:
  343. user_name = 0
  344. uid = 0
  345. user_type = 0
  346. url_token = 0
  347. avatar_url = 0
  348. elif 'author' in feeds[i] \
  349. and 'name' in feeds[i]['author'] \
  350. and 'uid' in feeds[i]['author'] \
  351. and 'user_type' in feeds[i]['author'] \
  352. and 'url_token' in feeds[i]['author'] \
  353. and 'avatar_url_template' in feeds[i]['author']:
  354. user_name = feeds[i]['author']['name']
  355. uid = feeds[i]['author']['uid']
  356. user_type = feeds[i]['author']['user_type']
  357. url_token = feeds[i]['author']['url_token']
  358. avatar_url = feeds[i]['author']['avatar_url_template']
  359. elif 'author' in feeds[i] \
  360. and 'name' in feeds[i]['author'] \
  361. and 'uid' in feeds[i]['author'] \
  362. and 'user_type' in feeds[i]['author'] \
  363. and 'url_token' in feeds[i]['author'] \
  364. and 'avatar_url' in feeds[i]['author']:
  365. user_name = feeds[i]['author']['name']
  366. uid = feeds[i]['author']['uid']
  367. user_type = feeds[i]['author']['user_type']
  368. url_token = feeds[i]['author']['url_token']
  369. avatar_url = feeds[i]['author']['avatar_url']
  370. else:
  371. user_name = 0
  372. uid = 0
  373. user_type = 0
  374. url_token = 0
  375. avatar_url = 0
  376. Common.logger(log_type).info('video_title:{}', video_title)
  377. Common.logger(log_type).info('duration:{}秒', int(duration))
  378. Common.logger(log_type).info(
  379. 'publish_time:{}', time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(publish_time)))
  380. if video_title == 0 or cover_url == 0 or video_url == 0:
  381. Common.logger(log_type).info('无效视频\n')
  382. elif int(time.time()) - int(publish_time) >= 3600 * 24 * 30:
  383. Common.logger(log_type).info('发布时间超过30天\n')
  384. cls.offset = 0
  385. return
  386. elif int(duration) < 60:
  387. Common.logger(log_type).info('时长{}<60秒\n', int(duration))
  388. elif str(video_id) in [x for y in Feishu.get_values_batch(log_type, 'zhihu', '8871e3') for x in
  389. y]:
  390. Common.logger(log_type).info('视频已下载\n')
  391. elif str(video_id) in [x for y in Feishu.get_values_batch(log_type, 'zhihu', '4MGuux') for x in
  392. y]:
  393. Common.logger(log_type).info('视频已下载\n')
  394. else:
  395. Common.download_method(log_type, 'cover', video_title, cover_url)
  396. Common.download_method(log_type, 'video', video_title, video_url)
  397. # 保存视频信息至 "./videos/{download_video_title}/info.txt"
  398. with open("./videos/" + video_title + "/" + "info.txt", "a", encoding="UTF-8") as f_a:
  399. f_a.write(str(video_id) + "\n" +
  400. str(video_title) + "\n" +
  401. str(int(duration)) + "\n" +
  402. str(play_cnt) + "\n" +
  403. str(comment_cnt) + "\n" +
  404. str(like_cnt) + "\n" +
  405. str(share_cnt) + "\n" +
  406. str(video_width) + '*' + str(video_height) + "\n" +
  407. str(publish_time) + "\n" +
  408. str(user_name) + "\n" +
  409. str(avatar_url) + "\n" +
  410. str(video_url) + "\n" +
  411. str(cover_url) + "\n" +
  412. "zhihu_follow" + str(int(time.time())))
  413. Common.logger(log_type).info("==========视频信息已保存至info.txt==========")
  414. our_video_id = Publish.upload_and_publish(log_type, env, our_uid)
  415. if env == 'dev':
  416. our_video_link = "https://testadmin.piaoquantv.com/cms/post-detail/" + str(
  417. our_video_id) + "/info"
  418. else:
  419. our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(
  420. our_video_id) + "/info"
  421. Common.logger(log_type).info("视频上传完成:{}", video_title)
  422. Feishu.insert_columns(log_type, 'zhihu', '4MGuux', 'ROWS', 1, 2)
  423. time.sleep(1)
  424. upload_time = int(time.time())
  425. values = [[
  426. time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
  427. "定向榜",
  428. video_title,
  429. str(video_id),
  430. our_video_link,
  431. play_cnt,
  432. comment_cnt,
  433. like_cnt,
  434. share_cnt,
  435. voteup_cnt,
  436. int(duration),
  437. str(video_width) + '*' + str(video_height),
  438. time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(publish_time))),
  439. user_name,
  440. uid,
  441. 'https://www.zhihu.com/' + user_type + '/' + url_token,
  442. user_type,
  443. url_token,
  444. avatar_url,
  445. cover_url,
  446. video_url]]
  447. Feishu.update_values(log_type, 'zhihu', "4MGuux", "F2:Z2", values)
  448. Common.logger(log_type).info("视频已保存至云文档:{}\n", video_title)
  449. if is_end is True:
  450. Common.logger(log_type).info('到底了\n')
  451. cls.offset = 0
  452. return
  453. except Exception as e:
  454. Common.logger(log_type).error('get_follow_feeds异常:{}', e)
  455. @classmethod
  456. def get_all_users_feeds(cls, log_type, env):
  457. try:
  458. user_list = cls.get_users_from_feishu(log_type)
  459. if len(user_list) == 0:
  460. Common.logger(log_type).info('定向用户列表为空\n')
  461. else:
  462. for k, v in user_list.items():
  463. user_name = k
  464. url_token = v.split(',')[0]
  465. referer = v.split(',')[1]
  466. our_uid = v.split(',')[2]
  467. Common.logger(log_type).info('开始抓取 {} 主页视频\n', user_name)
  468. cls.get_follow_feeds(log_type, url_token, referer, our_uid, env)
  469. Common.logger(log_type).info('{} 主页视频抓取完毕,休眠 60 秒\n', user_name)
  470. cls.offset = 0
  471. time.sleep(60)
  472. except Exception as e:
  473. Common.logger(log_type).error('get_all_users_feeds异常:{}', e)
  474. if __name__ == '__main__':
  475. ZhihuFollow.get_all_users_feeds('follow', 'dev')
  476. pass