# -*- coding: utf-8 -*- # @Author: wangkun # @Time: 2022/10/31 import os import sys import time import requests import urllib3 sys.path.append(os.getcwd()) from main.common import Common from main.feishu_lib import Feishu from main.zhihu_hot_publish import Publish proxies = {'http': None, 'https': None} class ZhihuHot: @classmethod def download_rule(cls, publish_time, play_cnt, duration): """ 热榜内容抓取 - 发布时间<30天 - 视频播放量>1w - 视频时长1分钟以上 - 站内标题=知乎 视频原标题 - 站内封面图=知乎 视频原封面图 """ if int(time.time()) - publish_time <= 3600*24*180: if int(play_cnt) >= 30000: if int(duration) >= 60: return True else: return False else: return False else: return False @classmethod def get_hot_feeds(cls, log_type, env): try: url = "https://www.zhihu.com/api/v4/zvideo-tabs/tabs/choice/feeds/recommend?include=creation_relationship&limit=12&offset=0&trans=" payload = {} headers = { 'authority': 'www.zhihu.com', 'accept': '*/*', 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', 'cache-control': 'no-cache', 'cookie': '_zap=246102fb-af66-40c3-a5a5-9901921d5a71; d_c0=AHCWVw5U5hWPTqifPR-jYwskMnmcUFEgHzQ=|1669014326; q_c1=40c865e7cbed4099b5d090229d3096f5|1669983925000|1669983925000; _xsrf=05151c3d-2d05-47fe-98bc-7b01dae731ba; Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49=1675166229; Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49=1675650146; arialoadData=false; unlock_ticket=AGBWwkpQuhUmAAAAYAJVTWpr4GPx5OU2pJq5pGyrjlK8kt9ctaE_kQ==; z_c0=2|1:0|10:1675650146|4:z_c0|80:MS4xVFdsTlB3QUFBQUFtQUFBQVlBSlZUVDFJeG1TWWVaZEJSVWdsSWdRalloaWlGaVlqYlFrVmpRPT0=|c0945918804e3c623699052665e50a2452bceb732d25e544df0ca9419e50fa6d; SESSIONID=AgocnyI3witm93R7LqTR2y59rcyJQ9p0QONL8jD8laf; JOID=V1sUA0OVH-cf-lXUM5BktSnhBDki0nqJdJkqnwHvcJFzy2uABvN4DnnxUdc6mEsEpn2HejuruTnxTM_CAaxxrJQ=; osd=UVodBEiTHu4Y8VPVOpdvsyjoAzIk03OOf58rlgbkdpB6zGCGB_p_BX_wWNAxnkoNoXaBezKssj_wRcjJB614q58=; KLBRSID=b5ffb4aa1a842930a6f64d0a8f93e9bf|1675650208|1675650143; tst=v; KLBRSID=b5ffb4aa1a842930a6f64d0a8f93e9bf|1675650577|1675650143', 'pragma': 'no-cache', 'referer': 'https://www.zhihu.com/zvideo', 'sec-ch-ua': '"Not_A Brand";v="99", "Microsoft Edge";v="109", "Chromium";v="109"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"macOS"', 'sec-fetch-dest': 'empty', 'sec-fetch-mode': 'cors', 'sec-fetch-site': 'same-origin', 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 Edg/109.0.1518.52', 'x-ab-param': '', 'x-ab-pb': 'CmQIABsAPwBHALQAaQFqAXQBOwLMAtcC2AK3A9YEEQVRBYsFjAWeBTAGMQbrBicHdAh2CHkIPwlgCfQJBApJCmUKawq+Cv4KQwtxC4cLjQvXC+AL5QvmCzgMcQyPDKwMwwzJDPgMEjIBAAAAAAAAAAADAAAABAAAAAABAAABAAAABgABAwAAAAAAAAEAAAUCAQAAAgYAAAIAAA==', 'x-requested-with': 'fetch', 'x-zse-93': '101_3_3.0', 'x-zse-96': '2.0_9hhW+XP7k+dDB9BskcIiHxVJqhacSZWPOmtKRnK6sLS+A=09Pdu+xMIJRDD2=6RR', 'x-zst-81': '3_2.0aR_sn77yn6O92wOB8hPZn79qE72xcXFZ16fyQArZ39Sm7820XM20cL_1kwxYUqwT16P0EiUZbR2x-LOmwhp1tD_I-JOfgGXTzJO1ADRZ0cHsTJXII820Eer0c4nVDJH8zGCBADwMuukRo4Cqm4w0riRO70CB70O83uPmgbgmhufXiqomKbO1FJLYiRnxEL2ZZrxmDucmqhPXnXFMTAoTF6RhRuLPF8O8xUt9ahuYyveKQRVydDLKFUCycHpxf9CMNGFL209megOV2HLKXqSm3JSTv72pr9Y0MTHfagHLovw_2qof_CN_UhomyvO_EUV_HGcGsh3_suVGPGe_JD3qPDwBWhOYAhpys8SYzgu0WgXLuvN1Yq3GHDcYOuws6HwqsUXCDvN_HvxsCqfzKiLMricfPgXVqqc0_DoMSGR9NqeYEvO_1LgmoGxBOqtLThXyyuC_xcN0jqFY19xmkGVm2cwMNbXpTwFm39L_ACo_2CpKc6eLEBXynGOBpwc9QTLL1QOC' } urllib3.disable_warnings() response = requests.get(url=url, headers=headers, data=payload, verify=False, proxies=proxies) if 'data' not in response.json(): Common.logger(log_type).error('response:{}', response.text) elif len(response.json()['data']) == 0: Common.logger(log_type).error('response:{}', response.text) else: feeds = response.json()['data'] for i in range(len(feeds)): # video_title if 'title' not in feeds[i]: video_title = 0 else: video_title = feeds[i]['title'] # video_id if 'video' not in feeds[i]: video_id = 0 elif 'video_id' not in feeds[i]['video']: video_id = 0 else: video_id = feeds[i]['video']['video_id'] # play_cnt if 'play_count' not in feeds[i]: play_cnt = 0 else: play_cnt = feeds[i]['play_count'] # comment_cnt if 'comment_count' not in feeds[i]: comment_cnt = 0 else: comment_cnt = feeds[i]['comment_count'] # like_cnt if 'liked_count' not in feeds[i]: like_cnt = 0 else: like_cnt = feeds[i]['liked_count'] # share_cnt if 'share_count' not in feeds[i]: share_cnt = 0 else: share_cnt = feeds[i]['share_count'] # voteup_cnt 赞同数 if 'voteup_count' not in feeds[i]: voteup_cnt = 0 else: voteup_cnt = feeds[i]['voteup_count'] # publish_time if 'published_at' not in feeds[i]: publish_time = 0 else: publish_time = feeds[i]['published_at'] # duration if 'video' not in feeds[i]: duration = 0 elif 'duration' not in feeds[i]['video']: duration = 0 else: duration = feeds[i]['video']['duration'] # width / height / video_url if 'video' not in feeds[i]: video_width = 0 video_height = 0 video_url = 0 elif 'playlist' in feeds[i]['video'] and 'fhd' in feeds[i]['video']['playlist'] \ and 'width' in feeds[i]['video']['playlist']['fhd'] \ and 'height' in feeds[i]['video']['playlist']['fhd'] \ and 'play_url' in feeds[i]['video']['playlist']['fhd']: video_width = feeds[i]['video']['playlist']['fhd']['width'] video_height = feeds[i]['video']['playlist']['fhd']['height'] video_url = feeds[i]['video']['playlist']['fhd']['play_url'] elif 'playlist' in feeds[i]['video'] and 'fhd' in feeds[i]['video']['playlist'] \ and 'width' in feeds[i]['video']['playlist']['fhd'] \ and 'height' in feeds[i]['video']['playlist']['fhd'] \ and 'url' in feeds[i]['video']['playlist']['fhd']: video_width = feeds[i]['video']['playlist']['fhd']['width'] video_height = feeds[i]['video']['playlist']['fhd']['height'] video_url = feeds[i]['video']['playlist']['fhd']['url'] elif 'playlist' in feeds[i]['video'] and 'hd' in feeds[i]['video']['playlist'] \ and 'width' in feeds[i]['video']['playlist']['hd'] \ and 'height' in feeds[i]['video']['playlist']['hd'] \ and 'play_url' in feeds[i]['video']['playlist']['hd']: video_width = feeds[i]['video']['playlist']['hd']['width'] video_height = feeds[i]['video']['playlist']['hd']['height'] video_url = feeds[i]['video']['playlist']['hd']['play_url'] elif 'playlist' in feeds[i]['video'] and 'hd' in feeds[i]['video']['playlist'] \ and 'width' in feeds[i]['video']['playlist']['hd'] \ and 'height' in feeds[i]['video']['playlist']['hd'] \ and 'url' in feeds[i]['video']['playlist']['hd']: video_width = feeds[i]['video']['playlist']['hd']['width'] video_height = feeds[i]['video']['playlist']['hd']['height'] video_url = feeds[i]['video']['playlist']['hd']['url'] elif 'playlist' in feeds[i]['video'] and 'ld' in feeds[i]['video']['playlist'] \ and 'width' in feeds[i]['video']['playlist']['ld'] \ and 'height' in feeds[i]['video']['playlist']['ld'] \ and 'play_url' in feeds[i]['video']['playlist']['ld']: video_width = feeds[i]['video']['playlist']['ld']['width'] video_height = feeds[i]['video']['playlist']['ld']['height'] video_url = feeds[i]['video']['playlist']['ld']['play_url'] elif 'playlist' in feeds[i]['video'] and 'ld' in feeds[i]['video']['playlist'] \ and 'width' in feeds[i]['video']['playlist']['ld'] \ and 'height' in feeds[i]['video']['playlist']['ld'] \ and 'url' in feeds[i]['video']['playlist']['ld']: video_width = feeds[i]['video']['playlist']['ld']['width'] video_height = feeds[i]['video']['playlist']['ld']['height'] video_url = feeds[i]['video']['playlist']['ld']['url'] elif 'playlist' in feeds[i]['video'] and 'sd' in feeds[i]['video']['playlist'] \ and 'width' in feeds[i]['video']['playlist']['sd'] \ and 'height' in feeds[i]['video']['playlist']['sd'] \ and 'play_url' in feeds[i]['video']['playlist']['sd']: video_width = feeds[i]['video']['playlist']['sd']['width'] video_height = feeds[i]['video']['playlist']['sd']['height'] video_url = feeds[i]['video']['playlist']['sd']['play_url'] elif 'playlist' in feeds[i]['video'] and 'sd' in feeds[i]['video']['playlist'] \ and 'width' in feeds[i]['video']['playlist']['sd'] \ and 'height' in feeds[i]['video']['playlist']['sd'] \ and 'url' in feeds[i]['video']['playlist']['sd']: video_width = feeds[i]['video']['playlist']['sd']['width'] video_height = feeds[i]['video']['playlist']['sd']['height'] video_url = feeds[i]['video']['playlist']['sd']['url'] elif 'playlist_v2' in feeds[i]['video'] and 'fhd' in feeds[i]['video']['playlist_v2'] \ and 'width' in feeds[i]['video']['playlist_v2']['fhd'] \ and 'height' in feeds[i]['video']['playlist_v2']['fhd'] \ and 'play_url' in feeds[i]['video']['playlist_v2']['fhd']: video_width = feeds[i]['video']['playlist_v2']['fhd']['width'] video_height = feeds[i]['video']['playlist_v2']['fhd']['height'] video_url = feeds[i]['video']['playlist_v2']['fhd']['play_url'] elif 'playlist_v2' in feeds[i]['video'] and 'fhd' in feeds[i]['video']['playlist_v2'] \ and 'width' in feeds[i]['video']['playlist_v2']['fhd'] \ and 'height' in feeds[i]['video']['playlist_v2']['fhd'] \ and 'url' in feeds[i]['video']['playlist_v2']['fhd']: video_width = feeds[i]['video']['playlist_v2']['fhd']['width'] video_height = feeds[i]['video']['playlist_v2']['fhd']['height'] video_url = feeds[i]['video']['playlist_v2']['fhd']['url'] elif 'playlist_v2' in feeds[i]['video'] and 'hd' in feeds[i]['video']['playlist_v2'] \ and 'width' in feeds[i]['video']['playlist_v2']['hd'] \ and 'height' in feeds[i]['video']['playlist_v2']['hd'] \ and 'play_url' in feeds[i]['video']['playlist_v2']['hd']: video_width = feeds[i]['video']['playlist_v2']['hd']['width'] video_height = feeds[i]['video']['playlist_v2']['hd']['height'] video_url = feeds[i]['video']['playlist_v2']['hd']['play_url'] elif 'playlist_v2' in feeds[i]['video'] and 'hd' in feeds[i]['video']['playlist_v2'] \ and 'width' in feeds[i]['video']['playlist_v2']['hd'] \ and 'height' in feeds[i]['video']['playlist_v2']['hd'] \ and 'url' in feeds[i]['video']['playlist_v2']['hd']: video_width = feeds[i]['video']['playlist_v2']['hd']['width'] video_height = feeds[i]['video']['playlist_v2']['hd']['height'] video_url = feeds[i]['video']['playlist_v2']['hd']['url'] elif 'playlist_v2' in feeds[i]['video'] and 'ld' in feeds[i]['video']['playlist_v2'] \ and 'width' in feeds[i]['video']['playlist_v2']['ld'] \ and 'height' in feeds[i]['video']['playlist_v2']['ld'] \ and 'play_url' in feeds[i]['video']['playlist_v2']['ld']: video_width = feeds[i]['video']['playlist_v2']['ld']['width'] video_height = feeds[i]['video']['playlist_v2']['ld']['height'] video_url = feeds[i]['video']['playlist_v2']['ld']['play_url'] elif 'playlist_v2' in feeds[i]['video'] and 'ld' in feeds[i]['video']['playlist_v2'] \ and 'width' in feeds[i]['video']['playlist_v2']['ld'] \ and 'height' in feeds[i]['video']['playlist_v2']['ld'] \ and 'url' in feeds[i]['video']['playlist_v2']['ld']: video_width = feeds[i]['video']['playlist_v2']['ld']['width'] video_height = feeds[i]['video']['playlist_v2']['ld']['height'] video_url = feeds[i]['video']['playlist_v2']['ld']['url'] elif 'playlist_v2' in feeds[i]['video'] and 'sd' in feeds[i]['video']['playlist_v2'] \ and 'width' in feeds[i]['video']['playlist_v2']['sd'] \ and 'height' in feeds[i]['video']['playlist_v2']['sd'] \ and 'play_url' in feeds[i]['video']['playlist_v2']['sd']: video_width = feeds[i]['video']['playlist_v2']['sd']['width'] video_height = feeds[i]['video']['playlist_v2']['sd']['height'] video_url = feeds[i]['video']['playlist_v2']['sd']['play_url'] elif 'playlist_v2' in feeds[i]['video'] and 'sd' in feeds[i]['video']['playlist_v2'] \ and 'width' in feeds[i]['video']['playlist_v2']['sd'] \ and 'height' in feeds[i]['video']['playlist_v2']['sd'] \ and 'url' in feeds[i]['video']['playlist_v2']['sd']: video_width = feeds[i]['video']['playlist_v2']['sd']['width'] video_height = feeds[i]['video']['playlist_v2']['sd']['height'] video_url = feeds[i]['video']['playlist_v2']['sd']['url'] else: video_width = 0 video_height = 0 video_url = 0 # cover_url if 'video' not in feeds[i]: cover_url = 0 elif 'thumbnail' not in feeds[i]['video']: cover_url = 0 else: cover_url = feeds[i]['video']['thumbnail'] # user_name / uid / user_type / url_token / avatar_url if 'author' not in feeds[i]: user_name = 0 uid = 0 user_type = 0 url_token = 0 avatar_url = 0 elif 'author' in feeds[i] \ and 'name' in feeds[i]['author']\ and 'uid' in feeds[i]['author']\ and 'user_type' in feeds[i]['author']\ and 'url_token' in feeds[i]['author']\ and 'avatar_url_template' in feeds[i]['author']: user_name = feeds[i]['author']['name'] uid = feeds[i]['author']['uid'] user_type = feeds[i]['author']['user_type'] url_token = feeds[i]['author']['url_token'] avatar_url = feeds[i]['author']['avatar_url_template'] elif 'author' in feeds[i] \ and 'name' in feeds[i]['author']\ and 'uid' in feeds[i]['author']\ and 'user_type' in feeds[i]['author']\ and 'url_token' in feeds[i]['author']\ and 'avatar_url' in feeds[i]['author']: user_name = feeds[i]['author']['name'] uid = feeds[i]['author']['uid'] user_type = feeds[i]['author']['user_type'] url_token = feeds[i]['author']['url_token'] avatar_url = feeds[i]['author']['avatar_url'] else: user_name = 0 uid = 0 user_type = 0 url_token = 0 avatar_url = 0 Common.logger(log_type).info('video_title:{}', video_title) Common.logger(log_type).info('play_cnt:{}', play_cnt) Common.logger(log_type).info('duration:{}', int(duration)) Common.logger(log_type).info( 'publish_time:{}', time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(publish_time))) if video_title == 0 or video_id == 0 or avatar_url == 0 or video_url == 0: Common.logger(log_type).info('无效视频\n') elif cls.download_rule(publish_time, play_cnt, duration) is False: Common.logger(log_type).info('不满足下载规则\n') elif str(video_id) in [x for y in Feishu.get_values_batch(log_type, 'zhihu', '8871e3') for x in y]: Common.logger(log_type).info('视频已下载\n') elif str(video_id) in [x for y in Feishu.get_values_batch(log_type, 'zhihu', '4MGuux') for x in y]: Common.logger(log_type).info('视频已下载\n') else: Common.download_method(log_type, 'cover', video_title, cover_url) Common.download_method(log_type, 'video', video_title, video_url) # 保存视频信息至 "./videos/{download_video_title}/info.txt" with open("./videos/" + video_title + "/" + "info.txt", "a", encoding="UTF-8") as f_a: f_a.write(str(video_id) + "\n" + str(video_title) + "\n" + str(int(duration)) + "\n" + str(play_cnt) + "\n" + str(comment_cnt) + "\n" + str(like_cnt) + "\n" + str(share_cnt) + "\n" + str(video_width)+'*'+str(video_height) + "\n" + str(publish_time) + "\n" + str(user_name) + "\n" + str(avatar_url) + "\n" + str(video_url) + "\n" + str(cover_url) + "\n" + "zhihu" + str(int(time.time()))) Common.logger(log_type).info("==========视频信息已保存至info.txt==========") our_video_id = Publish.upload_and_publish(log_type, env, 'hot') if env == 'dev': our_video_link = "https://testadmin.piaoquantv.com/cms/post-detail/" + str(our_video_id) + "/info" else: our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(our_video_id) + "/info" Common.logger(log_type).info("视频上传完成:{}\n", video_title) Feishu.insert_columns(log_type, 'zhihu', '8871e3', 'ROWS', 1, 2) time.sleep(1) upload_time = int(time.time()) values = [[ time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)), "热榜", video_title, str(video_id), our_video_link, play_cnt, comment_cnt, like_cnt, share_cnt, voteup_cnt, int(duration), str(video_width)+'*'+str(video_height), time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(publish_time))), user_name, uid, 'https://www.zhihu.com/'+user_type+'/'+url_token, user_type, url_token, avatar_url, cover_url, video_url]] Feishu.update_values(log_type, 'zhihu', "8871e3", "F2:Z2", values) Common.logger(log_type).info("视频已保存至云文档:{}\n", video_title) except Exception as e: Common.logger(log_type).error('get_hot_feeds异常:{}\n', e) if __name__ == '__main__': ZhihuHot.get_hot_feeds('hot', 'dev')