wangkun 2 年之前
父節點
當前提交
28a8b24e05
共有 6 個文件被更改,包括 163 次插入35 次删除
  1. 7 0
      README.MD
  2. 40 2
      main/demo.py
  3. 68 24
      main/haokan_channel.py
  4. 45 3
      main/haokan_hot.py
  5. 3 3
      main/run_haokan_channel.py
  6. 0 3
      videos/__init__.py

+ 7 - 0
README.MD

@@ -17,6 +17,13 @@
 2. python3 ./main/run_xx.py
 
 #### 需求
+2022/12/01
+1. 新增:搞笑频道
+2. 新增:综艺频道
+3. 新增:生活频道
+4. 新增:美食频道
+5. 新增:三农频道
+
 2022/11/26 频道播放量榜
 1. 新增:首页频道
 2. 新增:音乐频道

+ 40 - 2
main/demo.py

@@ -2,7 +2,8 @@
 # @Author: wangkun
 # @Time: 2022/11/23
 import time
-
+import requests
+from main.common import Common
 from main.feishu_lib import Feishu
 
 
@@ -20,8 +21,45 @@ class Demo:
         time3 = int(time.mktime(time.strptime(time2, "%Y/%m/%d")))
         print(time3)
 
+    @classmethod
+    def get_video_url(cls, log_type, video_id):
+        url = 'https://haokan.hao123.com/v?'
+        params = {
+            'vid': video_id,
+            '_format': 'json',
+        }
+        headers = {
+            'Accept': '*/*',
+            'Accept-Encoding': 'gzip, deflate, br',
+            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
+            'Cache-Control': 'no-cache',
+            'Connection': 'keep-alive',
+            'Content-Type': 'application/x-www-form-urlencoded',
+            'Cookie': 'PC_TAB_LOG=video_details_page; COMMON_LID=b0be69dd9fcae328d06935bd40f615cd; Hm_lvt_4aadd610dfd2f5972f1efee2653a2bc5=1669029953; hkpcvideolandquery=%u82CF%u5DDE%u6700%u5927%u7684%u4E8C%u624B%u8F66%u8D85%u5E02%uFF0C%u8F6C%u4E00%u8F6C%u91CC%u8FB9%u8C6A%u8F66%u592A%u591A%u4E86%uFF0C%u4EF7%u683C%u66F4%u8BA9%u6211%u5403%u60CA%uFF01; Hm_lpvt_4aadd610dfd2f5972f1efee2653a2bc5=1669875695; ariaDefaultTheme=undefined; reptileData=%7B%22data%22%3A%22636c55e0319da5169a60acec4a264a35c10862f8abfe2f2cc32c55eb6b0ab4de0efdfa115ea522d6d4d361dea07feae2831d3e2c16ed6b051c611ffe5aded6c9f852501759497b9fbd2132a2160e1e40e5845b41f78121ddcc3288bd077ae4e8%22%2C%22key_id%22%3A%2230%22%2C%22sign%22%3A%22f6752aac%22%7D; RT="z=1&dm=hao123.com&si=uc0q7wnm4w&ss=lb4otu71&sl=j&tt=av0&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf&ld=1rdw&cl=7v6c"',
+            'Pragma': 'no-cache',
+            'Referer': 'https://haokan.hao123.com/v?vid=10623278258033022286&pd=pc&context=',
+            'sec-ch-ua': '"Microsoft Edge";v="107", "Chromium";v="107", "Not=A?Brand";v="24"',
+            'sec-ch-ua-mobile': '?0',
+            'sec-ch-ua-platform': '"macOS"',
+            'Sec-Fetch-Dest': 'empty',
+            'Sec-Fetch-Mode': 'cors',
+            'Sec-Fetch-Site': 'same-origin',
+            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.62',
+        }
+        r = requests.get(url=url, headers=headers, params=params)
+        if r.status_code != 200:
+            Common.logger(log_type).info(f'get_video_url_response:{r.text}')
+        elif r.json()['errno'] != 0 or len(r.json()['data']) == 0:
+            Common.logger(log_type).info(f'get_video_url_response:{r.json()}')
+        else:
+            clarityUrl = r.json()['data']['apiData']['curVideoMeta']['clarityUrl']
+            video_url = r.json()['data']['apiData']['curVideoMeta']['clarityUrl'][len(clarityUrl)-1]['url']
+
+            return video_url
+
 
 if __name__ == '__main__':
-    Demo.get_sheet('demo', 'haokan', '7f05d8')
+    # Demo.get_sheet('demo', 'haokan', '7f05d8')
     # Demo.publish_time()
+    Demo.get_video_url('demo', '10377041690614321392')
     pass

+ 68 - 24
main/haokan_play.py → main/haokan_channel.py

@@ -11,7 +11,7 @@ from main.feishu_lib import Feishu
 from main.haokan_publish import Publish
 
 
-class Play:
+class Channel:
     @classmethod
     def download_rule(cls, play_cnt, duration):
         if int(play_cnt) >= 50000:
@@ -23,12 +23,12 @@ class Play:
             return False
 
     @classmethod
-    def get_tab_from_feishu(cls, log_type):
+    def get_channel_from_feishu(cls, log_type):
         try:
             user_sheet = Feishu.get_values_batch(log_type, 'haokan', 'TaQXk3')
             user_dict = {}
             # for i in range(1, len(user_sheet)):
-            for i in range(1, 3):
+            for i in range(1, 7):
                 user_name = user_sheet[i][0]
                 out_id = user_sheet[i][1]
                 our_id = user_sheet[i][3]
@@ -41,7 +41,7 @@ class Play:
             Common.logger(log_type).error(f'get_tab_from_feishu异常:{e}\n')
 
     @classmethod
-    def get_play_feeds(cls, log_type, tab):
+    def get_channel_feeds(cls, log_type, tab):
         try:
             url = "https://haokan.baidu.com/web/video/feed?"
             params = {
@@ -80,9 +80,48 @@ class Play:
             Common.logger(log_type).error(f'get_play_feeds异常:{e}\n')
 
     @classmethod
-    def get_play_videos(cls, log_type, tab, our_id, env):
+    def get_video_url(cls, log_type, video_id):
         try:
-            feeds = cls.get_play_feeds(log_type, tab)
+            url = 'https://haokan.hao123.com/v?'
+            params = {
+                'vid': str(video_id),
+                '_format': 'json',
+            }
+            headers = {
+                'Accept': '*/*',
+                'Accept-Encoding': 'gzip, deflate, br',
+                'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
+                'Cache-Control': 'no-cache',
+                'Connection': 'keep-alive',
+                'Content-Type': 'application/x-www-form-urlencoded',
+                'Cookie': 'PC_TAB_LOG=video_details_page; COMMON_LID=b0be69dd9fcae328d06935bd40f615cd; Hm_lvt_4aadd610dfd2f5972f1efee2653a2bc5=1669029953; hkpcvideolandquery=%u82CF%u5DDE%u6700%u5927%u7684%u4E8C%u624B%u8F66%u8D85%u5E02%uFF0C%u8F6C%u4E00%u8F6C%u91CC%u8FB9%u8C6A%u8F66%u592A%u591A%u4E86%uFF0C%u4EF7%u683C%u66F4%u8BA9%u6211%u5403%u60CA%uFF01; Hm_lpvt_4aadd610dfd2f5972f1efee2653a2bc5=1669875695; ariaDefaultTheme=undefined; reptileData=%7B%22data%22%3A%22636c55e0319da5169a60acec4a264a35c10862f8abfe2f2cc32c55eb6b0ab4de0efdfa115ea522d6d4d361dea07feae2831d3e2c16ed6b051c611ffe5aded6c9f852501759497b9fbd2132a2160e1e40e5845b41f78121ddcc3288bd077ae4e8%22%2C%22key_id%22%3A%2230%22%2C%22sign%22%3A%22f6752aac%22%7D; RT="z=1&dm=hao123.com&si=uc0q7wnm4w&ss=lb4otu71&sl=j&tt=av0&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf&ld=1rdw&cl=7v6c"',
+                'Pragma': 'no-cache',
+                'Referer': 'https://haokan.hao123.com/v?vid=10623278258033022286&pd=pc&context=',
+                'sec-ch-ua': '"Microsoft Edge";v="107", "Chromium";v="107", "Not=A?Brand";v="24"',
+                'sec-ch-ua-mobile': '?0',
+                'sec-ch-ua-platform': '"macOS"',
+                'Sec-Fetch-Dest': 'empty',
+                'Sec-Fetch-Mode': 'cors',
+                'Sec-Fetch-Site': 'same-origin',
+                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.62',
+            }
+            r = requests.get(url=url, headers=headers, params=params)
+            if r.status_code != 200:
+                Common.logger(log_type).info(f'get_video_url_response:{r.text}')
+            elif r.json()['errno'] != 0 or len(r.json()['data']) == 0:
+                Common.logger(log_type).info(f'get_video_url_response:{r.json()}')
+            else:
+                clarityUrl = r.json()['data']['apiData']['curVideoMeta']['clarityUrl']
+                video_url = r.json()['data']['apiData']['curVideoMeta']['clarityUrl'][len(clarityUrl) - 1]['url']
+
+                return video_url
+        except Exception as e:
+            Common.logger(log_type).info(f'get_video_url异常:{e}\n')
+
+    @classmethod
+    def get_channel_videos(cls, log_type, tab, our_id, env):
+        try:
+            feeds = cls.get_channel_feeds(log_type, tab)
             for i in range(len(feeds)):
                 # video_title
                 if 'title' not in feeds[i]:
@@ -137,21 +176,18 @@ class Play:
                     cover_url = 0
 
                 # video_url
-                if 'play_url' not in feeds[i]:
-                    video_url = 0
-                else:
+                get_video_url = cls.get_video_url(log_type, video_id)
+                if get_video_url is not None:
+                    video_url = get_video_url
+                elif 'play_url' in feeds[i]:
                     video_url = feeds[i]['play_url']
+                else:
+                    video_url = 0
 
                 Common.logger(log_type).info(f'video_title:{video_title}')
-                # Common.logger(log_type).info(f'video_id:{video_id}, type:{type(video_id)}')
                 Common.logger(log_type).info(f'play_cnt:{play_cnt}')
                 Common.logger(log_type).info(f'duration:{duration}')
-                # Common.logger(log_type).info(f'publish_time:{publish_time}')
-                # Common.logger(log_type).info(f'user_name:{user_name}')
-                # Common.logger(log_type).info(f'head_url:{head_url}')
-                # Common.logger(log_type).info(f'cover_url:{cover_url}')
-                Common.logger(log_type).info(f'video_url:{video_url}\n')
-
+                Common.logger(log_type).info(f'video_url:{video_url}')
                 video_dict = {'video_title': video_title,
                               'video_id': video_id,
                               'play_cnt': play_cnt,
@@ -218,6 +254,14 @@ class Play:
                     tab = '播放量榜_首页频道'
                 elif tab == 'yinyue_new':
                     tab = '播放量榜_音乐频道'
+                elif tab == 'gaoxiao_new':
+                    tab = '播放量榜_搞笑频道'
+                elif tab == 'zongyi_new':
+                    tab = '播放量榜_综艺频道'
+                elif tab == 'shenghuo_new':
+                    tab = '播放量榜_生活频道'
+                elif tab == 'meishi_new':
+                    tab = '播放量榜_美食频道'
                 else:
                     tab = '播放量榜'
                 values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
@@ -239,22 +283,22 @@ class Play:
             Common.logger(log_type).error(f'download_publish异常:{e}\n')
 
     @classmethod
-    def get_tab_videos(cls, log_type, env):
+    def get_all_channel_videos(cls, log_type, env):
         try:
-            tab_dict = cls.get_tab_from_feishu(log_type)
-            if len(tab_dict) == 0:
-                Common.logger(log_type).warning('TAB数量为空\n')
+            channel_dict = cls.get_channel_from_feishu(log_type)
+            if len(channel_dict) == 0:
+                Common.logger(log_type).warning('频道数量为空\n')
             else:
-                for k, v in tab_dict.items():
+                for k, v in channel_dict.items():
                     Common.logger(log_type).info(f'正在获取 {k} 频道视频\n')
-                    cls.get_play_videos(log_type, v.split(',')[0], v.split(',')[1], env)
+                    cls.get_channel_videos(log_type, v.split(',')[0], v.split(',')[1], env)
                     time.sleep(10)
         except Exception as e:
             Common.logger(log_type).error(f'get_tab_videos异常:{e}\n')
 
 
 if __name__ == '__main__':
-    # print(Play.get_tab_from_feishu('play'))
-    Play.get_play_videos('play', 'recommend', '6267140', 'dev')
+    # print(Channel.get_tab_from_feishu('channel'))
+    Channel.get_channel_videos('channel', 'recommend', '6267140', 'dev')
 
     pass

+ 45 - 3
main/haokan_hot.py

@@ -97,10 +97,13 @@ class Hot:
                         cover_url = feeds[i]['poster']
 
                     # video_url
-                    if 'videoUrl' not in feeds[i]:
-                        video_url = 0
-                    else:
+                    get_video_url = cls.get_video_url(log_type, video_id)
+                    if get_video_url is not None:
+                        video_url = get_video_url
+                    elif 'videoUrl' in feeds[i]:
                         video_url = feeds[i]['videoUrl']
+                    else:
+                        video_url = 0
 
                     Common.logger(log_type).info(f'video_title:{video_title}')
                     Common.logger(log_type).info(f'video_id:{video_id}')
@@ -123,6 +126,45 @@ class Hot:
 
                 time.sleep(5)
 
+    @classmethod
+    def get_video_url(cls, log_type, video_id):
+        try:
+            url = 'https://haokan.hao123.com/v?'
+            params = {
+                'vid': str(video_id),
+                '_format': 'json',
+            }
+            headers = {
+                'Accept': '*/*',
+                'Accept-Encoding': 'gzip, deflate, br',
+                'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
+                'Cache-Control': 'no-cache',
+                'Connection': 'keep-alive',
+                'Content-Type': 'application/x-www-form-urlencoded',
+                'Cookie': 'PC_TAB_LOG=video_details_page; COMMON_LID=b0be69dd9fcae328d06935bd40f615cd; Hm_lvt_4aadd610dfd2f5972f1efee2653a2bc5=1669029953; hkpcvideolandquery=%u82CF%u5DDE%u6700%u5927%u7684%u4E8C%u624B%u8F66%u8D85%u5E02%uFF0C%u8F6C%u4E00%u8F6C%u91CC%u8FB9%u8C6A%u8F66%u592A%u591A%u4E86%uFF0C%u4EF7%u683C%u66F4%u8BA9%u6211%u5403%u60CA%uFF01; Hm_lpvt_4aadd610dfd2f5972f1efee2653a2bc5=1669875695; ariaDefaultTheme=undefined; reptileData=%7B%22data%22%3A%22636c55e0319da5169a60acec4a264a35c10862f8abfe2f2cc32c55eb6b0ab4de0efdfa115ea522d6d4d361dea07feae2831d3e2c16ed6b051c611ffe5aded6c9f852501759497b9fbd2132a2160e1e40e5845b41f78121ddcc3288bd077ae4e8%22%2C%22key_id%22%3A%2230%22%2C%22sign%22%3A%22f6752aac%22%7D; RT="z=1&dm=hao123.com&si=uc0q7wnm4w&ss=lb4otu71&sl=j&tt=av0&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf&ld=1rdw&cl=7v6c"',
+                'Pragma': 'no-cache',
+                'Referer': 'https://haokan.hao123.com/v?vid=10623278258033022286&pd=pc&context=',
+                'sec-ch-ua': '"Microsoft Edge";v="107", "Chromium";v="107", "Not=A?Brand";v="24"',
+                'sec-ch-ua-mobile': '?0',
+                'sec-ch-ua-platform': '"macOS"',
+                'Sec-Fetch-Dest': 'empty',
+                'Sec-Fetch-Mode': 'cors',
+                'Sec-Fetch-Site': 'same-origin',
+                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.62',
+            }
+            r = requests.get(url=url, headers=headers, params=params)
+            if r.status_code != 200:
+                Common.logger(log_type).info(f'get_video_url_response:{r.text}')
+            elif r.json()['errno'] != 0 or len(r.json()['data']) == 0:
+                Common.logger(log_type).info(f'get_video_url_response:{r.json()}')
+            else:
+                clarityUrl = r.json()['data']['apiData']['curVideoMeta']['clarityUrl']
+                video_url = r.json()['data']['apiData']['curVideoMeta']['clarityUrl'][len(clarityUrl) - 1]['url']
+
+                return video_url
+        except Exception as e:
+            Common.logger(log_type).error(f'get_video_url异常:{e}\n')
+
     @classmethod
     def download_publish(cls, log_type, video_dict, our_id, env):
         if video_dict['video_title'] == 0 or video_dict['video_url'] == 0:

+ 3 - 3
main/run_haokan_play.py → main/run_haokan_channel.py

@@ -7,18 +7,18 @@ import sys
 import time
 sys.path.append(os.getcwd())
 from main.common import Common
-from main.haokan_play import Play
+from main.haokan_channel import Channel
 
 
 class Main:
     @classmethod
     def main(cls, log_type, env):
         while True:
-            Play.get_tab_videos(log_type, env)
+            Channel.get_all_channel_videos(log_type, env)
             Common.del_logs(log_type)
             Common.logger(log_type).info('随机休眠 10 - 30 秒\n')
             time.sleep(random.randint(10, 30))
 
 
 if __name__ == '__main__':
-    Main.main('play', 'prod')
+    Main.main('channel', 'prod')

+ 0 - 3
videos/__init__.py

@@ -1,3 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2022/11/23