wangkun 2 years ago
parent
commit
a784d424c7
5 changed files with 69 additions and 18 deletions
  1. 4 2
      main/demo.py
  2. 47 0
      main/get_cookies.py
  3. 3 4
      main/haokan_channel.py
  4. 9 5
      main/run_haokan_channel.py
  5. 6 7
      main/run_haokan_hot.py

+ 4 - 2
main/demo.py

@@ -3,6 +3,8 @@
 # @Time: 2022/11/23
 import time
 import requests
+from selenium.webdriver import DesiredCapabilities
+
 from main.common import Common
 from main.feishu_lib import Feishu
 
@@ -197,6 +199,6 @@ if __name__ == '__main__':
     # Demo.get_video_url('demo', '10377041690614321392')
     # Demo.get_follow_users('demo')
     # Demo.get_video_feeds('demo', '3xfr3gqnxmk92y2')
-    print(Feishu.get_values_batch('log_type', 'haokan', '5LksMx')[0][0])
-    print(type(Feishu.get_values_batch('log_type', 'haokan', '5LksMx')[0][0]))
+    # print(Feishu.get_values_batch('log_type', 'haokan', '5LksMx')[0][0])
+    # print(type(Feishu.get_values_batch('log_type', 'haokan', '5LksMx')[0][0]))
     pass

+ 47 - 0
main/get_cookies.py

@@ -0,0 +1,47 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2022/12/5
+import json
+import os
+import sys
+import time
+from selenium.webdriver import DesiredCapabilities
+from selenium import webdriver
+from selenium.webdriver.chrome.service import Service
+sys.path.append(os.getcwd())
+
+
+class GetCookies:
+    @classmethod
+    def get_cookies(cls, channel):
+        channel_url = 'https://haokan.baidu.com/tab/'+str(channel)
+        ca = DesiredCapabilities.CHROME
+        ca['goog:loggingPrefs'] = {'performance': 'ALL'}
+
+        chrome_options = webdriver.ChromeOptions()
+        chrome_options.add_argument('headless')
+        chrome_options.add_argument(f'user-agent=Mozilla/5.0 (Windows NT 10.0; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
+        chrome_options.add_argument('--no-sandbox')
+
+        driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service('/Users/wangkun/Downloads/chromedriver_v107/chromedriver'))
+        driver.implicitly_wait(10)
+        driver.get(channel_url)
+        time.sleep(5)
+        logs = driver.get_log('performance')
+        driver.quit()
+        for line in logs:
+            msg = json.loads(line['message'])
+
+            if 'params' not in msg['message']:
+                pass
+            elif 'headers' not in msg['message']['params']:
+                pass
+            elif 'Set-Cookie' not in msg['message']['params']['headers']:
+                pass
+            else:
+                cookies = msg["message"]['params']['headers']['Set-Cookie']
+                return cookies
+
+
+if __name__ == '__main__':
+    print(GetCookies.get_cookies('recommend'))

File diff suppressed because it is too large
+ 3 - 4
main/haokan_channel.py


+ 9 - 5
main/run_haokan_channel.py

@@ -12,11 +12,15 @@ from main.haokan_channel import Channel
 class Main:
     @classmethod
     def main(cls, log_type, env):
-        while True:
-            Channel.get_all_channel_videos(log_type, env)
-            Common.del_logs(log_type)
-            Common.logger(log_type).info('休眠 1 小时\n')
-            time.sleep(3600)
+        try:
+            while True:
+                Channel.get_all_channel_videos(log_type, env)
+                Common.del_logs(log_type)
+                Common.logger(log_type).info('休眠 1 小时\n')
+                time.sleep(3600)
+        except Exception as e:
+            Common.logger(log_type).error(f'{e}\n重新运行抓取程序\n')
+            cls.main(log_type, env)
 
 
 if __name__ == '__main__':

+ 6 - 7
main/run_haokan_hot.py

@@ -13,17 +13,16 @@ from main.haokan_hot import Hot
 class Main:
     @classmethod
     def main(cls, log_type, our_id, env):
-        while True:
-            if datetime.datetime.now().now().hour >= 0:
+        try:
+            while True:
                 Hot.get_hot_feeds(log_type, our_id, env)
                 Common.del_logs(log_type)
-                # Common.logger(log_type).info(f'热榜抓取完毕,休眠{24-datetime.datetime.now().hour}小时')
-                # time.sleep(3600 * (24 - datetime.datetime.now().hour))
                 Hot.page = 0
-                Common.logger(log_type).info('热榜抓取完毕,休眠1小时')
+                Common.logger(log_type).info('热榜抓取完毕,休眠1小时\n')
                 time.sleep(3600)
-            else:
-                pass
+        except Exception as e:
+            Common.logger(log_type).error(f'{e}\n重新运行抓取程序\n')
+            cls.main(log_type, our_id, env)
 
 
 if __name__ == '__main__':

Some files were not shown because too many files changed in this diff