Browse Source

update xiaoniangao

wangkun 2 years ago
parent
commit
1ae338705c

+ 4 - 1
xiaoniangao/xiaoniangao_hour/xiaoniangao_hour.py

@@ -159,17 +159,20 @@ class XiaoniangaoHour:
         "wx_ver": "8.0.20",
         "code_ver": "3.62.0"
     }
-
         urllib3.disable_warnings()
         r = requests.post(url=url, headers=headers, json=data, proxies=proxies, verify=False)
         if 'data' not in r.text or r.status_code != 200:
             Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+            return
         elif "data" not in r.json():
             Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()}\n")
+            return
         elif "list" not in r.json()["data"]:
             Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()['data']}\n")
+            return
         elif len(r.json()['data']['list']) == 0:
             Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()['data']['list']}\n")
+            return
         else:
             # 视频列表数据
             feeds = r.json()["data"]["list"]

+ 12 - 18
xiaoniangao/xiaoniangao_main/run_xiaoniangao_follow.py

@@ -4,30 +4,24 @@
 import argparse
 import os
 import sys
-import time
 sys.path.append(os.getcwd())
 from common.common import Common
 from xiaoniangao.xiaoniangao_follow.xiaoniangao_follow import XiaoniangaoFollow
 
 
 def main(log_type, crawler, env):
-    while True:
-        try:
-            if env == "dev":
-                oss_endpoint = "out"
-            else:
-                oss_endpoint = "inner"
-            Common.logger(log_type, crawler).info('开始抓取 小年糕 定向榜\n')
-            XiaoniangaoFollow.get_follow_videos(log_type=log_type,
-                                                crawler=crawler,
-                                                strategy="定向爬虫策略",
-                                                oss_endpoint=oss_endpoint,
-                                                env=env)
-            Common.del_logs(log_type, crawler)
-            Common.logger(log_type, crawler).info('抓取完一轮,休眠 1 分钟\n')
-            time.sleep(60)
-        except Exception as e:
-            Common.logger(log_type, crawler).info(f"小年糕定向抓取异常:{e}\n")
+    if env == "dev":
+        oss_endpoint = "out"
+    else:
+        oss_endpoint = "inner"
+    Common.logger(log_type, crawler).info('开始抓取 小年糕 定向榜\n')
+    XiaoniangaoFollow.get_follow_videos(log_type=log_type,
+                                        crawler=crawler,
+                                        strategy="定向爬虫策略",
+                                        oss_endpoint=oss_endpoint,
+                                        env=env)
+    Common.del_logs(log_type, crawler)
+    Common.logger(log_type, crawler).info('抓取完一轮\n')
 
 
 if __name__ == "__main__":

+ 0 - 1
xiaoniangao/xiaoniangao_main/run_xiaoniangao_hour.py

@@ -11,7 +11,6 @@ from xiaoniangao.xiaoniangao_hour.xiaoniangao_hour import XiaoniangaoHour
 
 
 def main(log_type, crawler, env):
-    # while True:
     if env == "dev":
         oss_endpoint = "out"
     else:

+ 13 - 16
xiaoniangao/xiaoniangao_main/run_xiaoniangao_play.py

@@ -4,7 +4,6 @@
 import argparse
 import os
 import sys
-import time
 sys.path.append(os.getcwd())
 from common.common import Common
 from xiaoniangao.xiaoniangao_play.xiaoniangao_play import XiaoniangaoPlay
@@ -13,21 +12,19 @@ from xiaoniangao.xiaoniangao_play.xiaoniangao_play import XiaoniangaoPlay
 class Main:
     @classmethod
     def main(cls, log_type, crawler, env):
-        while True:
-            if env == "dev":
-                oss_endpoint = "out"
-            else:
-                oss_endpoint = "inner"
-            for i in range(50):
-                Common.logger(log_type, crawler).info(f'正在抓取小年糕播放量榜,第{i+1}页\n')
-                XiaoniangaoPlay.get_videoList(log_type=log_type,
-                                              crawler=crawler,
-                                              strategy="播放量榜爬虫策略",
-                                              oss_endpoint=oss_endpoint,
-                                              env=env)
-            Common.del_logs(log_type, crawler)
-            Common.logger(log_type, crawler).info('抓取完一轮,休眠 1 分钟\n')
-            time.sleep(60)
+        if env == "dev":
+            oss_endpoint = "out"
+        else:
+            oss_endpoint = "inner"
+        for i in range(100):
+            Common.logger(log_type, crawler).info(f'正在抓取小年糕播放量榜,第{i+1}页\n')
+            XiaoniangaoPlay.get_videoList(log_type=log_type,
+                                          crawler=crawler,
+                                          strategy="播放量榜爬虫策略",
+                                          oss_endpoint=oss_endpoint,
+                                          env=env)
+        Common.del_logs(log_type, crawler)
+        Common.logger(log_type, crawler).info('抓取完一轮\n')
 
 
 if __name__ == '__main__':

+ 6 - 0
xiaoniangao/xiaoniangao_play/xiaoniangao_play.py

@@ -154,10 +154,16 @@ class XiaoniangaoPlay:
             r = requests.post(url=url, headers=headers, json=data, proxies=proxies, verify=False)
             if "data" not in r.text or r.status_code != 200:
                 Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}")
+                return
             elif "data" not in r.json():
                 Common.logger(log_type, crawler).info(f"get_videoList:{r.json()}")
+                return
             elif "list" not in r.json()["data"]:
                 Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()['data']}")
+                return
+            elif len(r.json()["data"]["list"]) == 0:
+                Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()['data']['list']}")
+                return
             else:
                 # 视频列表数据
                 feeds = r.json()["data"]["list"]