Quellcode durchsuchen

update download and run time

wangkun vor 3 Jahren
Ursprung
Commit
d0dc71ddc8
4 geänderte Dateien mit 76 neuen und 59 gelöschten Zeilen
  1. 6 1
      main/download_sendtime.py
  2. 22 21
      main/download_up.py
  3. 12 1
      main/get_feeds.py
  4. 36 36
      main/run.py

+ 6 - 1
main/download_sendtime.py

@@ -106,7 +106,12 @@ class DownloadSendtime:
                             # Common.crawler_log().info('视频下载地址:{}'.format(download_url_up))
 
                     # 判断基本规则
-                    if cls.send_time_rule(v_width, v_height, v_duration, v_play_cnt_sendtime) is True:
+                    if cls.send_time_rule(v_width, v_height, v_duration, v_play_cnt_sendtime) is True \
+                            and v_id != "" and v_title != "" and v_duration != "" \
+                            and v_play_cnt_sendtime != "" and v_comment_cnt != "" and v_liked_cnt != "" \
+                            and v_shared_cnt != "" and v_width != "" and v_height != "" \
+                            and v_send_date != "" and v_username != "" and v_user_cover != "" \
+                            and v_video_cover != "" and download_url_up != "":
                         # 满足下载条件:当前时间 - 发布时间 <= 3天,播放量大于2万
                         if int(time.time()) - int(v_send_date) <= 259200:
                             if int(v_play_cnt_sendtime) >= 20000:

+ 22 - 21
main/download_up.py

@@ -98,22 +98,23 @@ class DownloadUp:
                     v_user_cover = data["user_info"]["headimg_url"]
                     v_video_cover = data["cover_url"]
                     if "items" not in data["play_info"]:
-                        if len(data["play_info"]) > 1:
+                        if len(data["play_info"]) > 2:
                             download_url_up = data["play_info"][2]["play_url"]
-                            # Common.crawler_log().info('视频下载地址:{}'.format(download_url_up))
                         else:
                             download_url_up = data["play_info"][0]["play_url"]
-                            # Common.crawler_log().info('视频下载地址:{}'.format(download_url_up))
                     else:
-                        if len(data["play_info"]["items"]) > 1:
+                        if len(data["play_info"]["items"]) > 2:
                             download_url_up = data["play_info"]["items"][2]["play_url"]
-                            # Common.crawler_log().info('视频下载地址:{}'.format(download_url_up))
                         else:
                             download_url_up = data["play_info"]["items"][0]["play_url"]
-                            # Common.crawler_log().info('视频下载地址:{}'.format(download_url_up))
 
                     # 判断基本规则
-                    if cls.up_rule(v_width, v_height, v_duration, v_play_cnt_up) is True:
+                    if cls.up_rule(v_width, v_height, v_duration, v_play_cnt_up) is True \
+                            and v_id != "" and v_title != "" and v_duration != "" \
+                            and v_play_cnt_up != "" and v_comment_cnt != "" and v_liked_cnt != "" \
+                            and v_shared_cnt != "" and v_width != "" and v_height != "" \
+                            and v_send_date != "" and v_username != "" and v_user_cover != "" \
+                            and v_video_cover != "" and download_url_up != "":
                         if int(time.time()) - int(v_time) < 3600:
                             Common.crawler_log().info("距上次获取该视频时间:{}分钟".format(
                                 int(int(int(time.time()) - int(v_time)) / 60)) + ";{}".format(v_title))
@@ -173,20 +174,20 @@ class DownloadUp:
                                         f_w.write(line2)
                                 with open("./txt/kanyikan_feeds.txt", "a", encoding="utf-8") as f_a:
                                     f_a.write(str(time.time()) + " + "
-                                              + v_id + " + "
-                                              + v_play_cnt_up + " + "
-                                              + v_title + " + "
-                                              + v_duration + " + "
-                                              + v_comment_cnt + " + "
-                                              + v_liked_cnt + " + "
-                                              + v_shared_cnt + " + "
-                                              + v_resolution + " + "
-                                              + v_send_date + " + "
-                                              + v_username + " + "
-                                              + v_user_cover + " + "
-                                              + v_video_cover + " + "
-                                              + download_url_up + " + "
-                                              + get_video_info_session + "\n")
+                                              + str(v_id) + " + "
+                                              + str(v_play_cnt_up) + " + "
+                                              + str(v_title) + " + "
+                                              + str(v_duration) + " + "
+                                              + str(v_comment_cnt) + " + "
+                                              + str(v_liked_cnt) + " + "
+                                              + str(v_shared_cnt) + " + "
+                                              + str(v_resolution) + " + "
+                                              + str(v_send_date) + " + "
+                                              + str(v_username) + " + "
+                                              + str(v_user_cover) + " + "
+                                              + str(v_video_cover) + " + "
+                                              + str(download_url_up) + " + "
+                                              + str(get_video_info_session) + "\n")
 
                         elif int(time.time()) - int(v_time) > 7200:
                             Common.crawler_log().info("距上次获取该视频时间:{}分钟。超过2小时,删除该视频".format(

+ 12 - 1
main/get_feeds.py

@@ -156,7 +156,18 @@ def get_feeds():
                         Common.crawler_log().info('视频播放地址:{}'.format(url))
 
                     # 过滤无效视频
-                    if str(video_id).strip() == "" or str(video_send_date).strip() == "":
+                    if video_id == "" \
+                            or video_send_date == "" \
+                            or video_title.strip() == "" \
+                            or video_play_cnt == "" \
+                            or video_liked_cnt == "" \
+                            or video_duration == "" \
+                            or video_comment_cnt == "" \
+                            or video_shared_cnt == "" \
+                            or video_user == "" \
+                            or video_user_cover == "" \
+                            or video_cover == "" \
+                            or url == "":
                         Common.crawler_log().info("无效视频")
                     else:
                         # 从 kanyikan_videoid.txt 去重

+ 36 - 36
main/run.py

@@ -20,18 +20,18 @@ class Main:
         """
         播放量脚本任务
         """
-        # while True:
-        #     # 指定时间结束抓取视频
-        #     if 19 >= Common.now.hour >= 0:
-        #         time.sleep(60)
-        #         break
-        #     else:
-
-        # 获取视频信息,并下载
-        get_feeds()
-        DownloadPlay.download_play_video("prod")
-        # 请求随机间隔时间
-        time.sleep(random.randint(31, 40))
+        while True:
+            # 指定时间结束抓取视频
+            if Common.now.hour == 0:
+                Common.crawler_log().info("结束抓取播放量视频\n")
+                time.sleep(3)
+                break
+            else:
+                # 获取视频信息,并下载
+                get_feeds()
+                DownloadPlay.download_play_video("prod")
+                # 请求随机间隔时间
+                time.sleep(random.randint(31, 40))
 
         # 删除 charles 缓存文件
         Common.del_charles_files()
@@ -45,18 +45,18 @@ class Main:
         """
         上升榜脚本任务
         """
-        # while True:
-        #     # 指定时间结束抓取视频
-        #     if 9 >= Common.now.hour >= 0 or 24 >= Common.now.hour >= 20:
-        #         time.sleep(60)
-        #         break
-        #     else:
-
-        # 获取视频信息,并下载
-        get_feeds()
-        DownloadUp.download_up_video("prod")
-        # 请求随机间隔时间
-        time.sleep(random.randint(31, 40))
+        while True:
+            # 指定时间结束抓取视频
+            if Common.now.hour > 19:
+                Common.crawler_log().info("结束抓取上升榜视频\n")
+                time.sleep(3)
+                break
+            else:
+                # 获取视频信息,并下载
+                get_feeds()
+                DownloadUp.download_up_video("prod")
+                # 请求随机间隔时间
+                time.sleep(random.randint(31, 40))
 
         # 删除 charles 缓存文件
         Common.del_charles_files()
@@ -70,18 +70,18 @@ class Main:
         """
         发布时间榜脚本任务
         """
-        # while True:
-        #     # 指定时间结束抓取视频
-        #     if 24 >= Common.now.hour >= 10:
-        #         time.sleep(60)
-        #         break
-        #     else:
-
-        # 获取视频信息,并下载
-        get_feeds()
-        DownloadSendtime.download_sendtime_video("prod")
-        # 请求随机间隔时间
-        time.sleep(random.randint(31, 40))
+        while True:
+            # 指定时间结束抓取视频
+            if Common.now.hour > 9:
+                Common.crawler_log().info("结束抓取3天榜单视频\n")
+                time.sleep(3)
+                break
+            else:
+                # 获取视频信息,并下载
+                get_feeds()
+                DownloadSendtime.download_sendtime_video("prod")
+                # 请求随机间隔时间
+                time.sleep(random.randint(31, 40))
 
         # 删除 charles 缓存文件
         Common.del_charles_files()