Sfoglia il codice sorgente

update download_play

wangkun 3 anni fa
parent
commit
db4143d829
2 ha cambiato i file con 70 aggiunte e 59 eliminazioni
  1. 69 58
      main/download_play.py
  2. 1 1
      抓取规则.txt

+ 69 - 58
main/download_play.py

@@ -44,69 +44,80 @@ class DownloadPlay:
         videos = Common.read_txt("kanyikan_feeds.txt")
         for video in videos:
             download_video_id = video.strip().split(" + ")[1]
-            download_video_title = video.strip().split(" + ")[3]
-            download_video_duration = video.strip().split(" + ")[4]
-            download_video_play_cnt = video.strip().split(" + ")[2]
-            download_video_comment_cnt = video.strip().split(" + ")[5]
-            download_video_like_cnt = video.strip().split(" + ")[6]
-            download_video_share_cnt = video.strip().split(" + ")[7]
-            download_video_resolution = video.strip().split(" + ")[8]
-            download_video_width = download_video_resolution.split("*")[0]
-            download_video_height = download_video_resolution.split("*")[-1]
-            download_video_send_time = video.strip().split(" + ")[9]
-            download_user_name = video.strip().split(" + ")[10]
-            download_head_url = video.strip().split(" + ")[11]
-            download_cover_url = video.strip().split(" + ")[12]
-            download_video_url = video.strip().split(" + ")[13]
-            download_video_session = video.strip().split(" + ")[-1]
+            try:
+                # download_video_id = video.strip().split(" + ")[1]
+                download_video_title = video.strip().split(" + ")[3]
+                download_video_duration = video.strip().split(" + ")[4]
+                download_video_play_cnt = video.strip().split(" + ")[2]
+                download_video_comment_cnt = video.strip().split(" + ")[5]
+                download_video_like_cnt = video.strip().split(" + ")[6]
+                download_video_share_cnt = video.strip().split(" + ")[7]
+                download_video_resolution = video.strip().split(" + ")[8]
+                download_video_width = download_video_resolution.split("*")[0]
+                download_video_height = download_video_resolution.split("*")[-1]
+                download_video_send_time = video.strip().split(" + ")[9]
+                download_user_name = video.strip().split(" + ")[10]
+                download_head_url = video.strip().split(" + ")[11]
+                download_cover_url = video.strip().split(" + ")[12]
+                download_video_url = video.strip().split(" + ")[13]
+                download_video_session = video.strip().split(" + ")[-1]
 
-            if cls.play_rule(download_video_width,
-                             download_video_height,
-                             download_video_duration,
-                             download_video_play_cnt) is True:
-                Common.crawler_log().info("开始下载视频:{}".format(download_video_title))
-                # 下载封面
-                Common.download_method(text="cover", d_name=download_video_title, d_url=download_cover_url)
-                # 下载视频
-                Common.download_method(text="video", d_name=download_video_title, d_url=download_video_url)
+                if cls.play_rule(download_video_width,
+                                 download_video_height,
+                                 download_video_duration,
+                                 download_video_play_cnt) is True:
+                    Common.crawler_log().info("开始下载视频:{}".format(download_video_title))
+                    # 下载封面
+                    Common.download_method(text="cover", d_name=download_video_title, d_url=download_cover_url)
+                    # 下载视频
+                    Common.download_method(text="video", d_name=download_video_title, d_url=download_video_url)
 
-                # 保存视频 ID 到 "./txt/kanyikan_videoid.txt"
-                with open("./txt/kanyikan_videoid.txt", "a", encoding="utf8") as f_a:
-                    f_a.write(download_video_id + "\n")
+                    # 保存视频 ID 到 "./txt/kanyikan_videoid.txt"
+                    with open("./txt/kanyikan_videoid.txt", "a", encoding="utf8") as f_a:
+                        f_a.write(download_video_id + "\n")
 
-                # 保存视频信息至 "./videos/{download_video_title}/info.txt"
-                with open("./videos/" + download_video_title + "/info.txt", "a", encoding="utf8") as f_a:
-                    f_a.write(str(download_video_id) + "\n" +
-                              str(download_video_title) + "\n" +
-                              str(download_video_duration) + "\n" +
-                              str(download_video_play_cnt) + "\n" +
-                              str(download_video_comment_cnt) + "\n" +
-                              str(download_video_like_cnt) + "\n" +
-                              str(download_video_share_cnt) + "\n" +
-                              str(download_video_resolution) + "\n" +
-                              str(download_video_send_time) + "\n" +
-                              str(download_user_name) + "\n" +
-                              str(download_head_url) + "\n" +
-                              str(download_video_url) + "\n" +
-                              str(download_cover_url) + "\n" +
-                              str(download_video_session))
+                    # 保存视频信息至 "./videos/{download_video_title}/info.txt"
+                    with open("./videos/" + download_video_title + "/info.txt", "a", encoding="utf8") as f_a:
+                        f_a.write(str(download_video_id) + "\n" +
+                                  str(download_video_title) + "\n" +
+                                  str(download_video_duration) + "\n" +
+                                  str(download_video_play_cnt) + "\n" +
+                                  str(download_video_comment_cnt) + "\n" +
+                                  str(download_video_like_cnt) + "\n" +
+                                  str(download_video_share_cnt) + "\n" +
+                                  str(download_video_resolution) + "\n" +
+                                  str(download_video_send_time) + "\n" +
+                                  str(download_user_name) + "\n" +
+                                  str(download_head_url) + "\n" +
+                                  str(download_video_url) + "\n" +
+                                  str(download_cover_url) + "\n" +
+                                  str(download_video_session))
 
-                # 上传视频
-                Common.crawler_log().info("开始上传视频:{}".format(download_video_title))
-                Publish.upload_and_publish(env, "play")
+                    # 上传视频
+                    Common.crawler_log().info("开始上传视频:{}".format(download_video_title))
+                    Publish.upload_and_publish(env, "play")
 
-                # 删除该视频在kanyikan_feeds.txt中的信息
-                Common.crawler_log().info("删除该视频在kanyikan_feeds.txt中的信息:{}".format(download_video_title))
-                with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
-                    lines = f_r.readlines()
-                with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
-                    for line in lines:
-                        if download_video_id in line.split(" + ")[1]:
-                            continue
-                        f_w.write(line)
-            else:
-                # 删除该视频在 recommend.txt中的信息
-                Common.crawler_log().info("该视频不满足下载规则,删除在kanyikan_feeds.txt中的信息:{}".format(download_video_title))
+                    # 删除该视频在kanyikan_feeds.txt中的信息
+                    Common.crawler_log().info("删除该视频在kanyikan_feeds.txt中的信息:{}".format(download_video_title))
+                    with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
+                        lines = f_r.readlines()
+                    with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
+                        for line in lines:
+                            if download_video_id in line.split(" + ")[1]:
+                                continue
+                            f_w.write(line)
+                else:
+                    # 删除该视频在 recommend.txt中的信息
+                    Common.crawler_log().info("该视频不满足下载规则,删除在kanyikan_feeds.txt中的信息:{}".format(download_video_title))
+                    with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
+                        lines = f_r.readlines()
+                    with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
+                        for line in lines:
+                            if download_video_id in line.split(" + ")[1]:
+                                continue
+                            f_w.write(line)
+            except Exception as e:
+                Common.crawler_log().info("视频 info 异常".format(e))
                 with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
                     lines = f_r.readlines()
                 with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:

+ 1 - 1
抓取规则.txt

@@ -1,7 +1,7 @@
 ==========2022/4/21===========
 - 视频发布7日内,播放量大于1万(当前时间 - 发布时间 <= 7 天)
 - 任务执行规则:
-    1.凌晨0点-10点      3日内播放大于2万 爬取
+    1.凌晨0点-10点      7日内播放大于1万 爬取
     2.早上10点-20点     内容上升榜 爬取
     3.晚上20点-24点     15万播放爬取
 ==============================