ソースを参照

更新下载规则:7 天内播放量大于 1W 的视频

wangkun 3 年 前
コミット
2f2d94d792
3 ファイル変更33 行追加9 行削除
  1. 15 8
      main/download_sendtime.py
  2. 9 1
      main/download_up.py
  3. 9 0
      抓取规则.txt

+ 15 - 8
main/download_sendtime.py

@@ -91,14 +91,14 @@ class DownloadSendtime:
                     v_user_cover = data["user_info"]["headimg_url"]
                     v_video_cover = data["cover_url"]
                     if "items" not in data["play_info"]:
-                        if len(data["play_info"]) > 1:
+                        if len(data["play_info"]) > 2:
                             download_url_up = data["play_info"][2]["play_url"]
                             # Common.crawler_log().info('视频下载地址:{}'.format(download_url_up))
                         else:
                             download_url_up = data["play_info"][0]["play_url"]
                             # Common.crawler_log().info('视频下载地址:{}'.format(download_url_up))
                     else:
-                        if len(data["play_info"]["items"]) > 1:
+                        if len(data["play_info"]["items"]) > 2:
                             download_url_up = data["play_info"]["items"][2]["play_url"]
                             # Common.crawler_log().info('视频下载地址:{}'.format(download_url_up))
                         else:
@@ -113,10 +113,10 @@ class DownloadSendtime:
                             and v_send_date != "" and v_username != "" and v_user_cover != "" \
                             and v_video_cover != "" and download_url_up != "":
                         # 满足下载条件:当前时间 - 发布时间 <= 3天,播放量大于2万
-                        if int(time.time()) - int(v_send_date) <= 259200:
-                            if int(v_play_cnt_sendtime) >= 20000:
+                        if int(time.time()) - int(v_send_date) <= 604800:
+                            if int(v_play_cnt_sendtime) >= 10000:
                                 Common.crawler_log().info("该视频:{}".format(
-                                    v_title) + " " + "在3天内的播放量{}>=20000".format(v_play_cnt_sendtime))
+                                    v_title) + " " + "在7天内的播放量{}>=10000".format(v_play_cnt_sendtime))
                                 # 下载封面
                                 Common.download_method("cover", v_title, v_video_cover)
                                 # 下载视频
@@ -157,7 +157,7 @@ class DownloadSendtime:
                                         f_w1.write(line1)
                             else:
                                 # 删除之前保存的该视频信息
-                                Common.crawler_log().info("该视频3天播放量:{}<20000".format(
+                                Common.crawler_log().info("该视频7天播放量:{}<10000".format(
                                     int(v_play_cnt_sendtime)) + ";" + "不满足下载规则:{}".format(v_title))
                                 with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
                                     lines = f_r.readlines()
@@ -167,7 +167,7 @@ class DownloadSendtime:
                                             continue
                                         f_w.write(line2)
                         else:
-                            Common.crawler_log().info("视频发布时间大于3天:{}天".format(
+                            Common.crawler_log().info("视频发布时间大于7天:{}天".format(
                                 int((int(time.time()) - int(v_send_date)) / 86400))
                                                 + ";" + "标题:{}".format(v_title))
                             with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
@@ -188,7 +188,14 @@ class DownloadSendtime:
                                 f_w.write(line3)
 
             except Exception as e:
-                Common.crawler_log().error("获取视频info异常:{}".format(e))
+                Common.crawler_log().error("获取视频info异常:{},删除该视频".format(e))
+                with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
+                    lines = f_r.readlines()
+                with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
+                    for line4 in lines:
+                        if v_id in line4.split(" + ")[1]:
+                            continue
+                        f_w.write(line4)
 
 
 if __name__ == "__main__":

+ 9 - 1
main/download_up.py

@@ -213,7 +213,15 @@ class DownloadUp:
                                     continue
                                 f_w.write(line3)
             except Exception as e:
-                Common.crawler_log().error("获取视频info异常:{}".format(e))
+                Common.crawler_log().error("获取视频info异常:{},删除该视频".format(e))
+                # 删除之前保存的该视频信息
+                with open("./txt/kanyikan_feeds.txt", "r", encoding="utf8") as f_r:
+                    lines = f_r.readlines()
+                with open("./txt/kanyikan_feeds.txt", "w", encoding="utf-8") as f_w:
+                    for line4 in lines:
+                        if v_id in line4.split(" + ")[1]:
+                            continue
+                        f_w.write(line4)
 
 
 if __name__ == "__main__":

+ 9 - 0
抓取规则.txt

@@ -1,3 +1,12 @@
+==========2022/4/21===========
+- 视频发布7日内,播放量大于1万(当前时间 - 发布时间 <= 7 天)
+- 任务执行规则:
+    1.凌晨0点-10点      3日内播放大于2万 爬取
+    2.早上10点-20点     内容上升榜 爬取
+    3.晚上20点-24点     15万播放爬取
+==============================
+
+
 ==========2022/4/15===========
 - 视频发布3日内,播放量大于2万(当前时间 - 发布时间 <= 3 天)
 - 视频时长1分钟以上,10分钟以下