wangkun 2 years ago
parent
commit
355b9cb5b2

+ 1 - 1
README.MD

@@ -130,7 +130,7 @@ ps aux | grep run_xiaoniangao_play | grep -v grep | awk '{print $2}' | xargs kil
 #### 公众号
 ```commandline
 阿里云 102 服务器
-定向爬虫策略: ps aux | grep run_gongzhonghao | grep -v grep | awk '{print $2}' | xargs kill -9 && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow.py --log_type="follow" --crawler="gongzhonghao" --env="prod"  gongzhonghao/nohup-follow.log
+定向爬虫策略: /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow.py --log_type="follow" --crawler="gongzhonghao" --env="prod"  gongzhonghao/nohup-follow.log
 线下调试
 定向爬虫策略: sh main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow.py --log_type="follow" --crawler="gongzhonghao" --env="dev" gongzhonghao/nohup-follow.log
 杀进程命令

+ 3 - 0
common/common.py

@@ -242,6 +242,9 @@ class Common:
         md_title = md5(video_title.encode('utf8')).hexdigest()
         video_path = f"./{crawler}/videos/{md_title}/video.mp4"
         # Common.logger(log_type, crawler).info(f"{video_path}")
+        if os.path.getsize(video_path) == 0:
+            Common.logger(log_type, crawler).info(f'video_size:{os.path.getsize(video_path)}')
+            return
         probe = ffmpeg.probe(video_path)
         video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
         if video_stream is None:

+ 0 - 2
common/publish.py

@@ -13,10 +13,8 @@ import time
 import oss2
 import requests
 import urllib3
-
 sys.path.append(os.getcwd())
 from common.common import Common
-
 proxies = {"http": None, "https": None}
 
 

BIN
gongzhonghao/.DS_Store


+ 6 - 1
gongzhonghao/gongzhonghao_follow/gongzhonghao_follow.py

@@ -165,7 +165,7 @@ class GongzhonghaoFollow:
             driver.implicitly_wait(10)
             # Common.logger(log_type, crawler).info('打开文章链接')
             driver.get(article_url)
-            time.sleep(2)
+            time.sleep(1)
 
             if len(driver.find_elements(By.XPATH, '//div[@class="js_video_poster video_poster"]/*[2]')) != 0:
                 video_url = driver.find_element(
@@ -333,6 +333,11 @@ class GongzhonghaoFollow:
             # 获取视频时长
             ffmpeg_dict = Common.ffmpeg(log_type, crawler,
                                         f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
+            if ffmpeg_dict is None:
+                # 删除视频文件夹
+                shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
+                Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
+                return
             video_dict["video_width"] = ffmpeg_dict["width"]
             video_dict["video_height"] = ffmpeg_dict["height"]
             video_dict["duration"] = ffmpeg_dict["duration"]

+ 0 - 0
video.mp4