wangkun 1 year ago
parent
commit
8005c97569

+ 16 - 0
scheduling/scheduling_v3/scheduling_v3.sh

@@ -27,6 +27,11 @@ if [ ${env} = "--env=hk" ];then
   piaoquan_crawler_dir=/root/piaoquan_crawler/
   profile_path=/etc/profile
   python=python3
+elif [ ${crawler} = "--crawler=shipinhao" ] && [ ${env} = "--env=prod" ];then
+  piaoquan_crawler_dir=/Users/piaoquan/Desktop/crawler/piaoquan_crawler/
+  profile_path=/etc/profile
+  python=python3
+  node_path=/usr/local/bin/node
 elif [ ${env} = "--env=prod" ];then
   piaoquan_crawler_dir=/data5/piaoquan_crawler/
   profile_path=/etc/profile
@@ -47,6 +52,17 @@ echo "$(date "+%Y-%m-%d %H:%M:%S") 更新环境变量完成!"
 if [ ${env} = "--env=hk" ];then
   echo "升级yt-dlp"
   pip3 install yt-dlp -U
+elif [ ${crawler} = "--crawler=shipinhao" ];then
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 正在重启Appium..."
+  ps -ef | grep "/Applications/Appium.app/Contents/Resources/app/node_modules/appium/build/lib/main.js" | grep -v "grep"
+  if [ "$?" -eq 1 ];then
+    echo "$(date "+%Y-%m-%d %H:%M:%S") Appium异常停止,正在重启!"
+    rm -f ${piaoquan_crawler_dir}main/main_logs/nohup-appium.log
+    nohup ${node_path} /Applications/Appium.app/Contents/Resources/app/node_modules/appium/build/lib/main.js >>./main/main_logs/nohup-appium.log 2>&1 &
+    echo "$(date "+%Y-%m-%d %H:%M:%S") 重启Appium完毕!"
+  else
+    echo "$(date "+%Y-%m-%d %H:%M:%S") Appium 进程状态正常"
+  fi
 else
   echo "$(date "+%Y-%m-%d %H:%M:%S") 正在更新代码..."
 #  cd ${piaoquan_crawler_dir} && git pull origin master --force && rm -f ${piaoquan_crawler_dir}main/nohup.log && rm -f ${piaoquan_crawler_dir}${nohup_dir}

+ 28 - 13
shipinhao/shipinhao_search/shipinhao_search_scheduling.py

@@ -48,7 +48,7 @@ class ShipinhaoSearchScheduling:
         if rule_duration_max == 0:
             rule_duration_max = 100000000
 
-        rule_period_min = rule_dict.get('period', {}).get('min', 0)
+        # rule_period_min = rule_dict.get('period', {}).get('min', 0)
         # rule_period_max = rule_dict.get('period', {}).get('max', 100000000)
         # if rule_period_max == 0:
         #     rule_period_max = 100000000
@@ -115,7 +115,7 @@ class ShipinhaoSearchScheduling:
         Common.logger(log_type, crawler).info(
             f'rule_height_max:{int(rule_height_max)} >= video_height:{int(video_dict["video_height"])} >= rule_height_min:{int(rule_height_min)}')
         Common.logger(log_type, crawler).info(
-            f'rule_publish_time_max:{int(rule_publish_time_max)} >= publish_time_stamp:{int(video_dict["publish_time_stamp"])} >= rule_publish_time_min:{int(rule_publish_time_min)}')
+            f'rule_publish_time_max:{int(rule_publish_time_max)} >= publish_time_stamp:{int(video_dict["publish_time_stamp"])*1000} >= rule_publish_time_min:{int(rule_publish_time_min)}')
 
         if int(rule_duration_max) >= int(float(video_dict["duration"])) >= int(rule_duration_min) \
                 and int(rule_play_cnt_max) >= int(video_dict['play_cnt']) >= int(rule_play_cnt_min) \
@@ -125,7 +125,7 @@ class ShipinhaoSearchScheduling:
                 and int(rule_favorite_cnt_max) >= int(video_dict['favorite_cnt']) >= int(rule_favorite_cnt_min) \
                 and int(rule_width_max) >= int(video_dict['video_width']) >= int(rule_width_min) \
                 and int(rule_height_max) >= int(video_dict['video_height']) >= int(rule_height_min) \
-                and int(rule_publish_time_max) >= int(video_dict['publish_time_stamp']) >= int(rule_publish_time_min):
+                and int(rule_publish_time_max) >= int(video_dict['publish_time_stamp'])*1000 >= int(rule_publish_time_min):
             return True
         else:
             return False
@@ -262,10 +262,6 @@ class ShipinhaoSearchScheduling:
         videos_cnt = rule_dict.get('videos_cnt', {}).get('min', 0)
         index = 0
         while True:
-            if cls.download_cnt >= int(videos_cnt):
-                Common.logger(log_type, crawler).info(f'搜索词:"{word}",已抓取视频数:{index}')
-                cls.download_cnt = 0
-                return
             # try:
             if cls.search_elements(driver, '//*[@class="double-rich double-rich_vertical"]') is None:
                 Common.logger(log_type, crawler).info('窗口已销毁\n')
@@ -283,9 +279,16 @@ class ShipinhaoSearchScheduling:
                 return
 
             for i, video_element in enumerate(video_element_temp):
+                Common.logger(log_type, crawler).info(f"download_cnt:{cls.download_cnt}")
+                if cls.download_cnt >= int(videos_cnt):
+                    Common.logger(log_type, crawler).info(f'搜索词:"{word}",已抓取视频数:{cls.download_cnt}')
+                    cls.download_cnt = 0
+                    return
+
                 if video_element is None:
                     Common.logger(log_type, crawler).info('到底啦~\n')
                     return
+
                 cls.i += 1
                 cls.search_elements(driver, '//div[@class="vc active__mask"]')
 
@@ -371,7 +374,10 @@ class ShipinhaoSearchScheduling:
         video_dict["video_height"] = ffmpeg_dict["height"]
 
         # 规则判断
-        if cls.download_rule(log_type, crawler, video_dict, rule_dict) is False:
+        if cls.download_rule(log_type=log_type,
+                             crawler=crawler,
+                             video_dict=video_dict,
+                             rule_dict=rule_dict) is False:
             md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
             shutil.rmtree(f"./{crawler}/videos/{md_title}/")
             Common.logger(log_type, crawler).info("不满足抓取规则,删除成功\n")
@@ -397,9 +403,14 @@ class ShipinhaoSearchScheduling:
         Common.logger(log_type, crawler).info("视频上传完成")
 
         if our_video_id is None:
-            # 删除视频文件夹
-            shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
-            return
+            try:
+                # 删除视频文件夹
+                md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
+                shutil.rmtree(f"./{crawler}/videos/{md_title}")
+                Common.logger(log_type, crawler).warning(f"our_video_id:{our_video_id}, 删除成功\n")
+                return
+            except FileNotFoundError:
+                return
 
         insert_sql = f""" insert into crawler_video(video_id,
                                                 out_user_id,
@@ -573,7 +584,7 @@ class ShipinhaoSearchScheduling:
                 continue
             our_user_list = []
             # for i in range(1, len(user_sheet)):
-            for i in range(1, 4):
+            for i in range(1, 3):
                 search_word = user_sheet[i][4]
                 our_uid = user_sheet[i][6]
                 tag1 = user_sheet[i][8]
@@ -641,5 +652,9 @@ if __name__ == '__main__':
     #                                             oss_endpoint="out",
     #                                             env="dev")
     # print(ShipinhaoSearchScheduling.get_users("search", "shipinhao", "wNgi6Z", "dev"))
-    print((date.today() + timedelta(days=0)).strftime("%Y-%m-%d"))
+    # print((date.today() + timedelta(days=0)).strftime("%Y-%m-%d"))
+    print(ShipinhaoSearchScheduling.repeat_out_video_id(log_type="search",
+                                                        crawler="shipinhao",
+                                                        out_video_id="123",
+                                                        env="dev"))
     pass

BIN
shipinhao/videos/.DS_Store