2 years ago · 8005c97569
--- a/scheduling/scheduling_v3/scheduling_v3.sh
+++ b/scheduling/scheduling_v3/scheduling_v3.sh
@@ -27,6 +27,11 @@ if [ ${env} = "--env=hk" ];then
 
				   piaoquan_crawler_dir=/root/piaoquan_crawler/
			
 
				   profile_path=/etc/profile
			
 
				   python=python3
			
 
				+elif [ ${crawler} = "--crawler=shipinhao" ] && [ ${env} = "--env=prod" ];then
			
 
				+  piaoquan_crawler_dir=/Users/piaoquan/Desktop/crawler/piaoquan_crawler/
			
 
				+  profile_path=/etc/profile
			
 
				+  python=python3
			
 
				+  node_path=/usr/local/bin/node
			
 
				 elif [ ${env} = "--env=prod" ];then
			
 
				   piaoquan_crawler_dir=/data5/piaoquan_crawler/
			
 
				   profile_path=/etc/profile
			
@@ -47,6 +52,17 @@ echo "$(date "+%Y-%m-%d %H:%M:%S") 更新环境变量完成！"
 
				 if [ ${env} = "--env=hk" ];then
			
 
				   echo "升级yt-dlp"
			
 
				   pip3 install yt-dlp -U
			
 
				+elif [ ${crawler} = "--crawler=shipinhao" ];then
			
 
				+  echo "$(date "+%Y-%m-%d %H:%M:%S") 正在重启Appium..."
			
 
				+  ps -ef | grep "/Applications/Appium.app/Contents/Resources/app/node_modules/appium/build/lib/main.js" | grep -v "grep"
			
 
				+  if [ "$?" -eq 1 ];then
			
 
				+    echo "$(date "+%Y-%m-%d %H:%M:%S") Appium异常停止,正在重启!"
			
 
				+    rm -f ${piaoquan_crawler_dir}main/main_logs/nohup-appium.log
			
 
				+    nohup ${node_path} /Applications/Appium.app/Contents/Resources/app/node_modules/appium/build/lib/main.js >>./main/main_logs/nohup-appium.log 2>&1 &
			
 
				+    echo "$(date "+%Y-%m-%d %H:%M:%S") 重启Appium完毕!"
			
 
				+  else
			
 
				+    echo "$(date "+%Y-%m-%d %H:%M:%S") Appium 进程状态正常"
			
 
				+  fi
			
 
				 else
			
 
				   echo "$(date "+%Y-%m-%d %H:%M:%S") 正在更新代码..."
			
 
				 #  cd ${piaoquan_crawler_dir} && git pull origin master --force && rm -f ${piaoquan_crawler_dir}main/nohup.log && rm -f ${piaoquan_crawler_dir}${nohup_dir}
			
--- a/shipinhao/shipinhao_search/shipinhao_search_scheduling.py
+++ b/shipinhao/shipinhao_search/shipinhao_search_scheduling.py
@@ -48,7 +48,7 @@ class ShipinhaoSearchScheduling:
 
				         if rule_duration_max == 0:
			
 
				             rule_duration_max = 100000000
			
 
				 
			
 
				-        rule_period_min = rule_dict.get('period', {}).get('min', 0)
			
 
				+        # rule_period_min = rule_dict.get('period', {}).get('min', 0)
			
 
				         # rule_period_max = rule_dict.get('period', {}).get('max', 100000000)
			
 
				         # if rule_period_max == 0:
			
 
				         #     rule_period_max = 100000000
			
@@ -115,7 +115,7 @@ class ShipinhaoSearchScheduling:
 
				         Common.logger(log_type, crawler).info(
			
 
				             f'rule_height_max:{int(rule_height_max)} >= video_height:{int(video_dict["video_height"])} >= rule_height_min:{int(rule_height_min)}')
			
 
				         Common.logger(log_type, crawler).info(
			
 
				-            f'rule_publish_time_max:{int(rule_publish_time_max)} >= publish_time_stamp:{int(video_dict["publish_time_stamp"])} >= rule_publish_time_min:{int(rule_publish_time_min)}')
			
 
				+            f'rule_publish_time_max:{int(rule_publish_time_max)} >= publish_time_stamp:{int(video_dict["publish_time_stamp"])*1000} >= rule_publish_time_min:{int(rule_publish_time_min)}')
			
 
				 
			
 
				         if int(rule_duration_max) >= int(float(video_dict["duration"])) >= int(rule_duration_min) \
			
 
				                 and int(rule_play_cnt_max) >= int(video_dict['play_cnt']) >= int(rule_play_cnt_min) \
			
@@ -125,7 +125,7 @@ class ShipinhaoSearchScheduling:
 
				                 and int(rule_favorite_cnt_max) >= int(video_dict['favorite_cnt']) >= int(rule_favorite_cnt_min) \
			
 
				                 and int(rule_width_max) >= int(video_dict['video_width']) >= int(rule_width_min) \
			
 
				                 and int(rule_height_max) >= int(video_dict['video_height']) >= int(rule_height_min) \
			
 
				-                and int(rule_publish_time_max) >= int(video_dict['publish_time_stamp']) >= int(rule_publish_time_min):
			
 
				+                and int(rule_publish_time_max) >= int(video_dict['publish_time_stamp'])*1000 >= int(rule_publish_time_min):
			
 
				             return True
			
 
				         else:
			
 
				             return False
			
@@ -262,10 +262,6 @@ class ShipinhaoSearchScheduling:
 
				         videos_cnt = rule_dict.get('videos_cnt', {}).get('min', 0)
			
 
				         index = 0
			
 
				         while True:
			
 
				-            if cls.download_cnt >= int(videos_cnt):
			
 
				-                Common.logger(log_type, crawler).info(f'搜索词:"{word}"，已抓取视频数:{index}')
			
 
				-                cls.download_cnt = 0
			
 
				-                return
			
 
				             # try:
			
 
				             if cls.search_elements(driver, '//*[@class="double-rich double-rich_vertical"]') is None:
			
 
				                 Common.logger(log_type, crawler).info('窗口已销毁\n')
			
@@ -283,9 +279,16 @@ class ShipinhaoSearchScheduling:
 
				                 return
			
 
				 
			
 
				             for i, video_element in enumerate(video_element_temp):
			
 
				+                Common.logger(log_type, crawler).info(f"download_cnt:{cls.download_cnt}")
			
 
				+                if cls.download_cnt >= int(videos_cnt):
			
 
				+                    Common.logger(log_type, crawler).info(f'搜索词:"{word}"，已抓取视频数:{cls.download_cnt}')
			
 
				+                    cls.download_cnt = 0
			
 
				+                    return
			
 
				+
			
 
				                 if video_element is None:
			
 
				                     Common.logger(log_type, crawler).info('到底啦~\n')
			
 
				                     return
			
 
				+
			
 
				                 cls.i += 1
			
 
				                 cls.search_elements(driver, '//div[@class="vc active__mask"]')
			
 
				 
			
@@ -371,7 +374,10 @@ class ShipinhaoSearchScheduling:
 
				         video_dict["video_height"] = ffmpeg_dict["height"]
			
 
				 
			
 
				         # 规则判断
			
 
				-        if cls.download_rule(log_type, crawler, video_dict, rule_dict) is False:
			
 
				+        if cls.download_rule(log_type=log_type,
			
 
				+                             crawler=crawler,
			
 
				+                             video_dict=video_dict,
			
 
				+                             rule_dict=rule_dict) is False:
			
 
				             md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
			
 
				             shutil.rmtree(f"./{crawler}/videos/{md_title}/")
			
 
				             Common.logger(log_type, crawler).info("不满足抓取规则，删除成功\n")
			
@@ -397,9 +403,14 @@ class ShipinhaoSearchScheduling:
 
				         Common.logger(log_type, crawler).info("视频上传完成")
			
 
				 
			
 
				         if our_video_id is None:
			
 
				-            # 删除视频文件夹
			
 
				-            shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}")
			
 
				-            return
			
 
				+            try:
			
 
				+                # 删除视频文件夹
			
 
				+                md_title = md5(video_dict['video_title'].encode('utf8')).hexdigest()
			
 
				+                shutil.rmtree(f"./{crawler}/videos/{md_title}")
			
 
				+                Common.logger(log_type, crawler).warning(f"our_video_id:{our_video_id}, 删除成功\n")
			
 
				+                return
			
 
				+            except FileNotFoundError:
			
 
				+                return
			
 
				 
			
 
				         insert_sql = f""" insert into crawler_video(video_id,
			
 
				                                                 out_user_id,
			
@@ -573,7 +584,7 @@ class ShipinhaoSearchScheduling:
 
				                 continue
			
 
				             our_user_list = []
			
 
				             # for i in range(1, len(user_sheet)):
			
 
				-            for i in range(1, 4):
			
 
				+            for i in range(1, 3):
			
 
				                 search_word = user_sheet[i][4]
			
 
				                 our_uid = user_sheet[i][6]
			
 
				                 tag1 = user_sheet[i][8]
			
@@ -641,5 +652,9 @@ if __name__ == '__main__':
 
				     #                                             oss_endpoint="out",
			
 
				     #                                             env="dev")
			
 
				     # print(ShipinhaoSearchScheduling.get_users("search", "shipinhao", "wNgi6Z", "dev"))
			
 
				-    print((date.today() + timedelta(days=0)).strftime("%Y-%m-%d"))
			
 
				+    # print((date.today() + timedelta(days=0)).strftime("%Y-%m-%d"))
			
 
				+    print(ShipinhaoSearchScheduling.repeat_out_video_id(log_type="search",
			
 
				+                                                        crawler="shipinhao",
			
 
				+                                                        out_video_id="123",
			
 
				+                                                        env="dev"))
			
 
				     pass
			
--- a/shipinhao/videos/.DS_Store
+++ b/shipinhao/videos/.DS_Store