2 tahun lalu · b3c444447b
--- a/kuaishou/kuaishou_recommend/recommend_kuaishou.py
+++ b/kuaishou/kuaishou_recommend/recommend_kuaishou.py
@@ -49,6 +49,7 @@ class KuaiShouRecommend:
 
				     @classmethod
			
 
				     def download_rule(cls, video_dict, rule_dict):
			
 
				         if eval(f"{video_dict['video_width']}{rule_dict['video_width']}") is True \
			
 
				+                and eval(f"{video_dict['play_cnt']}{rule_dict['play_cnt']}") is True \
			
 
				                 and eval(f"{video_dict['video_height']}{rule_dict['video_height']}") is True \
			
 
				                 and eval(f"{video_dict['like_cnt']}{rule_dict['like_cnt']}") is True \
			
 
				                 and eval(f"{video_dict['duration']}{rule_dict['duration']}") is True \
			
@@ -188,10 +189,10 @@ class KuaiShouRecommend:
 
				 
			
 
				     @classmethod
			
 
				     def get_videoList(cls, log_type, crawler, strategy, our_uid, oss_endpoint, env, machine):
			
 
				-        # rule_dict_1 = cls.get_rule(log_type, crawler)
			
 
				-        # if rule_dict_1 is None:
			
 
				-        #     Common.logger(log_type, crawler).warning(f"rule_dict is None")
			
 
				-        #     return
			
 
				+        rule_dict_1 = cls.get_rule(log_type, crawler)
			
 
				+        if rule_dict_1 is None:
			
 
				+            Common.logger(log_type, crawler).warning(f"rule_dict is None")
			
 
				+            return
			
 
				 
			
 
				         for i in range(100):
			
 
				             url = "https://www.kuaishou.com/graphql"
			
@@ -359,55 +360,37 @@ class KuaiShouRecommend:
 
				                                   'video_url': video_url,
			
 
				                                   'session': f"kuaishou{int(time.time())}"}
			
 
				 
			
 
				-                    # rule_1 = cls.download_rule(video_dict, rule_dict_1)
			
 
				-                    # Common.logger(log_type, crawler).info(f"video_title:{video_title}")
			
 
				-                    # Common.logger(log_type, crawler).info(f"video_id:{video_id}\n")
			
 
				-                    #
			
 
				-                    # Common.logger(log_type, crawler).info(
			
 
				-                    #     f"like_cnt:{video_dict['like_cnt']}{rule_dict_1['like_cnt']}, {eval(str(video_dict['like_cnt']) + str(rule_dict_1['like_cnt']))}")
			
 
				-                    # Common.logger(log_type, crawler).info(
			
 
				-                    #     f"video_width:{video_dict['video_width']}{rule_dict_1['video_width']}, {eval(str(video_dict['video_width']) + str(rule_dict_1['video_width']))}")
			
 
				-                    # Common.logger(log_type, crawler).info(
			
 
				-                    #     f"video_height:{video_dict['video_height']}{rule_dict_1['video_height']}, {eval(str(video_dict['video_height']) + str(rule_dict_1['video_height']))}")
			
 
				-                    # Common.logger(log_type, crawler).info(
			
 
				-                    #     f"duration:{video_dict['duration']}{rule_dict_1['duration']}, {eval(str(video_dict['duration']) + str(rule_dict_1['duration']))}")
			
 
				-                    # Common.logger(log_type, crawler).info(
			
 
				-                    #     f"publish_time:{video_dict['publish_time']}{rule_dict_1['publish_time']}, {eval(str(video_dict['publish_time']) + str(rule_dict_1['publish_time']))}")
			
 
				-                    # Common.logger(log_type, crawler).info(f"rule_1:{rule_1}\n")
			
 
				-
			
 
				+                    rule_1 = cls.download_rule(video_dict, rule_dict_1)
			
 
				+                    Common.logger(log_type, crawler).info(f"video_title:{video_title}")
			
 
				+                    Common.logger(log_type, crawler).info(f"video_id:{video_id}\n")
			
 
				+
			
 
				+                    Common.logger(log_type, crawler).info(
			
 
				+                        f"like_cnt:{video_dict['like_cnt']}{rule_dict_1['like_cnt']}, {eval(str(video_dict['like_cnt']) + str(rule_dict_1['like_cnt']))}")
			
 
				+                    Common.logger(log_type, crawler).info(
			
 
				+                        f"video_width:{video_dict['video_width']}{rule_dict_1['video_width']}, {eval(str(video_dict['video_width']) + str(rule_dict_1['video_width']))}")
			
 
				+                    Common.logger(log_type, crawler).info(
			
 
				+                        f"video_height:{video_dict['video_height']}{rule_dict_1['video_height']}, {eval(str(video_dict['video_height']) + str(rule_dict_1['video_height']))}")
			
 
				+                    Common.logger(log_type, crawler).info(
			
 
				+                        f"duration:{video_dict['duration']}{rule_dict_1['duration']}, {eval(str(video_dict['duration']) + str(rule_dict_1['duration']))}")
			
 
				+                    Common.logger(log_type, crawler).info(
			
 
				+                        f"publish_time:{video_dict['publish_time']}{rule_dict_1['publish_time']}, {eval(str(video_dict['publish_time']) + str(rule_dict_1['publish_time']))}")
			
 
				+                    Common.logger(log_type, crawler).info(f"rule_1:{rule_1}\n")
			
 
				                     if video_title == "" or video_url == "":
			
 
				                         Common.logger(log_type, crawler).info("无效视频\n")
			
 
				                         continue
			
 
				-                    cls.download_publish(log_type=log_type,
			
 
				-                                         crawler=crawler,
			
 
				-                                         strategy=strategy,
			
 
				-                                         video_dict=video_dict,
			
 
				-                                         rule_dict={},
			
 
				-                                         our_uid=our_uid,
			
 
				-                                         oss_endpoint=oss_endpoint,
			
 
				-                                         env=env,
			
 
				-                                         machine=machine)
			
 
				-                # elif rule_1 is True:
			
 
				-                #     cls.download_publish(log_type=log_type,
			
 
				-                #                          crawler=crawler,
			
 
				-                #                          strategy=strategy,
			
 
				-                #                          video_dict=video_dict,
			
 
				-                #                          rule_dict=rule_dict_1,
			
 
				-                #                          our_uid=our_uid,
			
 
				-                #                          oss_endpoint=oss_endpoint,
			
 
				-                #                          env=env,
			
 
				-                #                          machine=machine)
			
 
				-
			
 
				-
			
 
				-                # else:
			
 
				-                #     Common.logger(log_type, crawler).info("不满足下载规则\n")
			
 
				-
			
 
				-            # if pcursor == "no_more":
			
 
				-            #     Common.logger(log_type, crawler).info(f"作者,{out_uid},已经到底了，没有更多内容了\n")
			
 
				-            #     return
			
 
				-            # cls.get_videoList(log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine,
			
 
				-            #               pcursor=pcursor)
			
 
				-            # time.sleep(random.randint(1, 3))
			
 
				+                    elif rule_1 is True:
			
 
				+                        cls.download_publish(log_type=log_type,
			
 
				+                                             crawler=crawler,
			
 
				+                                             strategy=strategy,
			
 
				+                                             video_dict=video_dict,
			
 
				+                                             rule_dict=rule_dict_1,
			
 
				+                                             our_uid=our_uid,
			
 
				+                                             oss_endpoint=oss_endpoint,
			
 
				+                                             env=env,
			
 
				+                                             machine=machine)
			
 
				+
			
 
				+                    else:
			
 
				+                        Common.logger(log_type, crawler).info("不满足下载规则\n")
			
 
				 
			
 
				     @classmethod
			
 
				     def repeat_video(cls, log_type, crawler, video_id, video_title, publish_time, env, machine):
			
--- a/main/process.sh
+++ b/main/process.sh
@@ -127,20 +127,20 @@ else
 
				   echo "$(date "+%Y-%m-%d %H:%M:%S") 快手定向爬虫策略 进程状态正常" >> ${log_path}
			
 
				 fi
			
 
				 
			
 
				-## 快手推荐爬虫策略
			
 
				-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 快手推荐爬虫策略 进程状态" >> ${log_path}
			
 
				-#ps -ef | grep "run_kuaishou_recommend.py" | grep -v "grep"
			
 
				-#if [ "$?" -eq 1 ];then
			
 
				-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
			
 
				-#  if [ ${env} = "dev" ];then
			
 
				-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./kuaishou/kuaishou_main/run_kuaishou_follow.py --log_type="follow" --crawler="kuaishou" --env="dev" xiaoniangao/nohup-play.log
			
 
				-#  else
			
 
				-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh ./main/main.sh ./kuaishou/kuaishou_main/run_kuaishou_recommend.py --log_type="recommend" --crawler="kuaishou" --strategy="推荐爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" kuaishou/recommend.log
			
 
				-#  fi
			
 
				-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
			
 
				-#else
			
 
				-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 快手推荐爬虫策略 进程状态正常" >> ${log_path}
			
 
				-#fi
			
 
				+# 快手推荐爬虫策略
			
 
				+echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 快手推荐爬虫策略 进程状态" >> ${log_path}
			
 
				+ps -ef | grep "run_kuaishou_recommend.py" | grep -v "grep"
			
 
				+if [ "$?" -eq 1 ];then
			
 
				+  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
			
 
				+  if [ ${env} = "dev" ];then
			
 
				+    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./kuaishou/kuaishou_main/run_kuaishou_follow.py --log_type="follow" --crawler="kuaishou" --env="dev" xiaoniangao/nohup-play.log
			
 
				+  else
			
 
				+    cd ${piaoquan_crawler_dir} && /usr/bin/sh ./main/main.sh ./kuaishou/kuaishou_main/run_kuaishou_recommend.py --log_type="recommend" --crawler="kuaishou" --strategy="推荐爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" kuaishou/recommend.log
			
 
				+  fi
			
 
				+  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
			
 
				+else
			
 
				+  echo "$(date "+%Y-%m-%d %H:%M:%S") 快手推荐爬虫策略 进程状态正常" >> ${log_path}
			
 
				+fi
			
 
				 
			
 
				 #
			
 
				 # 抖音推荐爬虫策略