lierqiang 2 tahun lalu
induk
melakukan
b3c444447b
2 mengubah file dengan 47 tambahan dan 64 penghapusan
  1. 33 50
      kuaishou/kuaishou_recommend/recommend_kuaishou.py
  2. 14 14
      main/process.sh

+ 33 - 50
kuaishou/kuaishou_recommend/recommend_kuaishou.py

@@ -49,6 +49,7 @@ class KuaiShouRecommend:
     @classmethod
     def download_rule(cls, video_dict, rule_dict):
         if eval(f"{video_dict['video_width']}{rule_dict['video_width']}") is True \
+                and eval(f"{video_dict['play_cnt']}{rule_dict['play_cnt']}") is True \
                 and eval(f"{video_dict['video_height']}{rule_dict['video_height']}") is True \
                 and eval(f"{video_dict['like_cnt']}{rule_dict['like_cnt']}") is True \
                 and eval(f"{video_dict['duration']}{rule_dict['duration']}") is True \
@@ -188,10 +189,10 @@ class KuaiShouRecommend:
 
     @classmethod
     def get_videoList(cls, log_type, crawler, strategy, our_uid, oss_endpoint, env, machine):
-        # rule_dict_1 = cls.get_rule(log_type, crawler)
-        # if rule_dict_1 is None:
-        #     Common.logger(log_type, crawler).warning(f"rule_dict is None")
-        #     return
+        rule_dict_1 = cls.get_rule(log_type, crawler)
+        if rule_dict_1 is None:
+            Common.logger(log_type, crawler).warning(f"rule_dict is None")
+            return
 
         for i in range(100):
             url = "https://www.kuaishou.com/graphql"
@@ -359,55 +360,37 @@ class KuaiShouRecommend:
                                   'video_url': video_url,
                                   'session': f"kuaishou{int(time.time())}"}
 
-                    # rule_1 = cls.download_rule(video_dict, rule_dict_1)
-                    # Common.logger(log_type, crawler).info(f"video_title:{video_title}")
-                    # Common.logger(log_type, crawler).info(f"video_id:{video_id}\n")
-                    #
-                    # Common.logger(log_type, crawler).info(
-                    #     f"like_cnt:{video_dict['like_cnt']}{rule_dict_1['like_cnt']}, {eval(str(video_dict['like_cnt']) + str(rule_dict_1['like_cnt']))}")
-                    # Common.logger(log_type, crawler).info(
-                    #     f"video_width:{video_dict['video_width']}{rule_dict_1['video_width']}, {eval(str(video_dict['video_width']) + str(rule_dict_1['video_width']))}")
-                    # Common.logger(log_type, crawler).info(
-                    #     f"video_height:{video_dict['video_height']}{rule_dict_1['video_height']}, {eval(str(video_dict['video_height']) + str(rule_dict_1['video_height']))}")
-                    # Common.logger(log_type, crawler).info(
-                    #     f"duration:{video_dict['duration']}{rule_dict_1['duration']}, {eval(str(video_dict['duration']) + str(rule_dict_1['duration']))}")
-                    # Common.logger(log_type, crawler).info(
-                    #     f"publish_time:{video_dict['publish_time']}{rule_dict_1['publish_time']}, {eval(str(video_dict['publish_time']) + str(rule_dict_1['publish_time']))}")
-                    # Common.logger(log_type, crawler).info(f"rule_1:{rule_1}\n")
-
+                    rule_1 = cls.download_rule(video_dict, rule_dict_1)
+                    Common.logger(log_type, crawler).info(f"video_title:{video_title}")
+                    Common.logger(log_type, crawler).info(f"video_id:{video_id}\n")
+
+                    Common.logger(log_type, crawler).info(
+                        f"like_cnt:{video_dict['like_cnt']}{rule_dict_1['like_cnt']}, {eval(str(video_dict['like_cnt']) + str(rule_dict_1['like_cnt']))}")
+                    Common.logger(log_type, crawler).info(
+                        f"video_width:{video_dict['video_width']}{rule_dict_1['video_width']}, {eval(str(video_dict['video_width']) + str(rule_dict_1['video_width']))}")
+                    Common.logger(log_type, crawler).info(
+                        f"video_height:{video_dict['video_height']}{rule_dict_1['video_height']}, {eval(str(video_dict['video_height']) + str(rule_dict_1['video_height']))}")
+                    Common.logger(log_type, crawler).info(
+                        f"duration:{video_dict['duration']}{rule_dict_1['duration']}, {eval(str(video_dict['duration']) + str(rule_dict_1['duration']))}")
+                    Common.logger(log_type, crawler).info(
+                        f"publish_time:{video_dict['publish_time']}{rule_dict_1['publish_time']}, {eval(str(video_dict['publish_time']) + str(rule_dict_1['publish_time']))}")
+                    Common.logger(log_type, crawler).info(f"rule_1:{rule_1}\n")
                     if video_title == "" or video_url == "":
                         Common.logger(log_type, crawler).info("无效视频\n")
                         continue
-                    cls.download_publish(log_type=log_type,
-                                         crawler=crawler,
-                                         strategy=strategy,
-                                         video_dict=video_dict,
-                                         rule_dict={},
-                                         our_uid=our_uid,
-                                         oss_endpoint=oss_endpoint,
-                                         env=env,
-                                         machine=machine)
-                # elif rule_1 is True:
-                #     cls.download_publish(log_type=log_type,
-                #                          crawler=crawler,
-                #                          strategy=strategy,
-                #                          video_dict=video_dict,
-                #                          rule_dict=rule_dict_1,
-                #                          our_uid=our_uid,
-                #                          oss_endpoint=oss_endpoint,
-                #                          env=env,
-                #                          machine=machine)
-
-
-                # else:
-                #     Common.logger(log_type, crawler).info("不满足下载规则\n")
-
-            # if pcursor == "no_more":
-            #     Common.logger(log_type, crawler).info(f"作者,{out_uid},已经到底了,没有更多内容了\n")
-            #     return
-            # cls.get_videoList(log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine,
-            #               pcursor=pcursor)
-            # time.sleep(random.randint(1, 3))
+                    elif rule_1 is True:
+                        cls.download_publish(log_type=log_type,
+                                             crawler=crawler,
+                                             strategy=strategy,
+                                             video_dict=video_dict,
+                                             rule_dict=rule_dict_1,
+                                             our_uid=our_uid,
+                                             oss_endpoint=oss_endpoint,
+                                             env=env,
+                                             machine=machine)
+
+                    else:
+                        Common.logger(log_type, crawler).info("不满足下载规则\n")
 
     @classmethod
     def repeat_video(cls, log_type, crawler, video_id, video_title, publish_time, env, machine):

+ 14 - 14
main/process.sh

@@ -127,20 +127,20 @@ else
   echo "$(date "+%Y-%m-%d %H:%M:%S") 快手定向爬虫策略 进程状态正常" >> ${log_path}
 fi
 
-## 快手推荐爬虫策略
-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 快手推荐爬虫策略 进程状态" >> ${log_path}
-#ps -ef | grep "run_kuaishou_recommend.py" | grep -v "grep"
-#if [ "$?" -eq 1 ];then
-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
-#  if [ ${env} = "dev" ];then
-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./kuaishou/kuaishou_main/run_kuaishou_follow.py --log_type="follow" --crawler="kuaishou" --env="dev" xiaoniangao/nohup-play.log
-#  else
-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh ./main/main.sh ./kuaishou/kuaishou_main/run_kuaishou_recommend.py --log_type="recommend" --crawler="kuaishou" --strategy="推荐爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" kuaishou/recommend.log
-#  fi
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
-#else
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 快手推荐爬虫策略 进程状态正常" >> ${log_path}
-#fi
+# 快手推荐爬虫策略
+echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 快手推荐爬虫策略 进程状态" >> ${log_path}
+ps -ef | grep "run_kuaishou_recommend.py" | grep -v "grep"
+if [ "$?" -eq 1 ];then
+  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
+  if [ ${env} = "dev" ];then
+    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./kuaishou/kuaishou_main/run_kuaishou_follow.py --log_type="follow" --crawler="kuaishou" --env="dev" xiaoniangao/nohup-play.log
+  else
+    cd ${piaoquan_crawler_dir} && /usr/bin/sh ./main/main.sh ./kuaishou/kuaishou_main/run_kuaishou_recommend.py --log_type="recommend" --crawler="kuaishou" --strategy="推荐爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" kuaishou/recommend.log
+  fi
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
+else
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 快手推荐爬虫策略 进程状态正常" >> ${log_path}
+fi
 
 #
 # 抖音推荐爬虫策略