lierqiang 2 年之前
父节点
当前提交
df729dcba8
共有 2 个文件被更改,包括 22 次插入22 次删除
  1. 7 7
      douyin/douyin_follow/follow_dy.py
  2. 15 15
      main/process.sh

+ 7 - 7
douyin/douyin_follow/follow_dy.py

@@ -224,12 +224,12 @@ class DyFollow(object):
     @classmethod
     def download_publish(cls, log_type, crawler, strategy, video_dict, rule_dict, our_uid, oss_endpoint, env, machine):
         try:
-            if cls.download_rule(video_dict, rule_dict) is False:
-                Common.logger(log_type, crawler).info('不满足抓取规则\n')
-            elif any(word if word in video_dict['video_title'] else False for word in
-                     cls.filter_words(log_type, crawler)) is True:
-                Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
-            elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env, machine) != 0:
+            # if cls.download_rule(video_dict, rule_dict) is False:
+            #     Common.logger(log_type, crawler).info('不满足抓取规则\n')
+            # elif any(word if word in video_dict['video_title'] else False for word in
+            #          cls.filter_words(log_type, crawler)) is True:
+            #     Common.logger(log_type, crawler).info('标题已中过滤词:{}\n', video_dict['video_title'])
+            if cls.repeat_video(log_type, crawler, video_dict['video_id'], env, machine) != 0:
                 Common.logger(log_type, crawler).info('视频已下载\n')
             else:
                 # 下载视频
@@ -346,7 +346,7 @@ class DyFollow(object):
     @classmethod
     def get_follow_videos(cls, log_type, crawler, strategy, oss_endpoint, env, machine):
         user_list = get_user_from_mysql(log_type, crawler, crawler, env)
-        rule_dict = cls.get_rule(log_type, crawler)
+        rule_dict = {}#cls.get_rule(log_type, crawler)
         for user in user_list:
             spider_link = user["spider_link"]
             out_uid = spider_link

+ 15 - 15
main/process.sh

@@ -158,21 +158,21 @@ else
   echo "$(date "+%Y-%m-%d %H:%M:%S") 抖音推荐爬虫策略 进程状态正常" >> ${log_path}
 fi
 
-#
-# 抖音定向爬虫策略
-echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 抖音定向爬虫策略 进程状态" >> ${log_path}
-ps -ef | grep "run_douyin_follow.py" | grep -v "grep"
-if [ "$?" -eq 1 ];then
-  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
-  if [ ${env} = "dev" ];then
-    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./kuaishou/douyin_main/run_douyin_follow.py --log_type="author" --crawler="douyin" --env="dev" xiaoniangao/nohup-play.log
-  else
-    cd ${piaoquan_crawler_dir} && /usr/bin/sh ./main/main.sh ./douyin/douyin_main/run_douyin_follow.py --log_type="author" --crawler="douyin" --strategy="抖音定向爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" douyin/author.log
-  fi
-  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
-else
-  echo "$(date "+%Y-%m-%d %H:%M:%S") 抖音推荐爬虫策略 进程状态正常" >> ${log_path}
-fi
+##
+## 抖音定向爬虫策略
+#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 抖音定向爬虫策略 进程状态" >> ${log_path}
+#ps -ef | grep "run_douyin_follow.py" | grep -v "grep"
+#if [ "$?" -eq 1 ];then
+#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
+#  if [ ${env} = "dev" ];then
+#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./kuaishou/douyin_main/run_douyin_follow.py --log_type="author" --crawler="douyin" --env="dev" xiaoniangao/nohup-play.log
+#  else
+#    cd ${piaoquan_crawler_dir} && /usr/bin/sh ./main/main.sh ./douyin/douyin_main/run_douyin_follow.py --log_type="author" --crawler="douyin" --strategy="抖音定向爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" douyin/author.log
+#  fi
+#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
+#else
+#  echo "$(date "+%Y-%m-%d %H:%M:%S") 抖音推荐爬虫策略 进程状态正常" >> ${log_path}
+#fi
 
 # 西瓜定向爬虫策略
 echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 西瓜定向爬虫策略 进程状态" >> ${log_path}