فهرست منبع

update 调度模块,machine字段弃用

lierqiang 2 سال پیش
والد
کامیت
f1216e8196

+ 29 - 29
common/scheduling_db.py

@@ -7,37 +7,37 @@
 import redis
 import redis
 import pymysql
 import pymysql
 from common.common import Common
 from common.common import Common
-# from common import Common
+
 
 
 class MysqlHelper:
 class MysqlHelper:
     @classmethod
     @classmethod
-    def connect_mysql(cls, env, machine):
-        if machine == 'aliyun_hk':
+    def connect_mysql(cls, env):
+        if env == 'hk':
             # 创建一个 Connection 对象,代表了一个数据库连接
             # 创建一个 Connection 对象,代表了一个数据库连接
             connection = pymysql.connect(
             connection = pymysql.connect(
-                host="rm-j6cz4c6pt96000xi3.mysql.rds.aliyuncs.com",# 数据库IP地址,内网地址
+                host="rm-j6cz4c6pt96000xi3.mysql.rds.aliyuncs.com",  # 数据库IP地址,内网地址
                 # host="rm-j6cz4c6pt96000xi3lo.mysql.rds.aliyuncs.com",# 数据库IP地址,外网地址
                 # host="rm-j6cz4c6pt96000xi3lo.mysql.rds.aliyuncs.com",# 数据库IP地址,外网地址
-                port=3306,                      # 端口号
-                user="crawler",                 #  mysql用户名
-                passwd="crawler123456@",        # mysql用户登录密码
-                db="piaoquan-crawler" ,         # 数据库名
+                port=3306,  # 端口号
+                user="crawler",  # mysql用户名
+                passwd="crawler123456@",  # mysql用户登录密码
+                db="piaoquan-crawler",  # 数据库名
                 # 如果数据库里面的文本是utf8编码的,charset指定是utf8
                 # 如果数据库里面的文本是utf8编码的,charset指定是utf8
-                charset = "utf8")
+                charset="utf8")
         elif env == 'prod':
         elif env == 'prod':
             # 创建一个 Connection 对象,代表了一个数据库连接
             # 创建一个 Connection 对象,代表了一个数据库连接
             connection = pymysql.connect(
             connection = pymysql.connect(
-                host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",# 数据库IP地址,内网地址
+                host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",  # 数据库IP地址,内网地址
                 # host="rm-bp1159bu17li9hi94ro.mysql.rds.aliyuncs.com",# 数据库IP地址,外网地址
                 # host="rm-bp1159bu17li9hi94ro.mysql.rds.aliyuncs.com",# 数据库IP地址,外网地址
-                port=3306,                      # 端口号
-                user="crawler",                 #  mysql用户名
-                passwd="crawler123456@",        # mysql用户登录密码
-                db="piaoquan-crawler" ,         # 数据库名
+                port=3306,  # 端口号
+                user="crawler",  # mysql用户名
+                passwd="crawler123456@",  # mysql用户登录密码
+                db="piaoquan-crawler",  # 数据库名
                 # 如果数据库里面的文本是utf8编码的,charset指定是utf8
                 # 如果数据库里面的文本是utf8编码的,charset指定是utf8
-                charset = "utf8")
+                charset="utf8")
         else:
         else:
             # 创建一个 Connection 对象,代表了一个数据库连接
             # 创建一个 Connection 对象,代表了一个数据库连接
             connection = pymysql.connect(
             connection = pymysql.connect(
-                host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com",# 数据库IP地址,内网地址
+                host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com",  # 数据库IP地址,内网地址
                 # host="rm-bp1k5853td1r25g3ndo.mysql.rds.aliyuncs.com",  # 数据库IP地址,外网地址
                 # host="rm-bp1k5853td1r25g3ndo.mysql.rds.aliyuncs.com",  # 数据库IP地址,外网地址
                 port=3306,  # 端口号
                 port=3306,  # 端口号
                 user="crawler",  # mysql用户名
                 user="crawler",  # mysql用户名
@@ -49,10 +49,10 @@ class MysqlHelper:
         return connection
         return connection
 
 
     @classmethod
     @classmethod
-    def get_values(cls, log_type, crawler, sql, env, machine):
+    def get_values(cls, log_type, crawler, sql, env):
         try:
         try:
             # 连接数据库
             # 连接数据库
-            connect = cls.connect_mysql(env, machine)
+            connect = cls.connect_mysql(env)
             # 返回一个 Cursor对象
             # 返回一个 Cursor对象
             mysql = connect.cursor(cursor=pymysql.cursors.DictCursor)
             mysql = connect.cursor(cursor=pymysql.cursors.DictCursor)
 
 
@@ -71,9 +71,9 @@ class MysqlHelper:
             Common.logger(log_type, crawler).error(f"get_values异常:{e}\n")
             Common.logger(log_type, crawler).error(f"get_values异常:{e}\n")
 
 
     @classmethod
     @classmethod
-    def update_values(cls, log_type, crawler, sql, env, machine):
+    def update_values(cls, log_type, crawler, sql, env):
         # 连接数据库
         # 连接数据库
-        connect = cls.connect_mysql(env, machine)
+        connect = cls.connect_mysql(env)
         # 返回一个 Cursor对象
         # 返回一个 Cursor对象
         mysql = connect.cursor()
         mysql = connect.cursor()
 
 
@@ -91,16 +91,18 @@ class MysqlHelper:
         # 关闭数据库连接
         # 关闭数据库连接
         connect.close()
         connect.close()
 
 
+
 class RedisHelper:
 class RedisHelper:
     @classmethod
     @classmethod
-    def connect_redis(cls, env, machine):
-        if machine == 'aliyun_hk':
+    def connect_redis(cls, env):
+        if env == 'hk':
             redis_pool = redis.ConnectionPool(
             redis_pool = redis.ConnectionPool(
                 # host='r-bp154bpw97gptefiqk.redis.rds.aliyuncs.com',  # 内网地址
                 # host='r-bp154bpw97gptefiqk.redis.rds.aliyuncs.com',  # 内网地址
-                host='r-bp154bpw97gptefiqkpd.redis.rds.aliyuncs.com',  # 外网地址
+                # host='r-bp154bpw97gptefiqkpd.redis.rds.aliyuncs.com',  # 测试地址
+                host='r-bp1mb0v08fqi4hjffupd.redis.rds.aliyuncs.com',  # 外网地址
                 port=6379,
                 port=6379,
                 db=2,
                 db=2,
-                password='Qingqu2019'
+                password='Wqsd@2019'
             )
             )
             redis_conn = redis.Redis(connection_pool=redis_pool)
             redis_conn = redis.Redis(connection_pool=redis_pool)
         elif env == 'prod':
         elif env == 'prod':
@@ -125,21 +127,20 @@ class RedisHelper:
 
 
     @classmethod
     @classmethod
     def redis_push(cls, env, machine, data):
     def redis_push(cls, env, machine, data):
-        redis_conn = cls.connect_redis(env, machine)
+        redis_conn = cls.connect_redis(env)
         # print("开始写入数据")
         # print("开始写入数据")
         redis_conn.lpush(machine, data)
         redis_conn.lpush(machine, data)
         # print("数据写入完成")
         # print("数据写入完成")
 
 
     @classmethod
     @classmethod
     def redis_pop(cls, env, machine):
     def redis_pop(cls, env, machine):
-        redis_conn = cls.connect_redis(env, machine)
+        redis_conn = cls.connect_redis(env)
         if redis_conn.llen(machine) == 0:
         if redis_conn.llen(machine) == 0:
             return None
             return None
         else:
         else:
             return redis_conn.rpop(machine)
             return redis_conn.rpop(machine)
 
 
 
 
-
 if __name__ == "__main__":
 if __name__ == "__main__":
     # sql_statement = f"INSERT INTO crawler_user ( user_id, out_user_id, out_user_name, out_avatar_url, platform, tag) " \
     # sql_statement = f"INSERT INTO crawler_user ( user_id, out_user_id, out_user_name, out_avatar_url, platform, tag) " \
     #       f"VALUES ('6282398', 'out_uid_003', 'out_user_name', '', 'xiaoniangao', 'xiaoniangao_play')"
     #       f"VALUES ('6282398', 'out_uid_003', 'out_user_name', '', 'xiaoniangao', 'xiaoniangao_play')"
@@ -148,11 +149,10 @@ if __name__ == "__main__":
 
 
     # get_data = MysqlHelper.get_values("demo", "youtube", "select * from crawler_user", "dev", "local")
     # get_data = MysqlHelper.get_values("demo", "youtube", "select * from crawler_user", "dev", "local")
     # print(get_data)
     # print(get_data)
-    print(RedisHelper.connect_redis("prod", "aliyun"))
+    print(RedisHelper.connect_redis("prod"))
     # RedisHelper.redis_push("dev", "local", "test1")
     # RedisHelper.redis_push("dev", "local", "test1")
     # RedisHelper.redis_push("dev", "local", "test2")
     # RedisHelper.redis_push("dev", "local", "test2")
 
 
     # print(RedisHelper.redis_pop("dev", "local"))
     # print(RedisHelper.redis_pop("dev", "local"))
 
 
     pass
     pass
-

+ 10 - 10
main/scheduling_main.sh

@@ -4,20 +4,20 @@ crawler_dir=$1  # 爬虫执行路径,如: ./youtube/youtube_main/run_youtube_f
 log_type=$2     # 日志命名格式,如: follow,则在 youtube/logs/目录下,生成 2023-02-08-follow.log
 log_type=$2     # 日志命名格式,如: follow,则在 youtube/logs/目录下,生成 2023-02-08-follow.log
 crawler=$3      # 哪款爬虫,如: youtube / kanyikan / weixinzhishu
 crawler=$3      # 哪款爬虫,如: youtube / kanyikan / weixinzhishu
 env=$4          # 爬虫运行环境,正式环境: prod / 测试环境: dev
 env=$4          # 爬虫运行环境,正式环境: prod / 测试环境: dev
-machine=$5      # 爬虫运行机器,阿里云服务器: aliyun_hk / aliyun / macpro / macair / local
-nohup_dir=$6    # nohup日志存储路径,如: ./youtube/nohup.log
+#machine=$5      # 爬虫运行机器,阿里云服务器: aliyun_hk / aliyun / macpro / macair / local
+nohup_dir=$5    # nohup日志存储路径,如: ./youtube/nohup.log
 
 
 echo "开始"
 echo "开始"
-echo ${machine}
-if [ ${machine} = "--machine=aliyun_hk" ];then
+echo ${env}
+if [ ${env} = "--env=hk" ];then
   piaoquan_crawler_dir=/root/piaoquan_crawler/
   piaoquan_crawler_dir=/root/piaoquan_crawler/
   profile_path=/etc/profile
   profile_path=/etc/profile
   python=python3
   python=python3
-elif [ ${machine} = "--machine=aliyun" ];then
+elif [ ${env} = "--env=prod" ];then
   piaoquan_crawler_dir=/data5/piaoquan_crawler/
   piaoquan_crawler_dir=/data5/piaoquan_crawler/
   profile_path=/etc/profile
   profile_path=/etc/profile
   python=python
   python=python
-elif [ ${machine} = "--machine=local" ];then
+elif [ ${env} = "--env=local" ];then
   piaoquan_crawler_dir=/Users/wangkun/Desktop/crawler/piaoquan_crawler/
   piaoquan_crawler_dir=/Users/wangkun/Desktop/crawler/piaoquan_crawler/
   profile_path=/etc/profile
   profile_path=/etc/profile
   node_path=/opt/homebrew/bin/node
   node_path=/opt/homebrew/bin/node
@@ -33,7 +33,7 @@ grep_str=run_${crawler##*=}
 ps aux | grep ${grep_str} | grep Python | grep -v grep | awk '{print $2}' | xargs kill -9
 ps aux | grep ${grep_str} | grep Python | grep -v grep | awk '{print $2}' | xargs kill -9
 echo "$(date "+%Y-%m-%d %H:%M:%S") 进程已杀死!"
 echo "$(date "+%Y-%m-%d %H:%M:%S") 进程已杀死!"
 
 
-if [ ${machine} = "--machine=aliyun_hk" ];then
+if [ ${env} = "--env=hk" ];then
   echo "升级yt-dlp"
   echo "升级yt-dlp"
   pip3 install yt-dlp -U
   pip3 install yt-dlp -U
 else
 else
@@ -42,9 +42,9 @@ else
   echo "$(date "+%Y-%m-%d %H:%M:%S") 代码更新完成!"
   echo "$(date "+%Y-%m-%d %H:%M:%S") 代码更新完成!"
 fi
 fi
 
 
-if [ ${machine} = "--machine=aliyun_hk" ];then
+if [ ${env} = "--env=hk" ];then
   echo "无需重启Appium及adb服务"
   echo "无需重启Appium及adb服务"
-elif [ ${machine} = "--machine=aliyun" ];then
+elif [ ${env} = "--env=prod" ];then
   echo "无需重启Appium及adb服务"
   echo "无需重启Appium及adb服务"
 else
 else
   echo "$(date "+%Y-%m-%d %H:%M:%S") 正在重启Appium..."
   echo "$(date "+%Y-%m-%d %H:%M:%S") 正在重启Appium..."
@@ -60,7 +60,7 @@ fi
 
 
 echo "$(date "+%Y-%m-%d %H:%M:%S") 正在重启服务..."
 echo "$(date "+%Y-%m-%d %H:%M:%S") 正在重启服务..."
 cd ${piaoquan_crawler_dir}
 cd ${piaoquan_crawler_dir}
-nohup ${python} -u ${crawler_dir} ${log_type} ${crawler} ${env} ${machine} >>${nohup_dir} 2>&1 &
+nohup ${python} -u ${crawler_dir} ${log_type} ${crawler} ${env} >>${nohup_dir} 2>&1 &
 echo "$(date "+%Y-%m-%d %H:%M:%S") 服务重启完毕!"
 echo "$(date "+%Y-%m-%d %H:%M:%S") 服务重启完毕!"
 
 
 exit 0
 exit 0

+ 43 - 24
scheduling/scheduling_main/crawler_scheduling.py

@@ -4,17 +4,18 @@
 import os
 import os
 import sys
 import sys
 import time
 import time
+
 sys.path.append(os.getcwd())
 sys.path.append(os.getcwd())
 from common.common import Common
 from common.common import Common
-from common.scheduling_db  import MysqlHelper, RedisHelper
+from common.scheduling_db import MysqlHelper, RedisHelper
 
 
 
 
 class Scheduling:
 class Scheduling:
     # 读取任务表
     # 读取任务表
     @classmethod
     @classmethod
-    def get_task(cls, log_type, crawler, env, machine):
+    def get_task(cls, log_type, crawler, env):
         get_sql = """ select * from crawler_task """
         get_sql = """ select * from crawler_task """
-        all_task_list = MysqlHelper.get_values(log_type=log_type, crawler=crawler, sql=get_sql, env=env, machine=machine)
+        all_task_list = MysqlHelper.get_values(log_type=log_type, crawler=crawler, sql=get_sql, env=env)
         pre_task_list = []
         pre_task_list = []
         for task in all_task_list:
         for task in all_task_list:
             if int(time.time()) >= task["next_time"]:
             if int(time.time()) >= task["next_time"]:
@@ -23,34 +24,39 @@ class Scheduling:
 
 
     # 更新下次启动时间,调用时机:调度该 task_id 的任务时
     # 更新下次启动时间,调用时机:调度该 task_id 的任务时
     @classmethod
     @classmethod
-    def update_task(cls, log_type, crawler, task_id, next_time, interval_piaoquan, env, machine):
+    def update_task(cls, log_type, crawler, task_id, next_time, interval_piaoquan, env):
         if interval_piaoquan > 0:
         if interval_piaoquan > 0:
             new_next_time = next_time + interval_piaoquan
             new_next_time = next_time + interval_piaoquan
             update_sql = f""" UPDATE crawler_task SET next_time={new_next_time} WHERE task_id={task_id} """
             update_sql = f""" UPDATE crawler_task SET next_time={new_next_time} WHERE task_id={task_id} """
-            MysqlHelper.update_values(log_type, crawler, update_sql, env, machine)
+            MysqlHelper.update_values(log_type, crawler, update_sql, env)
 
 
     # 资源分配 / 组装
     # 资源分配 / 组装
     @classmethod
     @classmethod
-    def write_redis(cls, log_type, crawler, env, machine):
-        pre_task_list = cls.get_task(log_type=log_type, crawler=crawler, env=env, machine=machine)
+    def write_redis(cls, log_type, crawler, env):
+        pre_task_list = cls.get_task(log_type=log_type, crawler=crawler, env=env)
         if len(pre_task_list) == 0:
         if len(pre_task_list) == 0:
             Common.logger(log_type, crawler).info("暂无新任务\n")
             Common.logger(log_type, crawler).info("暂无新任务\n")
         else:
         else:
             for pre_task in pre_task_list:
             for pre_task in pre_task_list:
+                # machine字段是用来区分海外爬虫和国内爬虫使用的,不涉及任何其他含义
+                machine = pre_task.get('machine', 'dev')
                 if machine == "hk":
                 if machine == "hk":
                     # 写入 redis
                     # 写入 redis
-                    RedisHelper.redis_push(env, machine,str(pre_task))
-                elif machine == "aliyun":
+                    task_key = 'crawler_config_task_queue:hk'
+                    RedisHelper.redis_push(env, task_key, str(pre_task))
+                elif machine == "prod":
                     # 写入 redis
                     # 写入 redis
-                    RedisHelper.redis_push(env, machine,str(pre_task))
+                    task_key = 'crawler_config_task_queue:aliyun'
+                    RedisHelper.redis_push(env, task_key, str(pre_task))
                 else:
                 else:
                     # 写入 redis
                     # 写入 redis
-                    RedisHelper.redis_push(env, machine,str(pre_task))
+                    task_key = 'crawler_config_task_queue:dev'
+                    RedisHelper.redis_push(env, task_key, str(pre_task))
 
 
     @classmethod
     @classmethod
-    def get_redis(cls, log_type, crawler, env, machine):
+    def get_redis(cls, log_type, crawler, env):
         while True:
         while True:
-            redis_data = RedisHelper.redis_pop(env, machine)
+            redis_data = RedisHelper.redis_pop(env)
             if redis_data is None or len(redis_data) == 0:
             if redis_data is None or len(redis_data) == 0:
                 Common.logger(log_type, crawler).info("Redis为空,等待1秒")
                 Common.logger(log_type, crawler).info("Redis为空,等待1秒")
                 time.sleep(1)
                 time.sleep(1)
@@ -59,8 +65,8 @@ class Scheduling:
                 return task
                 return task
 
 
     @classmethod
     @classmethod
-    def scheduling_task(cls, log_type, crawler, env, machine):
-        task = cls.get_redis(log_type, crawler, env, machine)
+    def scheduling_task(cls, log_type, crawler, env):
+        task = cls.get_redis(log_type, crawler, env)
         Common.logger(log_type, crawler).info(f"task: {task}")
         Common.logger(log_type, crawler).info(f"task: {task}")
         Common.logger(log_type, crawler).info(f"已获取调度任务:{task}")
         Common.logger(log_type, crawler).info(f"已获取调度任务:{task}")
         task_id = task['task_id']
         task_id = task['task_id']
@@ -68,30 +74,43 @@ class Scheduling:
         next_time = task['next_time']
         next_time = task['next_time']
         interval_piaoquan = task['interval_piaoquan']
         interval_piaoquan = task['interval_piaoquan']
         spider_name = task['spider_name']
         spider_name = task['spider_name']
-        if machine == "aliyun":
+        if env == "aliyun":
             oss_endpoint = "inner"
             oss_endpoint = "inner"
-        elif machine == "aliyun_hk":
+        elif env == "hk":
             oss_endpoint = "hk"
             oss_endpoint = "hk"
         else:
         else:
             oss_endpoint = "out"
             oss_endpoint = "out"
 
 
         if int(time.time()) >= next_time:
         if int(time.time()) >= next_time:
-            cls.update_task(log_type, crawler, task_id, next_time, interval_piaoquan, env, machine)
+            cls.update_task(log_type, crawler, task_id, next_time, interval_piaoquan, env)
             # 正式环境,调度任务
             # 正式环境,调度任务
             Common.logger(log_type, crawler).info(f"开始调度任务:{task}\n")
             Common.logger(log_type, crawler).info(f"开始调度任务:{task}\n")
-            task_str = [('task_id', str(task_id)), ('task_name', str(task['task_name'])), ('source', str(task['source'])), ('next_time', str(task['next_time'])), ('interval_piaoquan', str(task['interval_piaoquan'])), ('play_cnt', eval(task['spider_rule'])['play_cnt']),('video_width', eval(task['spider_rule'])['video_width']),('video_height', eval(task['spider_rule'])['video_height']),('video_like', eval(task['spider_rule'])['video_like']),('share_cnt', eval(task['spider_rule'])['share_cnt']),('duration_min', eval(task['spider_rule'])['duration']['min']),('duration_max', eval(task['spider_rule'])['duration']['max']),('task_type', task['task_type']),('spider_link', eval(task['spider_link'])),('spider_name', str(task['spider_name'])),('min_publish_time', str(task['min_publish_time'])),('min_publish_day', str(task['min_publish_day'])),('media_id', str(task['media_id'])),('applets_status', str(task['applets_status'])),('app_status', str(task['app_status'])),('user_tag', str(task['user_tag'])),('user_content_tag',str(task['user_content_tag'])),('machine', str(task['machine']))]
+            task_str = [('task_id', str(task_id)), ('task_name', str(task['task_name'])),
+                        ('source', str(task['source'])), ('next_time', str(task['next_time'])),
+                        ('interval_piaoquan', str(task['interval_piaoquan'])),
+                        ('play_cnt', eval(task['spider_rule'])['play_cnt']),
+                        ('video_width', eval(task['spider_rule'])['video_width']),
+                        ('video_height', eval(task['spider_rule'])['video_height']),
+                        ('video_like', eval(task['spider_rule'])['video_like']),
+                        ('share_cnt', eval(task['spider_rule'])['share_cnt']),
+                        ('duration_min', eval(task['spider_rule'])['duration']['min']),
+                        ('duration_max', eval(task['spider_rule'])['duration']['max']),
+                        ('task_type', task['task_type']), ('spider_link', eval(task['spider_link'])),
+                        ('spider_name', str(task['spider_name'])), ('min_publish_time', str(task['min_publish_time'])),
+                        ('min_publish_day', str(task['min_publish_day'])), ('media_id', str(task['media_id'])),
+                        ('applets_status', str(task['applets_status'])), ('app_status', str(task['app_status'])),
+                        ('user_tag', str(task['user_tag'])), ('user_content_tag', str(task['user_content_tag'])),
+                        ('machine', str(task['machine']))]
             task_str = str(task_str).replace(' ', '')
             task_str = str(task_str).replace(' ', '')
-            cmd = f"""sh scheduling/scheduling_main/scheduling.sh {source}/{source}_main/{spider_name}_scheduling.py --log_type="{spider_name}" --crawler="{source}" --task="{str(task_str)}" --oss_endpoint="{oss_endpoint}" --env="{env}" --machine="{machine}" {source}/{source}-nohup.log """
+            cmd = f"""sh scheduling/scheduling_main/scheduling.sh {source}/{source}_main/{spider_name}_scheduling.py --log_type="{spider_name}" --crawler="{source}" --task="{str(task_str)}" --oss_endpoint="{oss_endpoint}" --env="{env}" {source}/{source}-nohup.log """
             Common.logger(log_type, crawler).info(f"cmd:{cmd}\n")
             Common.logger(log_type, crawler).info(f"cmd:{cmd}\n")
             os.system(cmd)
             os.system(cmd)
 
 
 
 
-
-
 if __name__ == "__main__":
 if __name__ == "__main__":
     # print(Scheduling.get_task("scheduling", "scheduling", "dev", "local"))
     # print(Scheduling.get_task("scheduling", "scheduling", "dev", "local"))
     # print(Scheduling.get_redis("scheduling", "scheduling", "dev", "local"))
     # print(Scheduling.get_redis("scheduling", "scheduling", "dev", "local"))
     # Scheduling.write_redis("scheduling", "scheduling", "dev", "local")
     # Scheduling.write_redis("scheduling", "scheduling", "dev", "local")
-    Scheduling.scheduling_task("scheduling", "scheduling", "dev", "local")
+    Scheduling.scheduling_task("scheduling", "scheduling", "dev")
 
 
-    pass
+    pass

+ 12 - 10
scheduling/scheduling_main/run_scheduling_task.py

@@ -5,6 +5,7 @@ import argparse
 import os
 import os
 import sys
 import sys
 import time
 import time
+
 sys.path.append(os.getcwd())
 sys.path.append(os.getcwd())
 from common.common import Common
 from common.common import Common
 from scheduling.scheduling_main.crawler_scheduling import Scheduling
 from scheduling.scheduling_main.crawler_scheduling import Scheduling
@@ -12,10 +13,10 @@ from scheduling.scheduling_main.crawler_scheduling import Scheduling
 
 
 class SchedulingTask:
 class SchedulingTask:
     @classmethod
     @classmethod
-    def scheduling_task(cls, log_type, crawler, env, machine):
+    def scheduling_task(cls, log_type, crawler, env):
         while True:
         while True:
             Common.logger(log_type, crawler).info("开始调度爬虫任务")
             Common.logger(log_type, crawler).info("开始调度爬虫任务")
-            Scheduling.scheduling_task(log_type, crawler, env, machine)
+            Scheduling.scheduling_task(log_type, crawler, env)
             Common.logger(log_type, crawler).info("爬虫任务调度完成")
             Common.logger(log_type, crawler).info("爬虫任务调度完成")
             Common.del_logs(log_type, crawler)
             Common.del_logs(log_type, crawler)
             time.sleep(60)
             time.sleep(60)
@@ -23,13 +24,14 @@ class SchedulingTask:
 
 
 if __name__ == "__main__":
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()  ## 新建参数解释器对象
     parser = argparse.ArgumentParser()  ## 新建参数解释器对象
-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
-    parser.add_argument('--crawler')  ## 添加参数
-    parser.add_argument('--env')  ## 添加参数
-    parser.add_argument('--machine')  ## 添加参数
+    parser.add_argument('--log_type', default='follow', type=str)  ## 添加参数,注明参数类型
+    parser.add_argument('--crawler', default='youtube')  ## 添加参数
+    parser.add_argument('--env', default='dev')  ## 添加参数
+    # parser.add_argument('--machine')  ## 添加参数
     args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
     args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
     # print(args)
     # print(args)
-    SchedulingTask.scheduling_task(log_type=args.log_type,
-                                   crawler=args.crawler,
-                                   env=args.env,
-                                   machine=args.machine)
+    SchedulingTask.scheduling_task(
+        log_type=args.log_type,
+        crawler=args.crawler,
+        env=args.env,
+    )

+ 13 - 10
scheduling/scheduling_main/run_write_task.py

@@ -5,6 +5,7 @@ import argparse
 import os
 import os
 import sys
 import sys
 import time
 import time
+
 sys.path.append(os.getcwd())
 sys.path.append(os.getcwd())
 from common.common import Common
 from common.common import Common
 from scheduling.scheduling_main.crawler_scheduling import Scheduling
 from scheduling.scheduling_main.crawler_scheduling import Scheduling
@@ -12,24 +13,26 @@ from scheduling.scheduling_main.crawler_scheduling import Scheduling
 
 
 class WriteTask:
 class WriteTask:
     @classmethod
     @classmethod
-    def write_task(cls, log_type, crawler, env, machine):
+    def write_task(cls, log_type, crawler, env):
         while True:
         while True:
             Common.logger(log_type, crawler).info("开始读取爬虫任务,写入Redis")
             Common.logger(log_type, crawler).info("开始读取爬虫任务,写入Redis")
-            Scheduling.write_redis(log_type=log_type, crawler=crawler, env=env, machine=machine)
+            Scheduling.write_redis(log_type=log_type, crawler=crawler, env=env)
             Common.logger(log_type, crawler).info("写入Redis完成")
             Common.logger(log_type, crawler).info("写入Redis完成")
+
             Common.del_logs(log_type, crawler)
             Common.del_logs(log_type, crawler)
             time.sleep(60)
             time.sleep(60)
 
 
 
 
 if __name__ == "__main__":
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()  ## 新建参数解释器对象
     parser = argparse.ArgumentParser()  ## 新建参数解释器对象
-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
-    parser.add_argument('--crawler')  ## 添加参数
-    parser.add_argument('--env')  ## 添加参数
-    parser.add_argument('--machine')  ## 添加参数
+    parser.add_argument('--log_type', default='follow', type=str, )  ## 添加参数,注明参数类型
+    parser.add_argument('--crawler', default='youtube')  ## 添加参数
+    parser.add_argument('--env', default='dev')  ## 添加参数
+    # parser.add_argument('--machine')  ## 添加参数
     args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
     args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
     # print(args)
     # print(args)
-    WriteTask.write_task(log_type=args.log_type,
-                         crawler=args.crawler,
-                         env=args.env,
-                         machine=args.machine)
+    WriteTask.write_task(
+        log_type=args.log_type,
+        crawler=args.crawler,
+        env=args.env,
+    )

+ 11 - 11
scheduling/scheduling_main/scheduling.sh

@@ -6,27 +6,27 @@ crawler=$3      # 哪款爬虫,如: youtube / kanyikan / weixinzhishu
 task=$4         # 爬虫任务
 task=$4         # 爬虫任务
 oss_endpoint=$5 # OSS网关,内网: inner / 外网: out / 香港: hk
 oss_endpoint=$5 # OSS网关,内网: inner / 外网: out / 香港: hk
 env=$6          # 爬虫运行环境,正式环境: prod / 测试环境: dev
 env=$6          # 爬虫运行环境,正式环境: prod / 测试环境: dev
-machine=$7      # 爬虫运行机器,阿里云服务器: aliyun_hk / aliyun / macpro / macair / local
-nohup_dir=$8    # nohup日志存储路径,如: ./youtube/nohup.log
+#machine=$7      # 爬虫运行机器,阿里云服务器: aliyun_hk / aliyun / macpro / macair / local
+nohup_dir=$7    # nohup日志存储路径,如: ./youtube/nohup.log
 
 
 echo "开始"
 echo "开始"
-#echo "machine:"${machine}
+#echo "env:"${env}
 
 
-if [ ${machine} = "--machine=aliyun_hk" ];then
+if [ ${env} = "--env=hk" ];then
   piaoquan_crawler_dir=/root/piaoquan_crawler/
   piaoquan_crawler_dir=/root/piaoquan_crawler/
   profile_path=/etc/profile
   profile_path=/etc/profile
   python=python3
   python=python3
-elif [ ${machine} = "--machine=aliyun" ];then
+elif [ ${env} = "--env=prod" ];then
   piaoquan_crawler_dir=/data5/piaoquan_crawler/
   piaoquan_crawler_dir=/data5/piaoquan_crawler/
   profile_path=/etc/profile
   profile_path=/etc/profile
   python=python
   python=python
-elif [ ${machine} = "--machine=local" ];then
+elif [ ${env} = "--env=local" ];then
   piaoquan_crawler_dir=/Users/wangkun/Desktop/crawler/piaoquan_crawler/
   piaoquan_crawler_dir=/Users/wangkun/Desktop/crawler/piaoquan_crawler/
   profile_path=/etc/profile
   profile_path=/etc/profile
   node_path=/opt/homebrew/bin/node
   node_path=/opt/homebrew/bin/node
   python=python3
   python=python3
 else
 else
-  echo ${machine}
+  echo ${env}
 fi
 fi
 
 
 echo "$(date "+%Y-%m-%d %H:%M:%S") 更新环境变量..."
 echo "$(date "+%Y-%m-%d %H:%M:%S") 更新环境变量..."
@@ -38,7 +38,7 @@ echo "$(date "+%Y-%m-%d %H:%M:%S") 更新环境变量完成!"
 #ps aux | grep ${grep_str} | grep Python | grep -v grep | awk '{print $2}' | xargs kill -9
 #ps aux | grep ${grep_str} | grep Python | grep -v grep | awk '{print $2}' | xargs kill -9
 #echo "$(date "+%Y-%m-%d %H:%M:%S") 进程已杀死!"
 #echo "$(date "+%Y-%m-%d %H:%M:%S") 进程已杀死!"
 
 
-if [ ${machine} = "--machine=aliyun_hk" ];then
+if [ ${env} = "--env=hk" ];then
   echo "升级yt-dlp"
   echo "升级yt-dlp"
   pip3 install yt-dlp -U
   pip3 install yt-dlp -U
 else
 else
@@ -47,9 +47,9 @@ else
   echo "$(date "+%Y-%m-%d %H:%M:%S") 代码更新完成!"
   echo "$(date "+%Y-%m-%d %H:%M:%S") 代码更新完成!"
 fi
 fi
 
 
-if [ ${machine} = "--machine=aliyun_hk" ];then
+if [ ${env} = "--env=hk" ];then
   echo "无需重启Appium及adb服务"
   echo "无需重启Appium及adb服务"
-elif [ ${machine} = "--machine=aliyun" ];then
+elif [ ${env} = "--env=prod" ];then
   echo "无需重启Appium及adb服务"
   echo "无需重启Appium及adb服务"
 else
 else
   echo "$(date "+%Y-%m-%d %H:%M:%S") 正在重启Appium..."
   echo "$(date "+%Y-%m-%d %H:%M:%S") 正在重启Appium..."
@@ -65,7 +65,7 @@ fi
 
 
 echo "$(date "+%Y-%m-%d %H:%M:%S") 正在重启服务..."
 echo "$(date "+%Y-%m-%d %H:%M:%S") 正在重启服务..."
 cd ${piaoquan_crawler_dir}
 cd ${piaoquan_crawler_dir}
-nohup ${python} -u ${crawler_dir} ${log_type} ${crawler} ${task} ${oss_endpoint} ${env} ${machine} >>${nohup_dir} 2>&1 &
+nohup ${python} -u ${crawler_dir} ${log_type} ${crawler} ${task} ${oss_endpoint} ${env} >>${nohup_dir} 2>&1 &
 echo "$(date "+%Y-%m-%d %H:%M:%S") 服务重启完毕!"
 echo "$(date "+%Y-%m-%d %H:%M:%S") 服务重启完毕!"
 
 
 exit 0
 exit 0