2 سال پیش · f1216e8196
--- a/common/scheduling_db.py
+++ b/common/scheduling_db.py
@@ -7,37 +7,37 @@
 
															 import redis
														
 
															 import pymysql
														
 
															 from common.common import Common
														
 
															-# from common import Common
														
 
															+
														
 
															 class MysqlHelper:
														
 
															     @classmethod
														
 
															-    def connect_mysql(cls, env, machine):
														
 
															-        if machine == 'aliyun_hk':
														
 
															+    def connect_mysql(cls, env):
														
 
															+        if env == 'hk':
														
 
															             # 创建一个 Connection 对象，代表了一个数据库连接
														
 
															             connection = pymysql.connect(
														
 
															-                host="rm-j6cz4c6pt96000xi3.mysql.rds.aliyuncs.com",# 数据库IP地址，内网地址
														
 
															+                host="rm-j6cz4c6pt96000xi3.mysql.rds.aliyuncs.com",  # 数据库IP地址，内网地址
														
 
															                 # host="rm-j6cz4c6pt96000xi3lo.mysql.rds.aliyuncs.com",# 数据库IP地址，外网地址
														
 
															-                port=3306,                      # 端口号
														
 
															-                user="crawler",                 #  mysql用户名
														
 
															-                passwd="crawler123456@",        # mysql用户登录密码
														
 
															-                db="piaoquan-crawler" ,         # 数据库名
														
 
															+                port=3306,  # 端口号
														
 
															+                user="crawler",  # mysql用户名
														
 
															+                passwd="crawler123456@",  # mysql用户登录密码
														
 
															+                db="piaoquan-crawler",  # 数据库名
														
 
															                 # 如果数据库里面的文本是utf8编码的，charset指定是utf8
														
 
															-                charset = "utf8")
														
 
															+                charset="utf8")
														
 
															         elif env == 'prod':
														
 
															             # 创建一个 Connection 对象，代表了一个数据库连接
														
 
															             connection = pymysql.connect(
														
 
															-                host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",# 数据库IP地址，内网地址
														
 
															+                host="rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com",  # 数据库IP地址，内网地址
														
 
															                 # host="rm-bp1159bu17li9hi94ro.mysql.rds.aliyuncs.com",# 数据库IP地址，外网地址
														
 
															-                port=3306,                      # 端口号
														
 
															-                user="crawler",                 #  mysql用户名
														
 
															-                passwd="crawler123456@",        # mysql用户登录密码
														
 
															-                db="piaoquan-crawler" ,         # 数据库名
														
 
															+                port=3306,  # 端口号
														
 
															+                user="crawler",  # mysql用户名
														
 
															+                passwd="crawler123456@",  # mysql用户登录密码
														
 
															+                db="piaoquan-crawler",  # 数据库名
														
 
															                 # 如果数据库里面的文本是utf8编码的，charset指定是utf8
														
 
															-                charset = "utf8")
														
 
															+                charset="utf8")
														
 
															         else:
														
 
															             # 创建一个 Connection 对象，代表了一个数据库连接
														
 
															             connection = pymysql.connect(
														
 
															-                host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com",# 数据库IP地址，内网地址
														
 
															+                host="rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com",  # 数据库IP地址，内网地址
														
 
															                 # host="rm-bp1k5853td1r25g3ndo.mysql.rds.aliyuncs.com",  # 数据库IP地址，外网地址
														
 
															                 port=3306,  # 端口号
														
 
															                 user="crawler",  # mysql用户名
														
@@ -49,10 +49,10 @@ class MysqlHelper:
 
															         return connection
														
 
															     @classmethod
														
 
															-    def get_values(cls, log_type, crawler, sql, env, machine):
														
 
															+    def get_values(cls, log_type, crawler, sql, env):
														
 
															         try:
														
 
															             # 连接数据库
														
 
															-            connect = cls.connect_mysql(env, machine)
														
 
															+            connect = cls.connect_mysql(env)
														
 
															             # 返回一个 Cursor对象
														
 
															             mysql = connect.cursor(cursor=pymysql.cursors.DictCursor)
														
@@ -71,9 +71,9 @@ class MysqlHelper:
 
															             Common.logger(log_type, crawler).error(f"get_values异常:{e}\n")
														
 
															     @classmethod
														
 
															-    def update_values(cls, log_type, crawler, sql, env, machine):
														
 
															+    def update_values(cls, log_type, crawler, sql, env):
														
 
															         # 连接数据库
														
 
															-        connect = cls.connect_mysql(env, machine)
														
 
															+        connect = cls.connect_mysql(env)
														
 
															         # 返回一个 Cursor对象
														
 
															         mysql = connect.cursor()
														
@@ -91,16 +91,18 @@ class MysqlHelper:
 
															         # 关闭数据库连接
														
 
															         connect.close()
														
 
															+
														
 
															 class RedisHelper:
														
 
															     @classmethod
														
 
															-    def connect_redis(cls, env, machine):
														
 
															-        if machine == 'aliyun_hk':
														
 
															+    def connect_redis(cls, env):
														
 
															+        if env == 'hk':
														
 
															             redis_pool = redis.ConnectionPool(
														
 
															                 # host='r-bp154bpw97gptefiqk.redis.rds.aliyuncs.com',  # 内网地址
														
 
															-                host='r-bp154bpw97gptefiqkpd.redis.rds.aliyuncs.com',  # 外网地址
														
 
															+                # host='r-bp154bpw97gptefiqkpd.redis.rds.aliyuncs.com',  # 测试地址
														
 
															+                host='r-bp1mb0v08fqi4hjffupd.redis.rds.aliyuncs.com',  # 外网地址
														
 
															                 port=6379,
														
 
															                 db=2,
														
 
															-                password='Qingqu2019'
														
 
															+                password='Wqsd@2019'
														
 
															             )
														
 
															             redis_conn = redis.Redis(connection_pool=redis_pool)
														
 
															         elif env == 'prod':
														
@@ -125,21 +127,20 @@ class RedisHelper:
 
															     @classmethod
														
 
															     def redis_push(cls, env, machine, data):
														
 
															-        redis_conn = cls.connect_redis(env, machine)
														
 
															+        redis_conn = cls.connect_redis(env)
														
 
															         # print("开始写入数据")
														
 
															         redis_conn.lpush(machine, data)
														
 
															         # print("数据写入完成")
														
 
															     @classmethod
														
 
															     def redis_pop(cls, env, machine):
														
 
															-        redis_conn = cls.connect_redis(env, machine)
														
 
															+        redis_conn = cls.connect_redis(env)
														
 
															         if redis_conn.llen(machine) == 0:
														
 
															             return None
														
 
															         else:
														
 
															             return redis_conn.rpop(machine)
														
 
															-
														
 
															 if __name__ == "__main__":
														
 
															     # sql_statement = f"INSERT INTO crawler_user ( user_id, out_user_id, out_user_name, out_avatar_url, platform, tag) " \
														
 
															     #       f"VALUES ('6282398', 'out_uid_003', 'out_user_name', '', 'xiaoniangao', 'xiaoniangao_play')"
														
@@ -148,11 +149,10 @@ if __name__ == "__main__":
 
															     # get_data = MysqlHelper.get_values("demo", "youtube", "select * from crawler_user", "dev", "local")
														
 
															     # print(get_data)
														
 
															-    print(RedisHelper.connect_redis("prod", "aliyun"))
														
 
															+    print(RedisHelper.connect_redis("prod"))
														
 
															     # RedisHelper.redis_push("dev", "local", "test1")
														
 
															     # RedisHelper.redis_push("dev", "local", "test2")
														
 
															     # print(RedisHelper.redis_pop("dev", "local"))
														
 
															     pass
														
 
															-
														
--- a/main/scheduling_main.sh
+++ b/main/scheduling_main.sh
@@ -4,20 +4,20 @@ crawler_dir=$1  # 爬虫执行路径，如: ./youtube/youtube_main/run_youtube_f
 
															 log_type=$2     # 日志命名格式，如: follow，则在 youtube/logs/目录下，生成 2023-02-08-follow.log
														
 
															 crawler=$3      # 哪款爬虫，如: youtube / kanyikan / weixinzhishu
														
 
															 env=$4          # 爬虫运行环境，正式环境: prod / 测试环境: dev
														
 
															-machine=$5      # 爬虫运行机器，阿里云服务器: aliyun_hk / aliyun / macpro / macair / local
														
 
															-nohup_dir=$6    # nohup日志存储路径，如: ./youtube/nohup.log
														
 
															+#machine=$5      # 爬虫运行机器，阿里云服务器: aliyun_hk / aliyun / macpro / macair / local
														
 
															+nohup_dir=$5    # nohup日志存储路径，如: ./youtube/nohup.log
														
 
															 echo "开始"
														
 
															-echo ${machine}
														
 
															-if [ ${machine} = "--machine=aliyun_hk" ];then
														
 
															+echo ${env}
														
 
															+if [ ${env} = "--env=hk" ];then
														
 
															   piaoquan_crawler_dir=/root/piaoquan_crawler/
														
 
															   profile_path=/etc/profile
														
 
															   python=python3
														
 
															-elif [ ${machine} = "--machine=aliyun" ];then
														
 
															+elif [ ${env} = "--env=prod" ];then
														
 
															   piaoquan_crawler_dir=/data5/piaoquan_crawler/
														
 
															   profile_path=/etc/profile
														
 
															   python=python
														
 
															-elif [ ${machine} = "--machine=local" ];then
														
 
															+elif [ ${env} = "--env=local" ];then
														
 
															   piaoquan_crawler_dir=/Users/wangkun/Desktop/crawler/piaoquan_crawler/
														
 
															   profile_path=/etc/profile
														
 
															   node_path=/opt/homebrew/bin/node
														
@@ -33,7 +33,7 @@ grep_str=run_${crawler##*=}
 
															 ps aux | grep ${grep_str} | grep Python | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															 echo "$(date "+%Y-%m-%d %H:%M:%S") 进程已杀死！"
														
 
															-if [ ${machine} = "--machine=aliyun_hk" ];then
														
 
															+if [ ${env} = "--env=hk" ];then
														
 
															   echo "升级yt-dlp"
														
 
															   pip3 install yt-dlp -U
														
 
															 else
														
@@ -42,9 +42,9 @@ else
 
															   echo "$(date "+%Y-%m-%d %H:%M:%S") 代码更新完成！"
														
 
															 fi
														
 
															-if [ ${machine} = "--machine=aliyun_hk" ];then
														
 
															+if [ ${env} = "--env=hk" ];then
														
 
															   echo "无需重启Appium及adb服务"
														
 
															-elif [ ${machine} = "--machine=aliyun" ];then
														
 
															+elif [ ${env} = "--env=prod" ];then
														
 
															   echo "无需重启Appium及adb服务"
														
 
															 else
														
 
															   echo "$(date "+%Y-%m-%d %H:%M:%S") 正在重启Appium..."
														
@@ -60,7 +60,7 @@ fi
 
															 echo "$(date "+%Y-%m-%d %H:%M:%S") 正在重启服务..."
														
 
															 cd ${piaoquan_crawler_dir}
														
 
															-nohup ${python} -u ${crawler_dir} ${log_type} ${crawler} ${env} ${machine} >>${nohup_dir} 2>&1 &
														
 
															+nohup ${python} -u ${crawler_dir} ${log_type} ${crawler} ${env} >>${nohup_dir} 2>&1 &
														
 
															 echo "$(date "+%Y-%m-%d %H:%M:%S") 服务重启完毕!"
														
 
															 exit 0
														
--- a/scheduling/scheduling_main/crawler_scheduling.py
+++ b/scheduling/scheduling_main/crawler_scheduling.py
@@ -4,17 +4,18 @@
 
															 import os
														
 
															 import sys
														
 
															 import time
														
 
															+
														
 
															 sys.path.append(os.getcwd())
														
 
															 from common.common import Common
														
 
															-from common.scheduling_db  import MysqlHelper, RedisHelper
														
 
															+from common.scheduling_db import MysqlHelper, RedisHelper
														
 
															 class Scheduling:
														
 
															     # 读取任务表
														
 
															     @classmethod
														
 
															-    def get_task(cls, log_type, crawler, env, machine):
														
 
															+    def get_task(cls, log_type, crawler, env):
														
 
															         get_sql = """ select * from crawler_task """
														
 
															-        all_task_list = MysqlHelper.get_values(log_type=log_type, crawler=crawler, sql=get_sql, env=env, machine=machine)
														
 
															+        all_task_list = MysqlHelper.get_values(log_type=log_type, crawler=crawler, sql=get_sql, env=env)
														
 
															         pre_task_list = []
														
 
															         for task in all_task_list:
														
 
															             if int(time.time()) >= task["next_time"]:
														
@@ -23,34 +24,39 @@ class Scheduling:
 
															     # 更新下次启动时间，调用时机：调度该 task_id 的任务时
														
 
															     @classmethod
														
 
															-    def update_task(cls, log_type, crawler, task_id, next_time, interval_piaoquan, env, machine):
														
 
															+    def update_task(cls, log_type, crawler, task_id, next_time, interval_piaoquan, env):
														
 
															         if interval_piaoquan > 0:
														
 
															             new_next_time = next_time + interval_piaoquan
														
 
															             update_sql = f""" UPDATE crawler_task SET next_time={new_next_time} WHERE task_id={task_id} """
														
 
															-            MysqlHelper.update_values(log_type, crawler, update_sql, env, machine)
														
 
															+            MysqlHelper.update_values(log_type, crawler, update_sql, env)
														
 
															     # 资源分配 / 组装
														
 
															     @classmethod
														
 
															-    def write_redis(cls, log_type, crawler, env, machine):
														
 
															-        pre_task_list = cls.get_task(log_type=log_type, crawler=crawler, env=env, machine=machine)
														
 
															+    def write_redis(cls, log_type, crawler, env):
														
 
															+        pre_task_list = cls.get_task(log_type=log_type, crawler=crawler, env=env)
														
 
															         if len(pre_task_list) == 0:
														
 
															             Common.logger(log_type, crawler).info("暂无新任务\n")
														
 
															         else:
														
 
															             for pre_task in pre_task_list:
														
 
															+                # machine字段是用来区分海外爬虫和国内爬虫使用的，不涉及任何其他含义
														
 
															+                machine = pre_task.get('machine', 'dev')
														
 
															                 if machine == "hk":
														
 
															                     # 写入 redis
														
 
															-                    RedisHelper.redis_push(env, machine,str(pre_task))
														
 
															-                elif machine == "aliyun":
														
 
															+                    task_key = 'crawler_config_task_queue:hk'
														
 
															+                    RedisHelper.redis_push(env, task_key, str(pre_task))
														
 
															+                elif machine == "prod":
														
 
															                     # 写入 redis
														
 
															-                    RedisHelper.redis_push(env, machine,str(pre_task))
														
 
															+                    task_key = 'crawler_config_task_queue:aliyun'
														
 
															+                    RedisHelper.redis_push(env, task_key, str(pre_task))
														
 
															                 else:
														
 
															                     # 写入 redis
														
 
															-                    RedisHelper.redis_push(env, machine,str(pre_task))
														
 
															+                    task_key = 'crawler_config_task_queue:dev'
														
 
															+                    RedisHelper.redis_push(env, task_key, str(pre_task))
														
 
															     @classmethod
														
 
															-    def get_redis(cls, log_type, crawler, env, machine):
														
 
															+    def get_redis(cls, log_type, crawler, env):
														
 
															         while True:
														
 
															-            redis_data = RedisHelper.redis_pop(env, machine)
														
 
															+            redis_data = RedisHelper.redis_pop(env)
														
 
															             if redis_data is None or len(redis_data) == 0:
														
 
															                 Common.logger(log_type, crawler).info("Redis为空，等待1秒")
														
 
															                 time.sleep(1)
														
@@ -59,8 +65,8 @@ class Scheduling:
 
															                 return task
														
 
															     @classmethod
														
 
															-    def scheduling_task(cls, log_type, crawler, env, machine):
														
 
															-        task = cls.get_redis(log_type, crawler, env, machine)
														
 
															+    def scheduling_task(cls, log_type, crawler, env):
														
 
															+        task = cls.get_redis(log_type, crawler, env)
														
 
															         Common.logger(log_type, crawler).info(f"task: {task}")
														
 
															         Common.logger(log_type, crawler).info(f"已获取调度任务:{task}")
														
 
															         task_id = task['task_id']
														
@@ -68,30 +74,43 @@ class Scheduling:
 
															         next_time = task['next_time']
														
 
															         interval_piaoquan = task['interval_piaoquan']
														
 
															         spider_name = task['spider_name']
														
 
															-        if machine == "aliyun":
														
 
															+        if env == "aliyun":
														
 
															             oss_endpoint = "inner"
														
 
															-        elif machine == "aliyun_hk":
														
 
															+        elif env == "hk":
														
 
															             oss_endpoint = "hk"
														
 
															         else:
														
 
															             oss_endpoint = "out"
														
 
															         if int(time.time()) >= next_time:
														
 
															-            cls.update_task(log_type, crawler, task_id, next_time, interval_piaoquan, env, machine)
														
 
															+            cls.update_task(log_type, crawler, task_id, next_time, interval_piaoquan, env)
														
 
															             # 正式环境，调度任务
														
 
															             Common.logger(log_type, crawler).info(f"开始调度任务:{task}\n")
														
 
															-            task_str = [('task_id', str(task_id)), ('task_name', str(task['task_name'])), ('source', str(task['source'])), ('next_time', str(task['next_time'])), ('interval_piaoquan', str(task['interval_piaoquan'])), ('play_cnt', eval(task['spider_rule'])['play_cnt']),('video_width', eval(task['spider_rule'])['video_width']),('video_height', eval(task['spider_rule'])['video_height']),('video_like', eval(task['spider_rule'])['video_like']),('share_cnt', eval(task['spider_rule'])['share_cnt']),('duration_min', eval(task['spider_rule'])['duration']['min']),('duration_max', eval(task['spider_rule'])['duration']['max']),('task_type', task['task_type']),('spider_link', eval(task['spider_link'])),('spider_name', str(task['spider_name'])),('min_publish_time', str(task['min_publish_time'])),('min_publish_day', str(task['min_publish_day'])),('media_id', str(task['media_id'])),('applets_status', str(task['applets_status'])),('app_status', str(task['app_status'])),('user_tag', str(task['user_tag'])),('user_content_tag',str(task['user_content_tag'])),('machine', str(task['machine']))]
														
 
															+            task_str = [('task_id', str(task_id)), ('task_name', str(task['task_name'])),
														
 
															+                        ('source', str(task['source'])), ('next_time', str(task['next_time'])),
														
 
															+                        ('interval_piaoquan', str(task['interval_piaoquan'])),
														
 
															+                        ('play_cnt', eval(task['spider_rule'])['play_cnt']),
														
 
															+                        ('video_width', eval(task['spider_rule'])['video_width']),
														
 
															+                        ('video_height', eval(task['spider_rule'])['video_height']),
														
 
															+                        ('video_like', eval(task['spider_rule'])['video_like']),
														
 
															+                        ('share_cnt', eval(task['spider_rule'])['share_cnt']),
														
 
															+                        ('duration_min', eval(task['spider_rule'])['duration']['min']),
														
 
															+                        ('duration_max', eval(task['spider_rule'])['duration']['max']),
														
 
															+                        ('task_type', task['task_type']), ('spider_link', eval(task['spider_link'])),
														
 
															+                        ('spider_name', str(task['spider_name'])), ('min_publish_time', str(task['min_publish_time'])),
														
 
															+                        ('min_publish_day', str(task['min_publish_day'])), ('media_id', str(task['media_id'])),
														
 
															+                        ('applets_status', str(task['applets_status'])), ('app_status', str(task['app_status'])),
														
 
															+                        ('user_tag', str(task['user_tag'])), ('user_content_tag', str(task['user_content_tag'])),
														
 
															+                        ('machine', str(task['machine']))]
														
 
															             task_str = str(task_str).replace(' ', '')
														
 
															-            cmd = f"""sh scheduling/scheduling_main/scheduling.sh {source}/{source}_main/{spider_name}_scheduling.py --log_type="{spider_name}" --crawler="{source}" --task="{str(task_str)}" --oss_endpoint="{oss_endpoint}" --env="{env}" --machine="{machine}" {source}/{source}-nohup.log """
														
 
															+            cmd = f"""sh scheduling/scheduling_main/scheduling.sh {source}/{source}_main/{spider_name}_scheduling.py --log_type="{spider_name}" --crawler="{source}" --task="{str(task_str)}" --oss_endpoint="{oss_endpoint}" --env="{env}" {source}/{source}-nohup.log """
														
 
															             Common.logger(log_type, crawler).info(f"cmd:{cmd}\n")
														
 
															             os.system(cmd)
														
 
															-
														
 
															-
														
 
															 if __name__ == "__main__":
														
 
															     # print(Scheduling.get_task("scheduling", "scheduling", "dev", "local"))
														
 
															     # print(Scheduling.get_redis("scheduling", "scheduling", "dev", "local"))
														
 
															     # Scheduling.write_redis("scheduling", "scheduling", "dev", "local")
														
 
															-    Scheduling.scheduling_task("scheduling", "scheduling", "dev", "local")
														
 
															+    Scheduling.scheduling_task("scheduling", "scheduling", "dev")
														
 
															-    pass
														
 
															+    pass
														
--- a/scheduling/scheduling_main/run_scheduling_task.py
+++ b/scheduling/scheduling_main/run_scheduling_task.py
@@ -5,6 +5,7 @@ import argparse
 
															 import os
														
 
															 import sys
														
 
															 import time
														
 
															+
														
 
															 sys.path.append(os.getcwd())
														
 
															 from common.common import Common
														
 
															 from scheduling.scheduling_main.crawler_scheduling import Scheduling
														
@@ -12,10 +13,10 @@ from scheduling.scheduling_main.crawler_scheduling import Scheduling
 
															 class SchedulingTask:
														
 
															     @classmethod
														
 
															-    def scheduling_task(cls, log_type, crawler, env, machine):
														
 
															+    def scheduling_task(cls, log_type, crawler, env):
														
 
															         while True:
														
 
															             Common.logger(log_type, crawler).info("开始调度爬虫任务")
														
 
															-            Scheduling.scheduling_task(log_type, crawler, env, machine)
														
 
															+            Scheduling.scheduling_task(log_type, crawler, env)
														
 
															             Common.logger(log_type, crawler).info("爬虫任务调度完成")
														
 
															             Common.del_logs(log_type, crawler)
														
 
															             time.sleep(60)
														
@@ -23,13 +24,14 @@ class SchedulingTask:
 
															 if __name__ == "__main__":
														
 
															     parser = argparse.ArgumentParser()  ## 新建参数解释器对象
														
 
															-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
														
 
															-    parser.add_argument('--crawler')  ## 添加参数
														
 
															-    parser.add_argument('--env')  ## 添加参数
														
 
															-    parser.add_argument('--machine')  ## 添加参数
														
 
															+    parser.add_argument('--log_type', default='follow', type=str)  ## 添加参数,注明参数类型
														
 
															+    parser.add_argument('--crawler', default='youtube')  ## 添加参数
														
 
															+    parser.add_argument('--env', default='dev')  ## 添加参数
														
 
															+    # parser.add_argument('--machine')  ## 添加参数
														
 
															     args = parser.parse_args()  ### 参数赋值，也可以通过终端赋值
														
 
															     # print(args)
														
 
															-    SchedulingTask.scheduling_task(log_type=args.log_type,
														
 
															-                                   crawler=args.crawler,
														
 
															-                                   env=args.env,
														
 
															-                                   machine=args.machine)
														
 
															+    SchedulingTask.scheduling_task(
														
 
															+        log_type=args.log_type,
														
 
															+        crawler=args.crawler,
														
 
															+        env=args.env,
														
 
															+    )
														
--- a/scheduling/scheduling_main/run_write_task.py
+++ b/scheduling/scheduling_main/run_write_task.py
@@ -5,6 +5,7 @@ import argparse
 
															 import os
														
 
															 import sys
														
 
															 import time
														
 
															+
														
 
															 sys.path.append(os.getcwd())
														
 
															 from common.common import Common
														
 
															 from scheduling.scheduling_main.crawler_scheduling import Scheduling
														
@@ -12,24 +13,26 @@ from scheduling.scheduling_main.crawler_scheduling import Scheduling
 
															 class WriteTask:
														
 
															     @classmethod
														
 
															-    def write_task(cls, log_type, crawler, env, machine):
														
 
															+    def write_task(cls, log_type, crawler, env):
														
 
															         while True:
														
 
															             Common.logger(log_type, crawler).info("开始读取爬虫任务，写入Redis")
														
 
															-            Scheduling.write_redis(log_type=log_type, crawler=crawler, env=env, machine=machine)
														
 
															+            Scheduling.write_redis(log_type=log_type, crawler=crawler, env=env)
														
 
															             Common.logger(log_type, crawler).info("写入Redis完成")
														
 
															+
														
 
															             Common.del_logs(log_type, crawler)
														
 
															             time.sleep(60)
														
 
															 if __name__ == "__main__":
														
 
															     parser = argparse.ArgumentParser()  ## 新建参数解释器对象
														
 
															-    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
														
 
															-    parser.add_argument('--crawler')  ## 添加参数
														
 
															-    parser.add_argument('--env')  ## 添加参数
														
 
															-    parser.add_argument('--machine')  ## 添加参数
														
 
															+    parser.add_argument('--log_type', default='follow', type=str, )  ## 添加参数,注明参数类型
														
 
															+    parser.add_argument('--crawler', default='youtube')  ## 添加参数
														
 
															+    parser.add_argument('--env', default='dev')  ## 添加参数
														
 
															+    # parser.add_argument('--machine')  ## 添加参数
														
 
															     args = parser.parse_args()  ### 参数赋值，也可以通过终端赋值
														
 
															     # print(args)
														
 
															-    WriteTask.write_task(log_type=args.log_type,
														
 
															-                         crawler=args.crawler,
														
 
															-                         env=args.env,
														
 
															-                         machine=args.machine)
														
 
															+    WriteTask.write_task(
														
 
															+        log_type=args.log_type,
														
 
															+        crawler=args.crawler,
														
 
															+        env=args.env,
														
 
															+    )
														
--- a/scheduling/scheduling_main/scheduling.sh
+++ b/scheduling/scheduling_main/scheduling.sh
@@ -6,27 +6,27 @@ crawler=$3      # 哪款爬虫，如: youtube / kanyikan / weixinzhishu
 
															 task=$4         # 爬虫任务
														
 
															 oss_endpoint=$5 # OSS网关，内网: inner / 外网: out / 香港: hk
														
 
															 env=$6          # 爬虫运行环境，正式环境: prod / 测试环境: dev
														
 
															-machine=$7      # 爬虫运行机器，阿里云服务器: aliyun_hk / aliyun / macpro / macair / local
														
 
															-nohup_dir=$8    # nohup日志存储路径，如: ./youtube/nohup.log
														
 
															+#machine=$7      # 爬虫运行机器，阿里云服务器: aliyun_hk / aliyun / macpro / macair / local
														
 
															+nohup_dir=$7    # nohup日志存储路径，如: ./youtube/nohup.log
														
 
															 echo "开始"
														
 
															-#echo "machine:"${machine}
														
 
															+#echo "env:"${env}
														
 
															-if [ ${machine} = "--machine=aliyun_hk" ];then
														
 
															+if [ ${env} = "--env=hk" ];then
														
 
															   piaoquan_crawler_dir=/root/piaoquan_crawler/
														
 
															   profile_path=/etc/profile
														
 
															   python=python3
														
 
															-elif [ ${machine} = "--machine=aliyun" ];then
														
 
															+elif [ ${env} = "--env=prod" ];then
														
 
															   piaoquan_crawler_dir=/data5/piaoquan_crawler/
														
 
															   profile_path=/etc/profile
														
 
															   python=python
														
 
															-elif [ ${machine} = "--machine=local" ];then
														
 
															+elif [ ${env} = "--env=local" ];then
														
 
															   piaoquan_crawler_dir=/Users/wangkun/Desktop/crawler/piaoquan_crawler/
														
 
															   profile_path=/etc/profile
														
 
															   node_path=/opt/homebrew/bin/node
														
 
															   python=python3
														
 
															 else
														
 
															-  echo ${machine}
														
 
															+  echo ${env}
														
 
															 fi
														
 
															 echo "$(date "+%Y-%m-%d %H:%M:%S") 更新环境变量..."
														
@@ -38,7 +38,7 @@ echo "$(date "+%Y-%m-%d %H:%M:%S") 更新环境变量完成！"
 
															 #ps aux | grep ${grep_str} | grep Python | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															 #echo "$(date "+%Y-%m-%d %H:%M:%S") 进程已杀死！"
														
 
															-if [ ${machine} = "--machine=aliyun_hk" ];then
														
 
															+if [ ${env} = "--env=hk" ];then
														
 
															   echo "升级yt-dlp"
														
 
															   pip3 install yt-dlp -U
														
 
															 else
														
@@ -47,9 +47,9 @@ else
 
															   echo "$(date "+%Y-%m-%d %H:%M:%S") 代码更新完成！"
														
 
															 fi
														
 
															-if [ ${machine} = "--machine=aliyun_hk" ];then
														
 
															+if [ ${env} = "--env=hk" ];then
														
 
															   echo "无需重启Appium及adb服务"
														
 
															-elif [ ${machine} = "--machine=aliyun" ];then
														
 
															+elif [ ${env} = "--env=prod" ];then
														
 
															   echo "无需重启Appium及adb服务"
														
 
															 else
														
 
															   echo "$(date "+%Y-%m-%d %H:%M:%S") 正在重启Appium..."
														
@@ -65,7 +65,7 @@ fi
 
															 echo "$(date "+%Y-%m-%d %H:%M:%S") 正在重启服务..."
														
 
															 cd ${piaoquan_crawler_dir}
														
 
															-nohup ${python} -u ${crawler_dir} ${log_type} ${crawler} ${task} ${oss_endpoint} ${env} ${machine} >>${nohup_dir} 2>&1 &
														
 
															+nohup ${python} -u ${crawler_dir} ${log_type} ${crawler} ${task} ${oss_endpoint} ${env} >>${nohup_dir} 2>&1 &
														
 
															 echo "$(date "+%Y-%m-%d %H:%M:%S") 服务重启完毕!"
														
 
															 exit 0