|
@@ -40,7 +40,7 @@ class Scheduling:
|
|
machine = task[16]
|
|
machine = task[16]
|
|
insert_time = task[17]
|
|
insert_time = task[17]
|
|
update_time = task[18]
|
|
update_time = task[18]
|
|
- if next_time >= int(time.time()):
|
|
|
|
|
|
+ if int(time.time()) >= next_time:
|
|
task_dict = {
|
|
task_dict = {
|
|
"task_id": task_id,
|
|
"task_id": task_id,
|
|
"task_name": task_name,
|
|
"task_name": task_name,
|
|
@@ -63,13 +63,17 @@ class Scheduling:
|
|
"update_time": update_time,
|
|
"update_time": update_time,
|
|
}
|
|
}
|
|
pre_task_list.append(task_dict)
|
|
pre_task_list.append(task_dict)
|
|
- if interval_piaoquan > 0:
|
|
|
|
- new_next_time = next_time + interval_piaoquan
|
|
|
|
- update_sql = f""" UPDATE crawler_task_1 SET next_time={new_next_time} WHERE task_id={task_id} """
|
|
|
|
- MysqlHelper.update_values(log_type, crawler, update_sql, env, machine)
|
|
|
|
-
|
|
|
|
return pre_task_list
|
|
return pre_task_list
|
|
|
|
|
|
|
|
+ # 更新下次启动时间,调用时机:调度该 task_id 的任务时
|
|
|
|
+ @classmethod
|
|
|
|
+ def update_task(cls, log_type, crawler, env, machine):
|
|
|
|
+ if interval_piaoquan > 0:
|
|
|
|
+ new_next_time = next_time + interval_piaoquan
|
|
|
|
+ update_sql = f""" UPDATE crawler_task_1 SET next_time={new_next_time} WHERE task_id={task_id} """
|
|
|
|
+ MysqlHelper.update_values(log_type, crawler, update_sql, env, machine)
|
|
|
|
+
|
|
|
|
+
|
|
'sh ./main/main.sh' \
|
|
'sh ./main/main.sh' \
|
|
' ./xigua/xigua_main/run_xigua_follow.py' \
|
|
' ./xigua/xigua_main/run_xigua_follow.py' \
|
|
' --log_type="follow"' \
|
|
' --log_type="follow"' \
|
|
@@ -80,9 +84,9 @@ class Scheduling:
|
|
' --machine="aliyun"' \
|
|
' --machine="aliyun"' \
|
|
' xigua/nohup.log'
|
|
' xigua/nohup.log'
|
|
|
|
|
|
- # 资源分配 / 组装 / 调度任务
|
|
|
|
|
|
+ # 资源分配 / 组装
|
|
@classmethod
|
|
@classmethod
|
|
- def main(cls, log_type, crawler, env, machine):
|
|
|
|
|
|
+ def write_redis(cls, log_type, crawler, env, machine):
|
|
pre_task_list = cls.get_task(log_type=log_type, crawler=crawler, env=env, machine=machine)
|
|
pre_task_list = cls.get_task(log_type=log_type, crawler=crawler, env=env, machine=machine)
|
|
if len(pre_task_list) == 0:
|
|
if len(pre_task_list) == 0:
|
|
Common.logger(log_type, crawler).info("暂无新任务\n")
|
|
Common.logger(log_type, crawler).info("暂无新任务\n")
|
|
@@ -104,6 +108,12 @@ class Scheduling:
|
|
# 写入 redis
|
|
# 写入 redis
|
|
pass
|
|
pass
|
|
|
|
|
|
|
|
+ @classmethod
|
|
|
|
+ def main(cls):
|
|
|
|
+ # 当前时间 >= next_time,更新 next_time(调用update_task),然后启动该爬虫
|
|
|
|
+
|
|
|
|
+ pass
|
|
|
|
+
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if __name__ == "__main__":
|
|
- Scheduling.main("scheduling", "scheduling", "dev", "local")
|
|
|
|
|
|
+ Scheduling.write_redis("scheduling", "scheduling", "dev", "local")
|