123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130 |
- import sys
- import time
- import alb_utils
- import gateway_config
- import logging
- logging.basicConfig(level=logging.INFO,
- format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
- datefmt='%a, %d %b %Y %H:%M:%S')
- def remove_instances(ecs_client, alb_client, instance_ids, port):
- """
- 停止并释放机器
- :param ecs_client: 创建客户端
- :param alb_client: ALB 客户端
- :param instance_ids: instanceId 类型列表
- :param port: 后端服务器使用的端口
- :return: None
- """
- # 1. 摘除流量
- weight_list = [(0, 20)] # 设置权重为0,等待20秒
- try:
- alb_utils.update_server_group_servers_attribute(alb_client,
- gateway_config.server_group_id_list,
- instance_id_list=instance_ids,
- weight_list=weight_list,
- port=port)
- except Exception as e:
- logging.error(f"Failed to set instance weight: {e}")
- sys.exit()
- # 等待 ALB 更新权重
- time.sleep(20)
- # 2. 从 ALB 服务器组中移除实例
- # for server_group_id in gateway_config.server_group_id_list:
- try:
- alb_utils.remove_servers_from_server_group(alb_client=alb_client,
- server_group_id_list=gateway_config.server_group_id_list,
- instance_ids=instance_ids, port=port)
- logging.info(
- f"Successfully removed count:{len(instance_ids)} instance_ids: {instance_ids} from server group {gateway_config.server_group_id_list}.")
- except Exception as e:
- logging.error(
- f"Failed to remove count:{len(instance_ids)} instance_ids: {instance_ids} from server group {gateway_config.server_group_id_list}: {e}")
- logging.info(f"Remove from ALB finished, count:{len(instance_ids)} instances: {instance_ids}")
- # 3. 停止机器
- alb_utils.stop_instances(ecs_client=ecs_client, instance_ids=instance_ids)
- # stop_response = alb_utils.stop_instances(ecs_client=ecs_client, instance_ids=instance_ids)
- # if stop_response.get('Code') is None:
- # logging.info(f"Instances stop finished, instances: {instance_ids}")
- # else:
- # logging.error(f"Failed to stop instances: {stop_response}")
- # sys.exit()
- # 4. 判断机器运行状态是否为 Stopped
- stopped_instances = alb_utils.get_instances_status(ecs_client=ecs_client, instance_ids=instance_ids)
- # while True:
- # response = alb_utils.get_instances_status(ecs_client=ecs_client, instance_ids=instance_ids)
- # if response.get('Code') is None:
- # instances_list = response.get('InstanceStatuses').get('InstanceStatus')
- # stopped_instances = [instance.get('InstanceId') for instance in instances_list if
- # instance.get('Status') == 'Stopped']
- # if len(stopped_instances) == len(instance_ids):
- # logging.info(f"Instances stopped status set success, instances: {stopped_instances}")
- # break
- # else:
- # logging.info(f"Stopped instances count = {len(stopped_instances)}, instances: {stopped_instances}")
- # time.sleep(5)
- # else:
- # logging.error(response)
- # sys.exit()
- # 5. 释放机器
- alb_utils.release_instances(ecs_client=ecs_client, instance_ids=stopped_instances)
- # release_response = alb_utils.release_instances(ecs_client=ecs_client, instance_ids=stopped_instances)
- # if release_response.get('Code') is None:
- # logging.info(f"Release instances finished, instances: {stopped_instances}")
- # else:
- # logging.error(f"Release instances fail!!!")
- # sys.exit()
- def main():
- try:
- alb_client = alb_utils.connect_alb_client(
- access_key_id=gateway_config.alb_client_params['access_key_id'],
- access_key_secret=gateway_config.alb_client_params['access_key_secret'],
- endpoint=gateway_config.alb_client_params['endpoint']
- )
- ecs_client = alb_utils.connect_client(
- access_key_id=gateway_config.ecs_client_params['access_key_id'],
- access_key_secret=gateway_config.ecs_client_params['access_key_secret'],
- region_id=gateway_config.ecs_client_params['region_id']
- )
- # 获取指定释放的机器数量
- reduce_count = int(sys.argv[1])
- logging.info(f"reduce_count: {reduce_count}")
- # 获取 ALB 下所有机器
- online_instance_ids = alb_utils.list_server_group_servers(alb_client=alb_client,
- server_group_id=gateway_config.server_group_id_list[
- 0])
- online_instance_count = len(online_instance_ids)
- logging.info(f"online instance count: {online_instance_count} instance_ids: {online_instance_ids}")
- if online_instance_count - reduce_count < 10:
- logging.error("缩容后服务器数量不能小于10台")
- sys.exit()
- # 获取前 count 台机器进行释放
- reduce_instance_ids = online_instance_ids[:reduce_count]
- logging.info(f"reduce instances count = {len(reduce_instance_ids)} instances: {reduce_instance_ids}")
- # 停止并释放机器
- remove_instances(ecs_client=ecs_client, alb_client=alb_client, instance_ids=reduce_instance_ids,
- port=gateway_config.port)
- logging.info(f"stop & release instances end!")
- except Exception as e:
- logging.error(e)
- sys.exit()
- if __name__ == '__main__':
- main()
|