import sys import time import alb_utils import gateway_config import logging logging.basicConfig(level=logging.INFO, format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', datefmt='%a, %d %b %Y %H:%M:%S') def remove_instances(ecs_client, alb_client, instance_ids, port): """ 停止并释放机器 :param ecs_client: 创建客户端 :param alb_client: ALB 客户端 :param instance_ids: instanceId 类型列表 :param port: 后端服务器使用的端口 :return: None """ # 1. 摘除流量 weight_list = [(0, 20)] # 设置权重为0,等待20秒 try: alb_utils.update_server_group_servers_attribute(alb_client, gateway_config.server_group_id_list, instance_id_list=instance_ids, weight_list=weight_list, port=port) except Exception as e: logging.error(f"Failed to set instance weight: {e}") sys.exit() # 等待 ALB 更新权重 time.sleep(20) # 2. 从 ALB 服务器组中移除实例 # for server_group_id in gateway_config.server_group_id_list: try: alb_utils.remove_servers_from_server_group(alb_client=alb_client, server_group_id_list=gateway_config.server_group_id_list, instance_ids=instance_ids, port=port) logging.info( f"Successfully removed count:{len(instance_ids)} instance_ids: {instance_ids} from server group {gateway_config.server_group_id_list}.") except Exception as e: logging.error( f"Failed to remove count:{len(instance_ids)} instance_ids: {instance_ids} from server group {gateway_config.server_group_id_list}: {e}") logging.info(f"Remove from ALB finished, count:{len(instance_ids)} instances: {instance_ids}") # 3. 停止机器 alb_utils.stop_instances(ecs_client=ecs_client, instance_ids=instance_ids) # stop_response = alb_utils.stop_instances(ecs_client=ecs_client, instance_ids=instance_ids) # if stop_response.get('Code') is None: # logging.info(f"Instances stop finished, instances: {instance_ids}") # else: # logging.error(f"Failed to stop instances: {stop_response}") # sys.exit() # 4. 判断机器运行状态是否为 Stopped stopped_instances = alb_utils.get_instances_status(ecs_client=ecs_client, instance_ids=instance_ids) # while True: # response = alb_utils.get_instances_status(ecs_client=ecs_client, instance_ids=instance_ids) # if response.get('Code') is None: # instances_list = response.get('InstanceStatuses').get('InstanceStatus') # stopped_instances = [instance.get('InstanceId') for instance in instances_list if # instance.get('Status') == 'Stopped'] # if len(stopped_instances) == len(instance_ids): # logging.info(f"Instances stopped status set success, instances: {stopped_instances}") # break # else: # logging.info(f"Stopped instances count = {len(stopped_instances)}, instances: {stopped_instances}") # time.sleep(5) # else: # logging.error(response) # sys.exit() # 5. 释放机器 alb_utils.release_instances(ecs_client=ecs_client, instance_ids=stopped_instances) # release_response = alb_utils.release_instances(ecs_client=ecs_client, instance_ids=stopped_instances) # if release_response.get('Code') is None: # logging.info(f"Release instances finished, instances: {stopped_instances}") # else: # logging.error(f"Release instances fail!!!") # sys.exit() def main(): try: alb_client = alb_utils.connect_alb_client( access_key_id=gateway_config.alb_client_params['access_key_id'], access_key_secret=gateway_config.alb_client_params['access_key_secret'], endpoint=gateway_config.alb_client_params['endpoint'] ) ecs_client = alb_utils.connect_client( access_key_id=gateway_config.ecs_client_params['access_key_id'], access_key_secret=gateway_config.ecs_client_params['access_key_secret'], region_id=gateway_config.ecs_client_params['region_id'] ) # 获取指定释放的机器数量 reduce_count = int(sys.argv[1]) logging.info(f"reduce_count: {reduce_count}") # 获取 ALB 下所有机器 online_instance_ids = alb_utils.list_server_group_servers(alb_client=alb_client, server_group_id=gateway_config.server_group_id_list[ 0]) online_instance_count = len(online_instance_ids) logging.info(f"online instance count: {online_instance_count} instance_ids: {online_instance_ids}") if online_instance_count - reduce_count < 10: logging.error("缩容后服务器数量不能小于10台") sys.exit() # 获取前 count 台机器进行释放 reduce_instance_ids = online_instance_ids[:reduce_count] logging.info(f"reduce instances count = {len(reduce_instance_ids)} instances: {reduce_instance_ids}") # 停止并释放机器 remove_instances(ecs_client=ecs_client, alb_client=alb_client, instance_ids=reduce_instance_ids, port=gateway_config.port) logging.info(f"stop & release instances end!") except Exception as e: logging.error(e) sys.exit() if __name__ == '__main__': main()