9
0

29 Коміти 0f84031d42 ... 86c59ffe42

Автор SHA1 Опис Дата
  xuekailun 86c59ffe42 merge 11 місяців тому
  xuekailun c305f45be6 update requirements.txt 11 місяців тому
  xuekailun 19359c2ac2 update config 11 місяців тому
  xuekailun d45f00dec3 update healthcheck 11 місяців тому
  xuekailun 670d072551 add clb_unittest 11 місяців тому
  xuekailun 9468a50d4d add clb_id_list test 11 місяців тому
  xuekailun 86f20ad82d update image_id 11 місяців тому
  xuekailun 4dac686865 update container_name_list 11 місяців тому
  xuekailun 57fd71ff16 update instance_config_j 11 місяців тому
  xuekailun 12294c5a01 update alb_client_params 11 місяців тому
  xuekailun 1ef77fe12b update server_group_id_list 11 місяців тому
  xuekailun 492a5e9e98 instance_config_h 11 місяців тому
  xuekailun 4a61bed13e add commonapi 11 місяців тому
  xuekailun 0d0332361c add clb_gateway_update_cluster 11 місяців тому
  xuekailun 7a591717c2 update cluster 11 місяців тому
  xuekailun 8ff00fa780 update one 11 місяців тому
  xuekailun 08682cf699 update /clb_gateway_reduce_with_count 11 місяців тому
  xuekailun 8a4eff1d3b update clb_gateway_scaling_j_count 11 місяців тому
  xuekailun 45551122a6 add test_send_file_to_ecs 11 місяців тому
  xuekailun 681a94e29c update alb_gateway_unittest 11 місяців тому
  xuekailun 49e38a51df add alb_gateway_restart 11 місяців тому
  xuekailun ecf7af22ff add clb_reduce 11 місяців тому
  xuekailun 84baba747d add gateway_restart 11 місяців тому
  xuekailun 7b9cebb334 add reduce 11 місяців тому
  xuekailun 1624b1adc6 add scaling 11 місяців тому
  xuekailun 95f10796ff update server_group_id_list test 11 місяців тому
  xuekailun 5c09757e83 add /clb_gateway_scaling_j_count 11 місяців тому
  xuekailun 13d69105e8 add clb_client_params 11 місяців тому
  xuekailun f31e2b86d6 init pre 11 місяців тому

+ 2 - 1
.gitignore

@@ -1,6 +1,7 @@
+.gitignore
 .git
 .idea
-.gitignore
 __pycache__
 __pycache__/*
 README.md
+

+ 130 - 0
gateway/alb_gateway_reduce_with_count.py

@@ -0,0 +1,130 @@
+import sys
+import time
+import alb_utils
+import gateway_config
+import logging
+
+logging.basicConfig(level=logging.INFO,
+                    format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
+                    datefmt='%a, %d %b %Y %H:%M:%S')
+
+
+def remove_instances(ecs_client, alb_client, instance_ids, port):
+    """
+    停止并释放机器
+    :param ecs_client: 创建客户端
+    :param alb_client: ALB 客户端
+    :param instance_ids: instanceId 类型列表
+    :param port: 后端服务器使用的端口
+    :return: None
+    """
+    # 1. 摘除流量
+    weight_list = [(0, 20)]  # 设置权重为0,等待20秒
+    try:
+        alb_utils.update_server_group_servers_attribute(alb_client,
+                                                    gateway_config.server_group_id_list,
+                                                    instance_id_list=instance_ids,
+                                                    weight_list=weight_list,
+                                                    port=port)
+    except Exception as e:
+        logging.error(f"Failed to set instance weight: {e}")
+        sys.exit()
+
+    # 等待 ALB 更新权重
+    time.sleep(20)
+
+    # 2. 从 ALB 服务器组中移除实例
+    # for server_group_id in gateway_config.server_group_id_list:
+    try:
+        alb_utils.remove_servers_from_server_group(alb_client=alb_client,
+                                               server_group_id_list=gateway_config.server_group_id_list,
+                                               instance_ids=instance_ids, port=port)
+        logging.info(
+            f"Successfully removed count:{len(instance_ids)} instance_ids:  {instance_ids} from server group {gateway_config.server_group_id_list}.")
+    except Exception as e:
+        logging.error(
+            f"Failed to remove count:{len(instance_ids)} instance_ids: {instance_ids} from server group {gateway_config.server_group_id_list}: {e}")
+
+    logging.info(f"Remove from ALB finished, count:{len(instance_ids)} instances: {instance_ids}")
+
+    # 3. 停止机器
+    alb_utils.stop_instances(ecs_client=ecs_client, instance_ids=instance_ids)
+    # stop_response = alb_utils.stop_instances(ecs_client=ecs_client, instance_ids=instance_ids)
+    # if stop_response.get('Code') is None:
+    #     logging.info(f"Instances stop finished, instances: {instance_ids}")
+    # else:
+    #     logging.error(f"Failed to stop instances: {stop_response}")
+    #     sys.exit()
+
+    # 4. 判断机器运行状态是否为 Stopped
+    stopped_instances = alb_utils.get_instances_status(ecs_client=ecs_client, instance_ids=instance_ids)
+    # while True:
+    #     response = alb_utils.get_instances_status(ecs_client=ecs_client, instance_ids=instance_ids)
+    #     if response.get('Code') is None:
+    #         instances_list = response.get('InstanceStatuses').get('InstanceStatus')
+    #         stopped_instances = [instance.get('InstanceId') for instance in instances_list if
+    #                              instance.get('Status') == 'Stopped']
+    #         if len(stopped_instances) == len(instance_ids):
+    #             logging.info(f"Instances stopped status set success, instances: {stopped_instances}")
+    #             break
+    #         else:
+    #             logging.info(f"Stopped instances count = {len(stopped_instances)}, instances: {stopped_instances}")
+    #             time.sleep(5)
+    #     else:
+    #         logging.error(response)
+    #         sys.exit()
+
+    # 5. 释放机器
+    alb_utils.release_instances(ecs_client=ecs_client, instance_ids=stopped_instances)
+    # release_response = alb_utils.release_instances(ecs_client=ecs_client, instance_ids=stopped_instances)
+    # if release_response.get('Code') is None:
+    #     logging.info(f"Release instances finished, instances: {stopped_instances}")
+    # else:
+    #     logging.error(f"Release instances fail!!!")
+    #     sys.exit()
+
+
+def main():
+    try:
+        alb_client = alb_utils.connect_alb_client(
+            access_key_id=gateway_config.alb_client_params['access_key_id'],
+            access_key_secret=gateway_config.alb_client_params['access_key_secret'],
+            endpoint=gateway_config.alb_client_params['endpoint']
+        )
+
+        ecs_client = alb_utils.connect_client(
+            access_key_id=gateway_config.ecs_client_params['access_key_id'],
+            access_key_secret=gateway_config.ecs_client_params['access_key_secret'],
+            region_id=gateway_config.ecs_client_params['region_id']
+        )
+
+        # 获取指定释放的机器数量
+        reduce_count = int(sys.argv[1])
+        logging.info(f"reduce_count: {reduce_count}")
+
+        # 获取 ALB 下所有机器
+        online_instance_ids = alb_utils.list_server_group_servers(alb_client=alb_client,
+                                                              server_group_id=gateway_config.server_group_id_list[
+                                                                  0])
+        online_instance_count = len(online_instance_ids)
+        logging.info(f"online instance count: {online_instance_count} instance_ids: {online_instance_ids}")
+
+        if online_instance_count - reduce_count < 10:
+            logging.error("缩容后服务器数量不能小于10台")
+            sys.exit()
+
+        # 获取前 count 台机器进行释放
+        reduce_instance_ids = online_instance_ids[:reduce_count]
+        logging.info(f"reduce instances count = {len(reduce_instance_ids)} instances: {reduce_instance_ids}")
+
+        # 停止并释放机器
+        remove_instances(ecs_client=ecs_client, alb_client=alb_client, instance_ids=reduce_instance_ids,
+                         port=gateway_config.port)
+        logging.info(f"stop & release instances end!")
+    except Exception as e:
+        logging.error(e)
+        sys.exit()
+
+
+if __name__ == '__main__':
+    main()

+ 109 - 0
gateway/alb_gateway_restart.py

@@ -0,0 +1,109 @@
+import logging
+import sys
+import time
+import docker
+import gateway_config
+import alb_utils
+import requests
+
+logging.basicConfig(level=logging.INFO,
+                    format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
+                    datefmt='%a, %d %b %Y %H:%M:%S')
+
+health_instances = []
+
+def server_health_check(ecs_client, instance_id):
+    """
+    服务健康检查
+    :param ecs_client: 客户端连接
+    :param instance_id: instanceId
+    :return:
+    """
+    global health_instances
+    ip_address = alb_utils.get_ip_address(ecs_client=ecs_client, instance_id=instance_id)
+    while True:
+        health_check_url = f"http://{ip_address}:9000/healthcheck"
+        try:
+            http_code = requests.get(health_check_url).status_code
+        except:
+            logging.info(f"images is downloading ip:{ip_address}")
+            http_code = 0
+
+        if http_code == 200:
+            health_instances.append((instance_id, ip_address))
+            logging.info(f"health check success, instance: {instance_id}/{ip_address}")
+            break
+        else:
+            time.sleep(10)
+
+def server_restart(alb_client, ecs_client, instance_id, image_name, port):
+    try:
+        logging.info(f"Restarting instance: {instance_id}")
+        # 获取ip
+        ip_address = alb_utils.get_ip_address(ecs_client=ecs_client, instance_id=instance_id)
+        logging.info(f"IP address: {instance_id}/{ip_address}")
+
+        # 摘流量
+        alb_utils.update_server_group_servers_attribute(alb_client=alb_client,
+                                                    server_group_id_list=gateway_config.server_group_id_list,
+                                                    instance_id_list=[instance_id],
+                                                    weight_list=[(0, 20)],
+                                                    port=port)
+        logging.info(f"Set weight to 0 for instance: {instance_id}")
+
+        # 连接 Docker 客户端并重启容器
+        docker_client = docker.DockerClient(base_url=f'tcp://{ip_address}:2375', timeout=60)
+        container = docker_client.containers.get(image_name)
+        container.restart()
+        logging.info("Docker restart finished.")
+
+        # 探活
+        server_health_check(ecs_client, instance_id)
+        time.sleep(20)
+        logging.info("Health check finished.")
+
+        # 设置权重
+        add_weight_list = [(10, 5), (20, 5), (40, 5), (60, 5), (80, 5), (100, 5)]
+        alb_utils.update_server_group_servers_attribute(alb_client=alb_client,
+                                                    server_group_id_list=gateway_config.server_group_id_list,
+                                                    instance_id_list=[instance_id],
+                                                    weight_list=add_weight_list,
+                                                    port=port)
+        logging.info(f"Server restart finished, instance: {instance_id}/{ip_address}")
+    except Exception as e:
+        logging.error(f"Server restart failed, instance: {instance_id}")
+        logging.error(e)
+
+
+def main():
+    try:
+        ecs_client = alb_utils.connect_client(access_key_id=gateway_config.ecs_client_params['access_key_id'],
+                                          access_key_secret=gateway_config.ecs_client_params['access_key_secret'],
+                                          region_id=gateway_config.ecs_client_params['region_id'])
+
+        alb_client = alb_utils.connect_alb_client(
+            access_key_id=gateway_config.alb_client_params['access_key_id'],
+            access_key_secret=gateway_config.alb_client_params['access_key_secret'],
+            endpoint=gateway_config.alb_client_params['endpoint']
+        )
+
+        # 获取 ALB 下所有机器
+        online_instance_ids = alb_utils.list_server_group_servers(alb_client=alb_client,
+                                                              server_group_id=gateway_config.server_group_id_list[0])
+        online_instance_count = len(online_instance_ids)
+        logging.info(f"Online instance count: {online_instance_count}.")
+        logging.info(f"Online instance ids: {online_instance_ids}")
+
+        # 逐台重启
+        image_name = 'piaoquan-gateway'
+        for i, instance_id in enumerate(online_instance_ids):
+            server_restart(alb_client=alb_client, ecs_client=ecs_client, instance_id=instance_id, image_name=image_name, port=gateway_config.port)
+            logging.info(f"Restart progress: {i + 1}/{online_instance_count}")
+        logging.info("All servers restarted successfully!")
+    except Exception as e:
+        logging.error("An error occurred during the main execution.")
+        logging.error(e)
+
+
+if __name__ == '__main__':
+    main()

+ 131 - 0
gateway/alb_gateway_scaling_j_count.py

@@ -0,0 +1,131 @@
+import sys
+import os
+import asyncio
+import logging
+import time
+import requests
+import alb_utils
+import gateway_config
+from concurrent.futures import ThreadPoolExecutor
+
+health_instances = []
+
+def gateway_health_check(ecs_client, instance_id, max_wait_time=None):
+    """
+    服务健康检查
+    :param ecs_client: 客户端连接
+    :param instance_id: instanceId
+    :param max_wait_time: 最长等待时间,单位:s
+    :return:
+    """
+    global health_instances
+    start_time = time.time()
+    ip_address = alb_utils.get_ip_address(ecs_client=ecs_client, instance_id=instance_id)
+    while True:
+        health_check_url = f"http://{ip_address}:9000/healthcheck"
+        try:
+            http_code = requests.get(health_check_url).status_code
+        except:
+            logging.info(f"images is downloading ip: {ip_address}")
+            http_code = 0
+
+        if http_code == 200:
+            health_instances.append((instance_id, ip_address))
+            logging.info(f"health check success, instance: {instance_id}/{ip_address}")
+            break
+        elif max_wait_time is not None:
+            now = time.time()
+            if (now - start_time) >= max_wait_time:
+                logging.info(f"health check error, instance: {instance_id}/{ip_address}")
+                break
+            else:
+                time.sleep(10)
+        else:
+            time.sleep(10)
+
+
+async def ess_instance(ecs_client, alb_client, ess_count, max_workers, port):
+    """
+    扩容机器并运行新服务
+    :param ecs_client: 购买机器客户端连接
+    :param alb_client: 修改负载均衡权限
+    :param ess_count: 扩容数量
+    :param max_workers: 线程数
+    :param port: 后端服务器使用的端口
+    :return:
+    """
+    # 1. 购买机器并启动
+    ess_instance_ids = alb_utils.create_multiple_instances(
+        amount=ess_count,
+        ecs_client=ecs_client,
+        **gateway_config.instance_config_j,
+    )
+    time.sleep(60)
+
+    # 2. 发送启动脚本到机器上
+    alb_utils.send_file_to_ecs(ecs_client=ecs_client, instance_id_list=ess_instance_ids, **gateway_config.start_sh)
+    logging.info(f"send start shell file finished, count: {len(ess_instance_ids)} instances: {ess_instance_ids}")
+    # 3. 启动服务
+    start_sh_param = "latest"
+    server_start_sh = os.path.join(gateway_config.start_sh['target_dir'], gateway_config.start_sh['name'])
+    server_start_commend = f"sh {server_start_sh} {start_sh_param}"
+    alb_utils.run_command(ecs_client=ecs_client, instance_ids=ess_instance_ids, command=server_start_commend)
+    # 4. 异步探活
+    global health_instances
+    health_instances = []
+    max_wait_time = 360
+    loop = asyncio.get_running_loop()
+    executor = ThreadPoolExecutor(max_workers=max_workers)
+    tasks = [
+        loop.run_in_executor(executor, gateway_health_check, *args) for args in
+        [(ecs_client, instance_id, max_wait_time) for instance_id in ess_instance_ids]
+    ]
+    await asyncio.wait(tasks)
+    logging.info(f"health instances count: {len(health_instances)}, {health_instances}")
+
+    # 5. 挂载流量到ALB
+    if len(health_instances) > 0:
+        # 所有机器探活成功
+        time.sleep(20)
+        # instance_id_list = []
+        # for instance_id, ip in health_instances:
+        #     instance_id_list.append(instance_id)
+        # #     for server_group_id in gateway_config.server_group_id_list:
+        health_instance_ids = [instance_id for instance_id, _ in health_instances]
+        alb_utils.add_servers_to_server_group(alb_client, gateway_config.server_group_id_list, health_instance_ids, weight=100, port=port)
+        logging.info(f"ess count: {ess_count}, "
+                     f"create count: {len(ess_instance_ids)}, "
+                     f"finished count: {len(health_instances)}")
+    else:
+        logging.info(f"ess count: {ess_count}, "
+                     f"create count: {len(ess_instance_ids)}, "
+                     f"health count: {len(health_instances)}")
+        sys.exit()
+
+
+
+def main():
+    try:
+        ecs_client = alb_utils.connect_client(access_key_id=gateway_config.ecs_client_params['access_key_id'],
+                                          access_key_secret=gateway_config.ecs_client_params['access_key_secret'],
+                                          region_id=gateway_config.ecs_client_params['region_id'])
+        alb_client = alb_utils.connect_alb_client(
+            access_key_id=gateway_config.alb_client_params['access_key_id'],
+            access_key_secret=gateway_config.alb_client_params['access_key_secret'],
+            endpoint=gateway_config.alb_client_params['endpoint']
+        )
+        # 获取批量创建ECS实例的数量
+        ess_instance_count = int(sys.argv[1])
+        # 扩容机器并启动服务
+        logging.info(f"ess instances start ...")
+        logging.info(f"ess instance count: {ess_instance_count}")
+        asyncio.run(ess_instance(ecs_client=ecs_client,
+                                 alb_client=alb_client,
+                                 ess_count=ess_instance_count, max_workers=2, port=gateway_config.port))
+        logging.info(f"ess instances end!")
+    except Exception as e:
+        logging.error(e)
+
+
+if __name__ == '__main__':
+    main()

+ 105 - 0
gateway/alb_gateway_unittest.py

@@ -0,0 +1,105 @@
+import logging
+import os
+import unittest
+from os import write
+from aliyunsdkecs.request.v20140526.RunInstancesRequest import RunInstancesRequest
+
+from gateway import gateway_config, alb_utils, alb_gateway_scaling_j_count, alb_gateway_update_list
+from longvideoapi.longvideoapi_config import server_group_id_list
+
+
+class MyTestCase(unittest.TestCase):
+
+    def test_send_file_to_ecs(self):
+        ecs_client = alb_utils.connect_client(access_key_id=gateway_config.ecs_client_params['access_key_id'],
+                                              access_key_secret=gateway_config.ecs_client_params['access_key_secret'],
+                                              region_id=gateway_config.ecs_client_params['region_id'])
+        instance_ids = ["i-bp19n839usecekzn2ig9"]
+        alb_utils.send_file_to_ecs(ecs_client,
+                                   instance_id_list=instance_ids,
+                                   **gateway_config.start_sh)
+
+    def test_run_command(self):
+        ecs_client = alb_utils.connect_client(access_key_id=gateway_config.ecs_client_params['access_key_id'],
+                                              access_key_secret=gateway_config.ecs_client_params['access_key_secret'],
+                                              region_id=gateway_config.ecs_client_params['region_id'])
+        instance_ids = ["i-bp19n839usecekzn2ig9"]
+        start_sh_param = "latest"
+        server_start_sh = os.path.join(gateway_config.start_sh['target_dir'], gateway_config.start_sh['name'])
+        server_start_commend = f"sh {server_start_sh} {start_sh_param}"
+        alb_utils.run_command(ecs_client, instance_ids, command=server_start_commend)
+
+
+    def test_add_servers_to_server_group(self):
+        alb_client = alb_utils.connect_alb_client(
+            access_key_id=gateway_config.alb_client_params['access_key_id'],
+            access_key_secret=gateway_config.alb_client_params['access_key_secret'],
+            endpoint=gateway_config.alb_client_params['endpoint']
+        )
+        instance_ids = ["i-bp19n839usecekzn2ig9"]
+        alb_utils.add_servers_to_server_group(alb_client, gateway_config.server_group_id_list, instance_ids, weight=100, port=gateway_config.port)
+
+
+    def test_list_server_group_servers(self):
+        alb_client = alb_utils.connect_alb_client(
+            access_key_id=gateway_config.alb_client_params['access_key_id'],
+            access_key_secret=gateway_config.alb_client_params['access_key_secret'],
+            endpoint=gateway_config.alb_client_params['endpoint']
+        )
+        online_instance_ids = alb_utils.list_server_group_servers(alb_client, server_group_id=gateway_config.server_group_id_list[0])
+        logging.info(online_instance_ids)
+
+
+    def test_update_server_group_servers_attribute(self):
+        alb_client = alb_utils.connect_alb_client(
+            access_key_id=gateway_config.alb_client_params['access_key_id'],
+            access_key_secret=gateway_config.alb_client_params['access_key_secret'],
+            endpoint=gateway_config.alb_client_params['endpoint']
+        )
+        instance_id = ["i-bp19n839usecekzn2ig9"]
+        add_weight_list = [(10, 1), (20, 1), (40, 1)]
+        port = gateway_config.port
+        alb_utils.update_server_group_servers_attribute(alb_client=alb_client,
+                                                        server_group_id_list=gateway_config.server_group_id_list,
+                                                        instance_id_list=instance_id,
+                                                        weight_list=add_weight_list,
+                                                        port=port)
+
+    def test_remove_servers_from_server_group(self):
+        alb_client = alb_utils.connect_alb_client(
+            access_key_id=gateway_config.alb_client_params['access_key_id'],
+            access_key_secret=gateway_config.alb_client_params['access_key_secret'],
+            endpoint=gateway_config.alb_client_params['endpoint']
+        )
+        instance_id = ["i-bp19n839usecekzn2ig9"]
+        alb_utils.remove_servers_from_server_group(alb_client,
+                                                   server_group_id_list=gateway_config.server_group_id_list,
+                                                   instance_ids=instance_id,
+                                                   port=gateway_config.port)
+
+    def test_create_multiple_instances(self):
+        ecs_client = alb_utils.connect_client(access_key_id=gateway_config.ecs_client_params['access_key_id'],
+                                              access_key_secret=gateway_config.ecs_client_params['access_key_secret'],
+                                              region_id=gateway_config.ecs_client_params['region_id'])
+
+        ess_instance_ids = alb_utils.create_multiple_instances(
+            amount=1,
+            ecs_client=ecs_client,
+            **gateway_config.instance_config_j,
+        )
+        logging.info(ess_instance_ids)
+
+
+    def test_remove_container_image(self):
+        ecs_client = alb_utils.connect_client(access_key_id=gateway_config.ecs_client_params['access_key_id'],
+                                          access_key_secret=gateway_config.ecs_client_params['access_key_secret'],
+                                          region_id=gateway_config.ecs_client_params['region_id'])
+        instance_id = 'i-bp1933o7phjzrwg1x3kd'
+        container_name_list = ['piaoquan-gateway', 'gateway']
+        alb_gateway_update_cluster.remove_container_image(ecs_client, instance_id, container_name_list)
+
+
+
+
+if __name__ == '__main__':
+    unittest.main()

+ 354 - 0
gateway/alb_gateway_update_list.py

@@ -0,0 +1,354 @@
+import asyncio
+import sys
+import time
+import requests
+import alb_utils
+import logging
+import os
+import docker
+import gateway_config
+
+from concurrent.futures import ThreadPoolExecutor
+
+health_instances = []
+ess_instances = []
+remove_container_instances = []
+
+
+def server_health_check(ecs_client, instance_id):
+    """
+    服务健康检查
+    :param ecs_client: 客户端连接
+    :param instance_id: instanceId
+    :return:
+    """
+    global health_instances
+    ip_address = alb_utils.get_ip_address(ecs_client=ecs_client, instance_id=instance_id)
+    while True:
+        health_check_url = f"http://{ip_address}:9000/healthcheck"
+        try:
+            http_code = requests.get(health_check_url).status_code
+        except:
+            logging.info(f"images is downloading ip:{ip_address}")
+            http_code = 0
+
+        if http_code == 200:
+            health_instances.append((instance_id, ip_address))
+            logging.info(f"health check success, instance: {instance_id}/{ip_address}")
+            break
+        else:
+            time.sleep(10)
+
+
+async def ess_instance(ecs_client, alb_client, ess_count, max_workers, version, port):
+    """
+    扩容机器并运行新服务
+    :param ecs_client: 购买机器客户端连接
+    :param alb_client: 修改负载均衡权限
+    :param ess_count: 扩容数量
+    :param max_workers: 线程数
+    :param version: 版本标记
+    :param port: 后端服务器使用的端口
+    :return:
+    """
+    # 1. 购买机器并启动
+    ess_instance_ids = alb_utils.create_multiple_instances(
+        amount=ess_count,
+        ecs_client=ecs_client,
+        **gateway_config.instance_config_j,
+    )
+    time.sleep(60)
+
+    # 2. 发送启动脚本到机器上
+    alb_utils.send_file_to_ecs(ecs_client=ecs_client, instance_id_list=ess_instance_ids, **gateway_config.start_sh)
+    logging.info(f"send start shell file finished, count: {len(ess_instance_ids)} instances: {ess_instance_ids}")
+    # 3. 启动服务
+    server_start_sh = os.path.join(gateway_config.start_sh['target_dir'], gateway_config.start_sh['name'])
+    server_start_commend = f"sh {server_start_sh} {version}"
+    alb_utils.run_command(ecs_client=ecs_client, instance_ids=ess_instance_ids, command=server_start_commend)
+    logging.info(f"start server finish, count: {len(ess_instance_ids)} instances: {ess_instance_ids}")
+    # 4. 异步探活
+    global health_instances
+    health_instances = []
+    loop = asyncio.get_running_loop()
+    executor = ThreadPoolExecutor(max_workers=max_workers)
+    tasks = [
+        loop.run_in_executor(executor, server_health_check, *args) for args in
+        [(ecs_client, instance_id) for instance_id in ess_instance_ids]
+    ]
+    await asyncio.wait(tasks)
+    logging.info(f"health instances count: {len(health_instances)}, instances: {health_instances}")
+    # 5. 挂载流量
+    if len(health_instances) == len(ess_instance_ids):
+        # 所有机器探活成功
+        time.sleep(20)
+        health_instance_ids = [instance_id for instance_id, _ in health_instances]
+        # for server_group_id in gateway_config.server_group_id_list:
+            # for instance_id in health_instance_ids:
+        alb_utils.add_servers_to_server_group(alb_client, gateway_config.server_group_id_list, health_instance_ids, weight=0, port=port)
+        logging.info(f"Successfully added count: {len(health_instance_ids)} health_instance_ids {health_instance_ids} to server groups {gateway_config.server_group_id_list}.")
+
+        time.sleep(20)
+        logging.info(f"start update weight count: {len(health_instance_ids)} instances: {health_instance_ids} server groups: {gateway_config.server_group_id_list}.")
+        add_weight_list = [(10, 5), (20, 5), (40, 5), (60, 5), (80, 5), (100, 5)]
+        # add_weight_list = [(10, 10), (20, 10), (40, 10), (60, 10), (80, 10), (100, 10)]
+        alb_utils.update_server_group_servers_attribute(alb_client,
+                                                    server_group_id_list=gateway_config.server_group_id_list,
+                                                    instance_id_list=health_instance_ids,
+                                                    weight_list=add_weight_list,
+                                                    port=port)
+        global ess_instances
+        ess_instances.extend(health_instance_ids)
+        logging.info(f"ess count: {ess_count}, "
+                     f"create count: {len(ess_instance_ids)}, "
+                     f"finished count: {len(health_instance_ids)}")
+    else:
+        logging.info(f"ess count: {ess_count}, "
+                     f"create count: {len(ess_instance_ids)}, "
+                     f"health count: {len(health_instances)}")
+        sys.exit()
+
+
+def remove_container_image(ecs_client, instance_id, container_name_list):
+    """
+    移除旧容器并删除旧镜像
+    :param ecs_client: 客户端连接
+    :param instance_id: instanceId type-string
+    :param container_name: 容器名称 type-string
+    :return:
+    """
+    ip_address = alb_utils.get_ip_address(ecs_client=ecs_client, instance_id=instance_id)
+    logging.info(f"服务器信息:{instance_id}/{ip_address}")
+    client = docker.DockerClient(base_url=f'tcp://{ip_address}:2375', timeout=60)
+    # 移除旧的容器
+    container_remove_retry = 3
+    i = 0
+    while True:
+        if i >= container_remove_retry:
+            logging.error(f"容器不存在或者无法删除当前容器, instance = {instance_id}/{ip_address}")
+            sys.exit()
+        try:
+            flag = False
+            for container_name in container_name_list:
+                try:
+                    container_id = client.containers.get(container_name)
+                    container_id.remove(force=True)
+                    flag = True
+                    break
+                except:
+                    continue
+            if flag:
+                break
+        except Exception as e:
+            i += 1
+
+    # 删除旧镜像
+    images_remove_retry = 3
+    j = 0
+    while True:
+        if j >= images_remove_retry:
+            logging.error(f"镜像不存在,无法获取到镜像ID, instance = {instance_id}/{ip_address}")
+            sys.exit()
+        try:
+            images = client.images.list()
+            for image in images:
+                client.images.remove(force=True, image=image.tags[0])
+                time.sleep(2)
+            global remove_container_instances
+            remove_container_instances.append(instance_id)
+            break
+        except Exception as e:
+            i += 1
+
+
+async def update_instance(ecs_client, alb_client, instance_ids, max_workers, version, port):
+    """
+    线上机器更新
+    :param ecs_client:
+    :param alb_client: alb客户端连接
+    :param instance_ids: instanceId type-list
+    :param max_workers:
+    :param version: 版本标记
+    :param port: 后端服务器使用的端口
+    :return:
+    """
+    sub_index = len(instance_ids)//2
+    instance_ids_group = [instance_ids[:sub_index], instance_ids[sub_index:]]
+    update_finished_count = 0
+    for instance_id_list in instance_ids_group:
+        logging.info(f"update instances: {instance_id_list}")
+        # 1. 摘流量
+        alb_utils.update_server_group_servers_attribute(alb_client=alb_client,
+                                                    server_group_id_list=gateway_config.server_group_id_list,
+                                                    instance_id_list=instance_id_list,
+                                                    weight_list=[(0, 20)],
+                                                    port=port)
+        logging.info(f"set weight with 0 finished, count: {len(instance_id_list)} instances: {instance_id_list}")
+        # 2. 异步移除旧容器并删除旧镜像
+        global remove_container_instances
+        remove_container_instances = []
+        # container_name = 'longvideoapi'
+        container_name_list = ['piaoquan-gateway', 'gateway']
+        loop = asyncio.get_running_loop()
+        executor = ThreadPoolExecutor(max_workers=max_workers)
+        tasks = [
+            loop.run_in_executor(executor, remove_container_image, *args) for args in
+            [(ecs_client, instance_id, container_name_list) for instance_id in instance_id_list]
+        ]
+        await asyncio.wait(tasks)
+        logging.info(f"remove container & images finished, instances: {remove_container_instances},"
+                     f" count: {len(remove_container_instances)}")
+        if len(remove_container_instances) < len(instance_id_list):
+            logging.error(f"remove container image failed| "
+                          f"request count: {len(instance_id_list)}, removed count: {len(remove_container_instances)}")
+            sys.exit()
+        # 3. 发送启动脚本到机器上
+        alb_utils.send_file_to_ecs(ecs_client=ecs_client, instance_id_list=instance_id_list, **gateway_config.start_sh)
+        logging.info(f"send start shell file finished, instances: {instance_id_list}, count: {len(instance_id_list)}")
+        # 4. 启动服务
+        server_start_sh = os.path.join(gateway_config.start_sh['target_dir'], gateway_config.start_sh['name'])
+        server_start_commend = f"sh {server_start_sh} {version}"
+        alb_utils.run_command(ecs_client=ecs_client, instance_ids=instance_id_list, command=server_start_commend)
+        # 5. 异步探活
+        global health_instances
+        health_instances = []
+        loop = asyncio.get_running_loop()
+        executor = ThreadPoolExecutor(max_workers=max_workers)
+        tasks = [
+            loop.run_in_executor(executor, server_health_check, *args) for args in
+            [(ecs_client, instance_id) for instance_id in instance_id_list]
+        ]
+        await asyncio.wait(tasks)
+        logging.info(f"health instances: {health_instances}, count: {len(health_instances)}")
+        # 6. 挂载流量
+        if len(health_instances) == len(instance_id_list):
+            # 所有机器探活成功
+            time.sleep(10)
+            health_instance_ids = [instance_id for instance_id, _ in health_instances]
+
+            add_weight_list = [(10, 5), (20, 5), (40, 5), (60, 5), (80, 5), (100, 5)]
+            # add_weight_list = [(10, 10), (20, 10), (40, 10), (60, 10), (80, 10), (100, 10)]
+            alb_utils.update_server_group_servers_attribute(alb_client,
+                                                        server_group_id_list=gateway_config.server_group_id_list,
+                                                        instance_id_list=health_instance_ids,
+                                                        weight_list=add_weight_list,
+                                                        port=port)
+            logging.info(f"finished instances: {health_instances}, count: {len(health_instances)}")
+            update_finished_count += len(health_instances)
+            logging.info(f"update finished: {update_finished_count}/{len(instance_ids)}")
+        else:
+            logging.info(f"health instances: {health_instances}, count: {len(health_instances)}")
+            sys.exit()
+
+
+def remove_instances(ecs_client, alb_client, instance_ids, port):
+    """
+    停止并释放机器
+    :param ecs_client:
+    :param alb_client:
+    :param instance_ids: instanceId type-list
+    :param port: 后端服务器使用的端口
+    :return: None
+    """
+    # 1. 摘流量
+    weight_list = [(0, 20)]  # 设置权重为0,等待20秒
+    try:
+        alb_utils.update_server_group_servers_attribute(alb_client=alb_client,
+                                                    server_group_id_list=gateway_config.server_group_id_list,
+                                                    instance_id_list=instance_ids,
+                                                    weight_list=weight_list,
+                                                    port=port)
+    except Exception as e:
+        logging.error(f"Failed to set instance weight: {e}")
+        sys.exit()
+
+    time.sleep(10)
+    # 2.移除alb
+    # for server_group_id in gateway_config.server_group_id_list:
+    try:
+        alb_utils.remove_servers_from_server_group(alb_client=alb_client, server_group_id_list=gateway_config.server_group_id_list,
+                                               instance_ids=instance_ids, port=port)
+        logging.info(f"Successfully removed instances: {instance_ids} from server group {gateway_config.server_group_id_list}.")
+    except Exception as e:
+        logging.error(f"Failed to remove instances: {instance_ids} from server group {gateway_config.server_group_id_list}: {e}")
+
+    logging.info(f"Remove from ALB finished, instances: {instance_ids}")
+
+    # 3. 停止机器
+    alb_utils.stop_instances(ecs_client=ecs_client, instance_ids=instance_ids)
+    # stop_response = alb_utils.stop_instances(ecs_client=ecs_client, instance_ids=instance_ids)
+    # if stop_response.get('Code') is None:
+    #     logging.info(f"Instances stop finished, instances: {instance_ids}")
+    # else:
+    #     logging.error(f"Failed to stop instances: {stop_response}")
+    #     sys.exit()
+
+    # 4. 判断机器运行状态是否为Stopped
+    # while True:
+    stopped_instances = alb_utils.get_instances_status(ecs_client=ecs_client, instance_ids=instance_ids)
+        # response = alb_utils.get_instances_status(ecs_client=ecs_client, instance_ids=instance_ids)
+        # if response.get('Code') is None:
+        #     instances_list = response.get('InstanceStatuses').get('InstanceStatus')
+        #     stopped_instances = [instance.get('InstanceId') for instance in instances_list if
+        #                          instance.get('Status') == 'Stopped']
+        #     if len(stopped_instances) == len(instance_ids):
+        #         logging.info(f"Instances stopped status set success, instances: {stopped_instances}")
+        #         break
+        #     else:
+        #         logging.info(f"Stopped instances count = {len(stopped_instances)}, instances: {stopped_instances}")
+        #         time.sleep(5)
+        # else:
+        #     logging.error(response)
+        #     sys.exit()
+    # 5. 释放机器
+    alb_utils.release_instances(ecs_client=ecs_client, instance_ids=stopped_instances)
+    # release_response = alb_utils.release_instances(ecs_client=ecs_client, instance_ids=stopped_instances)
+    # if release_response.get('Code') is None:
+    #     logging.info(f"Release instances finished, instances: {stopped_instances}")
+    # else:
+    #     logging.error(f"Release instances fail!!!")
+    #     sys.exit()
+
+
+def main():
+    try:
+        version = sys.argv[1]
+        alb_client = alb_utils.connect_alb_client(
+            access_key_id=gateway_config.alb_client_params['access_key_id'],
+            access_key_secret=gateway_config.alb_client_params['access_key_secret'],
+            endpoint=gateway_config.alb_client_params['endpoint']
+        )
+        ecs_client = alb_utils.connect_client(access_key_id=gateway_config.ecs_client_params['access_key_id'],
+                                          access_key_secret=gateway_config.ecs_client_params['access_key_secret'],
+                                          region_id=gateway_config.ecs_client_params['region_id'])
+
+        # 1. 获取alb下所有机器
+        online_instance_ids = alb_utils.list_server_group_servers(alb_client=alb_client, server_group_id=gateway_config.server_group_id_list[0])
+        online_instance_count = len(online_instance_ids)
+        logging.info(f"online instance count: {online_instance_count} instance_ids: {online_instance_ids}")
+
+        # 2. 扩容机器并启动新服务 扩容数量:线上机器数量//2
+        logging.info(f"ess instances start ...")
+        ess_instance_count = online_instance_count // 2
+        logging.info(f"ess instance count: {ess_instance_count}")
+        asyncio.run(ess_instance(ecs_client=ecs_client, alb_client=alb_client,
+                                 ess_count=ess_instance_count, max_workers=2, version=version, port=gateway_config.port))
+        logging.info(f"ess instances end!")
+
+        # 3. 原有机器进行更新
+        logging.info(f"update online instances start ...")
+        asyncio.run(update_instance(ecs_client=ecs_client, alb_client=alb_client,
+                                    instance_ids=online_instance_ids, max_workers=8, version=version, port=gateway_config.port))
+        logging.info(f"update online instances end!")
+
+        # 4. 停止并释放扩容机器
+        logging.info(f"stop & release instances start ...")
+        remove_instances(ecs_client=ecs_client, alb_client=alb_client, instance_ids=ess_instances, port=gateway_config.port)
+        logging.info(f"stop & release instances end!")
+    except Exception as e:
+        logging.error(e)
+        sys.exit()
+
+
+if __name__ == '__main__':
+    main()

+ 23 - 11
gateway/gateway_update.py → gateway/alb_gateway_update_one.py

@@ -7,7 +7,8 @@ import queue
 import threading
 import time
 import gateway_config
-import utils
+import alb_utils
+from longvideoapi.longvideoapi_config import server_group_id_list
 
 # 从配置文件中获取应用程序名称和容器仓库地址
 apps = gateway_config.apps
@@ -43,7 +44,7 @@ def update(instance_id, port):
     time.sleep(10)  # 等待10秒钟
     global success_count
 
-    ipadd = utils.get_ip_address(ecs_client, instance_id)  # 使用 utils 获取实例的 IP 地址
+    ipadd = alb_utils.get_ip_address(ecs_client, instance_id)  # 使用 utils 获取实例的 IP 地址
     print("服务器信息:" + "%s/%s" % (instance_id, ipadd))
     client = docker.DockerClient(base_url='tcp://%s:2375' % (ipadd), timeout=60)
 
@@ -60,14 +61,23 @@ def update(instance_id, port):
         docker_config = gateway_config.docker_config
         client.login(username=docker_config['username'], password=docker_config['password'],
                      registry=docker_config['registry'])
-        client.containers.run(registry.format(apps, version), detach=True, cap_add='SYS_PTRACE', network_mode='host', name=apps,
+        client.containers.run(registry.format(apps, version),
+                              detach=True,
+                              cap_add='SYS_PTRACE',
+                              network_mode='host',
+                              name=apps,
                               volumes={'/datalog/': {'bind': '/datalog/', 'mode': 'rw'}})
         print("开始健康检查")
         checkHealth(ipadd)
         print("%s :权重修改中......" % (ipadd))
         weight_list = [(10, 5), (20, 5), (40, 5), (60, 5), (80, 5), (100, 5)]
         # weight_list = [(10, 10), (20, 10), (40, 10), (60, 10), (80, 10), (100, 10)]
-        utils.update_server_group_servers_attribute(alb_client, gateway_config.server_group_id_list, instance_id_list=[instance_id], weight_list=weight_list, port=port)
+        alb_utils.update_server_group_servers_attribute(alb_client,
+                                                        server_group_id_list=gateway_config.server_group_id_list,
+                                                        instance_id_list=[instance_id],
+                                                        weight_list=weight_list,
+                                                        port=port)
+
         success_count += 1
         print("更新进度" + "%s/%s" % (success_count, total))
     except Exception as e:
@@ -78,7 +88,7 @@ def update(instance_id, port):
 def pull_image():
     """从镜像仓库中拉取指定版本的镜像"""
     instanceId = q1.get()
-    ipaddr = utils.get_ip_address(ecs_client, instanceId)
+    ipaddr = alb_utils.get_ip_address(ecs_client, instanceId)
     cd_url = "tcp://{}:2375".format(ipaddr)
     client = docker.DockerClient(base_url=cd_url, timeout=30)
 
@@ -93,11 +103,11 @@ def pull_image():
 
 if __name__ == '__main__':
     # 初始化 ECS 客户端
-    ecs_client = utils.connect_client(access_key_id=gateway_config.ecs_client_params['access_key_id'],
+    ecs_client = alb_utils.connect_client(access_key_id=gateway_config.ecs_client_params['access_key_id'],
                                       access_key_secret=gateway_config.ecs_client_params['access_key_secret'],
                                       region_id=gateway_config.ecs_client_params['region_id'])
     # 初始化 ALB 客户端
-    alb_client = utils.connect_alb_client(gateway_config.alb_client_params['access_key_id'],
+    alb_client = alb_utils.connect_alb_client(gateway_config.alb_client_params['access_key_id'],
                                           gateway_config.alb_client_params['access_key_secret'],
                                           endpoint=gateway_config.alb_client_params['endpoint']
                                           )
@@ -109,7 +119,7 @@ if __name__ == '__main__':
     q1 = queue.Queue()
 
     # 获取 ALB 下服务器组的实例 ID
-    res = utils.list_server_group_servers(alb_client=alb_client, server_group_id=gateway_config.server_group_id_list[0])
+    res = alb_utils.list_server_group_servers(alb_client=alb_client, server_group_id=gateway_config.server_group_id_list[0])
     total += len(res)
     print(f"获取 ALB 下服务器组的实例 ID = {res} total = {total}")
     InstanceIDs.extend(res)
@@ -130,7 +140,9 @@ if __name__ == '__main__':
 
     # 更新每个实例
     for instanceID in InstanceIDs:
-        for server_group_id in gateway_config.server_group_id_list:
-            utils.update_server_group_server_weight(alb_client, server_group_id, instanceID,
-                                                    weight=0, port=gateway_config.port)  # 设置初始权重为0
+        alb_utils.update_server_group_server_weight(alb_client=alb_client,
+                                                    server_group_id_list=server_group_id_list,
+                                                    instance_id_list=instanceID,
+                                                    weight=0,
+                                                    port=gateway_config.port)
         update(instanceID, port=gateway_config.port)

+ 593 - 0
gateway/alb_utils.py

@@ -0,0 +1,593 @@
+import logging
+import json
+import sys
+import time
+from asyncio import wait_for
+
+import requests
+import asyncio
+import time
+
+from alibabacloud_tea_util.client import Client as UtilClient
+from aliyunsdkcore.client import AcsClient
+from aliyunsdkecs.request.v20140526.RunInstancesRequest import RunInstancesRequest
+from aliyunsdkecs.request.v20140526.DescribeInstancesRequest import DescribeInstancesRequest
+from aliyunsdkecs.request.v20140526.DescribeNetworkInterfacesRequest import DescribeNetworkInterfacesRequest
+from aliyunsdkecs.request.v20140526.RunCommandRequest import RunCommandRequest
+from aliyunsdkecs.request.v20140526.SendFileRequest import SendFileRequest
+from aliyunsdkecs.request.v20140526.StopInstancesRequest import StopInstancesRequest
+from aliyunsdkecs.request.v20140526.DeleteInstancesRequest import DeleteInstancesRequest
+from aliyunsdkecs.request.v20140526.DescribeInstanceStatusRequest import DescribeInstanceStatusRequest
+from aliyunsdkecs.request.v20140526.ModifySecurityGroupRuleRequest import ModifySecurityGroupRuleRequest
+from alibabacloud_alb20200616.client import Client as Alb20200616Client
+from alibabacloud_tea_openapi import models as open_api_models
+from alibabacloud_alb20200616 import models as alb_models
+from alibabacloud_alb20200616 import models as alb_20200616_models
+from alibabacloud_tea_util import models as util_models
+from urllib3 import request
+
+logging.basicConfig(level=logging.INFO,
+                    format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
+                    datefmt='%a, %d %b %Y %H:%M:%S')
+
+
+def send_msg_to_feishu(webhook, key_word, msg_text):
+    """发送消息到飞书"""
+    headers = {'Content-Type': 'application/json'}
+    payload_message = {
+        "msg_type": "text",
+        "content": {
+            "text": '{}: {}'.format(key_word, msg_text)
+        }
+    }
+    response = requests.request('POST', url=webhook, headers=headers, data=json.dumps(payload_message))
+    logging.info(response.text)
+
+
+def connect_client(access_key_id, access_key_secret, region_id):
+    """
+    初始化账号,连接客户端
+    :param access_key_id: access key Id, type-string
+    :param access_key_secret: access key secret, type-string
+    :param region_id: region_id
+    :return: clt
+    """
+    try:
+        clt = AcsClient(ak=access_key_id, secret=access_key_secret, region_id=region_id)
+        return clt
+    except Exception as e:
+        # 失败,记录报错信息,发送通知,停止并退出
+        logging.error(e)
+        sys.exit()
+
+
+def connect_alb_client(access_key_id, access_key_secret, endpoint):
+    """
+    初始化ALB客户端
+    :param access_key_id: access key Id, type-string
+    :param access_key_secret: access key secret, type-string
+    :return: alb_client
+    """
+    config = open_api_models.Config(
+        access_key_id=access_key_id,
+        access_key_secret=access_key_secret,
+        endpoint=endpoint
+    )
+    alb_client = Alb20200616Client(config)
+    return alb_client
+
+
+def build_create_instances_request(image_id, vswitch_id, security_group_id, zone_id, instance_type, instance_name,
+                                   disk_size, disk_category, key_pair_name, tags):
+    """
+    购买服务器参数配置
+    :param image_id: 使用的镜像信息 type-string
+    :param vswitch_id: 选择的交换机 type-string
+    :param security_group_id: 当前vpc类型的安全组 type-string
+    :param zone_id: 服务器所在区域 type-string
+    :param instance_type: 实例规格 type-string
+    :param instance_name: 实例命名 type-string
+    :param disk_size: 磁盘大小,单位:G,type-string
+    :param disk_category: 磁盘类型 type-string
+    :param key_pair_name: 密钥对名称 type-string
+    :param tags: 标签 type-list, eg: [{"Key": "ecs", "Value": "rov-server.prod"}, ...]
+    :return: request
+    """
+    request = RunInstancesRequest()
+    request.set_ImageId(image_id)
+    request.set_VSwitchId(vswitch_id)
+    request.set_SecurityGroupId(security_group_id)
+    request.set_ZoneId(zone_id)
+    request.set_InstanceType(instance_type)
+    request.set_InstanceName(instance_name)
+    request.set_SystemDiskSize(disk_size)
+    request.set_SystemDiskCategory(disk_category)
+    request.set_KeyPairName(key_pair_name)
+    request.set_Tags(tags)
+    return request
+
+
+def send_req(client, request):
+    """
+    发送API请求
+    :param client: 客户端连接
+    :param request: 请求配置
+    :return: response
+    """
+    request.set_accept_format('json')
+    response = client.do_action_with_exception(request)
+    # print(response)
+    response = json.loads(response)
+    print(response)
+    # logging.info(response)
+    print(response.get('Code'))
+    return response
+    # except Exception as e:
+    # 失败,记录报错信息,发送通知,停止并退出
+    # logging.error(e)
+    # sys.exit()
+
+
+def check_instance_running(ecs_client, instance_ids):
+    """
+    检查服务器运行状态
+    :param ecs_client: 客户端连接
+    :param instance_ids: 实例id列表, type-list
+    :return: running_count,Status为Running的实例数
+    """
+    try:
+        request = DescribeInstancesRequest()
+        request.set_InstanceIds(json.dumps(instance_ids))
+        request.set_PageSize(100)
+        response = send_request(ecs_client=ecs_client, request=request)
+        if response.get('Code') is None:
+            instances_list = response.get('Instances').get('Instance')
+            running_count = 0
+            running_instances = []
+            for instance_detail in instances_list:
+                if instance_detail.get('Status') == "Running":
+                    running_count += 1
+                    running_instances.append(instance_detail.get('InstanceId'))
+            return running_count, running_instances
+        else:
+            # 失败,记录报错信息,发送通知,停止并退出
+            logging.error(response)
+            sys.exit()
+    except Exception as e:
+        # 失败,记录报错信息,发送通知,停止并退出
+        logging.error(e)
+        sys.exit()
+
+
+def get_ip_address(ecs_client, instance_id):
+    """
+    获取实例IP地址
+    :param ecs_client: 客户端连接
+    :param instance_id: 实例id, type-string
+    :return: ip_address, type-string
+    """
+    request = DescribeNetworkInterfacesRequest()
+    request.set_accept_format('json')
+    request.set_InstanceId(instance_id)
+    response = send_request(ecs_client=ecs_client, request=request)
+    ip_address = response['NetworkInterfaceSets']['NetworkInterfaceSet'][0]['PrivateIpAddress']
+    return ip_address
+
+
+def create_multiple_instances(amount, ecs_client,
+                              image_id, vswitch_id, security_group_id, zone_id, instance_type, instance_name,
+                              disk_size, disk_category, key_pair_name, tags):
+    """
+    创建多个ECS实例
+    :param amount: 创建实例数 type-int 取值范围:[1, 100]
+    :param ecs_client: 购买机器客户端连接
+    :param image_id: 使用的镜像信息 type-string
+    :param vswitch_id: 选择的交换机 type-string
+    :param security_group_id: 当前vpc类型的安全组 type-string
+    :param zone_id: 服务器所在区域 type-string
+    :param instance_type: 实例规格 type-string
+    :param instance_name: 实例命名 type-string
+    :param disk_size: 磁盘大小,单位:G,type-string
+    :param disk_category: 磁盘类型 type-string
+    :param key_pair_name: 密钥对名称 type-string
+    :param tags: 标签 type-list, eg: [{"Key": "ecs", "Value": "rov-server.prod"}, ...]
+    :return:
+    """
+    logging.info(f"create instances start, request amount: {amount}.")
+    # 1. 连接客户端
+    # create_instances_clt = connect_client(
+    #     access_key_id=access_key_id, access_key_secret=access_key_secret, region_id=region_id
+    # )
+    # 2. 请求参数配置
+    instance_ids = []
+    remain = amount
+    while True:
+        if remain <= 0:
+            break
+        if remain > 50:
+            sub_amount = 50
+            remain = remain - sub_amount
+        else:
+            sub_amount = remain
+            remain = 0
+        request = build_create_instances_request(
+            image_id=image_id, vswitch_id=vswitch_id, security_group_id=security_group_id, zone_id=zone_id,
+            instance_type=instance_type, instance_name=instance_name, disk_size=disk_size, disk_category=disk_category,
+            key_pair_name=key_pair_name, tags=tags
+        )
+        request.set_Amount(sub_amount)
+        # 3. 发送API请求,购买机器并启动
+        response = send_request(ecs_client=ecs_client, request=request)
+        if response.get('Code') is None:
+            sub_instance_ids = response.get('InstanceIdSets').get('InstanceIdSet')
+            logging.info(f"success amount: {len(sub_instance_ids)}, instance ids: {sub_instance_ids}.")
+            # 获取机器运行状态
+            running_amount = 0
+            while running_amount < sub_amount:
+                time.sleep(20)
+                running_amount, running_instances = check_instance_running(ecs_client=ecs_client,
+                                                                           instance_ids=sub_instance_ids)
+                logging.info(f"running amount: {running_amount}, running instances: {running_instances}.")
+            # return instance_ids
+            instance_ids.extend(sub_instance_ids)
+        else:
+            # 失败,记录报错信息,发送通知,停止并退出
+            logging.error(response)
+            sys.exit()
+    return instance_ids
+
+
+def release_instances(ecs_client, instance_ids, force=False):
+    """
+    释放实例
+    :param ecs_client:
+    :param instance_ids: instance_id, type-list
+    :param force: 是否强制释放, True-强制释放, False-正常释放, type-bool
+    :return:
+    """
+    instance_id_list_array = [instance_ids[i:i + 100] for i in range(0, len(instance_ids), 100)]
+    for instance_id_sub_array in instance_id_list_array:
+        request = DeleteInstancesRequest()
+        request.set_InstanceIds(instance_id_sub_array)
+        request.set_Force(force)
+        response = send_request(ecs_client=ecs_client, request=request)
+        # return response
+        if response.get('Code') is None:
+            logging.info(f"Release instances finished, count = {len(instance_id_sub_array)} instances: {instance_id_sub_array}")
+        else:
+            logging.error(f"Release instances fail!!!")
+            sys.exit()
+        time.sleep(3)
+
+
+def get_instances_status(ecs_client, instance_ids):
+    """
+    获取实例运行状态
+    :param ecs_client:
+    :param instance_ids: instance_id, type-list
+    :return:
+    """
+    stopped_instances = []
+    instance_id_list_array = [instance_ids[i:i + 50] for i in range(0, len(instance_ids), 50)]
+    for instance_id_sub_array in instance_id_list_array:
+        while True:
+            request = DescribeInstanceStatusRequest()
+            request.set_InstanceIds(instance_id_sub_array)
+            request.set_PageSize(50)
+            response = send_request(ecs_client=ecs_client, request=request)
+            # return response
+            if response.get('Code') is None:
+                instances_list = response.get('InstanceStatuses').get('InstanceStatus')
+                stopped_instances_sub = [instance.get('InstanceId') for instance in instances_list if
+                                     instance.get('Status') == 'Stopped']
+                if len(stopped_instances_sub) == len(instance_id_sub_array):
+                    logging.info(f"Instances stopped status set success, count:{len(stopped_instances_sub)} instances: {stopped_instances_sub}")
+                    stopped_instances.extend(stopped_instances_sub)
+                    break
+                else:
+                    logging.info(f"Stopped instances count = {len(stopped_instances_sub)}, instances: {stopped_instances_sub}")
+                    time.sleep(5)
+            else:
+                logging.error(response)
+                sys.exit()
+            time.sleep(3)
+    return stopped_instances
+
+def stop_instances(ecs_client, instance_ids, force_stop=False):
+    """
+    停止实例
+    :param ecs_client:
+    :param instance_ids: 实例ID, type-list
+    :param force_stop: 是否强制关机, True-强制关机, False-正常关机, type-bool
+    :return:
+    """
+    instance_id_list_array = [instance_ids[i:i + 100] for i in range(0, len(instance_ids), 100)]
+    for instance_id_sub_array in instance_id_list_array:
+        request = StopInstancesRequest()
+        request.set_InstanceIds(instance_id_sub_array)
+        request.set_ForceStop(force_stop)
+        response = send_request(ecs_client=ecs_client, request=request)
+        # return response
+        if response.get('Code') is None:
+            logging.info(f"Instances stop finished, count:{len(instance_id_sub_array)} instances: {instance_id_sub_array}")
+        else:
+            logging.error(f"Failed to stop instances: {response}")
+            sys.exit()
+        time.sleep(3)
+
+
+def send_request(ecs_client, request):
+    """
+    发送API请求
+    :param ecs_client: 客户端连接
+    :param request: 请求配置
+    :return: response
+    """
+    request.set_accept_format('json')
+    try:
+        response = ecs_client.do_action_with_exception(request)
+        response = json.loads(response)
+        # logging.info(response)
+        return response
+    except Exception as e:
+        # 失败,记录报错信息,发送通知,停止并退出
+        logging.error(e)
+        sys.exit()
+
+
+def run_command(ecs_client, instance_ids, command):
+    """
+    批量执行命令
+    :param ecs_client: 客户端连接
+    :param instance_ids: 实例id列表, type-list, 最多能指定50台ECS实例ID
+    :param command: 命令 type-string
+    :return:
+    """
+    for i in range(len(instance_ids) // 50 + 1):
+        instance_id_list = instance_ids[i * 50:(i + 1) * 50]
+        if len(instance_id_list) == 0:
+            return
+        request = RunCommandRequest()
+        request.set_accept_format('json')
+        request.set_Type("RunShellScript")
+        request.set_CommandContent(command)
+        request.set_InstanceIds(instance_id_list)
+        request.set_Timeout(180)
+        response = send_request(ecs_client=ecs_client, request=request)
+        logging.info(f"run_command count:{len(instance_id_list)} instance_id_list:{instance_id_list} response:{response}")
+
+
+def send_file_to_ecs(ecs_client, instance_id_list, target_dir, name, content):
+    """
+    发送文件到ecs;alb应用,区分上方clb
+    :param ecs_client:
+    :param instance_id_list: 最多能指定50台ECS实例ID
+    :param target_dir: 文件存放目录 type-string
+    :param name: 文件名 type-string
+    :param content: 文件内容 type-string
+    :return:
+    """
+    if not instance_id_list:
+        logging.warning("实例ID列表为空,无法发送文件。")
+        return
+
+    for i in range(len(instance_id_list) // 50 + 1):
+        instance_ids = instance_id_list[i * 50:(i + 1) * 50]
+        if len(instance_ids) == 0:
+            logging.info("没有更多的实例ID需要发送文件,退出。")
+            return
+        request = SendFileRequest()
+        request.set_Content(content)
+        request.set_TargetDir(target_dir)
+        request.set_Name(name)
+        request.set_Overwrite(True)
+        request.set_InstanceIds(instance_ids)
+        try:
+            logging.info(f"正在向实例 {instance_ids} 发送文件 '{name}' 到目录 '{target_dir}'")
+            response = send_request(ecs_client=ecs_client, request=request)
+            logging.info(f"成功发送文件到实例 {instance_ids},响应: {response}")
+        except Exception as e:
+            logging.error(f"发送文件到实例 {instance_ids} 失败,错误: {str(e)}")
+
+
+def add_servers_to_server_group(alb_client, server_group_ids, instance_ids, weight, port):
+    """
+    添加服务器到ALB服务器组
+    :param alb_client: ALB客户端连接
+    :param server_group_ids: 服务器组ID
+    :param instance_ids: 实例ID
+    :param weight: 权重
+    :param port: 后端服务器使用的端口
+    """
+    instance_ids_array = [instance_ids[i:i + 200] for i in range(0, len(instance_ids), 200)]
+    for instance_ids_sub_array in instance_ids_array:
+        servers = []
+        for i in range(len(instance_ids_sub_array)):
+            server = alb_models.AddServersToServerGroupRequestServers(
+                server_id=instance_ids_sub_array[i],
+                server_type='ecs',
+                weight=weight,
+                port=port
+            )
+            servers.append(server)
+
+        # server = alb_models.AddServersToServerGroupRequestServers(
+        #     server_id=instance_id,
+        #     server_type='ecs',
+        #     weight=weight,
+        #     port=port
+        # )
+        for server_group_id in server_group_ids:
+            request = alb_models.AddServersToServerGroupRequest(
+                server_group_id=server_group_id,
+                servers=servers
+            )
+            runtime = util_models.RuntimeOptions(
+                connect_timeout=5000,
+                read_timeout=60000
+            )
+            try:
+                alb_client.add_servers_to_server_group_with_options(request, runtime)
+                logging.info(
+                    f"Successfully added count:{len(instance_ids_sub_array)} instance_ids: {instance_ids_sub_array} to server group {server_group_id} with weight {weight}.")
+            except Exception as e:
+                logging.error(f"Failed to add count:{len(instance_ids_sub_array)} instance_ids: {instance_ids_sub_array} to server group {server_group_id}: {str(e)}")
+                sys.exit()
+        time.sleep(5)
+
+def remove_servers_from_server_group(alb_client, server_group_id_list, instance_ids, port):
+    """
+    从ALB服务器组中移除服务器
+    :param alb_client: ALB客户端连接
+    :param server_group_id_list: 服务器组ID list
+    :param instance_ids: 实例ID list
+    :param port: 后端服务器使用的端口
+    """
+    instance_ids_array = [instance_ids[i:i + 200] for i in range(0, len(instance_ids), 200)]
+    for instance_ids_sub_array in instance_ids_array:
+        servers = []
+        for instance_id in instance_ids_sub_array:
+            server = alb_models.RemoveServersFromServerGroupRequestServers(
+                port=port,
+                server_id=instance_id,
+                server_type='ecs'
+            )
+            servers.append(server)
+        for server_group_id in server_group_id_list:
+            request = alb_models.RemoveServersFromServerGroupRequest(
+                server_group_id=server_group_id,
+                servers=servers
+            )
+            runtime = util_models.RuntimeOptions(
+                connect_timeout=5000,
+                read_timeout=60000
+            )
+            try:
+                alb_client.remove_servers_from_server_group_with_options(request, runtime)
+                logging.info(f"Successfully removed count: {len(instance_ids_sub_array)} instance_ids: {instance_ids_sub_array} from server group {server_group_id}.")
+            except Exception as e:
+                logging.error(f"Failed to remove count: {len(instance_ids_sub_array)} instance_ids: {instance_ids_sub_array} from server group {server_group_id}: {str(e)}")
+                sys.exit()
+        time.sleep(5)
+
+
+def list_server_group_servers(alb_client, server_group_id):
+    """
+    列出服务器组中的服务器并返回实例ID列表
+    @param alb_client: ALB客户端
+    @param server_group_id: 服务器组ID
+    @return: 实例ID列表
+    """
+    instance_ids = []
+    next_token = None
+    while True:
+        try:
+            list_server_group_servers_request = alb_20200616_models.ListServerGroupServersRequest(
+                server_group_id=server_group_id,
+                max_results=100,
+                next_token=next_token
+            )
+            runtime = util_models.RuntimeOptions(
+                connect_timeout=5000,
+                read_timeout=60000
+            )
+            response = alb_client.list_server_group_servers_with_options(list_server_group_servers_request, runtime)
+            next_token = UtilClient.to_map(response.body).get('NextToken')
+            sub_instance_ids = [server.server_id for server in response.body.servers]
+            if len(sub_instance_ids) > 0:
+                instance_ids.extend(sub_instance_ids)
+            if next_token is None:
+                break
+        except Exception as error:
+            logging.error(error)
+        time.sleep(3)
+    return instance_ids
+
+
+def update_server_group_server_weight(alb_client, server_group_id_list, instance_id_list, weight, port):
+    """
+    更指定服务器在服务器组中的权重
+    :param alb_client: ALB客户端
+    :param server_group_id_list: 服务器组ID list
+    :param instance_id_list: 实例ID list
+    :param weight: 权重值
+    :param port: 后端服务器使用的端口
+    """
+    instance_id_list_array = [instance_id_list[i:i + 40] for i in range(0, len(instance_id_list), 40)]
+    for instance_ids_sub_array in instance_id_list_array:
+        servers = []
+        for i in range(len(instance_ids_sub_array)):
+            server = alb_20200616_models.UpdateServerGroupServersAttributeRequestServers(
+                server_type='Ecs',
+                server_id=instance_ids_sub_array[i],
+                weight=weight,
+                port=port
+            )
+            servers.append(server)
+        for server_group_id in server_group_id_list:
+            request = alb_20200616_models.UpdateServerGroupServersAttributeRequest(
+                servers=servers,
+                server_group_id=server_group_id
+            )
+            # logging.info(f"servers = {servers}")
+            runtime = util_models.RuntimeOptions(
+                connect_timeout=5000,
+                read_timeout=60000
+            )
+            try:
+                # logging.info(f"instance_id_list = {instance_id_list} request = {request}")
+                alb_client.update_server_group_servers_attribute_with_options(request, runtime)
+                logging.info(
+                    f"Successfully updated count = {len(instance_ids_sub_array)} instance_ids: {instance_ids_sub_array} in group {server_group_id} to weight {weight}.")
+            except Exception as e:
+                logging.error(e)
+                sys.exit()
+        time.sleep(5)
+
+
+def update_server_group_servers_attribute(alb_client, server_group_id_list, instance_id_list, weight_list, port):
+    """
+    更新服务器组中的服务器权重
+    :param alb_client: ALB客户端
+    :param server_group_id_list: 服务器组ID列表
+    :param instance_id_list: 实例ID列表
+    :param weight_list: 权重修改列表 type-list [(weight, sleep_time), ...]
+    :param port: 后端服务器使用的端口
+    """
+    # for server_group_id in server_group_id_list:
+    #     for instance_id in instance_id_list:
+    for weight, sleep_time in weight_list:
+        update_server_group_server_weight(alb_client, server_group_id_list, instance_id_list, weight, port)
+        time.sleep(sleep_time)
+        # check_server_group_status(alb_client, server_group_id_list)
+
+
+def check_server_group_status(alb_client, server_group_id_list):
+    list_server_groups_request = alb_20200616_models.ListServerGroupsRequest(
+        server_group_ids=server_group_id_list,
+        max_results=100
+    )
+    flag = False
+    runtime = util_models.RuntimeOptions(
+        connect_timeout=5000,
+        read_timeout=60000
+    )
+    for i in range(10):
+        try:
+            response = alb_client.list_server_groups_with_options(list_server_groups_request, runtime)
+            count = 0
+            if response.body:
+                server_groups = UtilClient.to_map(response.body).get("ServerGroups")
+                if server_groups:
+                    for server_group in server_groups:
+                        if server_group.get("ServerGroupStatus") == "Available":
+                            logging.info(f"Server group {server_group} is available.")
+                            count += 1
+            if count == len(server_group_id_list):
+                flag = True
+                break
+            time.sleep(2)
+        except Exception as e:
+            logging.error(e)
+    if not flag:
+        sys.exit()

+ 96 - 0
gateway/clb_gateway_reduce_with_count.py

@@ -0,0 +1,96 @@
+import sys
+import time
+import clb_utils
+import gateway_config
+import logging
+
+logging.basicConfig(level=logging.INFO,
+                    format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
+                    datefmt='%a, %d %b %Y %H:%M:%S')
+
+
+def remove_instances(ecs_client, clb_client, instance_ids):
+    """
+    停止并释放机器
+    :param ecs_client:
+    :param clb_client:
+    :param instance_ids: instanceId type-list
+    :return: None
+    """
+    # 1. 摘流量
+    clb_utils.set_instance_weight_process_with_clbs(client=clb_client,
+                                                clb_id_list=gateway_config.clb_id_list,
+                                                instance_id_list=instance_ids,
+                                                weight_list=[(0, 20)])
+    logging.info(f"set weight = 0 finished, instances: {instance_ids}")
+    time.sleep(10)
+    # 2.移除slb
+    clb_utils.remove_backend_servers_with_clbs(client=clb_client,
+                                           clb_id_list=gateway_config.clb_id_list,
+                                           instances=instance_ids)
+    # 3. 停止机器
+    clb_utils.stop_instances(client=ecs_client, instance_ids=instance_ids)
+    logging.info(f"instances stop finished, instances: {instance_ids}")
+    # 4. 判断机器运行状态是否为Stopped
+    while True:
+        response = clb_utils.get_instances_status(client=ecs_client, instance_ids=instance_ids)
+        if response.get('Code') is None:
+            instances_list = response.get('InstanceStatuses').get('InstanceStatus')
+            # logging.info(instances_list)
+            stopped_instances = [instance.get('InstanceId') for instance in instances_list
+                                 if instance.get('Status') == 'Stopped']
+            if len(stopped_instances) == len(instance_ids):
+                logging.info(f"instances stopped status set success, instances: {stopped_instances}")
+                break
+            else:
+                logging.info(f"stopped instances count = {len(stopped_instances)}, instances: {stopped_instances}")
+                time.sleep(5)
+        else:
+            logging.error(response)
+            sys.exit()
+    # 5. 释放机器
+    response = clb_utils.release_instances(client=ecs_client, instance_ids=stopped_instances)
+    if response.get('Code') is None:
+        logging.info(f"release instances finished, instances: {stopped_instances}")
+    else:
+        logging.error(f"release instances fail!!!")
+        sys.exit()
+
+
+def main():
+    try:
+        clb_client = clb_utils.connect_client(
+            access_key_id=gateway_config.clb_client_params['access_key_id'],
+            access_key_secret=gateway_config.clb_client_params['access_key_secret'],
+            region_id=gateway_config.clb_client_params['region_id']
+        )
+        ecs_client = clb_utils.connect_client(
+            access_key_id=gateway_config.ecs_client_params['access_key_id'],
+            access_key_secret=gateway_config.ecs_client_params['access_key_secret'],
+            region_id=gateway_config.ecs_client_params['region_id']
+        )
+
+        # 获取指定释放的机器数量
+        reduce_count = int(sys.argv[1])
+        logging.info(f"reduce instances count: {reduce_count}")
+
+        # 获取clb下所有机器
+        online_instance_ids = clb_utils.get_instance_ids(client=clb_client, clb_id=gateway_config.clb_id_list[0])
+        online_instance_count = len(online_instance_ids)
+        logging.info(f"online instance count: {online_instance_count}.")
+        logging.info(f"online instance ids: {online_instance_ids}")
+
+        # 获取前count台机器进行释放
+        reduce_instance_ids = online_instance_ids[:reduce_count]
+        logging.info(f"reduce instances: {reduce_instance_ids}")
+
+        # 停止并释放机器
+        remove_instances(ecs_client=ecs_client, clb_client=clb_client, instance_ids=reduce_instance_ids)
+        logging.info(f"stop & release instances end!")
+    except Exception as e:
+        logging.error(e)
+        sys.exit()
+
+
+if __name__ == '__main__':
+    main()

+ 132 - 0
gateway/clb_gateway_scaling_j_count.py

@@ -0,0 +1,132 @@
+import sys
+import os
+import asyncio
+import logging
+import time
+import requests
+
+import clb_utils
+import gateway_config
+
+from concurrent.futures import ThreadPoolExecutor
+
+
+health_instances = []
+
+
+def gateway_health_check(client, instance_id, max_wait_time=None):
+    """
+    服务健康检查
+    :param client: 客户端连接
+    :param instance_id: instanceId
+    :param max_wait_time: 最长等待时间,单位:s
+    :return:
+    """
+    global health_instances
+    start_time = time.time()
+    ip_address = clb_utils.get_ip_address(client=client, instance_id=instance_id)
+    while True:
+        health_check_url = f"http://{ip_address}:9000/healthcheck"
+        try:
+            http_code = requests.get(health_check_url).status_code
+        except:
+            logging.info(f"images is downloading ip: {ip_address}")
+            http_code = 0
+
+        if http_code == 200:
+            health_instances.append((instance_id, ip_address))
+            logging.info(f"health check success, instance: {instance_id}/{ip_address}")
+            break
+        elif max_wait_time is not None:
+            now = time.time()
+            if (now - start_time) >= max_wait_time:
+                logging.info(f"health check error, instance: {instance_id}/{ip_address}")
+                break
+            else:
+                time.sleep(10)
+        else:
+            time.sleep(10)
+
+
+async def ess_instance(ecs_client, clb_client, ess_count, max_workers):
+    """
+    扩容机器并运行新服务
+    :param ecs_client: 购买机器客户端连接
+    :param clb_client: 修改负载均衡权限
+    :param ess_count: 扩容数量
+    :param max_workers: 线程数
+    :return:
+    """
+    # 1. 购买机器并启动
+    ess_instance_ids = clb_utils.create_multiple_instances(
+        amount=ess_count,
+        client=ecs_client,
+        **gateway_config.instance_config_j,
+    )
+    time.sleep(60)
+
+    # 2. 发送启动脚本到机器上
+    clb_utils.send_file_to_ecs(client=ecs_client, instance_id_list=ess_instance_ids, **gateway_config.start_sh)
+    logging.info(f"send start shell file finished, instances: {ess_instance_ids}")
+    # 3. 启动服务
+    start_sh_param = "latest"
+    server_start_sh = os.path.join(gateway_config.start_sh['target_dir'], gateway_config.start_sh['name'])
+    server_start_commend = f"sh {server_start_sh} {start_sh_param}"
+    clb_utils.run_command(client=ecs_client, instance_ids=ess_instance_ids, command=server_start_commend)
+    # 4. 异步探活
+    global health_instances
+    health_instances = []
+    max_wait_time = 180
+    loop = asyncio.get_running_loop()
+    executor = ThreadPoolExecutor(max_workers=max_workers)
+    tasks = [
+        loop.run_in_executor(executor, gateway_health_check, *args) for args in
+        [(clb_client, instance_id, max_wait_time) for instance_id in ess_instance_ids]
+    ]
+    await asyncio.wait(tasks)
+    logging.info(f"health instances count: {len(health_instances)}, {health_instances}")
+    # 5. 挂载流量
+    if len(health_instances) > 0:
+        # 所有机器探活成功
+        time.sleep(20)
+        clb_utils.add_backend_servers_with_clbs(client=clb_client,
+                                            clb_id_list=gateway_config.clb_id_list,
+                                            instances=health_instances)
+        add_weight_list = [(10, 5), (20, 5), (40, 5), (60, 5), (80, 5), (100, 5)]
+        health_instance_ids = [instance_id for instance_id, _ in health_instances]
+        clb_utils.set_instance_weight_process_with_clbs(client=clb_client,
+                                                    clb_id_list=gateway_config.clb_id_list,
+                                                    instance_id_list=health_instance_ids,
+                                                    weight_list=add_weight_list)
+        logging.info(f"ess count: {ess_count}, "
+                     f"create count: {len(ess_instance_ids)}, "
+                     f"finished count: {len(health_instance_ids)}")
+    else:
+        logging.info(f"ess count: {ess_count}, "
+                     f"create count: {len(ess_instance_ids)}, "
+                     f"health count: {len(health_instances)}")
+        sys.exit()
+
+
+def main():
+    try:
+        clb_client = clb_utils.connect_client(access_key_id=gateway_config.clb_client_params['access_key_id'],
+                                          access_key_secret=gateway_config.clb_client_params['access_key_secret'],
+                                          region_id=gateway_config.clb_client_params['region_id'])
+        ecs_client = clb_utils.connect_client(access_key_id=gateway_config.ecs_client_params['access_key_id'],
+                                             access_key_secret=gateway_config.ecs_client_params['access_key_secret'],
+                                             region_id=gateway_config.ecs_client_params['region_id'])
+        # 获取批量创建ECS实例的数量
+        ess_instance_count = int(sys.argv[1])
+        # 扩容机器并启动服务
+        logging.info(f"ess instances start ...")
+        logging.info(f"ess instance count: {ess_instance_count}")
+        asyncio.run(ess_instance(ecs_client=ecs_client, clb_client=clb_client,
+                                 ess_count=ess_instance_count, max_workers=2))
+        logging.info(f"ess instances end!")
+    except Exception as e:
+        logging.error(e)
+
+
+if __name__ == '__main__':
+    main()

+ 48 - 0
gateway/clb_gateway_unittest.py

@@ -0,0 +1,48 @@
+import logging
+import unittest
+import clb_utils
+import gateway_config
+
+
+class MyTestCase(unittest.TestCase):
+    def test_add_backend_servers_with_clbs(self):
+        clb_client = clb_utils.connect_client(access_key_id=gateway_config.clb_client_params['access_key_id'],
+                                              access_key_secret=gateway_config.clb_client_params['access_key_secret'],
+                                              region_id=gateway_config.clb_client_params['region_id'])
+        health_instances = [('i-bp10y6fsz6obfw5uqa7z', '192.168.207.72')]
+        clb_utils.add_backend_servers_with_clbs(client=clb_client,
+                                                clb_id_list=gateway_config.clb_id_list,
+                                                instances=health_instances)
+
+    def test_set_instance_weight_process_with_clbs(self):
+        clb_client = clb_utils.connect_client(access_key_id=gateway_config.clb_client_params['access_key_id'],
+                                              access_key_secret=gateway_config.clb_client_params['access_key_secret'],
+                                              region_id=gateway_config.clb_client_params['region_id'])
+        health_instances = ['i-bp10y6fsz6obfw5uqa7z']
+        add_weight_list = [(10, 5), (20, 5), (40, 5), (60, 5), (80, 5), (95, 5)]
+        clb_utils.set_instance_weight_process_with_clbs(client=clb_client,
+                                                        clb_id_list=gateway_config.clb_id_list,
+                                                        instance_id_list=health_instances,
+                                                        weight_list=add_weight_list)
+
+
+    def test_get_instance_ids(self):
+        clb_client = clb_utils.connect_client(access_key_id=gateway_config.clb_client_params['access_key_id'],
+                                              access_key_secret=gateway_config.clb_client_params['access_key_secret'],
+                                              region_id=gateway_config.clb_client_params['region_id'])
+        online_instance_ids = clb_utils.get_instance_ids(client=clb_client,
+                                   clb_id=gateway_config.clb_id_list[0])
+        logging.info(online_instance_ids)
+
+    def test_remove_backend_servers_with_clbs(self):
+        clb_client = clb_utils.connect_client(access_key_id=gateway_config.clb_client_params['access_key_id'],
+                                              access_key_secret=gateway_config.clb_client_params['access_key_secret'],
+                                              region_id=gateway_config.clb_client_params['region_id'])
+        instance_ids = ['i-bp10y6fsz6obfw5uqa7z']
+        clb_utils.remove_backend_servers_with_clbs(client=clb_client,
+                                                   clb_id_list=gateway_config.clb_id_list,
+                                                   instances=instance_ids)
+
+
+if __name__ == '__main__':
+    unittest.main()

+ 322 - 0
gateway/clb_gateway_update_list.py

@@ -0,0 +1,322 @@
+import asyncio
+import sys
+import time
+import requests
+import clb_utils
+import logging
+import os
+import docker
+import gateway_config
+
+from concurrent.futures import ThreadPoolExecutor
+
+
+health_instances = []
+ess_instances = []
+remove_container_instances = []
+
+
+def server_health_check(client, instance_id):
+    """
+    服务健康检查
+    :param client: 客户端连接
+    :param instance_id: instanceId
+    :return:
+    """
+    global health_instances
+    ip_address = clb_utils.get_ip_address(client=client, instance_id=instance_id)
+    while True:
+        health_check_url = f"http://{ip_address}:9000/healthcheck"
+        try:
+            http_code = requests.get(health_check_url).status_code
+        except:
+            logging.info(f"images is downloading ip:{ip_address}")
+            http_code = 0
+
+        if http_code == 200:
+            health_instances.append((instance_id, ip_address))
+            logging.info(f"health check success, instance: {instance_id}/{ip_address}")
+            break
+        else:
+            time.sleep(10)
+
+
+async def ess_instance(ecs_client, clb_client, ess_count, max_workers, version):
+    """
+    扩容机器并运行新服务
+    :param ecs_client: 购买机器客户端连接
+    :param clb_client: 修改负载均衡权限
+    :param ess_count: 扩容数量
+    :param max_workers: 线程数
+    :param version: 版本标记
+    :return:
+    """
+    # 1. 购买机器并启动
+    ess_instance_ids = clb_utils.create_multiple_instances(
+        amount=ess_count,
+        client=ecs_client,
+        **gateway_config.instance_config_j,
+    )
+    time.sleep(60)
+
+    # 2. 发送启动脚本到机器上
+    clb_utils.send_file_to_ecs(client=ecs_client, instance_id_list=ess_instance_ids, **gateway_config.start_sh)
+    logging.info(f"send start shell file finished, instances: {ess_instance_ids}")
+    # 3. 启动服务
+    server_start_sh = os.path.join(gateway_config.start_sh['target_dir'], gateway_config.start_sh['name'])
+    server_start_commend = f"sh {server_start_sh} {version}"
+    clb_utils.run_command(client=ecs_client, instance_ids=ess_instance_ids, command=server_start_commend)
+    # 4. 异步探活
+    global health_instances
+    health_instances = []
+    loop = asyncio.get_running_loop()
+    executor = ThreadPoolExecutor(max_workers=max_workers)
+    tasks = [
+        loop.run_in_executor(executor, server_health_check, *args) for args in
+        [(clb_client, instance_id) for instance_id in ess_instance_ids]
+    ]
+    await asyncio.wait(tasks)
+    logging.info(f"health instances count: {len(health_instances)}, {health_instances}")
+    # 5. 挂载流量
+    if len(health_instances) == len(ess_instance_ids):
+        # 所有机器探活成功
+        time.sleep(10)
+        clb_utils.add_backend_servers_with_clbs(client=clb_client,
+                                            clb_id_list=gateway_config.clb_id_list,
+                                            instances=health_instances)
+        health_instance_ids = [instance_id for instance_id, _ in health_instances]
+        add_weight_list = [(10, 5), (20, 5), (40, 5), (60, 5), (80, 5), (100, 5)]
+        clb_utils.set_instance_weight_process_with_clbs(client=clb_client,
+                                                    clb_id_list=gateway_config.clb_id_list,
+                                                    instance_id_list=health_instance_ids,
+                                                    weight_list=add_weight_list)
+        global ess_instances
+        ess_instances.extend(health_instance_ids)
+        logging.info(f"ess count: {ess_count}, "
+                     f"create count: {len(ess_instance_ids)}, "
+                     f"finished count: {len(health_instance_ids)}")
+    else:
+        logging.info(f"ess count: {ess_count}, "
+                     f"create count: {len(ess_instance_ids)}, "
+                     f"health count: {len(health_instances)}")
+        sys.exit()
+
+
+def remove_container_image(client, instance_id, container_name_list):
+    """
+    移除旧容器并删除旧镜像
+    :param client: 客户端连接
+    :param instance_id: instanceId type-string
+    :param container_name_list: 容器名称 type-string
+    :return:
+    """
+    ip_address = clb_utils.get_ip_address(client=client, instance_id=instance_id)
+    logging.info(f"服务器信息:{instance_id}/{ip_address}")
+    client = docker.DockerClient(base_url=f'tcp://{ip_address}:2375', timeout=60)
+    # 移除旧的容器
+    container_remove_retry = 3
+    i = 0
+    while True:
+        if i >= container_remove_retry:
+            logging.error(f"容器不存在或者无法删除当前容器, instance = {instance_id}/{ip_address}")
+            sys.exit()
+        try:
+            flag = False
+            for container_name in container_name_list:
+                try:
+                    container_id = client.containers.get(container_name)
+                    container_id.remove(force=True)
+                    flag = True
+                    break
+                except:
+                    continue
+            if flag:
+                break
+        except Exception as e:
+            i += 1
+
+    # 删除旧镜像
+    images_remove_retry = 3
+    j = 0
+    while True:
+        if j >= images_remove_retry:
+            logging.error(f"镜像不存在,无法获取到镜像ID, instance = {instance_id}/{ip_address}")
+            sys.exit()
+        try:
+            images = client.images.list()
+            for image in images:
+                client.images.remove(force=True, image=image.tags[0])
+                time.sleep(2)
+            global remove_container_instances
+            remove_container_instances.append(instance_id)
+            break
+        except Exception as e:
+            i += 1
+
+
+async def update_instance(ecs_client, clb_client, instance_ids, max_workers, version):
+    """
+    线上机器更新
+    :param ecs_client:
+    :param clb_client: slb客户端连接
+    :param instance_ids: instanceId type-list
+    :param max_workers:
+    :param version: 版本标记
+    :return:
+    """
+    media_index = len(instance_ids)//2
+    instance_ids_group = [instance_ids[:media_index], instance_ids[media_index:]]
+    update_finished_count = 0
+    for instance_id_list in instance_ids_group:
+        logging.info(f"update instances: {instance_id_list}")
+        # 1. 摘流量
+        clb_utils.set_instance_weight_process_with_clbs(client=clb_client,
+                                                    clb_id_list=gateway_config.clb_id_list,
+                                                    instance_id_list=instance_id_list,
+                                                    weight_list=[(0, 15)])
+        logging.info(f"set weight with 0 finished, instances: {instance_id_list}")
+        # 2. 异步移除旧容器并删除旧镜像
+        global remove_container_instances
+        remove_container_instances = []
+        container_name_list = ['piaoquan-gateway', 'gateway']
+        loop = asyncio.get_running_loop()
+        executor = ThreadPoolExecutor(max_workers=max_workers)
+        tasks = [
+            loop.run_in_executor(executor, remove_container_image, *args) for args in
+            [(clb_client, instance_id, container_name_list) for instance_id in instance_id_list]
+        ]
+        await asyncio.wait(tasks)
+        logging.info(f"remove container & images finished, instances: {remove_container_instances},"
+                     f" count: {len(remove_container_instances)}")
+        if len(remove_container_instances) < len(instance_id_list):
+            logging.error(f"remove container image failed| "
+                          f"request count: {len(instance_id_list)}, removed count: {len(remove_container_instances)}")
+            sys.exit()
+        # 3. 发送启动脚本到机器上
+        clb_utils.send_file_to_ecs(client=ecs_client, instance_id_list=instance_id_list, **gateway_config.start_sh)
+        logging.info(f"send start shell file finished, instances: {instance_id_list}, count: {len(instance_id_list)}")
+        # 4. 启动服务
+        server_start_sh = os.path.join(gateway_config.start_sh['target_dir'], gateway_config.start_sh['name'])
+        server_start_commend = f"sh {server_start_sh} {version}"
+        clb_utils.run_command(client=ecs_client, instance_ids=instance_id_list, command=server_start_commend)
+        # 5. 异步探活
+        global health_instances
+        health_instances = []
+        loop = asyncio.get_running_loop()
+        executor = ThreadPoolExecutor(max_workers=max_workers)
+        tasks = [
+            loop.run_in_executor(executor, server_health_check, *args) for args in
+            [(clb_client, instance_id) for instance_id in instance_id_list]
+        ]
+        await asyncio.wait(tasks)
+        logging.info(f"health instances: {health_instances}, count: {len(health_instances)}")
+        # 6. 挂载流量
+        if len(health_instances) == len(instance_id_list):
+            # 所有机器探活成功
+            time.sleep(10)
+            clb_utils.add_backend_servers_with_clbs(client=clb_client,
+                                                clb_id_list=gateway_config.clb_id_list,
+                                                instances=health_instances)
+            health_instance_ids = [instance_id for instance_id, _ in health_instances]
+            add_weight_list = [(10, 5), (20, 5), (40, 5), (60, 5), (80, 5), (100, 5)]
+            clb_utils.set_instance_weight_process_with_clbs(client=clb_client,
+                                                        clb_id_list=gateway_config.clb_id_list,
+                                                        instance_id_list=health_instance_ids,
+                                                        weight_list=add_weight_list)
+            logging.info(f"finished instances: {health_instances}, count: {len(health_instances)}")
+            update_finished_count += len(health_instances)
+            logging.info(f"update finished: {update_finished_count}/{len(instance_ids)}")
+        else:
+            logging.info(f"health instances: {health_instances}, count: {len(health_instances)}")
+            sys.exit()
+
+
+def remove_instances(ecs_client, clb_client, instance_ids):
+    """
+    停止并释放机器
+    :param ecs_client:
+    :param clb_client:
+    :param instance_ids: instanceId type-list
+    :return: None
+    """
+    # 1. 摘流量
+    clb_utils.set_instance_weight_process_with_clbs(client=clb_client,
+                                                clb_id_list=gateway_config.clb_id_list,
+                                                instance_id_list=instance_ids,
+                                                weight_list=[(0, 20)])
+    logging.info(f"set weight = 0 finished, instances: {instance_ids}")
+    time.sleep(10)
+    # 2.移除slb
+    clb_utils.remove_backend_servers_with_clbs(client=clb_client,
+                                           clb_id_list=gateway_config.clb_id_list,
+                                           instances=instance_ids)
+    # 3. 停止机器
+    clb_utils.stop_instances(client=ecs_client, instance_ids=instance_ids)
+    logging.info(f"instances stop finished, instances: {instance_ids}")
+    # 4. 判断机器运行状态是否为Stopped
+    while True:
+        response = clb_utils.get_instances_status(client=ecs_client, instance_ids=instance_ids)
+        if response.get('Code') is None:
+            instances_list = response.get('InstanceStatuses').get('InstanceStatus')
+            # logging.info(instances_list)
+            stopped_instances = [instance.get('InstanceId') for instance in instances_list
+                                 if instance.get('Status') == 'Stopped']
+            if len(stopped_instances) == len(instance_ids):
+                logging.info(f"instances stopped status set success, instances: {stopped_instances}")
+                break
+            else:
+                logging.info(f"stopped instances count = {len(stopped_instances)}, instances: {stopped_instances}")
+                time.sleep(5)
+        else:
+            logging.error(response)
+            sys.exit()
+    # 5. 释放机器
+    response = clb_utils.release_instances(client=ecs_client, instance_ids=stopped_instances)
+    if response.get('Code') is None:
+        logging.info(f"release instances finished, instances: {stopped_instances}")
+    else:
+        logging.error(f"release instances fail!!!")
+        sys.exit()
+
+
+def main():
+    try:
+        version = sys.argv[1]
+        clb_client = clb_utils.connect_client(access_key_id=gateway_config.clb_client_params['access_key_id'],
+                                          access_key_secret=gateway_config.clb_client_params['access_key_secret'],
+                                          region_id=gateway_config.clb_client_params['region_id'])
+        ecs_client = clb_utils.connect_client(access_key_id=gateway_config.ecs_client_params['access_key_id'],
+                                             access_key_secret=gateway_config.ecs_client_params['access_key_secret'],
+                                             region_id=gateway_config.ecs_client_params['region_id'])
+
+        # 1. 获取clb下所有机器
+        online_instance_ids = clb_utils.get_instance_ids(client=clb_client, clb_id=gateway_config.clb_id_list[0])
+        online_instance_count = len(online_instance_ids)
+        logging.info(f"online instance count: {online_instance_count}.")
+        logging.info(f"online instance ids: {online_instance_ids}")
+
+        # 2. 扩容机器并启动新服务 扩容数量:线上机器数量//2
+        logging.info(f"ess instances start ...")
+        ess_instance_count = online_instance_count // 2
+        logging.info(f"ess instance count: {ess_instance_count}")
+        asyncio.run(ess_instance(ecs_client=ecs_client, clb_client=clb_client,
+                                 ess_count=ess_instance_count, max_workers=2, version=version))
+        logging.info(f"ess instances end!")
+
+        # 3. 原有机器进行更新
+        logging.info(f"update online instances start ...")
+        asyncio.run(update_instance(ecs_client=ecs_client, clb_client=clb_client,
+                                    instance_ids=online_instance_ids, max_workers=8, version=version))
+        logging.info(f"update online instances end!")
+
+        # 4. 停止并释放扩容机器
+        logging.info(f"stop & release instances start ...")
+        remove_instances(ecs_client=ecs_client, clb_client=clb_client, instance_ids=ess_instances)
+        logging.info(f"stop & release instances end!")
+    except Exception as e:
+        logging.error(e)
+        sys.exit()
+
+
+if __name__ == '__main__':
+    main()

+ 179 - 0
gateway/clb_gateway_update_one.py

@@ -0,0 +1,179 @@
+#!/bin/env python
+#coding=utf-8
+#edite  panwang
+import docker
+import sys
+import  requests
+import json
+import queue
+import threading
+from aliyunsdkcore import client
+from aliyunsdkecs.request.v20140526.DescribeNetworkInterfacesRequest import DescribeNetworkInterfacesRequest
+from aliyunsdkcore.request import CommonRequest
+from aliyunsdkslb.request.v20140515.DescribeLoadBalancerAttributeRequest import DescribeLoadBalancerAttributeRequest
+import time
+
+
+AccessKey = 'LTAIuPbTPL3LDDKN'
+AccessSecret = 'ORcNedKwWuwVtcq4IRFtUDZgS0b1le'
+RegionId = 'cn-hangzhou'
+version = sys.argv[1]
+slbIDs = ['lb-bp1jtzhp9krunyv3mim2q','lb-bp1mfk1gmd47twfh6bgwv']
+#slbIDs = ['lb-bp136gme0r0n02aew4vjc','lb-bp1mfk1gmd47twfh6bgwv']
+apps = 'piaoquan-gateway'
+repository = 'registry-vpc.cn-hangzhou.aliyuncs.com/stuuudy/{}'.format(apps)
+registry = 'registry-vpc.cn-hangzhou.aliyuncs.com/stuuudy/{}:{}'.format(apps, version)
+
+
+
+clt = client.AcsClient (AccessKey, AccessSecret, RegionId)
+
+class MyThread (threading.Thread):
+    def __init__(self, func):
+        threading.Thread.__init__ (self)
+
+        self.func = func
+
+    def run(self):
+        self.func ()
+
+def  checkHealth(ipadd):
+    while  True:
+        health_url = 'http://%s:9000/healthcheck' %(ipadd)
+        header = {"Content-Type":"application/json"}
+        try:
+            health_code = requests.get(health_url).status_code
+        except  Exception  as  e:
+           continue
+        if  health_code == 200:
+            print("httpcode 200,开始挂载流量")
+            return  False
+#服务更新完之后逐步修改服务器的权重值,直接加载100会出现502。权重值每次增加10,每2s修改一次
+def setInstanceWeightProcess(slb_id, instance_id):
+    for  i in range(1,6):
+        weight = i*20
+        setWeight(slb_id,instance_id,weight)
+        print("当前权重: ", weight)
+        time.sleep(5)
+
+#设置权重 instance_id :服务器id,weight:权重值
+def setWeight(slb_id,instance_id ,weight):
+
+    BackendServers = [{"ServerId": instance_id, "Weight": weight}]
+
+    request = CommonRequest ()
+    request.set_accept_format ('json')
+    request.set_domain ('slb.aliyuncs.com')
+    request.set_version ('2014-05-15')
+    request.set_method ('POST')
+    request.set_action_name ('SetBackendServers')
+    request.add_query_param ('BackendServers', BackendServers)
+    request.add_query_param ('LoadBalancerId', slb_id)
+    try:
+        response = clt.do_action (request)
+    except Exception as e:
+        print (e)
+
+
+def  getInstanceId(slb_id):
+    request = DescribeLoadBalancerAttributeRequest()
+    request.set_accept_format('json')
+    request.set_LoadBalancerId(slb_id)
+    response = clt.do_action_with_exception(request)
+    return json.loads (response)
+
+
+#获取实例IP地址
+def  getIpadd(instance_id):
+    request = DescribeNetworkInterfacesRequest()
+    request.set_accept_format('json')
+    request.set_InstanceId(instance_id)
+    response = clt.do_action_with_exception(request)
+    request_content = json.loads(response)
+    IpAddr  = request_content['NetworkInterfaceSets']['NetworkInterfaceSet'][0]['PrivateIpAddress']
+    return  IpAddr
+
+
+#更新服务
+def  update(instance_id):
+    time.sleep(10)
+    global success_count
+
+    ipadd = getIpadd(instance_id)
+    print("服务器信息:" + "%s/%s" %(instance_id, ipadd))
+    client = docker.DockerClient(base_url='tcp://%s:2375'  %(ipadd),timeout=60 )
+    try:
+    #更新前移除旧的容器
+        id = client.containers.get(apps)
+        id.remove(force = True)
+
+    except Exception as e:
+        print("容器不存在或者无法删除当前容器")
+
+    try:
+        #登录镜像仓库
+        client.login(username='stuuudys' ,password='Qingqu@2019', registry='registry-vpc.cn-hangzhou.aliyuncs.com')
+        #启动一个容器
+        client.containers.run(registry, detach = True, cap_add = 'SYS_PTRACE', network_mode = 'host', name = apps,volumes={'/datalog/': {'bind': '/datalog/', 'mode': 'rw'}})
+        print("开始健康检查")
+        checkHealth(ipadd)
+        print("%s :权重修改中......" %(ipadd))
+
+        for slbID in slbIDs:
+            setInstanceWeightProcess(slbID, instance_id)
+        success_count = success_count + 1
+        print("更新进度" + "%s/%s"  %(success_count, total))
+    except Exception as e:
+        print(e)
+        sys.exit()
+        #容器启动失败立即退出更新
+
+
+def  pull_image():
+    instanceId = q1.get()
+    ipaddr = getIpadd(instanceId)
+    cd_url = "tcp://{}:2375".format(ipaddr)
+    client = docker.DockerClient(base_url=cd_url, timeout=30)
+
+    try:
+        client.images.pull(repository, tag=version)
+        print(ipaddr, "pull  images success ")
+
+        return  True
+    except Exception as e:
+        print(e, "images pull  fail")
+        return  False
+
+
+if __name__ == '__main__':
+
+
+    #更新完成计数
+    success_count = 0
+    threads = []
+    res = getInstanceId (slbIDs[0])
+    #slb下服务器总数
+    total = len(res["BackendServers"]["BackendServer"])
+    InstanceIDs = []
+    q1 = queue.Queue()
+    if res["BackendServers"]["BackendServer"]:
+        for i in range ((len (res["BackendServers"]["BackendServer"]))):
+            InstanceID = res["BackendServers"]["BackendServer"][i]["ServerId"]
+            InstanceIDs.append(InstanceID)
+            q1.put(InstanceID)
+    print(InstanceIDs)
+    #多线程预先pull images
+    for i in range (len(InstanceIDs)):
+        thread = MyThread (pull_image)
+        thread.start ()
+        threads.append(thread)
+    for thread in threads:
+            thread.join ()
+
+    #单线程更新
+    for instanceID in InstanceIDs:
+        for slbID in slbIDs:
+            setWeight(slbID,instanceID,0)
+        update(instanceID)
+
+

+ 524 - 0
gateway/clb_utils.py

@@ -0,0 +1,524 @@
+import logging
+import json
+import sys
+import time
+import requests
+import asyncio
+
+from aliyunsdkcore.client import AcsClient
+from aliyunsdkslb.request.v20140515.AddBackendServersRequest import AddBackendServersRequest
+from aliyunsdkslb.request.v20140515.RemoveBackendServersRequest import RemoveBackendServersRequest
+from aliyunsdkecs.request.v20140526.RunInstancesRequest import RunInstancesRequest
+from aliyunsdkecs.request.v20140526.DescribeInstancesRequest import DescribeInstancesRequest
+from aliyunsdkecs.request.v20140526.DescribeNetworkInterfacesRequest import DescribeNetworkInterfacesRequest
+from aliyunsdkslb.request.v20140515.DescribeLoadBalancerAttributeRequest import DescribeLoadBalancerAttributeRequest
+from aliyunsdkecs.request.v20140526.RunCommandRequest import RunCommandRequest
+from aliyunsdkecs.request.v20140526.SendFileRequest import SendFileRequest
+from aliyunsdkecs.request.v20140526.StopInstancesRequest import StopInstancesRequest
+from aliyunsdkecs.request.v20140526.DeleteInstancesRequest import DeleteInstancesRequest
+from aliyunsdkecs.request.v20140526.DescribeInstanceStatusRequest import DescribeInstanceStatusRequest
+from aliyunsdkcore.request import CommonRequest
+
+logging.basicConfig(level=logging.INFO,
+                    format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
+                    datefmt='%a, %d %b %Y %H:%M:%S')
+
+
+def send_msg_to_feishu(webhook, key_word, msg_text):
+    """发送消息到飞书"""
+    headers = {'Content-Type': 'application/json'}
+    payload_message = {
+        "msg_type": "text",
+        "content": {
+            "text": '{}: {}'.format(key_word, msg_text)
+        }
+    }
+    response = requests.request('POST', url=webhook, headers=headers, data=json.dumps(payload_message))
+    logging.info(response.text)
+
+
+def connect_client(access_key_id, access_key_secret, region_id):
+    """
+    初始化账号,连接客户端
+    :param access_key_id: access key Id, type-string
+    :param access_key_secret: access key secret, type-string
+    :param region_id: region_id
+    :return: clt
+    """
+    try:
+        clt = AcsClient(ak=access_key_id, secret=access_key_secret, region_id=region_id)
+        return clt
+    except Exception as e:
+        # 失败,记录报错信息,发送通知,停止并退出
+        logging.error(e)
+        sys.exit()
+
+
+def build_create_instances_request(image_id, vswitch_id, security_group_id, zone_id, instance_type, instance_name,
+                                   disk_size, disk_category, key_pair_name, tags):
+    """
+    购买服务器参数配置
+    :param image_id: 使用的镜像信息 type-string
+    :param vswitch_id: 选择的交换机 type-string
+    :param security_group_id: 当前vpc类型的安全组 type-string
+    :param zone_id: 服务器所在区域 type-string
+    :param instance_type: 实例规格 type-string
+    :param instance_name: 实例命名 type-string
+    :param disk_size: 磁盘大小,单位:G,type-string
+    :param disk_category: 磁盘类型 type-string
+    :param key_pair_name: 密钥对名称 type-string
+    :param tags: 标签 type-list, eg: [{"Key": "ecs", "Value": "rov-server.prod"}, ...]
+    :return: request
+    """
+    request = RunInstancesRequest()
+    request.set_ImageId(image_id)
+    request.set_VSwitchId(vswitch_id)
+    request.set_SecurityGroupId(security_group_id)
+    request.set_ZoneId(zone_id)
+    request.set_InstanceType(instance_type)
+    request.set_InstanceName(instance_name)
+    request.set_SystemDiskSize(disk_size)
+    request.set_SystemDiskCategory(disk_category)
+    request.set_KeyPairName(key_pair_name)
+    request.set_Tags(tags)
+    return request
+
+
+def send_request(client, request):
+    """
+    发送API请求
+    :param client: 客户端连接
+    :param request: 请求配置
+    :return: response
+    """
+    request.set_accept_format('json')
+    try:
+        response = client.do_action_with_exception(request)
+        response = json.loads(response)
+        # logging.info(response)
+        return response
+    except Exception as e:
+        # 失败,记录报错信息,发送通知,停止并退出
+        logging.error(e)
+        sys.exit()
+
+
+def send_req(client, request):
+    """
+    发送API请求
+    :param client: 客户端连接
+    :param request: 请求配置
+    :return: response
+    """
+    request.set_accept_format('json')
+    response = client.do_action_with_exception(request)
+    #print(response)
+    response = json.loads(response)
+    print(response)
+        # logging.info(response)
+    print(response.get('Code'))
+    return response
+    #except Exception as e:
+        # 失败,记录报错信息,发送通知,停止并退出
+    #logging.error(e)
+    #sys.exit()
+
+def check_instance_running(client, instance_ids):
+    """
+    检查服务器运行状态
+    :param client: 客户端连接
+    :param instance_ids: 实例id列表, type-list
+    :return: running_count,Status为Running的实例数
+    """
+    try:
+        request = DescribeInstancesRequest()
+        request.set_InstanceIds(json.dumps(instance_ids))
+        request.set_PageSize(100)
+        response = send_request(client=client, request=request)
+        if response.get('Code') is None:
+            instances_list = response.get('Instances').get('Instance')
+            running_count = 0
+            running_instances = []
+            for instance_detail in instances_list:
+                if instance_detail.get('Status') == "Running":
+                    running_count += 1
+                    running_instances.append(instance_detail.get('InstanceId'))
+            return running_count, running_instances
+        else:
+            # 失败,记录报错信息,发送通知,停止并退出
+            logging.error(response)
+            sys.exit()
+    except Exception as e:
+        # 失败,记录报错信息,发送通知,停止并退出
+        logging.error(e)
+        sys.exit()
+
+
+def create_multiple_instances(amount, client,
+                              image_id, vswitch_id, security_group_id, zone_id, instance_type, instance_name,
+                              disk_size, disk_category, key_pair_name, tags):
+    """
+    创建多个ECS实例
+    :param amount: 创建实例数 type-int 取值范围:[1, 100]
+    :param client: 购买机器客户端连接
+    :param image_id: 使用的镜像信息 type-string
+    :param vswitch_id: 选择的交换机 type-string
+    :param security_group_id: 当前vpc类型的安全组 type-string
+    :param zone_id: 服务器所在区域 type-string
+    :param instance_type: 实例规格 type-string
+    :param instance_name: 实例命名 type-string
+    :param disk_size: 磁盘大小,单位:G,type-string
+    :param disk_category: 磁盘类型 type-string
+    :param key_pair_name: 密钥对名称 type-string
+    :param tags: 标签 type-list, eg: [{"Key": "ecs", "Value": "rov-server.prod"}, ...]
+    :return:
+    """
+    logging.info(f"create instances start, request amount: {amount}.")
+    # 1. 连接客户端
+    # create_instances_clt = connect_client(
+    #     access_key_id=access_key_id, access_key_secret=access_key_secret, region_id=region_id
+    # )
+    # 2. 请求参数配置
+    request = build_create_instances_request(
+        image_id=image_id, vswitch_id=vswitch_id, security_group_id=security_group_id, zone_id=zone_id,
+        instance_type=instance_type, instance_name=instance_name, disk_size=disk_size, disk_category=disk_category,
+        key_pair_name=key_pair_name, tags=tags
+    )
+    request.set_Amount(amount)
+    # 3. 发送API请求,购买机器并启动
+    response = send_request(client=client, request=request)
+    if response.get('Code') is None:
+        instance_ids = response.get('InstanceIdSets').get('InstanceIdSet')
+        logging.info(f"success amount: {len(instance_ids)}, instance ids: {instance_ids}.")
+        # 获取机器运行状态
+        running_amount = 0
+        while running_amount < amount:
+            time.sleep(10)
+            running_amount, running_instances = check_instance_running(client=client, instance_ids=instance_ids)
+            logging.info(f"running amount: {running_amount}, running instances: {running_instances}.")
+        return instance_ids
+    else:
+        # 失败,记录报错信息,发送通知,停止并退出
+        logging.error(response)
+        sys.exit()
+
+
+def run_command(client, instance_ids, command):
+    """
+    批量执行命令
+    :param client: 客户端连接
+    :param instance_ids: 实例id列表, type-list, 最多能指定50台ECS实例ID
+    :param command: 命令 type-string
+    :return:
+    """
+    for i in range(len(instance_ids) // 50 + 1):
+        instance_id_list = instance_ids[i * 50:(i + 1) * 50]
+        if len(instance_id_list) == 0:
+            return
+        request = RunCommandRequest()
+        request.set_accept_format('json')
+        request.set_Type("RunShellScript")
+        request.set_CommandContent(command)
+        request.set_InstanceIds(instance_id_list)
+        response = send_request(client=client, request=request)
+        logging.info(response)
+
+# def run_per_command(client, instance, command):
+#     """
+#     批量执行命令
+#     :param client: 客户端连接
+#     :param instance_ids: 实例id列表, type-list, 最多能指定50台ECS实例ID
+#     :param command: 命令 type-string
+#     :return:
+#     """
+#     #for i in range(len(instance_ids) // 50 + 1)
+#     request = RunCommandRequest()
+#     request.set_accept_format('json')
+#     request.set_Type("RunShellScript")
+#     request.set_CommandContent(command)
+#     request.set_InstanceIds([instance])
+#     response = send_req(client=client, request=request)
+#     logging.info(response)
+#     return response
+
+
+def get_instance_ids(client, clb_id):
+    """
+    获取slb下所有服务器instanceId
+    :param client: 客户端连接
+    :param clb_id: 负载均衡id type-string
+    :return: instance_ids type-list
+    """
+    request = DescribeLoadBalancerAttributeRequest()
+    request.set_accept_format('json')
+    request.set_LoadBalancerId(clb_id)
+    response = send_request(client=client, request=request)
+    instance_ids = [instance["ServerId"] for instance in response["BackendServers"]["BackendServer"]]
+    return instance_ids
+
+
+def get_ip_address(client, instance_id):
+    """
+    获取实例IP地址
+    :param client: 客户端连接
+    :param instance_id: 实例id, type-string
+    :return: ip_address, type-string
+    """
+    request = DescribeNetworkInterfacesRequest()
+    request.set_accept_format('json')
+    request.set_InstanceId(instance_id)
+    response = send_request(client=client, request=request)
+    ip_address = response['NetworkInterfaceSets']['NetworkInterfaceSet'][0]['PrivateIpAddress']
+    return ip_address
+
+
+def set_weight_for_instances(client, clb_id, instance_id_list, weight):
+    """
+    同时设置多台服务器的slb权重,权重一样
+    :param client: 客户端连接
+    :param clb_id: clb_id
+    :param instance_id_list: 服务器id list
+    :param weight: 权重值
+    :return: None
+    """
+    for i in range(len(instance_id_list) // 20 + 1):
+        instances_list = instance_id_list[i * 20:(i + 1) * 20]
+        if len(instances_list) == 0:
+            return
+        BackendServers = [{"ServerId": instance_id, "Weight": weight} for instance_id in instances_list]
+        request = CommonRequest()
+        request.set_accept_format('json')
+        request.set_domain('slb.aliyuncs.com')
+        request.set_version('2014-05-15')
+        request.set_method('POST')
+        request.set_action_name('SetBackendServers')
+        request.add_query_param('BackendServers', BackendServers)
+        request.add_query_param('LoadBalancerId', clb_id)
+        response = send_request(client=client, request=request)
+
+
+def send_file_to_ecs(client, instance_id_list, target_dir, name, content):
+    """
+    发送文件到ecs
+    :param client:
+    :param instance_id_list: 最多能指定50台ECS实例ID
+    :param target_dir: 文件存放目录 type-string
+    :param name: 文件名 type-string
+    :param content: 文件内容 type-string
+    :return:
+    """
+    for i in range(len(instance_id_list) // 50 + 1):
+        instance_ids = instance_id_list[i * 50:(i + 1) * 50]
+        if len(instance_ids) == 0:
+            return
+        request = SendFileRequest()
+        request.set_Content(content)
+        request.set_TargetDir(target_dir)
+        request.set_Name(name)
+        request.set_Overwrite(True)
+        request.set_InstanceIds(instance_ids)
+        response = send_request(client=client, request=request)
+
+
+def stop_instances(client, instance_ids, force_stop=False):
+    """
+    停止实例
+    :param client:
+    :param instance_ids: 实例ID, type-list
+    :param force_stop: 是否强制关机, True-强制关机, False-正常关机, type-bool
+    :return:
+    """
+    request = StopInstancesRequest()
+    request.set_InstanceIds(instance_ids)
+    request.set_ForceStop(force_stop)
+    response = send_request(client=client, request=request)
+    return response
+
+
+def release_instances(client, instance_ids, force=False):
+    """
+    释放实例
+    :param client:
+    :param instance_ids: instance_id, type-list
+    :param force: 是否强制释放, True-强制释放, False-正常释放, type-bool
+    :return:
+    """
+    request = DeleteInstancesRequest()
+    request.set_InstanceIds(instance_ids)
+    request.set_Force(force)
+    response = send_request(client=client, request=request)
+    return response
+
+
+def get_instances_status(client, instance_ids):
+    """
+    获取实例运行状态
+    :param client:
+    :param instance_ids: instance_id, type-liist
+    :return:
+    """
+    request = DescribeInstanceStatusRequest()
+    request.set_InstanceIds(instance_ids)
+    request.set_PageSize(50)
+    response = send_request(client=client, request=request)
+    return response
+
+
+def set_instance_weight_process(client, clb_id, instance_id_list, weight_list):
+    """
+    修改服务器的权重值
+    :param client: clb客户端连接
+    :param clb_id: clb id
+    :param instance_id_list: instance id list
+    :param weight_list: 权重修改列表 type-list [(weight, sleep_time), ...]
+    :return:
+    """
+    for weight, sleep_time in weight_list:
+        logging.info(f"weight = {weight}")
+        flag = True
+        while flag:
+            try:
+                set_weight_for_instances(client=client, clb_id=clb_id, instance_id_list=instance_id_list, weight=weight)
+                time.sleep(sleep_time)
+                flag = False
+            except Exception as e:
+                time.sleep(10)
+                continue
+
+
+def add_backend_servers(client, slb_id, instances):
+    """
+    服务器挂载到负载均衡(必须是状态为运行中的后端服务器才可以加入负载均衡实例,每次调用最多可添加20个后端服务器)
+    :param client:
+    :param slb_id:
+    :param instances: 实例列表 [(instance_id, ip), ...]
+    :return:
+    """
+    try:
+        for i in range(len(instances) // 20 + 1):
+            instances_list = instances[i * 20:(i + 1) * 20]
+            if len(instances_list) == 0:
+                return
+            request = AddBackendServersRequest()
+            request.set_accept_format('json')
+            request.set_LoadBalancerId(slb_id)
+            backend_servers = [
+                {"ServerId": instance_id, "Weight": "0", "Type": "ecs", "ServerIp": ip_address}
+                for instance_id, ip_address in instances_list]
+            request.set_BackendServers(backend_servers)
+            response = client.do_action_with_exception(request)
+            return response
+    except Exception as e:
+        logging.error(e)
+        sys.exit()
+
+
+def remove_backend_servers(client, slb_id, instances):
+    """
+    服务器从负载均衡移除(一次调用最多可以移除20个后端服务器)
+    :param client:
+    :param slb_id:
+    :param instances: 实例列表 [instance_id, ...]
+    :return:
+    """
+    try:
+        for i in range(len(instances) // 20 + 1):
+            instances_list = instances[i * 20:(i + 1) * 20]
+            if len(instances_list) == 0:
+                return
+            request = RemoveBackendServersRequest()
+            request.set_accept_format('json')
+            request.set_LoadBalancerId(slb_id)
+            backend_servers = [
+                {"ServerId": instance_id, "Weight": "0", "Type": "ecs"}
+                for instance_id in instances_list]
+            request.set_BackendServers(backend_servers)
+            response = client.do_action_with_exception(request)
+            return response
+    except Exception as e:
+        logging.error(e)
+        sys.exit()
+
+
+def set_instance_weight_process_with_clbs(client, clb_id_list, instance_id_list, weight_list):
+    """
+    修改服务器的权重值
+    :param client: clb客户端连接
+    :param clb_id_list: clb id list
+    :param instance_id_list: instance id list
+    :param weight_list: 权重修改列表 type-list [(weight, sleep_time), ...]
+    :return:
+    """
+    for weight, sleep_time in weight_list:
+        logging.info(f"修改权重中: weight = {weight}")
+        for clb_id in clb_id_list:
+            flag = True
+            while flag:
+                try:
+                    set_weight_for_instances(client=client, clb_id=clb_id, instance_id_list=instance_id_list, weight=weight)
+                    logging.info(f"slb: {clb_id} finished!")
+                    flag = False
+                except Exception as e:
+                    time.sleep(10)
+                    continue
+        time.sleep(sleep_time)
+
+
+def add_backend_servers_with_clbs(client, clb_id_list, instances):
+    """
+    服务器挂载到负载均衡(必须是状态为运行中的后端服务器才可以加入负载均衡实例,每次调用最多可添加20个后端服务器)
+    :param client:
+    :param clb_id_list:
+    :param instances: 实例列表 [(instance_id, ip), ...]
+    :return:
+    """
+    try:
+        for i in range(len(instances)//20 + 1):
+            instances_list = instances[i*20:(i+1)*20]
+            if len(instances_list) == 0:
+                return
+            for clb_id in clb_id_list:
+                request = AddBackendServersRequest()
+                request.set_accept_format('json')
+                request.set_LoadBalancerId(clb_id)
+                backend_servers = [
+                    {"ServerId": instance_id, "Weight": "0", "Type": "ecs", "ServerIp": ip_address}
+                    for instance_id, ip_address in instances_list]
+                request.set_BackendServers(backend_servers)
+                response = client.do_action_with_exception(request)
+                logging.info(f"slb: {clb_id} add backend servers finished!")
+            logging.info(f"i: {i}, count: {len(instances_list)}, instances: {instances_list} "
+                         f"add backend servers finished!")
+    except Exception as e:
+        logging.error(e)
+        sys.exit()
+
+
+def remove_backend_servers_with_clbs(client, clb_id_list, instances):
+    """
+    服务器从负载均衡移除(一次调用最多可以移除20个后端服务器)
+    :param client:
+    :param clb_id_list:
+    :param instances: 实例列表 [instance_id, ...]
+    :return:
+    """
+    try:
+        for i in range(len(instances)//20 + 1):
+            instances_list = instances[i*20:(i+1)*20]
+            if len(instances_list) == 0:
+                return
+            for slb_id in clb_id_list:
+                request = RemoveBackendServersRequest()
+                request.set_accept_format('json')
+                request.set_LoadBalancerId(slb_id)
+                backend_servers = [
+                    {"ServerId": instance_id, "Weight": "0", "Type": "ecs"}
+                    for instance_id in instances_list]
+                request.set_BackendServers(backend_servers)
+                response = client.do_action_with_exception(request)
+                logging.info(f"slb: {slb_id} remove backend servers finished!")
+            logging.info(f"i: {i}, count: {len(instances_list)}, instances: {instances_list} "
+                         f"remove backend servers finished!")
+    except Exception as e:
+        logging.error(e)
+        sys.exit()

+ 33 - 124
gateway/gateway_config.py

@@ -5,12 +5,21 @@ logging.basicConfig(level=logging.INFO,
                     format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
                     datefmt='%a, %d %b %Y %H:%M:%S')
 
-# gateway-alb-ecs组-生产环境
-server_group_id_list = ["sgp-l2k0p33e470vfmj538"]
 
 # alb后端服务器_调试使用组
 # server_group_id_list = ["sgp-ec4gopoclruofsfmxu"]
 
+# clb 测试环境_临时调试使用
+# clb_id_list = ["lb-bp1i49h7ncw2c9nl3kp6u"]
+
+
+# gateway-alb-ecs组-生产环境
+server_group_id_list = ["sgp-l2k0p33e470vfmj538"]
+
+# gateway clb 生产环境
+clb_id_list = ["lb-bp1mfk1gmd47twfh6bgwv", "lb-bp1jtzhp9krunyv3mim2q"]
+
+
 apps = 'piaoquan-gateway'
 repository = 'registry-vpc.cn-hangzhou.aliyuncs.com/stuuudy/{}'
 registry = 'registry-vpc.cn-hangzhou.aliyuncs.com/stuuudy/{}:{}'
@@ -19,6 +28,12 @@ registry = 'registry-vpc.cn-hangzhou.aliyuncs.com/stuuudy/{}:{}'
 port = "9000"
 
 # 修改负载均衡权限
+clb_client_params = {
+    'access_key_id': 'LTAIuPbTPL3LDDKN',
+    'access_key_secret': 'ORcNedKwWuwVtcq4IRFtUDZgS0b1le',
+    'region_id': 'cn-hangzhou'
+}
+
 alb_client_params = {
     'access_key_id': 'LTAI5tASD5yEZLeC8ffmNebY',
     'access_key_secret': '1PtsFRdp8viJmI78lEhNZR8MezWZBq',
@@ -37,84 +52,14 @@ docker_config = {
     'password': 'Qingqu@2019',
     'registry': 'registry-vpc.cn-hangzhou.aliyuncs.com'
 }
-# 机器配置
-instance_config = {
-    # 使用的镜像信息
-    'image_id': 'm-bp12bkuvg20k6ueqmb4v',
-    # 设置实例规格
-    'instance_type': 'ecs.c6.2xlarge',
-    # 选择的交换机
-    'vswitch_id': 'vsw-bp19lpjwtc6j0p0m9mdc2',
-    # 当前VPC类型的安全组
-    'security_group_id': 'sg-bp1irhrkr4vfj272hk4y',
-    # 硬盘的大小,单位:G
-    'disk_size': '200',
-    # 服务器命名
-    'instance_name': 'ESS-vlogapi-[1,2]',
-    # 服务器所在区域
-    'zone_id': 'cn-hangzhou-h',
-    # 磁盘类型:云盘
-    'disk_category': 'cloud_efficiency',
-    # 密钥
-    'key_pair_name': 'stuuudy',
-    # tag
-    'tags': [{"Key": "ecs", "Value": "longvideoapi.prod"}]
-}
 
-# 机器配置_hangzhou_i
-instance_config_i = {
-    # 使用的镜像信息
-    'image_id': 'm-bp12bkuvg20k6ueqmb4v',
-    # 设置实例规格
-    'instance_type': 'ecs.c6.2xlarge',
-    # 选择的交换机
-    'vswitch_id': 'vsw-bp17c002ovyomzwnhhdhj',
-    # 当前VPC类型的安全组
-    'security_group_id': 'sg-bp1irhrkr4vfj272hk4y',
-    # 硬盘的大小,单位:G
-    'disk_size': '200',
-    # 服务器命名
-    'instance_name': 'ESS-vlogapi-[1,2]',
-    # 服务器所在区域
-    'zone_id': 'cn-hangzhou-i',
-    # 磁盘类型:云盘
-    'disk_category': 'cloud_efficiency',
-    # 密钥
-    'key_pair_name': 'stuuudy',
-    # tag
-    'tags': [{"Key": "ecs", "Value": "longvideoapi.prod"}]
-}
-
-# 机器配置_hangzhou_g
-instance_config_g = {
-    # 使用的镜像信息
-    'image_id': 'm-bp12bkuvg20k6ueqmb4v',
-    # 设置实例规格
-    'instance_type': 'ecs.c6.2xlarge',
-    # 选择的交换机
-    'vswitch_id': 'vsw-bp10m69sb9ydfa64jdrn3',
-    # 当前VPC类型的安全组
-    'security_group_id': 'sg-bp1irhrkr4vfj272hk4y',
-    # 硬盘的大小,单位:G
-    'disk_size': '200',
-    # 服务器命名
-    'instance_name': 'ESS-vlogapi-[1,2]',
-    # 服务器所在区域
-    'zone_id': 'cn-hangzhou-g',
-    # 磁盘类型:云盘
-    'disk_category': 'cloud_efficiency',
-    # 密钥
-    'key_pair_name': 'stuuudy',
-    # tag
-    'tags': [{"Key": "ecs", "Value": "longvideoapi.prod"}]
-}
 
 # 机器配置_hangzhou_j
 instance_config_j = {
     # 使用的镜像信息
-    'image_id': 'm-bp12bkuvg20k6ueqmb4v',
+    'image_id': 'm-bp1e7t7odil9c8kqsm10',
     # 设置实例规格
-    'instance_type': 'ecs.c6.2xlarge',
+    'instance_type': 'ecs.c6.xlarge',
     # 选择的交换机
     'vswitch_id': 'vsw-bp1ssuwxyrt0p17ceeir0',
     # 当前VPC类型的安全组
@@ -122,65 +67,29 @@ instance_config_j = {
     # 硬盘的大小,单位:G
     'disk_size': '200',
     # 服务器命名
-    'instance_name': 'ESS-vlogapi-[1,2]',
+    'instance_name': 'ESS-gateway-[1,2]',
     # 服务器所在区域
     'zone_id': 'cn-hangzhou-j',
     # 磁盘类型:云盘
-    'disk_category': 'cloud_efficiency',
-    # 密钥
-    'key_pair_name': 'stuuudy',
-    # tag
-    'tags': [{"Key": "ecs", "Value": "longvideoapi.prod"}]
-}
-
-# 机器配置_hangzhou_k
-instance_config_k = {
-    # 使用的镜像信息
-    'image_id': 'm-bp12bkuvg20k6ueqmb4v',
-    # 设置实例规格
-    'instance_type': 'ecs.c6.2xlarge',
-    # 选择的交换机
-    'vswitch_id': 'vsw-bp14e4xu6uzte9nyn6nvr',
-    # 当前VPC类型的安全组
-    'security_group_id': 'sg-bp1irhrkr4vfj272hk4y',
-    # 硬盘的大小,单位:G
-    'disk_size': '200',
-    # 服务器命名
-    'instance_name': 'ESS-longvideoapi-alb-[1,2]',
-    # 服务器所在区域
-    'zone_id': 'cn-hangzhou-k',
-    # 磁盘类型:云盘
-    'disk_category': 'cloud_efficiency',
+    'disk_category': 'cloud_essd',
     # 密钥
     'key_pair_name': 'stuuudy',
     # tag
-    'tags': [{"Key": "ecs", "Value": "longvideoapi.prod"}]
+    'tags': [{"Key": "ecs", "Value": "gateway.prod"}]
 }
 
-# 机器配置_hangzhou_k alb
-instance_config_k_alb = {
-    # 使用的镜像信息
-    'image_id': 'm-bp12bkuvg20k6ueqmb4v',
-    # 设置实例规格
-    'instance_type': 'ecs.c6.2xlarge',
-    # 选择的交换机
-    'vswitch_id': 'vsw-bp14e4xu6uzte9nyn6nvr',
-    # 当前VPC类型的安全组
-    'security_group_id': 'sg-bp1irhrkr4vfj272hk4y',
-    # 硬盘的大小,单位:G
-    'disk_size': '200',
-    # 服务器命名
-    'instance_name': 'ESS-vlogapi-alb-[1,2]',
-    # 服务器所在区域
-    'zone_id': 'cn-hangzhou-k',
-    # 磁盘类型:云盘
-    'disk_category': 'cloud_efficiency',
-    # 密钥
-    'key_pair_name': 'stuuudy',
-    # tag
-    'tags': [{"Key": "ecs", "Value": "longvideoapi.prod"}]
-}
 
 
 
+# 服务启动脚本
+start_sh_dir = os.path.dirname(os.path.realpath(__file__))
+start_sh_filename = 'gateway_start.sh'
+with open(file=os.path.join(start_sh_dir, start_sh_filename), mode='r', encoding='utf-8') as rf:
+    file_content = rf.read()
+    logging.info(f"start sh file content: {file_content}")
+start_sh = {
+    'target_dir': '/home/gateway_server_sh',
+    'name': start_sh_filename,
+    'content': file_content,
+}
 

+ 10 - 0
gateway/gateway_start.sh

@@ -0,0 +1,10 @@
+#!/bin/bash
+
+# 登录
+docker login --username=stuuudys --password='Qingqu@2019' registry-vpc.cn-hangzhou.aliyuncs.com
+# 拉取镜像
+docker pull registry-vpc.cn-hangzhou.aliyuncs.com/stuuudy/piaoquan-gateway:$1
+# 删除未运行的容器
+docker rm -f piaoquan-gateway
+# 运行 最新的镜像
+docker run -d --cap-add=SYS_PTRACE --net=host --name=piaoquan-gateway -v /datalog/:/datalog/ registry-vpc.cn-hangzhou.aliyuncs.com/stuuudy/piaoquan-gateway:$1

+ 1 - 0
requirements.txt

@@ -23,6 +23,7 @@ certifi==2024.8.30
 cffi==1.17.1
 charset-normalizer==3.4.0
 cryptography==44.0.0
+docker==7.1.0
 frozenlist==1.5.0
 idna==3.10
 jmespath==0.10.0