|
@@ -13,53 +13,7 @@ from concurrent.futures import ThreadPoolExecutor
|
|
|
|
|
|
health_instances = []
|
|
health_instances = []
|
|
ess_instances = []
|
|
ess_instances = []
|
|
-
|
|
|
|
-# slb_id = 'lb-bp1werfophtsjzfr76njm'
|
|
|
|
-# # 修改负载均衡权限
|
|
|
|
-# slb_client_params = {
|
|
|
|
-# 'access_key_id': 'LTAIuPbTPL3LDDKN',
|
|
|
|
-# 'access_key_secret': 'ORcNedKwWuwVtcq4IRFtUDZgS0b1le',
|
|
|
|
-# 'region_id': 'cn-hangzhou'
|
|
|
|
-# }
|
|
|
|
-# # 购买机器权限
|
|
|
|
-# create_client_params = {
|
|
|
|
-# 'access_key_id': 'LTAI4GBWbFvvXoXsSVBe1o9f',
|
|
|
|
-# 'access_key_secret': 'kRAikWitb4kDxaAyBqNrmLmllMEDO3',
|
|
|
|
-# 'region_id': 'cn-hangzhou'
|
|
|
|
-# }
|
|
|
|
-#
|
|
|
|
-# # 机器配置
|
|
|
|
-# instance_config = {
|
|
|
|
-# # 使用的镜像信息
|
|
|
|
-# 'image_id': 'm-bp1e5jx8eqhq22l91xw7',
|
|
|
|
-# # 设置实例规格
|
|
|
|
-# 'instance_type': 'ecs.ic5.xlarge',
|
|
|
|
-# # 选择的交换机
|
|
|
|
-# 'vswitch_id': 'vsw-bp19lpjwtc6j0p0m9mdc2',
|
|
|
|
-# # 当前VPC类型的安全组
|
|
|
|
-# 'security_group_id': 'sg-bp1irhrkr4vfj272hk4y',
|
|
|
|
-# # 硬盘的大小,单位:G
|
|
|
|
-# 'disk_size': '200',
|
|
|
|
-# # 服务器命名
|
|
|
|
-# 'instance_name': 'ESS-rov-server-[1,2]',
|
|
|
|
-# # 服务器所在区域
|
|
|
|
-# 'zone_id': 'cn-hangzhou-h',
|
|
|
|
-# # 磁盘类型:云盘
|
|
|
|
-# 'disk_category': 'cloud_efficiency',
|
|
|
|
-# # 密钥
|
|
|
|
-# 'key_pair_name': 'stuuudy'
|
|
|
|
-# }
|
|
|
|
-#
|
|
|
|
-# # 服务启动脚本
|
|
|
|
-# start_sh_dir = os.path.dirname(os.path.realpath(__file__))
|
|
|
|
-# start_sh_filename = 'rov_server_start.sh'
|
|
|
|
-# with open(file=os.path.join(start_sh_dir, start_sh_filename), mode='r', encoding='utf-8') as rf:
|
|
|
|
-# file_content = rf.read()
|
|
|
|
-# start_sh = {
|
|
|
|
-# 'target_dir': '/home/piaoquan_server_sh',
|
|
|
|
-# 'name': start_sh_filename,
|
|
|
|
-# 'content': file_content,
|
|
|
|
-# }
|
|
|
|
|
|
+remove_container_instances = []
|
|
|
|
|
|
|
|
|
|
def server_health_check(client, instance_id):
|
|
def server_health_check(client, instance_id):
|
|
@@ -140,13 +94,14 @@ async def run_server(create_client, slb_client, instance_ids, max_workers):
|
|
sys.exit()
|
|
sys.exit()
|
|
|
|
|
|
|
|
|
|
-async def ess_instance(create_client, slb_client, ess_count, max_workers):
|
|
|
|
|
|
+async def ess_instance(create_client, slb_client, ess_count, max_workers, version):
|
|
"""
|
|
"""
|
|
扩容机器并运行新服务
|
|
扩容机器并运行新服务
|
|
:param create_client: 购买机器客户端连接
|
|
:param create_client: 购买机器客户端连接
|
|
:param slb_client: 修改负载均衡权限
|
|
:param slb_client: 修改负载均衡权限
|
|
:param ess_count: 扩容数量
|
|
:param ess_count: 扩容数量
|
|
:param max_workers: 线程数
|
|
:param max_workers: 线程数
|
|
|
|
+ :param version: 版本标记
|
|
:return:
|
|
:return:
|
|
"""
|
|
"""
|
|
# 1. 购买机器并启动
|
|
# 1. 购买机器并启动
|
|
@@ -162,7 +117,7 @@ async def ess_instance(create_client, slb_client, ess_count, max_workers):
|
|
logging.info(f"send start shell file finished, instances: {ess_instance_ids}")
|
|
logging.info(f"send start shell file finished, instances: {ess_instance_ids}")
|
|
# 3. 启动服务
|
|
# 3. 启动服务
|
|
server_start_sh = os.path.join(rov_server_config.start_sh['target_dir'], rov_server_config.start_sh['name'])
|
|
server_start_sh = os.path.join(rov_server_config.start_sh['target_dir'], rov_server_config.start_sh['name'])
|
|
- server_start_commend = f"sh {server_start_sh}"
|
|
|
|
|
|
+ server_start_commend = f"sh {server_start_sh} {version}"
|
|
utils.run_command(client=create_client, instance_ids=ess_instance_ids, command=server_start_commend)
|
|
utils.run_command(client=create_client, instance_ids=ess_instance_ids, command=server_start_commend)
|
|
# 4. 异步探活
|
|
# 4. 异步探活
|
|
global health_instances
|
|
global health_instances
|
|
@@ -179,13 +134,15 @@ async def ess_instance(create_client, slb_client, ess_count, max_workers):
|
|
if len(health_instances) == len(ess_instance_ids):
|
|
if len(health_instances) == len(ess_instance_ids):
|
|
# 所有机器探活成功
|
|
# 所有机器探活成功
|
|
time.sleep(60)
|
|
time.sleep(60)
|
|
|
|
+ utils.add_backend_servers(client=slb_client, slb_id=rov_server_config.slb_id, instances=health_instances)
|
|
|
|
+ health_instance_ids = [instance_id for instance_id, _ in health_instances]
|
|
add_weight_list = [(10, 30), (20, 20), (40, 10), (60, 10), (80, 10), (100, 10)]
|
|
add_weight_list = [(10, 30), (20, 20), (40, 10), (60, 10), (80, 10), (100, 10)]
|
|
- # set_instance_weight_process(client=slb_client, instance_id_list=ess_instance_ids, weight_list=add_weight_list)
|
|
|
|
|
|
+ # set_instance_weight_process(client=slb_client, instance_id_list=health_instance_ids, weight_list=add_weight_list)
|
|
global ess_instances
|
|
global ess_instances
|
|
- ess_instances.extend(ess_instance_ids)
|
|
|
|
|
|
+ ess_instances.extend(health_instance_ids)
|
|
logging.info(f"ess count: {ess_count}, "
|
|
logging.info(f"ess count: {ess_count}, "
|
|
f"create count: {len(ess_instance_ids)}, "
|
|
f"create count: {len(ess_instance_ids)}, "
|
|
- f"finished count: {len(health_instances)}")
|
|
|
|
|
|
+ f"finished count: {len(health_instance_ids)}")
|
|
else:
|
|
else:
|
|
logging.info(f"ess count: {ess_count}, "
|
|
logging.info(f"ess count: {ess_count}, "
|
|
f"create count: {len(ess_instance_ids)}, "
|
|
f"create count: {len(ess_instance_ids)}, "
|
|
@@ -207,44 +164,55 @@ def remove_container_image(client, instance_id, container_name):
|
|
# 移除旧的容器
|
|
# 移除旧的容器
|
|
container_remove_retry = 3
|
|
container_remove_retry = 3
|
|
i = 0
|
|
i = 0
|
|
- while i < container_remove_retry:
|
|
|
|
|
|
+ while True:
|
|
|
|
+ if i >= container_remove_retry:
|
|
|
|
+ logging.error(f"容器不存在或者无法删除当前容器, instance = {instance_id}/{ip_address}")
|
|
|
|
+ sys.exit()
|
|
try:
|
|
try:
|
|
container_id = client.containers.get(container_name)
|
|
container_id = client.containers.get(container_name)
|
|
container_id.remove(force=True)
|
|
container_id.remove(force=True)
|
|
break
|
|
break
|
|
except Exception as e:
|
|
except Exception as e:
|
|
i += 1
|
|
i += 1
|
|
- print("容器不存在或者无法删除当前容器")
|
|
|
|
|
|
+
|
|
# 删除旧镜像
|
|
# 删除旧镜像
|
|
images_remove_retry = 3
|
|
images_remove_retry = 3
|
|
j = 0
|
|
j = 0
|
|
- while j < images_remove_retry:
|
|
|
|
|
|
+ while True:
|
|
|
|
+ if j >= images_remove_retry:
|
|
|
|
+ logging.error(f"镜像不存在,无法获取到镜像ID, instance = {instance_id}/{ip_address}")
|
|
|
|
+ sys.exit()
|
|
try:
|
|
try:
|
|
images = client.images.list()
|
|
images = client.images.list()
|
|
for image in images:
|
|
for image in images:
|
|
client.images.remove(force=True, image=image.tags[0])
|
|
client.images.remove(force=True, image=image.tags[0])
|
|
time.sleep(2)
|
|
time.sleep(2)
|
|
|
|
+ global remove_container_instances
|
|
|
|
+ remove_container_instances.append(instance_id)
|
|
except Exception as e:
|
|
except Exception as e:
|
|
i += 1
|
|
i += 1
|
|
- print("镜像不存在,无法获取到镜像ID")
|
|
|
|
|
|
|
|
|
|
|
|
-async def update_instance(create_client, slb_client, instance_ids, max_workers):
|
|
|
|
|
|
+async def update_instance(create_client, slb_client, instance_ids, max_workers, version):
|
|
"""
|
|
"""
|
|
线上机器更新
|
|
线上机器更新
|
|
:param create_client:
|
|
:param create_client:
|
|
:param slb_client: slb客户端连接
|
|
:param slb_client: slb客户端连接
|
|
:param instance_ids: instanceId type-list
|
|
:param instance_ids: instanceId type-list
|
|
:param max_workers:
|
|
:param max_workers:
|
|
|
|
+ :param version: 版本标记
|
|
:return:
|
|
:return:
|
|
"""
|
|
"""
|
|
media_index = len(instance_ids)//2
|
|
media_index = len(instance_ids)//2
|
|
instance_ids_group = [instance_ids[:media_index], instance_ids[media_index:]]
|
|
instance_ids_group = [instance_ids[:media_index], instance_ids[media_index:]]
|
|
|
|
+ update_finished_count = 0
|
|
for instance_id_list in instance_ids_group:
|
|
for instance_id_list in instance_ids_group:
|
|
# 1. 摘流量
|
|
# 1. 摘流量
|
|
set_instance_weight_process(client=slb_client, instance_id_list=instance_id_list, weight_list=[(0, 60)])
|
|
set_instance_weight_process(client=slb_client, instance_id_list=instance_id_list, weight_list=[(0, 60)])
|
|
logging.info(f"set weight with 0 finished, instances: {instance_id_list}")
|
|
logging.info(f"set weight with 0 finished, instances: {instance_id_list}")
|
|
# 2. 异步移除旧容器并删除旧镜像
|
|
# 2. 异步移除旧容器并删除旧镜像
|
|
|
|
+ global remove_container_instances
|
|
|
|
+ remove_container_instances = []
|
|
container_name = 'rov-server'
|
|
container_name = 'rov-server'
|
|
loop = asyncio.get_running_loop()
|
|
loop = asyncio.get_running_loop()
|
|
executor = ThreadPoolExecutor(max_workers=max_workers)
|
|
executor = ThreadPoolExecutor(max_workers=max_workers)
|
|
@@ -253,13 +221,18 @@ async def update_instance(create_client, slb_client, instance_ids, max_workers):
|
|
[(slb_client, instance_id, container_name) for instance_id in instance_id_list]
|
|
[(slb_client, instance_id, container_name) for instance_id in instance_id_list]
|
|
]
|
|
]
|
|
await asyncio.wait(tasks)
|
|
await asyncio.wait(tasks)
|
|
- logging.info(f"remove container & images finished, instances: {instance_id_list}")
|
|
|
|
|
|
+ logging.info(f"remove container & images finished, instances: {remove_container_instances},"
|
|
|
|
+ f" count: {len(remove_container_instances)}")
|
|
|
|
+ if len(remove_container_instances) < len(instance_id_list):
|
|
|
|
+ logging.error(f"remove container image failed| "
|
|
|
|
+ f"request count: {len(instance_id_list)}, removed count: {len(remove_container_instances)}")
|
|
|
|
+ sys.exit()
|
|
# 3. 发送启动脚本到机器上
|
|
# 3. 发送启动脚本到机器上
|
|
utils.send_file_to_ecs(client=create_client, instance_id_list=instance_id_list, **rov_server_config.start_sh)
|
|
utils.send_file_to_ecs(client=create_client, instance_id_list=instance_id_list, **rov_server_config.start_sh)
|
|
- logging.info(f"send start shell file finished, instances: {instance_id_list}")
|
|
|
|
|
|
+ logging.info(f"send start shell file finished, instances: {instance_id_list}, count: {len(instance_id_list)}")
|
|
# 4. 启动服务
|
|
# 4. 启动服务
|
|
server_start_sh = os.path.join(rov_server_config.start_sh['target_dir'], rov_server_config.start_sh['name'])
|
|
server_start_sh = os.path.join(rov_server_config.start_sh['target_dir'], rov_server_config.start_sh['name'])
|
|
- server_start_commend = f"sh {server_start_sh}"
|
|
|
|
|
|
+ server_start_commend = f"sh {server_start_sh} {version}"
|
|
utils.run_command(client=create_client, instance_ids=instance_id_list, command=server_start_commend)
|
|
utils.run_command(client=create_client, instance_ids=instance_id_list, command=server_start_commend)
|
|
# 5. 异步探活
|
|
# 5. 异步探活
|
|
global health_instances
|
|
global health_instances
|
|
@@ -271,17 +244,21 @@ async def update_instance(create_client, slb_client, instance_ids, max_workers):
|
|
[(slb_client, instance_id) for instance_id in instance_id_list]
|
|
[(slb_client, instance_id) for instance_id in instance_id_list]
|
|
]
|
|
]
|
|
await asyncio.wait(tasks)
|
|
await asyncio.wait(tasks)
|
|
- logging.info(f"health instances count: {len(health_instances)}, {health_instances}")
|
|
|
|
|
|
+ logging.info(f"health instances: {health_instances}, count: {len(health_instances)}")
|
|
# 6. 挂载流量
|
|
# 6. 挂载流量
|
|
if len(health_instances) == len(instance_id_list):
|
|
if len(health_instances) == len(instance_id_list):
|
|
# 所有机器探活成功
|
|
# 所有机器探活成功
|
|
time.sleep(60)
|
|
time.sleep(60)
|
|
|
|
+ utils.add_backend_servers(client=slb_client, slb_id=rov_server_config.slb_id, instances=health_instances)
|
|
|
|
+ health_instance_ids = [instance_id for instance_id, _ in health_instances]
|
|
add_weight_list = [(10, 30), (20, 20), (40, 10), (60, 10), (80, 10), (100, 10)]
|
|
add_weight_list = [(10, 30), (20, 20), (40, 10), (60, 10), (80, 10), (100, 10)]
|
|
- set_instance_weight_process(client=slb_client, instance_id_list=instance_id_list,
|
|
|
|
|
|
+ set_instance_weight_process(client=slb_client, instance_id_list=health_instance_ids,
|
|
weight_list=add_weight_list)
|
|
weight_list=add_weight_list)
|
|
- logging.info(f"finished count: {len(health_instances)}")
|
|
|
|
|
|
+ logging.info(f"finished instances: {health_instances}, count: {len(health_instances)}")
|
|
|
|
+ update_finished_count += len(health_instances)
|
|
|
|
+ logging.info(f"update finished: {update_finished_count}/{len(instance_ids)}")
|
|
else:
|
|
else:
|
|
- logging.info(f"health count: {len(health_instances)}")
|
|
|
|
|
|
+ logging.info(f"health instances: {health_instances}, count: {len(health_instances)}")
|
|
sys.exit()
|
|
sys.exit()
|
|
|
|
|
|
|
|
|
|
@@ -327,37 +304,42 @@ def remove_instances(create_client, slb_client, instance_ids):
|
|
|
|
|
|
|
|
|
|
def main():
|
|
def main():
|
|
- slb_client = utils.connect_client(access_key_id=rov_server_config.slb_client_params['access_key_id'],
|
|
|
|
- access_key_secret=rov_server_config.slb_client_params['access_key_secret'],
|
|
|
|
- region_id=rov_server_config.slb_client_params['region_id'])
|
|
|
|
- create_client = utils.connect_client(access_key_id=rov_server_config.create_client_params['access_key_id'],
|
|
|
|
- access_key_secret=rov_server_config.create_client_params['access_key_secret'],
|
|
|
|
- region_id=rov_server_config.create_client_params['region_id'])
|
|
|
|
|
|
+ try:
|
|
|
|
+ version = sys.argv[1]
|
|
|
|
+ slb_client = utils.connect_client(access_key_id=rov_server_config.slb_client_params['access_key_id'],
|
|
|
|
+ access_key_secret=rov_server_config.slb_client_params['access_key_secret'],
|
|
|
|
+ region_id=rov_server_config.slb_client_params['region_id'])
|
|
|
|
+ create_client = utils.connect_client(access_key_id=rov_server_config.create_client_params['access_key_id'],
|
|
|
|
+ access_key_secret=rov_server_config.create_client_params['access_key_secret'],
|
|
|
|
+ region_id=rov_server_config.create_client_params['region_id'])
|
|
|
|
|
|
- # 1. 获取slb下所有机器
|
|
|
|
- online_instance_ids = utils.get_instance_ids(client=slb_client, slb_id=rov_server_config.slb_id)
|
|
|
|
- online_instance_count = len(online_instance_ids)
|
|
|
|
- logging.info(f"online instance count: {online_instance_count}.")
|
|
|
|
- logging.info(f"online instance ids: {online_instance_ids}")
|
|
|
|
|
|
+ # 1. 获取slb下所有机器
|
|
|
|
+ online_instance_ids = utils.get_instance_ids(client=slb_client, slb_id=rov_server_config.slb_id)
|
|
|
|
+ online_instance_count = len(online_instance_ids)
|
|
|
|
+ logging.info(f"online instance count: {online_instance_count}.")
|
|
|
|
+ logging.info(f"online instance ids: {online_instance_ids}")
|
|
|
|
|
|
- # 2. 扩容机器并启动新服务 扩容数量:线上机器数量/2
|
|
|
|
- logging.info(f"ess instances start ...")
|
|
|
|
- ess_instance_count = online_instance_count // 2
|
|
|
|
- logging.info(f"ess instance count: {ess_instance_count}")
|
|
|
|
- asyncio.run(ess_instance(create_client=create_client, slb_client=slb_client,
|
|
|
|
- ess_count=ess_instance_count, max_workers=2))
|
|
|
|
- logging.info(f"ess instances end!")
|
|
|
|
|
|
+ # 2. 扩容机器并启动新服务 扩容数量:线上机器数量//2
|
|
|
|
+ logging.info(f"ess instances start ...")
|
|
|
|
+ ess_instance_count = online_instance_count // 2
|
|
|
|
+ logging.info(f"ess instance count: {ess_instance_count}")
|
|
|
|
+ asyncio.run(ess_instance(create_client=create_client, slb_client=slb_client,
|
|
|
|
+ ess_count=ess_instance_count, max_workers=2, version=version))
|
|
|
|
+ logging.info(f"ess instances end!")
|
|
|
|
|
|
- # # 3. 原有机器进行更新
|
|
|
|
- # logging.info(f"update online instances start ...")
|
|
|
|
- # asyncio.run(update_instance(create_client=create_client, slb_client=slb_client,
|
|
|
|
- # instance_ids=online_instance_ids, max_workers=2))
|
|
|
|
- # logging.info(f"update online instances end!")
|
|
|
|
-
|
|
|
|
- # 4. 停止并释放扩容机器
|
|
|
|
- logging.info(f"stop & release instances start ...")
|
|
|
|
- remove_instances(create_client=create_client, slb_client=slb_client, instance_ids=ess_instances)
|
|
|
|
- logging.info(f"stop & release instances end!")
|
|
|
|
|
|
+ # # 3. 原有机器进行更新
|
|
|
|
+ # logging.info(f"update online instances start ...")
|
|
|
|
+ # asyncio.run(update_instance(create_client=create_client, slb_client=slb_client,
|
|
|
|
+ # instance_ids=online_instance_ids, max_workers=2, version=version))
|
|
|
|
+ # logging.info(f"update online instances end!")
|
|
|
|
+ #
|
|
|
|
+ # # 4. 停止并释放扩容机器
|
|
|
|
+ # logging.info(f"stop & release instances start ...")
|
|
|
|
+ # remove_instances(create_client=create_client, slb_client=slb_client, instance_ids=ess_instances)
|
|
|
|
+ # logging.info(f"stop & release instances end!")
|
|
|
|
+ except Exception as e:
|
|
|
|
+ logging.error(e)
|
|
|
|
+ sys.exit()
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
if __name__ == '__main__':
|