alb_gateway_restart.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. import logging
  2. import sys
  3. import time
  4. import docker
  5. import gateway_config
  6. import alb_utils
  7. import requests
  8. logging.basicConfig(level=logging.INFO,
  9. format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
  10. datefmt='%a, %d %b %Y %H:%M:%S')
  11. health_instances = []
  12. def server_health_check(ecs_client, instance_id):
  13. """
  14. 服务健康检查
  15. :param ecs_client: 客户端连接
  16. :param instance_id: instanceId
  17. :return:
  18. """
  19. global health_instances
  20. ip_address = alb_utils.get_ip_address(ecs_client=ecs_client, instance_id=instance_id)
  21. while True:
  22. health_check_url = f"http://{ip_address}:9000/healthcheck"
  23. try:
  24. http_code = requests.get(health_check_url).status_code
  25. except:
  26. logging.info(f"images is downloading ip:{ip_address}")
  27. http_code = 0
  28. if http_code == 200:
  29. health_instances.append((instance_id, ip_address))
  30. logging.info(f"health check success, instance: {instance_id}/{ip_address}")
  31. break
  32. else:
  33. time.sleep(10)
  34. def server_restart(alb_client, ecs_client, instance_id, image_name, port):
  35. try:
  36. logging.info(f"Restarting instance: {instance_id}")
  37. # 获取ip
  38. ip_address = alb_utils.get_ip_address(ecs_client=ecs_client, instance_id=instance_id)
  39. logging.info(f"IP address: {instance_id}/{ip_address}")
  40. # 摘流量
  41. alb_utils.update_server_group_servers_attribute(alb_client=alb_client,
  42. server_group_id_list=gateway_config.server_group_id_list,
  43. instance_id_list=[instance_id],
  44. weight_list=[(0, 20)],
  45. port=port)
  46. logging.info(f"Set weight to 0 for instance: {instance_id}")
  47. # 连接 Docker 客户端并重启容器
  48. docker_client = docker.DockerClient(base_url=f'tcp://{ip_address}:2375', timeout=60)
  49. container = docker_client.containers.get(image_name)
  50. container.restart()
  51. logging.info("Docker restart finished.")
  52. # 探活
  53. server_health_check(ecs_client, instance_id)
  54. time.sleep(20)
  55. logging.info("Health check finished.")
  56. # 设置权重
  57. add_weight_list = [(10, 5), (20, 5), (40, 5), (60, 5), (80, 5), (100, 5)]
  58. alb_utils.update_server_group_servers_attribute(alb_client=alb_client,
  59. server_group_id_list=gateway_config.server_group_id_list,
  60. instance_id_list=[instance_id],
  61. weight_list=add_weight_list,
  62. port=port)
  63. logging.info(f"Server restart finished, instance: {instance_id}/{ip_address}")
  64. except Exception as e:
  65. logging.error(f"Server restart failed, instance: {instance_id}")
  66. logging.error(e)
  67. def main():
  68. try:
  69. ecs_client = alb_utils.connect_client(access_key_id=gateway_config.ecs_client_params['access_key_id'],
  70. access_key_secret=gateway_config.ecs_client_params['access_key_secret'],
  71. region_id=gateway_config.ecs_client_params['region_id'])
  72. alb_client = alb_utils.connect_alb_client(
  73. access_key_id=gateway_config.alb_client_params['access_key_id'],
  74. access_key_secret=gateway_config.alb_client_params['access_key_secret'],
  75. endpoint=gateway_config.alb_client_params['endpoint']
  76. )
  77. # 获取 ALB 下所有机器
  78. online_instance_ids = alb_utils.list_server_group_servers(alb_client=alb_client,
  79. server_group_id=gateway_config.server_group_id_list[0])
  80. online_instance_count = len(online_instance_ids)
  81. logging.info(f"Online instance count: {online_instance_count}.")
  82. logging.info(f"Online instance ids: {online_instance_ids}")
  83. # 逐台重启
  84. image_name = 'piaoquan-gateway'
  85. for i, instance_id in enumerate(online_instance_ids):
  86. server_restart(alb_client=alb_client, ecs_client=ecs_client, instance_id=instance_id, image_name=image_name, port=gateway_config.port)
  87. logging.info(f"Restart progress: {i + 1}/{online_instance_count}")
  88. logging.info("All servers restarted successfully!")
  89. except Exception as e:
  90. logging.error("An error occurred during the main execution.")
  91. logging.error(e)
  92. if __name__ == '__main__':
  93. main()