alb_utils.py 24 KB


  1. import logging
  2. import json
  3. import sys
  4. import time
  5. from asyncio import wait_for
  6. import requests
  7. import asyncio
  8. import time
  9. from alibabacloud_tea_util.client import Client as UtilClient
  10. from aliyunsdkcore.client import AcsClient
  11. from aliyunsdkecs.request.v20140526.RunInstancesRequest import RunInstancesRequest
  12. from aliyunsdkecs.request.v20140526.DescribeInstancesRequest import DescribeInstancesRequest
  13. from aliyunsdkecs.request.v20140526.DescribeNetworkInterfacesRequest import DescribeNetworkInterfacesRequest
  14. from aliyunsdkecs.request.v20140526.RunCommandRequest import RunCommandRequest
  15. from aliyunsdkecs.request.v20140526.SendFileRequest import SendFileRequest
  16. from aliyunsdkecs.request.v20140526.StopInstancesRequest import StopInstancesRequest
  17. from aliyunsdkecs.request.v20140526.DeleteInstancesRequest import DeleteInstancesRequest
  18. from aliyunsdkecs.request.v20140526.DescribeInstanceStatusRequest import DescribeInstanceStatusRequest
  19. from aliyunsdkecs.request.v20140526.ModifySecurityGroupRuleRequest import ModifySecurityGroupRuleRequest
  20. from alibabacloud_alb20200616.client import Client as Alb20200616Client
  21. from alibabacloud_tea_openapi import models as open_api_models
  22. from alibabacloud_alb20200616 import models as alb_models
  23. from alibabacloud_alb20200616 import models as alb_20200616_models
  24. from alibabacloud_tea_util import models as util_models
  25. from urllib3 import request
  26. logging.basicConfig(level=logging.INFO,
  27. format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
  28. datefmt='%a, %d %b %Y %H:%M:%S')
  29. def send_msg_to_feishu(webhook, key_word, msg_text):
  30. """发送消息到飞书"""
  31. headers = {'Content-Type': 'application/json'}
  32. payload_message = {
  33. "msg_type": "text",
  34. "content": {
  35. "text": '{}: {}'.format(key_word, msg_text)
  36. }
  37. }
  38. response = requests.request('POST', url=webhook, headers=headers, data=json.dumps(payload_message))
  39. logging.info(response.text)
  40. def connect_client(access_key_id, access_key_secret, region_id):
  41. """
  42. 初始化账号,连接客户端
  43. :param access_key_id: access key Id, type-string
  44. :param access_key_secret: access key secret, type-string
  45. :param region_id: region_id
  46. :return: clt
  47. """
  48. try:
  49. clt = AcsClient(ak=access_key_id, secret=access_key_secret, region_id=region_id)
  50. return clt
  51. except Exception as e:
  52. # 失败,记录报错信息,发送通知,停止并退出
  53. logging.error(e)
  54. sys.exit()
  55. def connect_alb_client(access_key_id, access_key_secret, endpoint):
  56. """
  57. 初始化ALB客户端
  58. :param access_key_id: access key Id, type-string
  59. :param access_key_secret: access key secret, type-string
  60. :return: alb_client
  61. """
  62. config = open_api_models.Config(
  63. access_key_id=access_key_id,
  64. access_key_secret=access_key_secret,
  65. endpoint=endpoint
  66. )
  67. alb_client = Alb20200616Client(config)
  68. return alb_client
  69. def build_create_instances_request(image_id, vswitch_id, security_group_id, zone_id, instance_type, instance_name,
  70. disk_size, disk_category, key_pair_name, tags):
  71. """
  72. 购买服务器参数配置
  73. :param image_id: 使用的镜像信息 type-string
  74. :param vswitch_id: 选择的交换机 type-string
  75. :param security_group_id: 当前vpc类型的安全组 type-string
  76. :param zone_id: 服务器所在区域 type-string
  77. :param instance_type: 实例规格 type-string
  78. :param instance_name: 实例命名 type-string
  79. :param disk_size: 磁盘大小,单位:G,type-string
  80. :param disk_category: 磁盘类型 type-string
  81. :param key_pair_name: 密钥对名称 type-string
  82. :param tags: 标签 type-list, eg: [{"Key": "ecs", "Value": "rov-server.prod"}, ...]
  83. :return: request
  84. """
  85. request = RunInstancesRequest()
  86. request.set_ImageId(image_id)
  87. request.set_VSwitchId(vswitch_id)
  88. request.set_SecurityGroupId(security_group_id)
  89. request.set_ZoneId(zone_id)
  90. request.set_InstanceType(instance_type)
  91. request.set_InstanceName(instance_name)
  92. request.set_SystemDiskSize(disk_size)
  93. request.set_SystemDiskCategory(disk_category)
  94. request.set_KeyPairName(key_pair_name)
  95. request.set_Tags(tags)
  96. return request
  97. def send_req(client, request):
  98. """
  99. 发送API请求
  100. :param client: 客户端连接
  101. :param request: 请求配置
  102. :return: response
  103. """
  104. request.set_accept_format('json')
  105. response = client.do_action_with_exception(request)
  106. # print(response)
  107. response = json.loads(response)
  108. print(response)
  109. # logging.info(response)
  110. print(response.get('Code'))
  111. return response
  112. # except Exception as e:
  113. # 失败,记录报错信息,发送通知,停止并退出
  114. # logging.error(e)
  115. # sys.exit()
  116. def check_instance_running(ecs_client, instance_ids):
  117. """
  118. 检查服务器运行状态
  119. :param ecs_client: 客户端连接
  120. :param instance_ids: 实例id列表, type-list
  121. :return: running_count,Status为Running的实例数
  122. """
  123. try:
  124. request = DescribeInstancesRequest()
  125. request.set_InstanceIds(json.dumps(instance_ids))
  126. request.set_PageSize(100)
  127. response = send_request(ecs_client=ecs_client, request=request)
  128. if response.get('Code') is None:
  129. instances_list = response.get('Instances').get('Instance')
  130. running_count = 0
  131. running_instances = []
  132. for instance_detail in instances_list:
  133. if instance_detail.get('Status') == "Running":
  134. running_count += 1
  135. running_instances.append(instance_detail.get('InstanceId'))
  136. return running_count, running_instances
  137. else:
  138. # 失败,记录报错信息,发送通知,停止并退出
  139. logging.error(response)
  140. sys.exit()
  141. except Exception as e:
  142. # 失败,记录报错信息,发送通知,停止并退出
  143. logging.error(e)
  144. sys.exit()
  145. def get_ip_address(ecs_client, instance_id):
  146. """
  147. 获取实例IP地址
  148. :param ecs_client: 客户端连接
  149. :param instance_id: 实例id, type-string
  150. :return: ip_address, type-string
  151. """
  152. request = DescribeNetworkInterfacesRequest()
  153. request.set_accept_format('json')
  154. request.set_InstanceId(instance_id)
  155. response = send_request(ecs_client=ecs_client, request=request)
  156. ip_address = response['NetworkInterfaceSets']['NetworkInterfaceSet'][0]['PrivateIpAddress']
  157. return ip_address
  158. def create_multiple_instances(amount, ecs_client,
  159. image_id, vswitch_id, security_group_id, zone_id, instance_type, instance_name,
  160. disk_size, disk_category, key_pair_name, tags):
  161. """
  162. 创建多个ECS实例
  163. :param amount: 创建实例数 type-int 取值范围:[1, 100]
  164. :param ecs_client: 购买机器客户端连接
  165. :param image_id: 使用的镜像信息 type-string
  166. :param vswitch_id: 选择的交换机 type-string
  167. :param security_group_id: 当前vpc类型的安全组 type-string
  168. :param zone_id: 服务器所在区域 type-string
  169. :param instance_type: 实例规格 type-string
  170. :param instance_name: 实例命名 type-string
  171. :param disk_size: 磁盘大小,单位:G,type-string
  172. :param disk_category: 磁盘类型 type-string
  173. :param key_pair_name: 密钥对名称 type-string
  174. :param tags: 标签 type-list, eg: [{"Key": "ecs", "Value": "rov-server.prod"}, ...]
  175. :return:
  176. """
  177. logging.info(f"create instances start, request amount: {amount}.")
  178. # 1. 连接客户端
  179. # create_instances_clt = connect_client(
  180. # access_key_id=access_key_id, access_key_secret=access_key_secret, region_id=region_id
  181. # )
  182. # 2. 请求参数配置
  183. instance_ids = []
  184. remain = amount
  185. while True:
  186. if remain <= 0:
  187. break
  188. if remain > 50:
  189. sub_amount = 50
  190. remain = remain - sub_amount
  191. else:
  192. sub_amount = remain
  193. remain = 0
  194. request = build_create_instances_request(
  195. image_id=image_id, vswitch_id=vswitch_id, security_group_id=security_group_id, zone_id=zone_id,
  196. instance_type=instance_type, instance_name=instance_name, disk_size=disk_size, disk_category=disk_category,
  197. key_pair_name=key_pair_name, tags=tags
  198. )
  199. request.set_Amount(sub_amount)
  200. # 3. 发送API请求,购买机器并启动
  201. response = send_request(ecs_client=ecs_client, request=request)
  202. if response.get('Code') is None:
  203. sub_instance_ids = response.get('InstanceIdSets').get('InstanceIdSet')
  204. logging.info(f"success amount: {len(sub_instance_ids)}, instance ids: {sub_instance_ids}.")
  205. # 获取机器运行状态
  206. running_amount = 0
  207. while running_amount < sub_amount:
  208. time.sleep(20)
  209. running_amount, running_instances = check_instance_running(ecs_client=ecs_client,
  210. instance_ids=sub_instance_ids)
  211. logging.info(f"running amount: {running_amount}, running instances: {running_instances}.")
  212. # return instance_ids
  213. instance_ids.extend(sub_instance_ids)
  214. else:
  215. # 失败,记录报错信息,发送通知,停止并退出
  216. logging.error(response)
  217. sys.exit()
  218. return instance_ids
  219. def release_instances(ecs_client, instance_ids, force=False):
  220. """
  221. 释放实例
  222. :param ecs_client:
  223. :param instance_ids: instance_id, type-list
  224. :param force: 是否强制释放, True-强制释放, False-正常释放, type-bool
  225. :return:
  226. """
  227. instance_id_list_array = [instance_ids[i:i + 100] for i in range(0, len(instance_ids), 100)]
  228. for instance_id_sub_array in instance_id_list_array:
  229. request = DeleteInstancesRequest()
  230. request.set_InstanceIds(instance_id_sub_array)
  231. request.set_Force(force)
  232. response = send_request(ecs_client=ecs_client, request=request)
  233. # return response
  234. if response.get('Code') is None:
  235. logging.info(f"Release instances finished, count = {len(instance_id_sub_array)} instances: {instance_id_sub_array}")
  236. else:
  237. logging.error(f"Release instances fail!!!")
  238. sys.exit()
  239. time.sleep(3)
  240. def get_instances_status(ecs_client, instance_ids):
  241. """
  242. 获取实例运行状态
  243. :param ecs_client:
  244. :param instance_ids: instance_id, type-list
  245. :return:
  246. """
  247. stopped_instances = []
  248. instance_id_list_array = [instance_ids[i:i + 50] for i in range(0, len(instance_ids), 50)]
  249. for instance_id_sub_array in instance_id_list_array:
  250. while True:
  251. request = DescribeInstanceStatusRequest()
  252. request.set_InstanceIds(instance_id_sub_array)
  253. request.set_PageSize(50)
  254. response = send_request(ecs_client=ecs_client, request=request)
  255. # return response
  256. if response.get('Code') is None:
  257. instances_list = response.get('InstanceStatuses').get('InstanceStatus')
  258. stopped_instances_sub = [instance.get('InstanceId') for instance in instances_list if
  259. instance.get('Status') == 'Stopped']
  260. if len(stopped_instances_sub) == len(instance_id_sub_array):
  261. logging.info(f"Instances stopped status set success, count:{len(stopped_instances_sub)} instances: {stopped_instances_sub}")
  262. stopped_instances.extend(stopped_instances_sub)
  263. break
  264. else:
  265. logging.info(f"Stopped instances count = {len(stopped_instances_sub)}, instances: {stopped_instances_sub}")
  266. time.sleep(5)
  267. else:
  268. logging.error(response)
  269. sys.exit()
  270. time.sleep(3)
  271. return stopped_instances
  272. def stop_instances(ecs_client, instance_ids, force_stop=False):
  273. """
  274. 停止实例
  275. :param ecs_client:
  276. :param instance_ids: 实例ID, type-list
  277. :param force_stop: 是否强制关机, True-强制关机, False-正常关机, type-bool
  278. :return:
  279. """
  280. instance_id_list_array = [instance_ids[i:i + 100] for i in range(0, len(instance_ids), 100)]
  281. for instance_id_sub_array in instance_id_list_array:
  282. request = StopInstancesRequest()
  283. request.set_InstanceIds(instance_id_sub_array)
  284. request.set_ForceStop(force_stop)
  285. response = send_request(ecs_client=ecs_client, request=request)
  286. # return response
  287. if response.get('Code') is None:
  288. logging.info(f"Instances stop finished, count:{len(instance_id_sub_array)} instances: {instance_id_sub_array}")
  289. else:
  290. logging.error(f"Failed to stop instances: {response}")
  291. sys.exit()
  292. time.sleep(3)
  293. def send_request(ecs_client, request):
  294. """
  295. 发送API请求
  296. :param ecs_client: 客户端连接
  297. :param request: 请求配置
  298. :return: response
  299. """
  300. request.set_accept_format('json')
  301. try:
  302. response = ecs_client.do_action_with_exception(request)
  303. response = json.loads(response)
  304. # logging.info(response)
  305. return response
  306. except Exception as e:
  307. # 失败,记录报错信息,发送通知,停止并退出
  308. logging.error(e)
  309. sys.exit()
  310. def run_command(ecs_client, instance_ids, command):
  311. """
  312. 批量执行命令
  313. :param ecs_client: 客户端连接
  314. :param instance_ids: 实例id列表, type-list, 最多能指定50台ECS实例ID
  315. :param command: 命令 type-string
  316. :return:
  317. """
  318. for i in range(len(instance_ids) // 50 + 1):
  319. instance_id_list = instance_ids[i * 50:(i + 1) * 50]
  320. if len(instance_id_list) == 0:
  321. return
  322. request = RunCommandRequest()
  323. request.set_accept_format('json')
  324. request.set_Type("RunShellScript")
  325. request.set_CommandContent(command)
  326. request.set_InstanceIds(instance_id_list)
  327. request.set_Timeout(180)
  328. response = send_request(ecs_client=ecs_client, request=request)
  329. logging.info(f"run_command count:{len(instance_id_list)} instance_id_list:{instance_id_list} response:{response}")
  330. def send_file_to_ecs(ecs_client, instance_id_list, target_dir, name, content):
  331. """
  332. 发送文件到ecs;alb应用,区分上方clb
  333. :param ecs_client:
  334. :param instance_id_list: 最多能指定50台ECS实例ID
  335. :param target_dir: 文件存放目录 type-string
  336. :param name: 文件名 type-string
  337. :param content: 文件内容 type-string
  338. :return:
  339. """
  340. if not instance_id_list:
  341. logging.warning("实例ID列表为空,无法发送文件。")
  342. return
  343. for i in range(len(instance_id_list) // 50 + 1):
  344. instance_ids = instance_id_list[i * 50:(i + 1) * 50]
  345. if len(instance_ids) == 0:
  346. logging.info("没有更多的实例ID需要发送文件,退出。")
  347. return
  348. request = SendFileRequest()
  349. request.set_Content(content)
  350. request.set_TargetDir(target_dir)
  351. request.set_Name(name)
  352. request.set_Overwrite(True)
  353. request.set_InstanceIds(instance_ids)
  354. try:
  355. logging.info(f"正在向实例 {instance_ids} 发送文件 '{name}' 到目录 '{target_dir}'")
  356. response = send_request(ecs_client=ecs_client, request=request)
  357. logging.info(f"成功发送文件到实例 {instance_ids},响应: {response}")
  358. except Exception as e:
  359. logging.error(f"发送文件到实例 {instance_ids} 失败,错误: {str(e)}")
  360. def add_servers_to_server_group(alb_client, server_group_ids, instance_ids, weight, port):
  361. """
  362. 添加服务器到ALB服务器组
  363. :param alb_client: ALB客户端连接
  364. :param server_group_ids: 服务器组ID
  365. :param instance_ids: 实例ID
  366. :param weight: 权重
  367. :param port: 后端服务器使用的端口
  368. """
  369. instance_ids_array = [instance_ids[i:i + 200] for i in range(0, len(instance_ids), 200)]
  370. for instance_ids_sub_array in instance_ids_array:
  371. servers = []
  372. for i in range(len(instance_ids_sub_array)):
  373. server = alb_models.AddServersToServerGroupRequestServers(
  374. server_id=instance_ids_sub_array[i],
  375. server_type='ecs',
  376. weight=weight,
  377. port=port
  378. )
  379. servers.append(server)
  380. # server = alb_models.AddServersToServerGroupRequestServers(
  381. # server_id=instance_id,
  382. # server_type='ecs',
  383. # weight=weight,
  384. # port=port
  385. # )
  386. for server_group_id in server_group_ids:
  387. request = alb_models.AddServersToServerGroupRequest(
  388. server_group_id=server_group_id,
  389. servers=servers
  390. )
  391. runtime = util_models.RuntimeOptions(
  392. connect_timeout=5000,
  393. read_timeout=60000
  394. )
  395. try:
  396. alb_client.add_servers_to_server_group_with_options(request, runtime)
  397. logging.info(
  398. f"Successfully added count:{len(instance_ids_sub_array)} instance_ids: {instance_ids_sub_array} to server group {server_group_id} with weight {weight}.")
  399. except Exception as e:
  400. logging.error(f"Failed to add count:{len(instance_ids_sub_array)} instance_ids: {instance_ids_sub_array} to server group {server_group_id}: {str(e)}")
  401. sys.exit()
  402. time.sleep(5)
  403. def remove_servers_from_server_group(alb_client, server_group_id_list, instance_ids, port):
  404. """
  405. 从ALB服务器组中移除服务器
  406. :param alb_client: ALB客户端连接
  407. :param server_group_id_list: 服务器组ID list
  408. :param instance_ids: 实例ID list
  409. :param port: 后端服务器使用的端口
  410. """
  411. instance_ids_array = [instance_ids[i:i + 200] for i in range(0, len(instance_ids), 200)]
  412. for instance_ids_sub_array in instance_ids_array:
  413. servers = []
  414. for instance_id in instance_ids_sub_array:
  415. server = alb_models.RemoveServersFromServerGroupRequestServers(
  416. port=port,
  417. server_id=instance_id,
  418. server_type='ecs'
  419. )
  420. servers.append(server)
  421. for server_group_id in server_group_id_list:
  422. request = alb_models.RemoveServersFromServerGroupRequest(
  423. server_group_id=server_group_id,
  424. servers=servers
  425. )
  426. runtime = util_models.RuntimeOptions(
  427. connect_timeout=5000,
  428. read_timeout=60000
  429. )
  430. try:
  431. alb_client.remove_servers_from_server_group_with_options(request, runtime)
  432. logging.info(f"Successfully removed count: {len(instance_ids_sub_array)} instance_ids: {instance_ids_sub_array} from server group {server_group_id}.")
  433. except Exception as e:
  434. logging.error(f"Failed to remove count: {len(instance_ids_sub_array)} instance_ids: {instance_ids_sub_array} from server group {server_group_id}: {str(e)}")
  435. sys.exit()
  436. time.sleep(5)
  437. def list_server_group_servers(alb_client, server_group_id):
  438. """
  439. 列出服务器组中的服务器并返回实例ID列表
  440. @param alb_client: ALB客户端
  441. @param server_group_id: 服务器组ID
  442. @return: 实例ID列表
  443. """
  444. instance_ids = []
  445. next_token = None
  446. while True:
  447. try:
  448. list_server_group_servers_request = alb_20200616_models.ListServerGroupServersRequest(
  449. server_group_id=server_group_id,
  450. max_results=100,
  451. next_token=next_token
  452. )
  453. runtime = util_models.RuntimeOptions(
  454. connect_timeout=5000,
  455. read_timeout=60000
  456. )
  457. response = alb_client.list_server_group_servers_with_options(list_server_group_servers_request, runtime)
  458. next_token = UtilClient.to_map(response.body).get('NextToken')
  459. sub_instance_ids = [server.server_id for server in response.body.servers]
  460. if len(sub_instance_ids) > 0:
  461. instance_ids.extend(sub_instance_ids)
  462. if next_token is None:
  463. break
  464. except Exception as error:
  465. logging.error(error)
  466. time.sleep(3)
  467. return instance_ids
  468. def update_server_group_server_weight(alb_client, server_group_id_list, instance_id_list, weight, port):
  469. """
  470. 更指定服务器在服务器组中的权重
  471. :param alb_client: ALB客户端
  472. :param server_group_id_list: 服务器组ID list
  473. :param instance_id_list: 实例ID list
  474. :param weight: 权重值
  475. :param port: 后端服务器使用的端口
  476. """
  477. instance_id_list_array = [instance_id_list[i:i + 40] for i in range(0, len(instance_id_list), 40)]
  478. for instance_ids_sub_array in instance_id_list_array:
  479. servers = []
  480. for i in range(len(instance_ids_sub_array)):
  481. server = alb_20200616_models.UpdateServerGroupServersAttributeRequestServers(
  482. server_type='Ecs',
  483. server_id=instance_ids_sub_array[i],
  484. weight=weight,
  485. port=port
  486. )
  487. servers.append(server)
  488. for server_group_id in server_group_id_list:
  489. request = alb_20200616_models.UpdateServerGroupServersAttributeRequest(
  490. servers=servers,
  491. server_group_id=server_group_id
  492. )
  493. # logging.info(f"servers = {servers}")
  494. runtime = util_models.RuntimeOptions(
  495. connect_timeout=5000,
  496. read_timeout=60000
  497. )
  498. try:
  499. # logging.info(f"instance_id_list = {instance_id_list} request = {request}")
  500. alb_client.update_server_group_servers_attribute_with_options(request, runtime)
  501. logging.info(
  502. f"Successfully updated count = {len(instance_ids_sub_array)} instance_ids: {instance_ids_sub_array} in group {server_group_id} to weight {weight}.")
  503. except Exception as e:
  504. logging.error(e)
  505. sys.exit()
  506. time.sleep(5)
  507. def update_server_group_servers_attribute(alb_client, server_group_id_list, instance_id_list, weight_list, port):
  508. """
  509. 更新服务器组中的服务器权重
  510. :param alb_client: ALB客户端
  511. :param server_group_id_list: 服务器组ID列表
  512. :param instance_id_list: 实例ID列表
  513. :param weight_list: 权重修改列表 type-list [(weight, sleep_time), ...]
  514. :param port: 后端服务器使用的端口
  515. """
  516. # for server_group_id in server_group_id_list:
  517. # for instance_id in instance_id_list:
  518. for weight, sleep_time in weight_list:
  519. update_server_group_server_weight(alb_client, server_group_id_list, instance_id_list, weight, port)
  520. time.sleep(sleep_time)
  521. # check_server_group_status(alb_client, server_group_id_list)
  522. def check_server_group_status(alb_client, server_group_id_list):
  523. list_server_groups_request = alb_20200616_models.ListServerGroupsRequest(
  524. server_group_ids=server_group_id_list,
  525. max_results=100
  526. )
  527. flag = False
  528. runtime = util_models.RuntimeOptions(
  529. connect_timeout=5000,
  530. read_timeout=60000
  531. )
  532. for i in range(10):
  533. try:
  534. response = alb_client.list_server_groups_with_options(list_server_groups_request, runtime)
  535. count = 0
  536. if response.body:
  537. server_groups = UtilClient.to_map(response.body).get("ServerGroups")
  538. if server_groups:
  539. for server_group in server_groups:
  540. if server_group.get("ServerGroupStatus") == "Available":
  541. logging.info(f"Server group {server_group} is available.")
  542. count += 1
  543. if count == len(server_group_id_list):
  544. flag = True
  545. break
  546. time.sleep(2)
  547. except Exception as e:
  548. logging.error(e)
  549. if not flag:
  550. sys.exit()