utils.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570
  1. import logging
  2. import json
  3. import sys
  4. import time
  5. from asyncio import wait_for
  6. import requests
  7. import asyncio
  8. import time
  9. from alibabacloud_tea_util.client import Client as UtilClient
  10. from aliyunsdkcore.client import AcsClient
  11. from aliyunsdkecs.request.v20140526.RunInstancesRequest import RunInstancesRequest
  12. from aliyunsdkecs.request.v20140526.DescribeInstancesRequest import DescribeInstancesRequest
  13. from aliyunsdkecs.request.v20140526.DescribeNetworkInterfacesRequest import DescribeNetworkInterfacesRequest
  14. from aliyunsdkecs.request.v20140526.RunCommandRequest import RunCommandRequest
  15. from aliyunsdkecs.request.v20140526.SendFileRequest import SendFileRequest
  16. from aliyunsdkecs.request.v20140526.StopInstancesRequest import StopInstancesRequest
  17. from aliyunsdkecs.request.v20140526.DeleteInstancesRequest import DeleteInstancesRequest
  18. from aliyunsdkecs.request.v20140526.DescribeInstanceStatusRequest import DescribeInstanceStatusRequest
  19. from aliyunsdkecs.request.v20140526.ModifySecurityGroupRuleRequest import ModifySecurityGroupRuleRequest
  20. from alibabacloud_alb20200616.client import Client as Alb20200616Client
  21. from alibabacloud_tea_openapi import models as open_api_models
  22. from alibabacloud_alb20200616 import models as alb_models
  23. from alibabacloud_alb20200616 import models as alb_20200616_models
  24. from alibabacloud_tea_util import models as util_models
  25. from urllib3 import request
  26. logging.basicConfig(level=logging.INFO,
  27. format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
  28. datefmt='%a, %d %b %Y %H:%M:%S')
  29. def send_msg_to_feishu(webhook, key_word, msg_text):
  30. """发送消息到飞书"""
  31. headers = {'Content-Type': 'application/json'}
  32. payload_message = {
  33. "msg_type": "text",
  34. "content": {
  35. "text": '{}: {}'.format(key_word, msg_text)
  36. }
  37. }
  38. response = requests.request('POST', url=webhook, headers=headers, data=json.dumps(payload_message))
  39. logging.info(response.text)
  40. def connect_client(access_key_id, access_key_secret, region_id):
  41. """
  42. 初始化账号,连接客户端
  43. :param access_key_id: access key Id, type-string
  44. :param access_key_secret: access key secret, type-string
  45. :param region_id: region_id
  46. :return: clt
  47. """
  48. try:
  49. clt = AcsClient(ak=access_key_id, secret=access_key_secret, region_id=region_id)
  50. return clt
  51. except Exception as e:
  52. # 失败,记录报错信息,发送通知,停止并退出
  53. logging.error(e)
  54. sys.exit()
  55. def connect_alb_client(access_key_id, access_key_secret, endpoint):
  56. """
  57. 初始化ALB客户端
  58. :param access_key_id: access key Id, type-string
  59. :param access_key_secret: access key secret, type-string
  60. :return: alb_client
  61. """
  62. config = open_api_models.Config(
  63. access_key_id=access_key_id,
  64. access_key_secret=access_key_secret,
  65. endpoint=endpoint
  66. )
  67. alb_client = Alb20200616Client(config)
  68. return alb_client
  69. def build_create_instances_request(image_id, vswitch_id, security_group_id, zone_id, instance_type, instance_name,
  70. disk_size, disk_category, key_pair_name, tags):
  71. """
  72. 购买服务器参数配置
  73. :param image_id: 使用的镜像信息 type-string
  74. :param vswitch_id: 选择的交换机 type-string
  75. :param security_group_id: 当前vpc类型的安全组 type-string
  76. :param zone_id: 服务器所在区域 type-string
  77. :param instance_type: 实例规格 type-string
  78. :param instance_name: 实例命名 type-string
  79. :param disk_size: 磁盘大小,单位:G,type-string
  80. :param disk_category: 磁盘类型 type-string
  81. :param key_pair_name: 密钥对名称 type-string
  82. :param tags: 标签 type-list, eg: [{"Key": "ecs", "Value": "rov-server.prod"}, ...]
  83. :return: request
  84. """
  85. request = RunInstancesRequest()
  86. request.set_ImageId(image_id)
  87. request.set_VSwitchId(vswitch_id)
  88. request.set_SecurityGroupId(security_group_id)
  89. request.set_ZoneId(zone_id)
  90. request.set_InstanceType(instance_type)
  91. request.set_InstanceName(instance_name)
  92. request.set_SystemDiskSize(disk_size)
  93. request.set_SystemDiskCategory(disk_category)
  94. request.set_KeyPairName(key_pair_name)
  95. request.set_Tags(tags)
  96. return request
  97. def send_req(client, request):
  98. """
  99. 发送API请求
  100. :param client: 客户端连接
  101. :param request: 请求配置
  102. :return: response
  103. """
  104. request.set_accept_format('json')
  105. response = client.do_action_with_exception(request)
  106. # print(response)
  107. response = json.loads(response)
  108. print(response)
  109. # logging.info(response)
  110. print(response.get('Code'))
  111. return response
  112. # except Exception as e:
  113. # 失败,记录报错信息,发送通知,停止并退出
  114. # logging.error(e)
  115. # sys.exit()
  116. def check_instance_running(ecs_client, instance_ids):
  117. """
  118. 检查服务器运行状态
  119. :param ecs_client: 客户端连接
  120. :param instance_ids: 实例id列表, type-list
  121. :return: running_count,Status为Running的实例数
  122. """
  123. try:
  124. request = DescribeInstancesRequest()
  125. request.set_InstanceIds(json.dumps(instance_ids))
  126. request.set_PageSize(100)
  127. response = send_request(ecs_client=ecs_client, request=request)
  128. if response.get('Code') is None:
  129. instances_list = response.get('Instances').get('Instance')
  130. running_count = 0
  131. running_instances = []
  132. for instance_detail in instances_list:
  133. if instance_detail.get('Status') == "Running":
  134. running_count += 1
  135. running_instances.append(instance_detail.get('InstanceId'))
  136. return running_count, running_instances
  137. else:
  138. # 失败,记录报错信息,发送通知,停止并退出
  139. logging.error(response)
  140. sys.exit()
  141. except Exception as e:
  142. # 失败,记录报错信息,发送通知,停止并退出
  143. logging.error(e)
  144. sys.exit()
  145. def get_ip_address(ecs_client, instance_id):
  146. """
  147. 获取实例IP地址
  148. :param ecs_client: 客户端连接
  149. :param instance_id: 实例id, type-string
  150. :return: ip_address, type-string
  151. """
  152. request = DescribeNetworkInterfacesRequest()
  153. request.set_accept_format('json')
  154. request.set_InstanceId(instance_id)
  155. response = send_request(ecs_client=ecs_client, request=request)
  156. ip_address = response['NetworkInterfaceSets']['NetworkInterfaceSet'][0]['PrivateIpAddress']
  157. return ip_address
  158. def create_multiple_instances(amount, ecs_client,
  159. image_id, vswitch_id, security_group_id, zone_id, instance_type, instance_name,
  160. disk_size, disk_category, key_pair_name, tags):
  161. """
  162. 创建多个ECS实例
  163. :param amount: 创建实例数 type-int 取值范围:[1, 100]
  164. :param ecs_client: 购买机器客户端连接
  165. :param image_id: 使用的镜像信息 type-string
  166. :param vswitch_id: 选择的交换机 type-string
  167. :param security_group_id: 当前vpc类型的安全组 type-string
  168. :param zone_id: 服务器所在区域 type-string
  169. :param instance_type: 实例规格 type-string
  170. :param instance_name: 实例命名 type-string
  171. :param disk_size: 磁盘大小,单位:G,type-string
  172. :param disk_category: 磁盘类型 type-string
  173. :param key_pair_name: 密钥对名称 type-string
  174. :param tags: 标签 type-list, eg: [{"Key": "ecs", "Value": "rov-server.prod"}, ...]
  175. :return:
  176. """
  177. logging.info(f"create instances start, request amount: {amount}.")
  178. # 1. 连接客户端
  179. # create_instances_clt = connect_client(
  180. # access_key_id=access_key_id, access_key_secret=access_key_secret, region_id=region_id
  181. # )
  182. # 2. 请求参数配置
  183. instance_ids = []
  184. remain = amount
  185. while True:
  186. if remain <= 0:
  187. break
  188. if remain > 50:
  189. sub_amount = 50
  190. remain = remain - sub_amount
  191. else:
  192. sub_amount = remain
  193. remain = 0
  194. request = build_create_instances_request(
  195. image_id=image_id, vswitch_id=vswitch_id, security_group_id=security_group_id, zone_id=zone_id,
  196. instance_type=instance_type, instance_name=instance_name, disk_size=disk_size, disk_category=disk_category,
  197. key_pair_name=key_pair_name, tags=tags
  198. )
  199. request.set_Amount(sub_amount)
  200. # 3. 发送API请求,购买机器并启动
  201. response = send_request(ecs_client=ecs_client, request=request)
  202. if response.get('Code') is None:
  203. sub_instance_ids = response.get('InstanceIdSets').get('InstanceIdSet')
  204. logging.info(f"success amount: {len(sub_instance_ids)}, instance ids: {sub_instance_ids}.")
  205. # 获取机器运行状态
  206. running_amount = 0
  207. while running_amount < sub_amount:
  208. time.sleep(20)
  209. running_amount, running_instances = check_instance_running(ecs_client=ecs_client,
  210. instance_ids=sub_instance_ids)
  211. logging.info(f"running amount: {running_amount}, running instances: {running_instances}.")
  212. # return instance_ids
  213. instance_ids.extend(sub_instance_ids)
  214. else:
  215. # 失败,记录报错信息,发送通知,停止并退出
  216. logging.error(response)
  217. sys.exit()
  218. return instance_ids
  219. def release_instances(ecs_client, instance_ids, force=False):
  220. """
  221. 释放实例
  222. :param ecs_client:
  223. :param instance_ids: instance_id, type-list
  224. :param force: 是否强制释放, True-强制释放, False-正常释放, type-bool
  225. :return:
  226. """
  227. instance_id_list_array = [instance_ids[i:i + 100] for i in range(0, len(instance_ids), 100)]
  228. for instance_id_sub_array in instance_id_list_array:
  229. request = DeleteInstancesRequest()
  230. request.set_InstanceIds(instance_id_sub_array)
  231. request.set_Force(force)
  232. response = send_request(ecs_client=ecs_client, request=request)
  233. # return response
  234. if response.get('Code') is None:
  235. logging.info(f"Release instances finished, instances: {instance_id_sub_array}")
  236. else:
  237. logging.error(f"Release instances fail!!!")
  238. sys.exit()
  239. def get_instances_status(ecs_client, instance_ids):
  240. """
  241. 获取实例运行状态
  242. :param ecs_client:
  243. :param instance_ids: instance_id, type-list
  244. :return:
  245. """
  246. instance_id_list_array = [instance_ids[i:i + 100] for i in range(0, len(instance_ids), 100)]
  247. for instance_id_sub_array in instance_id_list_array:
  248. request = DescribeInstanceStatusRequest()
  249. request.set_InstanceIds(instance_id_sub_array)
  250. request.set_PageSize(50)
  251. response = send_request(ecs_client=ecs_client, request=request)
  252. # return response
  253. if response.get('Code') is None:
  254. instances_list = response.get('InstanceStatuses').get('InstanceStatus')
  255. stopped_instances = [instance.get('InstanceId') for instance in instances_list if
  256. instance.get('Status') == 'Stopped']
  257. if len(stopped_instances) == len(instance_id_sub_array):
  258. logging.info(f"Instances stopped status set success, instances: {stopped_instances}")
  259. break
  260. else:
  261. logging.info(f"Stopped instances count = {len(stopped_instances)}, instances: {stopped_instances}")
  262. time.sleep(5)
  263. else:
  264. logging.error(response)
  265. sys.exit()
  266. def stop_instances(ecs_client, instance_ids, force_stop=False):
  267. """
  268. 停止实例
  269. :param ecs_client:
  270. :param instance_ids: 实例ID, type-list
  271. :param force_stop: 是否强制关机, True-强制关机, False-正常关机, type-bool
  272. :return:
  273. """
  274. instance_id_list_array = [instance_ids[i:i + 100] for i in range(0, len(instance_ids), 100)]
  275. for instance_id_sub_array in instance_id_list_array:
  276. request = StopInstancesRequest()
  277. request.set_InstanceIds(instance_id_sub_array)
  278. request.set_ForceStop(force_stop)
  279. response = send_request(ecs_client=ecs_client, request=request)
  280. # return response
  281. if response.get('Code') is None:
  282. logging.info(f"Instances stop finished, instances: {instance_id_sub_array}")
  283. else:
  284. logging.error(f"Failed to stop instances: {response}")
  285. sys.exit()
  286. def send_request(ecs_client, request):
  287. """
  288. 发送API请求
  289. :param ecs_client: 客户端连接
  290. :param request: 请求配置
  291. :return: response
  292. """
  293. request.set_accept_format('json')
  294. try:
  295. response = ecs_client.do_action_with_exception(request)
  296. response = json.loads(response)
  297. # logging.info(response)
  298. return response
  299. except Exception as e:
  300. # 失败,记录报错信息,发送通知,停止并退出
  301. logging.error(e)
  302. sys.exit()
  303. def run_command(ecs_client, instance_ids, command):
  304. """
  305. 批量执行命令
  306. :param ecs_client: 客户端连接
  307. :param instance_ids: 实例id列表, type-list, 最多能指定50台ECS实例ID
  308. :param command: 命令 type-string
  309. :return:
  310. """
  311. for i in range(len(instance_ids) // 50 + 1):
  312. instance_id_list = instance_ids[i * 50:(i + 1) * 50]
  313. if len(instance_id_list) == 0:
  314. return
  315. request = RunCommandRequest()
  316. request.set_accept_format('json')
  317. request.set_Type("RunShellScript")
  318. request.set_CommandContent(command)
  319. request.set_InstanceIds(instance_id_list)
  320. response = send_request(ecs_client=ecs_client, request=request)
  321. logging.info(response)
  322. def send_file_to_ecs(ecs_client, instance_id_list, target_dir, name, content):
  323. """
  324. 发送文件到ecs;alb应用,区分上方clb
  325. :param ecs_client:
  326. :param instance_id_list: 最多能指定50台ECS实例ID
  327. :param target_dir: 文件存放目录 type-string
  328. :param name: 文件名 type-string
  329. :param content: 文件内容 type-string
  330. :return:
  331. """
  332. if not instance_id_list:
  333. logging.warning("实例ID列表为空,无法发送文件。")
  334. return
  335. for i in range(len(instance_id_list) // 50 + 1):
  336. instance_ids = instance_id_list[i * 50:(i + 1) * 50]
  337. if len(instance_ids) == 0:
  338. logging.info("没有更多的实例ID需要发送文件,退出。")
  339. return
  340. request = SendFileRequest()
  341. request.set_Content(content)
  342. request.set_TargetDir(target_dir)
  343. request.set_Name(name)
  344. request.set_Overwrite(True)
  345. request.set_InstanceIds(instance_ids)
  346. try:
  347. logging.info(f"正在向实例 {instance_ids} 发送文件 '{name}' 到目录 '{target_dir}'")
  348. response = send_request(ecs_client=ecs_client, request=request)
  349. logging.info(f"成功发送文件到实例 {instance_ids},响应: {response}")
  350. except Exception as e:
  351. logging.error(f"发送文件到实例 {instance_ids} 失败,错误: {str(e)}")
  352. def add_servers_to_server_group(alb_client, server_group_ids, instance_ids, weight, port):
  353. """
  354. 添加服务器到ALB服务器组
  355. :param alb_client: ALB客户端连接
  356. :param server_group_ids: 服务器组ID
  357. :param instance_ids: 实例ID
  358. :param weight: 权重
  359. :param port: 后端服务器使用的端口
  360. """
  361. instance_ids_array = [instance_ids[i:i + 100] for i in range(0, len(instance_ids), 100)]
  362. for instance_ids_sub_array in instance_ids_array:
  363. servers = []
  364. for i in range(len(instance_ids_sub_array)):
  365. server = alb_models.AddServersToServerGroupRequestServers(
  366. server_id=instance_ids_sub_array[i],
  367. server_type='ecs',
  368. weight=weight,
  369. port=port
  370. )
  371. servers.append(server)
  372. # server = alb_models.AddServersToServerGroupRequestServers(
  373. # server_id=instance_id,
  374. # server_type='ecs',
  375. # weight=weight,
  376. # port=port
  377. # )
  378. for server_group_id in server_group_ids:
  379. request = alb_models.AddServersToServerGroupRequest(
  380. server_group_id=server_group_id,
  381. servers=servers
  382. )
  383. runtime = util_models.RuntimeOptions()
  384. try:
  385. alb_client.add_servers_to_server_group_with_options(request, runtime)
  386. logging.info(
  387. f"Successfully added instance_ids_sub_array {instance_ids_sub_array} to server group {server_group_id} with weight {weight}.")
  388. except Exception as e:
  389. logging.error(f"Failed to add instance_ids_sub_array {instance_ids_sub_array} to server group {server_group_id}: {str(e)}")
  390. sys.exit()
  391. time.sleep(10)
  392. def remove_servers_from_server_group(alb_client, server_group_id_list, instance_ids, port):
  393. """
  394. 从ALB服务器组中移除服务器
  395. :param alb_client: ALB客户端连接
  396. :param server_group_id_list: 服务器组ID list
  397. :param instance_ids: 实例ID list
  398. :param port: 后端服务器使用的端口
  399. """
  400. instance_ids_array = [instance_ids[i:i + 100] for i in range(0, len(instance_ids), 100)]
  401. for instance_ids_sub_array in instance_ids_array:
  402. servers = []
  403. for instance_id in instance_ids_sub_array:
  404. server = alb_models.RemoveServersFromServerGroupRequestServers(
  405. port=port,
  406. server_id=instance_id,
  407. server_type='ecs'
  408. )
  409. servers.append(server)
  410. for server_group_id in server_group_id_list:
  411. request = alb_models.RemoveServersFromServerGroupRequest(
  412. server_group_id=server_group_id,
  413. servers=servers
  414. )
  415. runtime = util_models.RuntimeOptions()
  416. try:
  417. alb_client.remove_servers_from_server_group_with_options(request, runtime)
  418. logging.info(f"Successfully removed instance_ids_sub_array {instance_ids_sub_array} from server group {server_group_id}.")
  419. except Exception as e:
  420. logging.error(f"Failed to remove instance_ids_sub_array {instance_ids_sub_array} from server group {server_group_id}: {str(e)}")
  421. sys.exit()
  422. time.sleep(10)
  423. def list_server_group_servers(alb_client, server_group_id):
  424. """
  425. 列出服务器组中的服务器并返回实例ID列表
  426. @param alb_client: ALB客户端
  427. @param server_group_id: 服务器组ID
  428. @return: 实例ID列表
  429. """
  430. instance_ids = []
  431. next_token = None
  432. while True:
  433. try:
  434. list_server_group_servers_request = alb_20200616_models.ListServerGroupServersRequest(
  435. server_group_id=server_group_id,
  436. max_results=50,
  437. next_token=next_token
  438. )
  439. runtime = util_models.RuntimeOptions()
  440. response = alb_client.list_server_group_servers_with_options(list_server_group_servers_request, runtime)
  441. next_token = UtilClient.to_map(response.body).get('NextToken')
  442. sub_instance_ids = [server.server_id for server in response.body.servers]
  443. if len(sub_instance_ids) > 0:
  444. instance_ids.extend(sub_instance_ids)
  445. else:
  446. break
  447. except Exception as error:
  448. logging.error(error)
  449. return instance_ids
  450. def update_server_group_server_weight(alb_client, server_group_id_list, instance_id_list, weight, port):
  451. """
  452. 更指定服务器在服务器组中的权重
  453. :param alb_client: ALB客户端
  454. :param server_group_id_list: 服务器组ID list
  455. :param instance_id_list: 实例ID list
  456. :param weight: 权重值
  457. :param port: 后端服务器使用的端口
  458. """
  459. instance_id_list_array = [instance_id_list[i:i + 40] for i in range(0, len(instance_id_list), 40)]
  460. for instance_ids_sub_array in instance_id_list_array:
  461. servers = []
  462. for i in range(len(instance_ids_sub_array)):
  463. server = alb_20200616_models.UpdateServerGroupServersAttributeRequestServers(
  464. server_type='Ecs',
  465. server_id=instance_ids_sub_array[i],
  466. weight=weight,
  467. port=port
  468. )
  469. servers.append(server)
  470. for server_group_id in server_group_id_list:
  471. request = alb_20200616_models.UpdateServerGroupServersAttributeRequest(
  472. servers=servers,
  473. server_group_id=server_group_id
  474. )
  475. # logging.info(f"servers = {servers}")
  476. runtime = util_models.RuntimeOptions()
  477. try:
  478. # logging.info(f"instance_id_list = {instance_id_list} request = {request}")
  479. alb_client.update_server_group_servers_attribute_with_options(request, runtime)
  480. logging.info(
  481. f"Successfully updated instance_ids_sub_array {instance_ids_sub_array} in group {server_group_id} to weight {weight}.")
  482. except Exception as e:
  483. logging.error(e)
  484. sys.exit()
  485. time.sleep(10)
  486. def update_server_group_servers_attribute(alb_client, server_group_id_list, instance_id_list, weight_list, port):
  487. """
  488. 更新服务器组中的服务器权重
  489. :param alb_client: ALB客户端
  490. :param server_group_id_list: 服务器组ID列表
  491. :param instance_id_list: 实例ID列表
  492. :param weight_list: 权重修改列表 type-list [(weight, sleep_time), ...]
  493. :param port: 后端服务器使用的端口
  494. """
  495. # for server_group_id in server_group_id_list:
  496. # for instance_id in instance_id_list:
  497. for weight, sleep_time in weight_list:
  498. update_server_group_server_weight(alb_client, server_group_id_list, instance_id_list, weight, port)
  499. time.sleep(sleep_time)
  500. # check_server_group_status(alb_client, server_group_id_list)
  501. def check_server_group_status(alb_client, server_group_id_list):
  502. list_server_groups_request = alb_20200616_models.ListServerGroupsRequest(
  503. server_group_ids=server_group_id_list,
  504. max_results=100
  505. )
  506. flag = False
  507. runtime = util_models.RuntimeOptions()
  508. for i in range(10):
  509. try:
  510. response = alb_client.list_server_groups_with_options(list_server_groups_request, runtime)
  511. count = 0
  512. if response.body:
  513. server_groups = UtilClient.to_map(response.body).get("ServerGroups")
  514. if server_groups:
  515. for server_group in server_groups:
  516. if server_group.get("ServerGroupStatus") == "Available":
  517. logging.info(f"Server group {server_group} is available.")
  518. count += 1
  519. if count == len(server_group_id_list):
  520. flag = True
  521. break
  522. time.sleep(2)
  523. except Exception as e:
  524. logging.error(e)
  525. if not flag:
  526. sys.exit()