long_tts_client.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. import base64
  4. import hashlib
  5. import hmac
  6. import json
  7. import time
  8. import uuid
  9. from datetime import datetime
  10. from urllib import parse
  11. import requests
  12. from loguru import logger
  13. from utils.config import OssConfig
  14. class AccessToken:
  15. @staticmethod
  16. def _encode_text(text):
  17. encoded_text = parse.quote_plus(text)
  18. return encoded_text.replace('+', '%20').replace('*', '%2A').replace('%7E', '~')
  19. @staticmethod
  20. def _encode_dict(dic):
  21. keys = dic.keys()
  22. dic_sorted = [(key, dic[key]) for key in sorted(keys)]
  23. encoded_text = parse.urlencode(dic_sorted)
  24. return encoded_text.replace('+', '%20').replace('*', '%2A').replace('%7E', '~')
  25. @staticmethod
  26. def create_token(access_key_id, access_key_secret):
  27. """生成访问令牌"""
  28. parameters = {
  29. 'AccessKeyId': access_key_id,
  30. 'Action': 'CreateToken',
  31. 'Format': 'JSON',
  32. 'RegionId': 'cn-shanghai',
  33. 'SignatureMethod': 'HMAC-SHA1',
  34. 'SignatureNonce': str(uuid.uuid1()),
  35. 'SignatureVersion': '1.0',
  36. 'Timestamp': time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
  37. 'Version': '2019-02-28'
  38. }
  39. query_string = AccessToken._encode_dict(parameters)
  40. string_to_sign = 'GET' + '&' + AccessToken._encode_text('/') + '&' + AccessToken._encode_text(query_string)
  41. secreted_string = hmac.new(
  42. bytes(access_key_secret + '&', encoding='utf-8'),
  43. bytes(string_to_sign, encoding='utf-8'),
  44. hashlib.sha1
  45. ).digest()
  46. signature = base64.b64encode(secreted_string).decode('utf-8')
  47. signature = AccessToken._encode_text(signature)
  48. full_url = 'http://nls-meta.cn-shanghai.aliyuncs.com/?Signature=%s&%s' % (signature, query_string)
  49. try:
  50. response = requests.get(full_url)
  51. response.raise_for_status()
  52. root_obj = response.json()
  53. if 'Token' in root_obj:
  54. token = root_obj['Token']['Id']
  55. expire_time = root_obj['Token']['ExpireTime']
  56. return token, expire_time
  57. except requests.exceptions.RequestException as e:
  58. logger.error(f"获取Token失败: {e}")
  59. logger.error(f"获取Token失败: {response.text}")
  60. return None, None
  61. class TtsHeader:
  62. def __init__(self, appkey, token):
  63. self.appkey = appkey
  64. self.token = token
  65. def to_dict(self):
  66. return {'appkey': self.appkey, 'token': self.token}
  67. class TtsContext:
  68. def __init__(self, device_id):
  69. self.device_id = device_id
  70. def to_dict(self):
  71. return {'device_id': self.device_id}
  72. class TtsRequest:
  73. def __init__(self, voice, sample_rate, format, enable_subtitle, text):
  74. self.voice = voice
  75. self.sample_rate = sample_rate
  76. self.format = format
  77. self.enable_subtitle = enable_subtitle
  78. self.text = text
  79. def to_dict(self):
  80. return {
  81. 'voice': self.voice,
  82. 'sample_rate': self.sample_rate,
  83. 'format': self.format,
  84. 'enable_subtitle': self.enable_subtitle,
  85. 'text': self.text
  86. }
  87. class TtsPayload:
  88. def __init__(self, enable_notify, notify_url, tts_request):
  89. self.enable_notify = enable_notify
  90. self.notify_url = notify_url
  91. self.tts_request = tts_request
  92. def to_dict(self):
  93. return {
  94. 'enable_notify': self.enable_notify,
  95. 'notify_url': self.notify_url,
  96. 'tts_request': self.tts_request.to_dict()
  97. }
  98. class TtsBody:
  99. def __init__(self, tts_header, tts_context, tts_payload):
  100. self.tts_header = tts_header
  101. self.tts_context = tts_context
  102. self.tts_payload = tts_payload
  103. def to_dict(self):
  104. return {
  105. 'header': self.tts_header.to_dict(),
  106. 'context': self.tts_context.to_dict(),
  107. 'payload': self.tts_payload.to_dict()
  108. }
  109. class AliyunTTS:
  110. def __init__(self):
  111. self.access_key_id = OssConfig["OSS_ACCESS_KEY_ID"]
  112. self.access_key_secret = OssConfig["OSS_ACCESS_KEY_SECRET"]
  113. self.app_key = OssConfig["APP_KEY"]
  114. self.token = None
  115. self.expire_time = None
  116. def get_token(self):
  117. """获取并缓存访问令牌"""
  118. if not self.token or time.time() + 60 > self.expire_time:
  119. self.token, self.expire_time = AccessToken.create_token(
  120. self.access_key_id, self.access_key_secret
  121. )
  122. if self.token:
  123. logger.info(f"获取Token成功,有效期至: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(self.expire_time))}")
  124. else:
  125. logger.error("获取Token失败")
  126. return self.token
  127. def synthesize(self, text, voice="xiaoyun", format="mp3", sample_rate=16000, use_polling=True):
  128. """
  129. 当 use_polling=False 时,阿里云语音合成会采用 回调模式 而非轮询模式。此时,需要提供一个 公网可访问的回调 URL,
  130. 阿里云会在语音合成完成后主动发送请求到该 URL,通知合成结果。
  131. """
  132. """长文本语音合成"""
  133. token = self.get_token()
  134. if not token:
  135. return None
  136. th = TtsHeader(self.app_key, token)
  137. tc = TtsContext("mydevice")
  138. tr = TtsRequest(voice, sample_rate, format, False, text)
  139. notify_url = "" if use_polling else "http://your-public-server.com/tts-callback"
  140. tp = TtsPayload(use_polling, notify_url, tr)
  141. tb = TtsBody(th, tc, tp)
  142. body = json.dumps(tb.to_dict())
  143. polling_url = "https://nls-gateway.cn-shanghai.aliyuncs.com/rest/v1/tts/async"
  144. return request_long_tts(body, self.app_key, token, use_polling, polling_url)
  145. def request_long_tts(tts_body, appkey, token, use_polling=True, polling_url=None):
  146. """发送长文本语音合成请求"""
  147. url = 'https://nls-gateway.cn-shanghai.aliyuncs.com/rest/v1/tts/async'
  148. headers = {'Content-Type': 'application/json'}
  149. try:
  150. response = requests.post(url, data=tts_body, headers=headers)
  151. response.raise_for_status()
  152. json_data = response.json()
  153. if "error_code" in json_data and json_data["error_code"] == 20000000:
  154. task_id = json_data['data']['task_id']
  155. request_id = json_data['request_id']
  156. logger.info(f"语音合成任务已提交,task_id: {task_id}")
  157. if use_polling and polling_url:
  158. return wait_loop_for_complete(polling_url, appkey, token, task_id, request_id)
  159. return task_id, request_id
  160. else:
  161. logger.error(f"请求失败: {json_data}")
  162. return None, None
  163. except requests.exceptions.RequestException as e:
  164. logger.error(f'请求异常: {e}')
  165. return None, None
  166. def wait_loop_for_complete(url, appkey, token, task_id, request_id, max_retries=30):
  167. """轮询等待合成完成"""
  168. full_url = f"{url}?appkey={appkey}&task_id={task_id}&token={token}&request_id={request_id}"
  169. logger.info(f"开始轮询任务状态: {task_id}")
  170. for retries in range(max_retries):
  171. try:
  172. response = requests.get(full_url)
  173. response.raise_for_status()
  174. json_data = response.json()
  175. if "data" in json_data and "audio_address" in json_data["data"]:
  176. audio_address = json_data["data"]["audio_address"]
  177. if audio_address:
  178. logger.info(f"合成完成! audio_address = {audio_address}")
  179. return audio_address
  180. else:
  181. logger.info(f"第 {retries + 1}/{max_retries} 次轮询: 合成中...")
  182. elif "error_code" in json_data and json_data["error_code"] != 20000000:
  183. logger.error(f"合成失败: {json_data.get('error_message', '未知错误')}")
  184. return None
  185. except requests.exceptions.RequestException as e:
  186. logger.warning(f"轮询请求异常: {e}")
  187. time.sleep(10)
  188. logger.warning(f"已达到最大轮询次数({max_retries}),任务可能仍在处理中")
  189. return None
  190. if __name__ == "__main__":
  191. # 准备请求文本
  192. tts_text = """生活中总有一些故事能让我们感受到温暖和智慧,赵元任的传奇经历就是这样一个值得分享的好故事..."""
  193. tts_client = AliyunTTS()
  194. # 语音合成
  195. logger.info("开始语音合成...")
  196. mp3_url = tts_client.synthesize(tts_text)
  197. logger.info(f"合成的url: {mp3_url}")
  198. if not mp3_url:
  199. logger.error("语音合成失败,程序退出")