xunfei_asr.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. import ast
  2. import base64
  3. import hashlib
  4. import hmac
  5. import json
  6. import time
  7. import requests
  8. import urllib
  9. import os
  10. from audio_process import get_audio_duration
  11. from config import set_config
  12. from log import Log
  13. config_ = set_config()
  14. log_ = Log()
  15. class RequestApi(object):
  16. def __init__(self, appid, secret_key, upload_file_path):
  17. self.appid = appid
  18. self.secret_key = secret_key
  19. self.upload_file_path = upload_file_path
  20. self.ts = str(int(time.time()))
  21. self.signa = self.get_signa()
  22. def get_signa(self):
  23. """
  24. signa生成
  25. :return: signa
  26. """
  27. # signa的生成公式:HmacSHA1(MD5(appid + ts),secretkey)
  28. m2 = hashlib.md5()
  29. m2.update((self.appid + self.ts).encode('utf-8'))
  30. md5 = m2.hexdigest()
  31. md5 = bytes(md5, encoding='utf-8')
  32. # 以secret_key为key, 上面的md5为msg, 使用hashlib.sha1加密结果为signa
  33. signa = hmac.new(self.secret_key.encode('utf-8'), md5, hashlib.sha1).digest()
  34. signa = base64.b64encode(signa)
  35. signa = str(signa, 'utf-8')
  36. return signa
  37. def upload(self):
  38. """
  39. 上传
  40. :return: orderId
  41. """
  42. video_id = self.upload_file_path.split('/')[-1].replace('.wav', '')
  43. # 获取音频文件大小,不超过500M
  44. file_len = os.path.getsize(self.upload_file_path)
  45. file_size = file_len / 1024 / 1024
  46. if file_size > 500:
  47. log_.error({'videoId': video_id, 'errorType': 'audioSizeError',
  48. 'errorMsg': f'audioSize: {file_size}M, required <= 500M'})
  49. return None
  50. file_name = os.path.basename(self.upload_file_path)
  51. # 获取音频时长,不超过5h
  52. duration = get_audio_duration(self.upload_file_path)
  53. audio_duration = duration / 1000 / 60 / 60
  54. if audio_duration > 5:
  55. log_.error({'videoId': video_id, 'errorType': 'audioDurationError',
  56. 'errorMsg': f'audioSize: {audio_duration}h, required <= 5h'})
  57. return None
  58. # 请求参数拼接
  59. param_dict = {
  60. 'appId': self.appid,
  61. 'signa': self.signa,
  62. 'ts': self.ts,
  63. 'fileSize': file_len,
  64. 'fileName': file_name,
  65. 'duration': str(duration),
  66. 'roleType': 1
  67. }
  68. # print("upload参数:", param_dict)
  69. # 以二进制方式读取音频文件内容
  70. data = open(self.upload_file_path, 'rb').read(file_len)
  71. # 请求upload api
  72. response = requests.post(
  73. url=config_.XFASR_HOST + config_.XF_API['upload'] + "?" + urllib.parse.urlencode(param_dict),
  74. headers={"Content-type": "application/json"},
  75. data=data
  76. )
  77. # print(response.text)
  78. # print("upload_url:", response.request.url)
  79. result = json.loads(response.text)
  80. # print("upload resp:", result)
  81. return result['content']['orderId']
  82. def get_result(self, order_id):
  83. """
  84. 查询结果
  85. :param order_id:
  86. :return: result
  87. """
  88. param_dict = {
  89. 'appId': self.appid,
  90. 'signa': self.signa,
  91. 'ts': self.ts,
  92. 'orderId': order_id,
  93. 'resultType': 'transfer'
  94. }
  95. status = 3
  96. # 建议使用回调的方式查询结果,查询接口有请求频率限制
  97. while status == 3:
  98. response = requests.post(
  99. url=config_.XFASR_HOST + config_.XF_API['get_result'] + "?" + urllib.parse.urlencode(param_dict),
  100. headers={"Content-type": "application/json"}
  101. )
  102. # print("get_result_url:",response.request.url)
  103. result = json.loads(response.text)
  104. status = result['content']['orderInfo']['status']
  105. if status == 4:
  106. return result
  107. time.sleep(5)
  108. def parse_lattice(self, result):
  109. content = result['content']['orderResult']
  110. content = ast.literal_eval(content)
  111. contents = content['lattice']
  112. asr_ret = ''
  113. for js in contents:
  114. json_1best = js['json_1best']
  115. json_1best = ast.literal_eval(json_1best)
  116. # print(json_1best)
  117. json_1best_contents = json_1best['st']['rt']
  118. l = []
  119. for cw in json_1best_contents:
  120. cws = cw['ws']
  121. for cw in cws:
  122. l.append(cw['cw'][0]['w'])
  123. asr_ret += ''.join(l)+'\n'
  124. return asr_ret
  125. def call_asr(audio_path):
  126. """ASR"""
  127. dialogue_path = audio_path.replace('.wav', '.txt')
  128. # 视频已识别,则不重复调用,直接读取文件中的内容
  129. if os.path.exists(dialogue_path):
  130. with open(dialogue_path, 'r') as rf:
  131. asr_res = ''.join(rf.readlines())
  132. else:
  133. api = RequestApi(appid=config_.XFASR_CONFIG['appid'],
  134. secret_key=config_.XFASR_CONFIG['secret_key'],
  135. upload_file_path=audio_path)
  136. order_id = api.upload()
  137. result = api.get_result(order_id)
  138. asr_res = api.parse_lattice(result)
  139. with open(dialogue_path, 'w') as f:
  140. f.write(asr_res)
  141. return dialogue_path, asr_res
  142. if __name__ == '__main__':
  143. audio_path = 'videos/1275943.wav'
  144. call_asr(audio_path=audio_path)