xunfei_asr.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. import ast
  2. import base64
  3. import hashlib
  4. import hmac
  5. import json
  6. import time
  7. import requests
  8. import urllib
  9. import os
  10. from audio_process import get_audio_duration
  11. from config import set_config
  12. config_ = set_config()
  13. class RequestApi(object):
  14. def __init__(self, appid, secret_key, upload_file_path):
  15. self.appid = appid
  16. self.secret_key = secret_key
  17. self.upload_file_path = upload_file_path
  18. self.ts = str(int(time.time()))
  19. self.signa = self.get_signa()
  20. def get_signa(self):
  21. """
  22. signa生成
  23. :return: signa
  24. """
  25. # signa的生成公式:HmacSHA1(MD5(appid + ts),secretkey)
  26. m2 = hashlib.md5()
  27. m2.update((self.appid + self.ts).encode('utf-8'))
  28. md5 = m2.hexdigest()
  29. md5 = bytes(md5, encoding='utf-8')
  30. # 以secret_key为key, 上面的md5为msg, 使用hashlib.sha1加密结果为signa
  31. signa = hmac.new(self.secret_key.encode('utf-8'), md5, hashlib.sha1).digest()
  32. signa = base64.b64encode(signa)
  33. signa = str(signa, 'utf-8')
  34. return signa
  35. def upload(self):
  36. """
  37. 上传
  38. :return: orderId
  39. """
  40. # 获取音频文件大小
  41. file_len = os.path.getsize(self.upload_file_path)
  42. file_name = os.path.basename(self.upload_file_path)
  43. # 获取音频时长
  44. duration = get_audio_duration(self.upload_file_path)
  45. # 请求参数拼接
  46. param_dict = {
  47. 'appId': self.appid,
  48. 'signa': self.signa,
  49. 'ts': self.ts,
  50. 'fileSize': file_len,
  51. 'fileName': file_name,
  52. 'duration': str(duration),
  53. 'roleType': 1
  54. }
  55. # print("upload参数:", param_dict)
  56. # 以二进制方式读取音频文件内容
  57. data = open(self.upload_file_path, 'rb').read(file_len)
  58. # 请求upload api
  59. response = requests.post(
  60. url=config_.XFASR_HOST + config_.XF_API['upload'] + "?" + urllib.parse.urlencode(param_dict),
  61. headers={"Content-type": "application/json"},
  62. data=data
  63. )
  64. # print("upload_url:", response.request.url)
  65. result = json.loads(response.text)
  66. # print("upload resp:", result)
  67. return result['content']['orderId']
  68. def get_result(self, order_id):
  69. """
  70. 查询结果
  71. :param order_id:
  72. :return: result
  73. """
  74. param_dict = {
  75. 'appId': self.appid,
  76. 'signa': self.signa,
  77. 'ts': self.ts,
  78. 'orderId': order_id,
  79. 'resultType': 'transfer'
  80. }
  81. status = 3
  82. # 建议使用回调的方式查询结果,查询接口有请求频率限制
  83. while status == 3:
  84. response = requests.post(
  85. url=config_.XFASR_HOST + config_.XF_API['get_result'] + "?" + urllib.parse.urlencode(param_dict),
  86. headers={"Content-type": "application/json"}
  87. )
  88. # print("get_result_url:",response.request.url)
  89. result = json.loads(response.text)
  90. status = result['content']['orderInfo']['status']
  91. if status == 4:
  92. return result
  93. time.sleep(5)
  94. def parse_lattice(self, result):
  95. content = result['content']['orderResult']
  96. content = ast.literal_eval(content)
  97. contents = content['lattice']
  98. asr_ret = ''
  99. for js in contents:
  100. json_1best = js['json_1best']
  101. json_1best = ast.literal_eval(json_1best)
  102. # print(json_1best)
  103. json_1best_contents = json_1best['st']['rt']
  104. l = []
  105. for cw in json_1best_contents:
  106. cws = cw['ws']
  107. for cw in cws:
  108. l.append(cw['cw'][0]['w'])
  109. asr_ret += ''.join(l)+'\n'
  110. return asr_ret
  111. def call_asr(audio_path):
  112. """ASR"""
  113. dialogue_path = audio_path.replace('.wav', '.txt')
  114. # 视频已识别,则不重复调用,直接读取文件中的内容
  115. if os.path.exists(dialogue_path):
  116. with open(dialogue_path, 'r') as rf:
  117. asr_res = ''.join(rf.readlines())
  118. else:
  119. api = RequestApi(appid=config_.XFASR_CONFIG['appid'],
  120. secret_key=config_.XFASR_CONFIG['secret_key'],
  121. upload_file_path=audio_path)
  122. order_id = api.upload()
  123. result = api.get_result(order_id)
  124. asr_res = api.parse_lattice(result)
  125. with open(dialogue_path, 'w') as f:
  126. f.write(asr_res)
  127. return dialogue_path, asr_res
  128. if __name__ == '__main__':
  129. audio_path = 'videos/001.wav'
  130. call_asr(audio_path=audio_path)