post_api.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. import argparse
  2. import base64
  3. import json
  4. import wave
  5. from pathlib import Path
  6. import pyaudio
  7. import requests
  8. def wav_to_base64(file_path):
  9. if not file_path or not Path(file_path).exists():
  10. return None
  11. with open(file_path, "rb") as wav_file:
  12. wav_content = wav_file.read()
  13. base64_encoded = base64.b64encode(wav_content)
  14. return base64_encoded.decode("utf-8")
  15. def read_ref_text(ref_text):
  16. path = Path(ref_text)
  17. if path.exists() and path.is_file():
  18. with path.open("r", encoding="utf-8") as file:
  19. return file.read()
  20. return ref_text
  21. def play_audio(audio_content, format, channels, rate):
  22. p = pyaudio.PyAudio()
  23. stream = p.open(format=format, channels=channels, rate=rate, output=True)
  24. stream.write(audio_content)
  25. stream.stop_stream()
  26. stream.close()
  27. p.terminate()
  28. if __name__ == "__main__":
  29. parser = argparse.ArgumentParser(
  30. description="Send a WAV file and text to a server and receive synthesized audio."
  31. )
  32. parser.add_argument(
  33. "--url",
  34. "-u",
  35. type=str,
  36. default="http://127.0.0.1:8080/v1/invoke",
  37. help="URL of the server",
  38. )
  39. parser.add_argument(
  40. "--text", "-t", type=str, required=True, help="Text to be synthesized"
  41. )
  42. parser.add_argument(
  43. "--reference_audio",
  44. "-ra",
  45. type=str,
  46. default=None,
  47. help="Path to the WAV file",
  48. )
  49. parser.add_argument(
  50. "--reference_text",
  51. "-rt",
  52. type=str,
  53. default=None,
  54. help="Reference text for voice synthesis",
  55. )
  56. parser.add_argument(
  57. "--max_new_tokens",
  58. type=int,
  59. default=1024,
  60. help="Maximum new tokens to generate",
  61. )
  62. parser.add_argument(
  63. "--chunk_length", type=int, default=100, help="Chunk length for synthesis"
  64. )
  65. parser.add_argument(
  66. "--top_p", type=float, default=0.7, help="Top-p sampling for synthesis"
  67. )
  68. parser.add_argument(
  69. "--repetition_penalty",
  70. type=float,
  71. default=1.2,
  72. help="Repetition penalty for synthesis",
  73. )
  74. parser.add_argument(
  75. "--temperature", type=float, default=0.7, help="Temperature for sampling"
  76. )
  77. parser.add_argument(
  78. "--speaker", type=str, default=None, help="Speaker ID for voice synthesis"
  79. )
  80. parser.add_argument("--emotion", type=str, default=None, help="Speaker's Emotion")
  81. parser.add_argument("--format", type=str, default="wav", help="Audio format")
  82. parser.add_argument(
  83. "--streaming", type=bool, default=False, help="Enable streaming response"
  84. )
  85. parser.add_argument(
  86. "--channels", type=int, default=1, help="Number of audio channels"
  87. )
  88. parser.add_argument("--rate", type=int, default=44100, help="Sample rate for audio")
  89. args = parser.parse_args()
  90. base64_audio = wav_to_base64(args.reference_audio)
  91. ref_text = args.reference_text
  92. if ref_text:
  93. ref_text = read_ref_text(ref_text)
  94. data = {
  95. "text": args.text,
  96. "reference_text": ref_text,
  97. "reference_audio": base64_audio,
  98. "max_new_tokens": args.max_new_tokens,
  99. "chunk_length": args.chunk_length,
  100. "top_p": args.top_p,
  101. "repetition_penalty": args.repetition_penalty,
  102. "temperature": args.temperature,
  103. "speaker": args.speaker,
  104. "emotion": args.emotion,
  105. "format": args.format,
  106. "streaming": args.streaming,
  107. }
  108. response = requests.post(args.url, json=data, stream=args.streaming)
  109. audio_format = pyaudio.paInt16 # Assuming 16-bit PCM format
  110. if response.status_code == 200:
  111. if args.streaming:
  112. p = pyaudio.PyAudio()
  113. stream = p.open(
  114. format=audio_format, channels=args.channels, rate=args.rate, output=True
  115. )
  116. wf = wave.open("generated_audio.wav", "wb")
  117. wf.setnchannels(args.channels)
  118. wf.setsampwidth(p.get_sample_size(audio_format))
  119. wf.setframerate(args.rate)
  120. stream_stopped_flag = False
  121. try:
  122. for chunk in response.iter_content(chunk_size=1024):
  123. if chunk:
  124. stream.write(chunk)
  125. wf.writeframesraw(chunk)
  126. else:
  127. if not stream_stopped_flag:
  128. stream.stop_stream()
  129. stream_stopped_flag = True
  130. finally:
  131. stream.close()
  132. p.terminate()
  133. wf.close()
  134. else:
  135. audio_content = response.content
  136. with open("generated_audio.wav", "wb") as audio_file:
  137. audio_file.write(audio_content)
  138. play_audio(audio_content, audio_format, args.channels, args.rate)
  139. print("Audio has been saved to 'generated_audio.wav'.")
  140. else:
  141. print(f"Request failed with status code {response.status_code}")
  142. print(response.json())