post_api.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. import argparse
  2. import base64
  3. import json
  4. import pyaudio
  5. import requests
  6. def wav_to_base64(file_path):
  7. with open(file_path, "rb") as wav_file:
  8. wav_content = wav_file.read()
  9. base64_encoded = base64.b64encode(wav_content)
  10. return base64_encoded.decode("utf-8")
  11. def play_audio(audio_content, format, channels, rate):
  12. p = pyaudio.PyAudio()
  13. stream = p.open(format=format, channels=channels, rate=rate, output=True)
  14. stream.write(audio_content)
  15. stream.stop_stream()
  16. stream.close()
  17. p.terminate()
  18. if __name__ == "__main__":
  19. parser = argparse.ArgumentParser(
  20. description="Send a WAV file and text to a server and receive synthesized audio."
  21. )
  22. parser.add_argument(
  23. "--url", "-u", type=str, required=True, help="URL of the server"
  24. )
  25. parser.add_argument(
  26. "--text", "-t", type=str, required=True, help="Text to be synthesized"
  27. )
  28. parser.add_argument(
  29. "--reference_audio", "-ra", type=str, required=True, help="Path to the WAV file"
  30. )
  31. parser.add_argument(
  32. "--reference_text",
  33. "-rt",
  34. type=str,
  35. required=True,
  36. help="Reference text for voice synthesis",
  37. )
  38. parser.add_argument(
  39. "--max_new_tokens", type=int, default=0, help="Maximum new tokens to generate"
  40. )
  41. parser.add_argument(
  42. "--chunk_length", type=int, default=30, help="Chunk length for synthesis"
  43. )
  44. parser.add_argument(
  45. "--top_p", type=float, default=0.7, help="Top-p sampling for synthesis"
  46. )
  47. parser.add_argument(
  48. "--repetition_penalty",
  49. type=float,
  50. default=1.5,
  51. help="Repetition penalty for synthesis",
  52. )
  53. parser.add_argument(
  54. "--temperature", type=float, default=0.7, help="Temperature for sampling"
  55. )
  56. parser.add_argument(
  57. "--speaker", type=str, default=None, help="Speaker ID for voice synthesis"
  58. )
  59. parser.add_argument("--format", type=str, default="wav", help="Audio format")
  60. parser.add_argument(
  61. "--streaming", type=bool, default=False, help="Enable streaming response"
  62. )
  63. parser.add_argument(
  64. "--channels", type=int, default=1, help="Number of audio channels"
  65. )
  66. parser.add_argument("--rate", type=int, default=44100, help="Sample rate for audio")
  67. args = parser.parse_args()
  68. base64_audio = wav_to_base64(args.reference_audio)
  69. data = {
  70. "text": args.text,
  71. "reference_text": args.reference_text,
  72. "reference_audio": base64_audio,
  73. "max_new_tokens": args.max_new_tokens,
  74. "chunk_length": args.chunk_length,
  75. "top_p": args.top_p,
  76. "repetition_penalty": args.repetition_penalty,
  77. "temperature": args.temperature,
  78. "speaker": args.speaker,
  79. "format": args.format,
  80. "streaming": args.streaming,
  81. }
  82. response = requests.post(args.url, json=data, stream=args.streaming)
  83. audio_format = pyaudio.paInt16 # Assuming 16-bit PCM format
  84. if response.status_code == 200:
  85. if args.streaming:
  86. p = pyaudio.PyAudio()
  87. stream = p.open(
  88. format=audio_format, channels=args.channels, rate=args.rate, output=True
  89. )
  90. for chunk in response.iter_content(chunk_size=1024):
  91. if chunk:
  92. stream.write(chunk)
  93. stream.stop_stream()
  94. stream.close()
  95. p.terminate()
  96. else:
  97. audio_content = response.content
  98. with open("generated_audio.wav", "wb") as audio_file:
  99. audio_file.write(audio_content)
  100. play_audio(audio_content, audio_format, args.channels, args.rate)
  101. print("Audio has been saved to 'generated_audio.wav'.")
  102. else:
  103. print(f"Request failed with status code {response.status_code}")
  104. print(response.json())