msgpack_api.py 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. import os
  2. from argparse import ArgumentParser
  3. from pathlib import Path
  4. import httpx
  5. import ormsgpack
  6. from tools.commons import ServeReferenceAudio, ServeTTSRequest
  7. api_key = os.environ.get("FISH_API_KEY", "YOUR_API_KEY")
  8. def audio_request():
  9. # priority: ref_id > references
  10. request = ServeTTSRequest(
  11. text="你说的对, 但是原神是一款由米哈游自主研发的开放世界手游.",
  12. # reference_id="114514",
  13. references=[
  14. ServeReferenceAudio(
  15. audio=open("lengyue.wav", "rb").read(),
  16. text=open("lengyue.lab", "r", encoding="utf-8").read(),
  17. )
  18. ],
  19. streaming=True,
  20. )
  21. api_key = os.environ.get("FISH_API_KEY", "YOUR_API_KEY")
  22. with (
  23. httpx.Client() as client,
  24. open("hello.wav", "wb") as f,
  25. ):
  26. with client.stream(
  27. "POST",
  28. "http://127.0.0.1:8080/v1/tts",
  29. content=ormsgpack.packb(request, option=ormsgpack.OPT_SERIALIZE_PYDANTIC),
  30. headers={
  31. "authorization": f"Bearer {api_key}",
  32. "content-type": "application/msgpack",
  33. },
  34. timeout=None,
  35. ) as response:
  36. for chunk in response.iter_bytes():
  37. f.write(chunk)
  38. def asr_request(audio_path: Path):
  39. # Read the audio file
  40. with open(
  41. str(audio_path),
  42. "rb",
  43. ) as audio_file:
  44. audio_data = audio_file.read()
  45. # Prepare the request data
  46. request_data = {
  47. "audio": audio_data,
  48. "language": "en", # Optional: specify the language
  49. "ignore_timestamps": False, # Optional: set to True to ignore precise timestamps
  50. }
  51. # Send the request
  52. with httpx.Client() as client:
  53. response = client.post(
  54. "https://api.fish.audio/v1/asr",
  55. headers={
  56. "Authorization": f"Bearer {api_key}",
  57. "Content-Type": "application/msgpack",
  58. },
  59. content=ormsgpack.packb(request_data),
  60. )
  61. # Parse the response
  62. result = response.json()
  63. print(f"Transcribed text: {result['text']}")
  64. print(f"Audio duration: {result['duration']} seconds")
  65. for segment in result["segments"]:
  66. print(f"Segment: {segment['text']}")
  67. print(f"Start time: {segment['start']}, End time: {segment['end']}")
  68. def parse_args():
  69. parser = ArgumentParser()
  70. parser.add_argument("--audio_path", type=Path, default="audio/ref/trump.mp3")
  71. return parser.parse_args()
  72. if __name__ == "__main__":
  73. args = parse_args()
  74. asr_request(args.audio_path)