Просмотр исходного кода

Fix dockerfile for `pyaudio` (#623)

* Readmes, deps, api workers

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix speed loss after compiling

* revert log

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add dockerfile dep: gcc

* Move READMES in subfolder

* Fix dockerfile

* Fix dockerfile

* restore docker setup

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Leng Yue <lengyue@lengyue.me>
spicysama 1 год назад
Родитель
Сommit
23fa4d7e38
4 измененных файлов с 31 добавлено и 6 удалено
  1. 1 1
      .github/workflows/build-docker-image.yml
  2. 4 0
      dockerfile
  3. 4 0
      dockerfile.dev
  4. 22 5
      tools/msgpack_api.py

+ 1 - 1
.github/workflows/build-docker-image.yml

@@ -5,7 +5,7 @@ on:
     branches:
       - main
     tags:
-      - 'v*'
+      - "v*"
 
 jobs:
   build:

+ 4 - 0
dockerfile

@@ -18,6 +18,10 @@ ARG DEPENDENCIES="  \
     libsox-dev \
     build-essential \
     cmake \
+    libasound-dev \
+    portaudio19-dev \
+    libportaudio2 \
+    libportaudiocpp0 \
     ffmpeg"
 
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \

+ 4 - 0
dockerfile.dev

@@ -17,6 +17,10 @@ ARG TOOLS="               \
         openssh-server    \
         sudo              \
         protobuf-compiler \
+        libasound-dev     \
+        portaudio19-dev   \
+        libportaudio2     \
+        libportaudiocpp0  \
         cmake"
 
 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \

+ 22 - 5
tools/msgpack_api.py

@@ -1,8 +1,14 @@
+import os
+from argparse import ArgumentParser
+from pathlib import Path
+
 import httpx
 import ormsgpack
 
 from tools.commons import ServeReferenceAudio, ServeTTSRequest
 
+api_key = os.environ.get("FISH_API_KEY", "YOUR_API_KEY")
+
 
 def audio_request():
     # priority: ref_id > references
@@ -18,6 +24,8 @@ def audio_request():
         streaming=True,
     )
 
+    api_key = os.environ.get("FISH_API_KEY", "YOUR_API_KEY")
+
     with (
         httpx.Client() as client,
         open("hello.wav", "wb") as f,
@@ -27,7 +35,7 @@ def audio_request():
             "http://127.0.0.1:8080/v1/tts",
             content=ormsgpack.packb(request, option=ormsgpack.OPT_SERIALIZE_PYDANTIC),
             headers={
-                "authorization": "Bearer YOUR_API_KEY",
+                "authorization": f"Bearer {api_key}",
                 "content-type": "application/msgpack",
             },
             timeout=None,
@@ -36,11 +44,11 @@ def audio_request():
                 f.write(chunk)
 
 
-def asr_request():
+def asr_request(audio_path: Path):
 
     # Read the audio file
     with open(
-        r"D:\PythonProject\fish-speech\.cache\test_audios\prompts\2648200402409733590.wav",
+        str(audio_path),
         "rb",
     ) as audio_file:
         audio_data = audio_file.read()
@@ -57,7 +65,7 @@ def asr_request():
         response = client.post(
             "https://api.fish.audio/v1/asr",
             headers={
-                "Authorization": "Bearer 8eda4aeed2bc4aec9489b3efad003799",
+                "Authorization": f"Bearer {api_key}",
                 "Content-Type": "application/msgpack",
             },
             content=ormsgpack.packb(request_data),
@@ -74,5 +82,14 @@ def asr_request():
         print(f"Start time: {segment['start']}, End time: {segment['end']}")
 
 
+def parse_args():
+    parser = ArgumentParser()
+    parser.add_argument("--audio_path", type=Path, default="audio/ref/trump.mp3")
+
+    return parser.parse_args()
+
+
 if __name__ == "__main__":
-    asr_request()
+    args = parse_args()
+
+    asr_request(args.audio_path)