3 tháng trước cách đây · 30c99f4081
--- a/docs/en/server.md
+++ b/docs/en/server.md
@@ -40,6 +40,27 @@ Expected response:
 
															 - `POST /v1/vqgan/encode` for VQ encode
														
 
															 - `POST /v1/vqgan/decode` for VQ decode
														
 
															+### Python client example
														
 
															+
														
 
															+The base TTS model is selected when the server starts. In the example above, the server is started with the `checkpoints/s2-pro` weights, so every request sent to `http://127.0.0.1:8080/v1/tts` will use **S2-Pro** automatically. There is no separate per-request `model` field in `tools/api_client.py` for local server calls.
														
 
															+
														
 
															+```bash
														
 
															+python tools/api_client.py \
														
 
															+  --url http://127.0.0.1:8080/v1/tts \
														
 
															+  --text "Hello from Fish Speech" \
														
 
															+  --output s2-pro-demo
														
 
															+```
														
 
															+
														
 
															+If you want to select a saved reference voice, use `--reference_id`. This chooses the **voice reference**, not the base TTS model:
														
 
															+
														
 
															+```bash
														
 
															+python tools/api_client.py \
														
 
															+  --url http://127.0.0.1:8080/v1/tts \
														
 
															+  --text "Hello from Fish Speech" \
														
 
															+  --reference_id my-speaker \
														
 
															+  --output s2-pro-demo
														
 
															+```
														
 
															+
														
 
															 ## WebUI Inference
														
 
															 For WebUI usage, see:
														
--- a/tools/api_client.py
+++ b/tools/api_client.py
@@ -15,7 +15,16 @@ from fish_speech.utils.schema import ServeReferenceAudio, ServeTTSRequest
 
															 def parse_args():
														
 
															     parser = argparse.ArgumentParser(
														
 
															-        description="Send a WAV file and text to a server and receive synthesized audio.",
														
 
															+        description="Send text to a Fish Speech TTS server and receive synthesized audio.",
														
 
															+        epilog=(
														
 
															+            "Model selection note:\n"
														
 
															+            "  The base TTS model is selected by the server you call. For example, if the\n"
														
 
															+            "  server was started with checkpoints/s2-pro, this client will use S2-Pro\n"
														
 
															+            "  automatically. There is no separate per-request --model flag.\n\n"
														
 
															+            "Examples:\n"
														
 
															+            '  python tools/api_client.py -u http://127.0.0.1:8080/v1/tts -t "Hello from Fish Speech"\n'
														
 
															+            '  python tools/api_client.py -u http://127.0.0.1:8080/v1/tts -t "Hello" --reference_id my-speaker'
														
 
															+        ),
														
 
															         formatter_class=argparse.RawTextHelpFormatter,
														
 
															     )
														
@@ -24,7 +33,7 @@ def parse_args():
 
															         "-u",
														
 
															         type=str,
														
 
															         default="http://127.0.0.1:8080/v1/tts",
														
 
															-        help="URL of the server",
														
 
															+        help="URL of the TTS server. The server decides which base model is loaded.",
														
 
															     )
														
 
															     parser.add_argument(
														
 
															         "--text", "-t", type=str, required=True, help="Text to be synthesized"
														
@@ -34,7 +43,7 @@ def parse_args():
 
															         "-id",
														
 
															         type=str,
														
 
															         default=None,
														
 
															-        help="ID of the reference model to be used for the speech\n(Local: name of folder containing audios and files)",
														
 
															+        help="ID of the reference voice to use for synthesis\n(Local: name of folder containing audios and files)",
														
 
															     )
														
 
															     parser.add_argument(
														
 
															         "--reference_audio",