преди 2 месеца · 30c99f4081
--- a/docs/en/server.md
+++ b/docs/en/server.md
@@ -40,6 +40,27 @@ Expected response:
 
				 - `POST /v1/vqgan/encode` for VQ encode
			
 
				 - `POST /v1/vqgan/decode` for VQ decode
			
 
				 
			
 
				+### Python client example
			
 
				+
			
 
				+The base TTS model is selected when the server starts. In the example above, the server is started with the `checkpoints/s2-pro` weights, so every request sent to `http://127.0.0.1:8080/v1/tts` will use **S2-Pro** automatically. There is no separate per-request `model` field in `tools/api_client.py` for local server calls.
			
 
				+
			
 
				+```bash
			
 
				+python tools/api_client.py \
			
 
				+  --url http://127.0.0.1:8080/v1/tts \
			
 
				+  --text "Hello from Fish Speech" \
			
 
				+  --output s2-pro-demo
			
 
				+```
			
 
				+
			
 
				+If you want to select a saved reference voice, use `--reference_id`. This chooses the **voice reference**, not the base TTS model:
			
 
				+
			
 
				+```bash
			
 
				+python tools/api_client.py \
			
 
				+  --url http://127.0.0.1:8080/v1/tts \
			
 
				+  --text "Hello from Fish Speech" \
			
 
				+  --reference_id my-speaker \
			
 
				+  --output s2-pro-demo
			
 
				+```
			
 
				+
			
 
				 ## WebUI Inference
			
 
				 
			
 
				 For WebUI usage, see:
			
--- a/tools/api_client.py
+++ b/tools/api_client.py
@@ -15,7 +15,16 @@ from fish_speech.utils.schema import ServeReferenceAudio, ServeTTSRequest
 
				 
			
 
				 def parse_args():
			
 
				     parser = argparse.ArgumentParser(
			
 
				-        description="Send a WAV file and text to a server and receive synthesized audio.",
			
 
				+        description="Send text to a Fish Speech TTS server and receive synthesized audio.",
			
 
				+        epilog=(
			
 
				+            "Model selection note:\n"
			
 
				+            "  The base TTS model is selected by the server you call. For example, if the\n"
			
 
				+            "  server was started with checkpoints/s2-pro, this client will use S2-Pro\n"
			
 
				+            "  automatically. There is no separate per-request --model flag.\n\n"
			
 
				+            "Examples:\n"
			
 
				+            '  python tools/api_client.py -u http://127.0.0.1:8080/v1/tts -t "Hello from Fish Speech"\n'
			
 
				+            '  python tools/api_client.py -u http://127.0.0.1:8080/v1/tts -t "Hello" --reference_id my-speaker'
			
 
				+        ),
			
 
				         formatter_class=argparse.RawTextHelpFormatter,
			
 
				     )
			
 
				 
			
@@ -24,7 +33,7 @@ def parse_args():
 
				         "-u",
			
 
				         type=str,
			
 
				         default="http://127.0.0.1:8080/v1/tts",
			
 
				-        help="URL of the server",
			
 
				+        help="URL of the TTS server. The server decides which base model is loaded.",
			
 
				     )
			
 
				     parser.add_argument(
			
 
				         "--text", "-t", type=str, required=True, help="Text to be synthesized"
			
@@ -34,7 +43,7 @@ def parse_args():
 
				         "-id",
			
 
				         type=str,
			
 
				         default=None,
			
 
				-        help="ID of the reference model to be used for the speech\n(Local: name of folder containing audios and files)",
			
 
				+        help="ID of the reference voice to use for synthesis\n(Local: name of folder containing audios and files)",
			
 
				     )
			
 
				     parser.add_argument(
			
 
				         "--reference_audio",