فهرست منبع

fix: add reference ID validation to prevent path traversal (#1207)

* fix: add reference ID validation to prevent path traversal

The delete_reference and load_by_id methods in ReferenceLoader, and the
delete_reference and update_reference endpoints in views.py, accepted
arbitrary strings as reference IDs without format validation. This allowed
path traversal via IDs containing ".." to read, create, or delete
directories outside the references folder.

Applied the same alphanumeric + hyphen/underscore/space pattern already
used by add_reference to all reference ID entry points.

Made-with: Cursor

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
huang yutong 2 هفته پیش
والد
کامیت
0def6f38e3
2فایلهای تغییر یافته به همراه24 افزوده شده و 13 حذف شده
  1. 16 13
      fish_speech/inference_engine/reference_loader.py
  2. 8 0
      tools/server/views.py

+ 16 - 13
fish_speech/inference_engine/reference_loader.py

@@ -1,4 +1,5 @@
 import io
+import re
 from hashlib import sha256
 from pathlib import Path
 from typing import Callable, Literal, Tuple
@@ -16,6 +17,8 @@ from fish_speech.utils.file import (
 )
 from fish_speech.utils.schema import ServeReferenceAudio
 
+_ID_PATTERN = re.compile(r"^[a-zA-Z0-9\-_ ]+$")
+
 
 class ReferenceLoader:
     def __init__(self) -> None:
@@ -48,11 +51,21 @@ class ReferenceLoader:
             except (ImportError, ModuleNotFoundError):
                 self.backend = "soundfile"
 
+    @staticmethod
+    def _validate_id(id: str) -> None:
+        if not _ID_PATTERN.match(id) or len(id) > 255:
+            raise ValueError(
+                "Reference ID contains invalid characters or is too long. "
+                "Only alphanumeric, hyphens, underscores, and spaces are allowed."
+            )
+
     def load_by_id(
         self,
         id: str,
         use_cache: Literal["on", "off"],
     ) -> Tuple:
+        self._validate_id(id)
+
         # Load the references audio and text by id
         ref_folder = Path("references") / id
         ref_folder.mkdir(parents=True, exist_ok=True)
@@ -189,18 +202,7 @@ class ReferenceLoader:
             FileNotFoundError: If the audio file doesn't exist
             OSError: If file operations fail
         """
-        # Validate ID format
-        import re
-
-        if not re.match(r"^[a-zA-Z0-9\-_ ]+$", id):
-            raise ValueError(
-                "Reference ID contains invalid characters. Only alphanumeric, hyphens, underscores, and spaces are allowed."
-            )
-
-        if len(id) > 255:
-            raise ValueError(
-                "Reference ID is too long. Maximum length is 255 characters."
-            )
+        self._validate_id(id)
 
         # Check if reference already exists
         ref_dir = Path("references") / id
@@ -260,7 +262,8 @@ class ReferenceLoader:
             FileNotFoundError: If the reference ID doesn't exist
             OSError: If file operations fail
         """
-        # Check if reference exists
+        self._validate_id(id)
+
         ref_dir = Path("references") / id
         if not ref_dir.exists():
             raise FileNotFoundError(f"Reference ID '{id}' does not exist")

+ 8 - 0
tools/server/views.py

@@ -325,6 +325,10 @@ async def delete_reference(reference_id: str = Body(...)):
         if not reference_id or not reference_id.strip():
             raise ValueError("Reference ID cannot be empty")
 
+        id_pattern = r"^[a-zA-Z0-9\-_ ]+$"
+        if not re.match(id_pattern, reference_id) or len(reference_id) > 255:
+            raise ValueError("Reference ID contains invalid characters or is too long")
+
         # Get the model manager to access the reference loader
         app_state = request.app.state
         model_manager: ModelManager = app_state.model_manager
@@ -395,6 +399,10 @@ async def update_reference(
 
         # Validate ID format per ReferenceLoader rules
         id_pattern = r"^[a-zA-Z0-9\-_ ]+$"
+        if not re.match(id_pattern, old_reference_id) or len(old_reference_id) > 255:
+            raise ValueError(
+                "Old reference ID contains invalid characters or is too long"
+            )
         if not re.match(id_pattern, new_reference_id) or len(new_reference_id) > 255:
             raise ValueError(
                 "New reference ID contains invalid characters or is too long"