소스 검색

feat:文件系统改为oss存储

tanjingyu 2 달 전
부모
커밋
20d7890847
6개의 변경된 파일99개의 추가작업 그리고 33개의 파일을 삭제
  1. 7 2
      .env.example
  2. 8 0
      app/config.py
  3. 16 12
      app/main.py
  4. 58 0
      app/services/oss_client.py
  5. 8 18
      app/services/storage_service.py
  6. 2 1
      requirements.txt

+ 7 - 2
.env.example

@@ -10,5 +10,10 @@ GOGS_URL=https://git.yishihui.com
 GOGS_TOKEN=4ae18a8e348dd931e33bf6f752536e9a6fd4d9c3
 GOGS_TOKEN=4ae18a8e348dd931e33bf6f752536e9a6fd4d9c3
 GOGS_SECRET=
 GOGS_SECRET=
 
 
-# ========== 存储配置 ==========
-STORAGE_ROOT=C:\D_DEVICE\Projec\data_nexus\storage
+# ========== OSS 配置 ==========
+OSS_ACCESS_KEY_ID=your_access_key_id
+OSS_ACCESS_KEY_SECRET=your_access_key_secret
+OSS_ENDPOINT=oss-cn-hangzhou.aliyuncs.com
+OSS_BUCKET_NAME=your_bucket_name
+OSS_PREFIX=data_nexus
+OSS_CDN_URL=https://res-bj.cybertogether.net

+ 8 - 0
app/config.py

@@ -23,4 +23,12 @@ class Settings:
     # Storage
     # Storage
     STORAGE_ROOT: str = os.getenv("STORAGE_ROOT", "/data/storage")
     STORAGE_ROOT: str = os.getenv("STORAGE_ROOT", "/data/storage")
 
 
+    # OSS
+    OSS_ACCESS_KEY_ID: str = os.getenv("OSS_ACCESS_KEY_ID", "")
+    OSS_ACCESS_KEY_SECRET: str = os.getenv("OSS_ACCESS_KEY_SECRET", "")
+    OSS_ENDPOINT: str = os.getenv("OSS_ENDPOINT", "")  # e.g., oss-cn-hangzhou.aliyuncs.com
+    OSS_BUCKET_NAME: str = os.getenv("OSS_BUCKET_NAME", "")
+    OSS_PREFIX: str = os.getenv("OSS_PREFIX", "data_nexus")  # 存储前缀
+    OSS_CDN_URL: str = os.getenv("OSS_CDN_URL", "https://res-bj.cybertogether.net")  # CDN 域名
+
 settings = Settings()
 settings = Settings()

+ 16 - 12
app/main.py

@@ -194,7 +194,8 @@ def get_version_files(version_id: int, flat: bool = False, db: Session = Depends
 
 
 # ==================== File APIs ====================
 # ==================== File APIs ====================
 
 
-from fastapi.responses import FileResponse
+from fastapi.responses import RedirectResponse
+from app.services.oss_client import oss_client
 
 
 @app.get("/files/{file_id}", response_model=schemas.DataFileOut)
 @app.get("/files/{file_id}", response_model=schemas.DataFileOut)
 def get_file_info(file_id: int, db: Session = Depends(get_db)):
 def get_file_info(file_id: int, db: Session = Depends(get_db)):
@@ -205,23 +206,26 @@ def get_file_info(file_id: int, db: Session = Depends(get_db)):
     return file_record
     return file_record
 
 
 
 
+@app.get("/files/{file_id}/url")
+def get_file_url(file_id: int, db: Session = Depends(get_db)):
+    """Get file CDN URL."""
+    file_record = db.query(DataFile).filter(DataFile.id == file_id).first()
+    if not file_record:
+        raise HTTPException(status_code=404, detail="File not found")
+
+    cdn_url = oss_client.get_cdn_url(file_record.storage_path)
+    return {"url": cdn_url}
+
+
 @app.get("/files/{file_id}/content")
 @app.get("/files/{file_id}/content")
 def get_file_content(file_id: int, db: Session = Depends(get_db)):
 def get_file_content(file_id: int, db: Session = Depends(get_db)):
-    """Download file content."""
+    """Redirect to CDN URL for file download."""
     file_record = db.query(DataFile).filter(DataFile.id == file_id).first()
     file_record = db.query(DataFile).filter(DataFile.id == file_id).first()
     if not file_record:
     if not file_record:
         raise HTTPException(status_code=404, detail="File not found")
         raise HTTPException(status_code=404, detail="File not found")
 
 
-    if not file_record.storage_path or not os.path.exists(file_record.storage_path):
-        raise HTTPException(status_code=404, detail="Physical file not found")
-
-    import mimetypes
-    media_type, _ = mimetypes.guess_type(file_record.relative_path)
-    return FileResponse(
-        file_record.storage_path,
-        media_type=media_type,
-        filename=os.path.basename(file_record.relative_path)
-    )
+    cdn_url = oss_client.get_cdn_url(file_record.storage_path)
+    return RedirectResponse(url=cdn_url)
 
 
 if __name__ == "__main__":
 if __name__ == "__main__":
     import uvicorn
     import uvicorn

+ 58 - 0
app/services/oss_client.py

@@ -0,0 +1,58 @@
+import oss2
+from app.config import settings
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class OSSClient:
+    def __init__(self):
+        self.auth = oss2.Auth(
+            settings.OSS_ACCESS_KEY_ID,
+            settings.OSS_ACCESS_KEY_SECRET
+        )
+        self.bucket = oss2.Bucket(
+            self.auth,
+            settings.OSS_ENDPOINT,
+            settings.OSS_BUCKET_NAME
+        )
+        self.prefix = settings.OSS_PREFIX.strip("/")
+        self.cdn_url = settings.OSS_CDN_URL.rstrip("/")
+
+    def _build_key(self, project_name: str, stage: str, commit_id: str, relative_path: str) -> str:
+        """Build OSS object key."""
+        return f"{self.prefix}/{project_name}/{stage}/{commit_id}/{relative_path}"
+
+    def get_cdn_url(self, key: str) -> str:
+        """Get CDN URL for the object."""
+        return f"{self.cdn_url}/{key}"
+
+    def upload(self, key: str, content: bytes) -> bool:
+        """Upload content to OSS."""
+        try:
+            self.bucket.put_object(key, content)
+            logger.info(f"Uploaded to OSS: {key}")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to upload to OSS: {e}")
+            raise
+
+    def download(self, key: str) -> bytes:
+        """Download content from OSS."""
+        try:
+            result = self.bucket.get_object(key)
+            return result.read()
+        except oss2.exceptions.NoSuchKey:
+            logger.error(f"OSS key not found: {key}")
+            return None
+        except Exception as e:
+            logger.error(f"Failed to download from OSS: {e}")
+            raise
+
+    def exists(self, key: str) -> bool:
+        """Check if object exists."""
+        return self.bucket.object_exists(key)
+
+
+# Singleton instance
+oss_client = OSSClient() if settings.OSS_ACCESS_KEY_ID else None

+ 8 - 18
app/services/storage_service.py

@@ -1,10 +1,9 @@
 import os
 import os
-import shutil
-import hashlib
 from sqlalchemy.orm import Session
 from sqlalchemy.orm import Session
 from app.models import Project, DataVersion, DataFile
 from app.models import Project, DataVersion, DataFile
 from app.config import settings
 from app.config import settings
 from app.services.gogs_client import GogsClient
 from app.services.gogs_client import GogsClient
+from app.services.oss_client import oss_client
 import logging
 import logging
 
 
 logger = logging.getLogger(__name__)
 logger = logging.getLogger(__name__)
@@ -13,7 +12,6 @@ class StorageService:
     def __init__(self, db: Session, gogs_client: GogsClient):
     def __init__(self, db: Session, gogs_client: GogsClient):
         self.db = db
         self.db = db
         self.gogs = gogs_client
         self.gogs = gogs_client
-        self.storage_root = settings.STORAGE_ROOT
 
 
     def get_or_create_project(self, project_name: str, description: str = None) -> Project:
     def get_or_create_project(self, project_name: str, description: str = None) -> Project:
         project = self.db.query(Project).filter(Project.project_name == project_name).first()
         project = self.db.query(Project).filter(Project.project_name == project_name).first()
@@ -40,7 +38,6 @@ class StorageService:
     async def process_file_with_sha(self, version: DataVersion, relative_path: str, file_sha: str, owner: str, repo: str):
     async def process_file_with_sha(self, version: DataVersion, relative_path: str, file_sha: str, owner: str, repo: str):
         """只处理变化的文件,未变化的文件不记录。"""
         """只处理变化的文件,未变化的文件不记录。"""
         # 查询同一项目 + 同一 stage + 同一文件路径的最新一条记录
         # 查询同一项目 + 同一 stage + 同一文件路径的最新一条记录
-        # 通过 version 关联查询
         last_file = (
         last_file = (
             self.db.query(DataFile)
             self.db.query(DataFile)
             .join(DataVersion)
             .join(DataVersion)
@@ -54,34 +51,27 @@ class StorageService:
         )
         )
 
 
         if last_file and last_file.file_sha == file_sha:
         if last_file and last_file.file_sha == file_sha:
-            # 文件未变化,跳过不记录
             logger.info(f"File {relative_path} (SHA: {file_sha}) unchanged. Skipping.")
             logger.info(f"File {relative_path} (SHA: {file_sha}) unchanged. Skipping.")
             return
             return
 
 
-        # 文件是新的或有变化,下载并记录
+        # 文件是新的或有变化,下载并上传到 OSS
         logger.info(f"File {relative_path} (SHA: {file_sha}) changed. Downloading.")
         logger.info(f"File {relative_path} (SHA: {file_sha}) changed. Downloading.")
         content = await self.gogs.get_file_content(owner, repo, version.commit_id, relative_path)
         content = await self.gogs.get_file_content(owner, repo, version.commit_id, relative_path)
         file_size = len(content)
         file_size = len(content)
 
 
         project_name = version.project.project_name
         project_name = version.project.project_name
-        safe_path = os.path.join(
-            self.storage_root,
-            project_name,
-            version.stage,
-            version.commit_id,
-            relative_path.replace("/", os.sep)
-        )
 
 
-        os.makedirs(os.path.dirname(safe_path), exist_ok=True)
+        # 构建 OSS key
+        oss_key = oss_client._build_key(project_name, version.stage, version.commit_id, relative_path)
 
 
-        with open(safe_path, "wb") as f:
-            f.write(content)
+        # 上传到 OSS
+        oss_client.upload(oss_key, content)
 
 
-        # 创建记录
+        # 创建记录(storage_path 存 OSS key)
         new_file = DataFile(
         new_file = DataFile(
             version_id=version.id,
             version_id=version.id,
             relative_path=relative_path,
             relative_path=relative_path,
-            storage_path=safe_path,
+            storage_path=oss_key,
             file_size=file_size,
             file_size=file_size,
             file_type=os.path.splitext(relative_path)[1],
             file_type=os.path.splitext(relative_path)[1],
             file_sha=file_sha
             file_sha=file_sha

+ 2 - 1
requirements.txt

@@ -4,4 +4,5 @@ httpx
 sqlalchemy
 sqlalchemy
 pymysql
 pymysql
 python-dotenv
 python-dotenv
-pyyaml
+pyyaml
+oss2