Просмотр исходного кода

feat:records.html页面、支持directory_depth、label的manifest.yaml

tanjingyu 1 неделя назад
Родитель
Сommit
6afa58b3e6
5 измененных файлов с 55 добавлено и 14 удалено
  1. 2 0
      app/models.py
  2. 1 0
      app/schemas.py
  3. 9 1
      app/services/storage_service.py
  4. 42 13
      app/services/webhook_service.py
  5. 1 0
      app/static/records.html

+ 2 - 0
app/models.py

@@ -71,6 +71,8 @@ class DataRecord(Base):
     inputs = Column(JSON)
     outputs = Column(JSON)
     
+    content_hash = Column(String(64))
+    
     author = Column(String(50))
     created_at = Column(DateTime(timezone=True), server_default=func.now())
 

+ 1 - 0
app/schemas.py

@@ -76,6 +76,7 @@ class DataRecordBase(BaseModel):
     stage: str
     commit_id: str
     group_key: Optional[str] = None
+    content_hash: Optional[str] = None
     inputs: list = []
     outputs: list = []
     author: Optional[str] = None

+ 9 - 1
app/services/storage_service.py

@@ -6,6 +6,7 @@ from app.config import settings
 from app.services.gogs_client import GogsClient
 from app.services.oss_client import oss_client
 import logging
+import hashlib
 
 logger = logging.getLogger(__name__)
 
@@ -126,9 +127,15 @@ class StorageService:
             else:
                 # Treat 'output' or None as output by default for rendering purposes
                 groups[group_key]["outputs"].append(file_data)
-                
         # 3. Insert aggregated records
         for group_key, data in groups.items():
+            # Calculate a deterministic content_hash for this group of files
+            # Combine all SHA values, sort them to ensure same set of files results in same hash
+            all_shas = [f["file_sha"] for f in data["inputs"]] + [f["file_sha"] for f in data["outputs"]]
+            all_shas.sort()
+            combined_string = "|".join(all_shas)
+            content_hash = hashlib.sha256(combined_string.encode('utf-8')).hexdigest()
+
             record = DataRecord(
                 project_id=version.project_id,
                 version_id=version.id,
@@ -137,6 +144,7 @@ class StorageService:
                 group_key=group_key,
                 inputs=data["inputs"],
                 outputs=data["outputs"],
+                content_hash=content_hash,
                 author=version.author,
                 # letting server_default handle created_at
             )

+ 42 - 13
app/services/webhook_service.py

@@ -164,8 +164,13 @@ class WebhookService:
                 if is_dir:
                     # If it's a directory output, any change inside that directory counts
                     dir_path = path_pattern.rstrip("/")
-                    if f == dir_path or f.startswith(dir_path + "/"):
-                        return True
+                    if '*' in dir_path:
+                        import fnmatch
+                        if fnmatch.fnmatch(f, dir_path + "/*") or fnmatch.fnmatch(f, dir_path):
+                            return True
+                    else:
+                        if f == dir_path or f.startswith(dir_path + "/"):
+                            return True
                 else:
                     # Single file output: exact match
                     if f == path_pattern:
@@ -198,23 +203,47 @@ class WebhookService:
 
             path_pattern = normalize_path(raw_path_pattern)
             is_dir = is_directory_pattern(raw_path_pattern)
+            dir_path = path_pattern.rstrip("/")
 
             if is_dir:
-                # Directory pattern: fetch only this directory's files
-                dir_path = path_pattern.rstrip("/")
-                logger.info(f"Fetching directory: {dir_path} with patterns: {patterns}, excludes: {excludes}")
+                # Directory pattern: fetch files from the closest static parent directory
+                # For `data/*/test/`, that is `data/`
+                import re
+                
+                # Split by first wildcard chunk path
+                wildcard_idx = dir_path.find('*')
+                if wildcard_idx != -1:
+                    static_base = dir_path[:wildcard_idx]
+                    # Trim back to the nearest directory separator
+                    last_sep = static_base.rfind('/')
+                    if last_sep != -1:
+                        static_base = static_base[:last_sep]
+                    else:
+                        static_base = "" # ROOT
+                else:
+                    static_base = dir_path
+                    
+                static_base = static_base.strip('/')
+                
+                logger.info(f"Fetching directory: {static_base} (to match wildcard path: {dir_path}) with patterns: {patterns}, excludes: {excludes}")
 
-                files = await self.gogs.get_directory_tree(owner, repo_name, commit_id, dir_path)
+                files = await self.gogs.get_directory_tree(owner, repo_name, commit_id, static_base)
 
                 for file_info in files:
                     file_path = file_info.get("path")
-                    # Calculate name relative to the watched directory
-                    # e.g. dir_path="a", file_path="a/b.txt" -> rel_name="b.txt"
-                    rel_name = (
-                        file_path[len(dir_path) + 1 :]
-                        if file_path.startswith(dir_path + "/")
-                        else file_path
-                    )
+                    
+                    # 1. First verify if the full path matches the wildcard directory path provided
+                    if '*' in dir_path:
+                        # e.g dir_path: data/*/test/ -> match: data/*/test/*
+                        if not fnmatch.fnmatch(file_path, dir_path + "/*") and not fnmatch.fnmatch(file_path, dir_path):
+                            continue
+                    else:
+                        if not file_path.startswith(dir_path + "/"):
+                            continue
+                        
+                    # Calculate name relative to the matched base path segment for pattern matching
+                    import os
+                    rel_name = os.path.basename(file_path)
 
                     if self._match_patterns(rel_name, patterns, excludes):
                         try:

+ 1 - 0
app/static/records.html

@@ -775,6 +775,7 @@
                 <div class="meta-text">By: ${esc(r.author || 'unknown')}</div>
                 <div class="meta-text">Time: ${relTime(r.created_at)}</div>
                 ${r.group_key ? `<div class="meta-text" style="color:var(--orange)">Grp: ${esc(r.group_key)}</div>` : ''}
+                ${r.content_hash ? `<div class="meta-text" title="${esc(r.content_hash)}" style="font-family:monospace; cursor:help; color:var(--text-muted); border-top: 1px solid var(--border); margin-top:4px; padding-top:4px; font-size:10px;">Hash: ${esc(r.content_hash.substring(0, 12))}...</div>` : ''}
             </td>`;
 
                 sortedInLabels.forEach(lbl => {