Просмотр исходного кода

feat:records.html页面、支持directory_depth、label的manifest.yaml

tanjingyu 1 неделя назад
Родитель
Сommit
8809daff3c
8 измененных файлов с 1127 добавлено и 62 удалено
  1. 32 1
      app/main.py
  2. 24 1
      app/models.py
  3. 23 0
      app/schemas.py
  4. 125 1
      app/services/storage_service.py
  5. 13 2
      app/services/webhook_service.py
  6. 82 57
      app/static/console.html
  7. 807 0
      app/static/records.html
  8. 21 0
      manifest.yaml.example

+ 32 - 1
app/main.py

@@ -56,7 +56,11 @@ def build_file_tree(files: List[DataFile]) -> list:
                     "id": f.id,
                     "id": f.id,
                     "size": f.file_size,
                     "size": f.file_size,
                     "file_type": f.file_type,
                     "file_type": f.file_type,
-                    "sha": f.file_sha
+                    "sha": f.file_sha,
+                    "direction": f.direction,
+                    "label": f.label,
+                    "extracted_value": f.extracted_value,
+                    "group_key": f.group_key
                 })
                 })
             else:
             else:
                 # It's a folder
                 # It's a folder
@@ -93,6 +97,12 @@ def filesystem_page():
     return FileResponse(os.path.join(STATIC_DIR, "index.html"), media_type="text/html")
     return FileResponse(os.path.join(STATIC_DIR, "index.html"), media_type="text/html")
 
 
 
 
+@app.get("/records")
+def records_page():
+    """Serve the data records UI."""
+    return FileResponse(os.path.join(STATIC_DIR, "records.html"), media_type="text/html")
+
+
 @app.get("/api/health")
 @app.get("/api/health")
 def health_check():
 def health_check():
     """Health check endpoint."""
     """Health check endpoint."""
@@ -226,11 +236,32 @@ def get_stage_files(
                 "file_size": f.file_size,
                 "file_size": f.file_size,
                 "file_type": f.file_type,
                 "file_type": f.file_type,
                 "file_sha": f.file_sha,
                 "file_sha": f.file_sha,
+                "direction": f.direction,
+                "label": f.label,
+                "extracted_value": f.extracted_value,
+                "group_key": f.group_key,
             } for f in files]
             } for f in files]
         })
         })
     return result
     return result
 
 
 
 
+@app.get("/projects/{project_id}/records", response_model=List[schemas.DataRecordOut])
+def list_data_records(
+    project_id: str,
+    stage: Optional[str] = None,
+    skip: int = 0,
+    limit: int = 100,
+    db: Session = Depends(get_db)
+):
+    """List data records for a project, optionally filtered by stage."""
+    from app.models import DataRecord
+    query = db.query(DataRecord).filter(DataRecord.project_id == project_id)
+    if stage:
+        query = query.filter(DataRecord.stage == stage)
+    records = query.order_by(DataRecord.created_at.desc()).offset(skip).limit(limit).all()
+    return records
+
+
 # ==================== Version APIs ====================
 # ==================== Version APIs ====================
 
 
 @app.get("/projects/{project_id}/versions", response_model=List[schemas.DataVersionOut])
 @app.get("/projects/{project_id}/versions", response_model=List[schemas.DataVersionOut])

+ 24 - 1
app/models.py

@@ -1,4 +1,4 @@
-from sqlalchemy import Column, Integer, String, Text, ForeignKey, DateTime, BigInteger, UniqueConstraint
+from sqlalchemy import Column, Integer, String, Text, ForeignKey, DateTime, BigInteger, UniqueConstraint, JSON
 from sqlalchemy.orm import relationship
 from sqlalchemy.orm import relationship
 from sqlalchemy.sql import func
 from sqlalchemy.sql import func
 from ulid import ULID
 from ulid import ULID
@@ -50,6 +50,29 @@ class DataFile(Base):
     file_size = Column(BigInteger)
     file_size = Column(BigInteger)
     file_type = Column(String(20))
     file_type = Column(String(20))
     file_sha = Column(String(64), index=True)  # Git Blob SHA for deduplication
     file_sha = Column(String(64), index=True)  # Git Blob SHA for deduplication
+    direction = Column(String(20), nullable=True)  # e.g., 'input' or 'output'
+    label = Column(String(100), nullable=True)     # e.g., '帖子输入'
+    extracted_value = Column(Text, nullable=True)  # extracted JSON value
+    group_key = Column(String(255), nullable=True) # Used to group related inputs and outputs
     created_at = Column(DateTime(timezone=True), server_default=func.now())
     created_at = Column(DateTime(timezone=True), server_default=func.now())
 
 
     version = relationship("DataVersion", back_populates="files")
     version = relationship("DataVersion", back_populates="files")
+
+class DataRecord(Base):
+    __tablename__ = "data_records"
+
+    id = Column(String(26), primary_key=True, default=generate_ulid)
+    project_id = Column(String(26), ForeignKey("projects.id"))
+    version_id = Column(String(26), ForeignKey("data_versions.id"))
+    stage = Column(String(200), index=True)
+    commit_id = Column(String(64))
+    group_key = Column(String(255))
+    
+    inputs = Column(JSON)
+    outputs = Column(JSON)
+    
+    author = Column(String(50))
+    created_at = Column(DateTime(timezone=True), server_default=func.now())
+
+    version = relationship("DataVersion")
+    project = relationship("Project")

+ 23 - 0
app/schemas.py

@@ -29,6 +29,10 @@ class DataFileBase(BaseModel):
     file_size: int
     file_size: int
     file_type: str
     file_type: str
     file_sha: str
     file_sha: str
+    direction: Optional[str] = None
+    label: Optional[str] = None
+    extracted_value: Optional[str] = None
+    group_key: Optional[str] = None
 
 
 
 
 class DataFileOut(DataFileBase):
 class DataFileOut(DataFileBase):
@@ -66,3 +70,22 @@ class DataVersionWithFiles(DataVersionOut):
 
 
 # Keep old name for backward compatibility
 # Keep old name for backward compatibility
 DataVersion = DataVersionOut
 DataVersion = DataVersionOut
+
+
+class DataRecordBase(BaseModel):
+    stage: str
+    commit_id: str
+    group_key: Optional[str] = None
+    inputs: list = []
+    outputs: list = []
+    author: Optional[str] = None
+
+
+class DataRecordOut(DataRecordBase):
+    id: str
+    project_id: str
+    version_id: str
+    created_at: datetime
+
+    class Config:
+        from_attributes = True

+ 125 - 1
app/services/storage_service.py

@@ -1,7 +1,7 @@
 import os
 import os
 from sqlalchemy.orm import Session
 from sqlalchemy.orm import Session
 from sqlalchemy.exc import IntegrityError
 from sqlalchemy.exc import IntegrityError
-from app.models import Project, DataVersion, DataFile
+from app.models import Project, DataVersion, DataFile, DataRecord
 from app.config import settings
 from app.config import settings
 from app.services.gogs_client import GogsClient
 from app.services.gogs_client import GogsClient
 from app.services.oss_client import oss_client
 from app.services.oss_client import oss_client
@@ -94,6 +94,57 @@ class StorageService:
 
 
         return False
         return False
 
 
+    def aggregate_version_records(self, version: DataVersion):
+        """Aggregate files in a version into DataRecord groups based on parent directory."""
+        from collections import defaultdict
+        
+        # 1. Clean existing records for this version (idempotency)
+        self.db.query(DataRecord).filter(DataRecord.version_id == version.id).delete()
+        
+        files = self.db.query(DataFile).filter(DataFile.version_id == version.id).all()
+        
+        # 2. Group by dirname
+        groups = defaultdict(lambda: {"inputs": [], "outputs": []})
+        
+        for f in files:
+            # Group key falls back to immediate parent directory if not explicitly saved in f.group_key
+            group_key = f.group_key if f.group_key is not None else os.path.dirname(f.relative_path)
+            
+            file_data = {
+                "id": f.id,
+                "relative_path": f.relative_path,
+                "file_type": f.file_type,
+                "file_size": f.file_size,
+                "file_sha": f.file_sha,
+                "direction": f.direction,
+                "label": f.label,
+                "extracted_value": f.extracted_value,
+                "storage_path": f.storage_path
+            }
+            if f.direction == "input":
+                groups[group_key]["inputs"].append(file_data)
+            else:
+                # Treat 'output' or None as output by default for rendering purposes
+                groups[group_key]["outputs"].append(file_data)
+                
+        # 3. Insert aggregated records
+        for group_key, data in groups.items():
+            record = DataRecord(
+                project_id=version.project_id,
+                version_id=version.id,
+                stage=version.stage,
+                commit_id=version.commit_id,
+                group_key=group_key,
+                inputs=data["inputs"],
+                outputs=data["outputs"],
+                author=version.author,
+                # letting server_default handle created_at
+            )
+            self.db.add(record)
+            
+        self.db.commit()
+        logger.info(f"Aggregated version {version.id} into {len(groups)} DataRecord(s).")
+
     async def process_file_with_sha(
     async def process_file_with_sha(
         self,
         self,
         version: DataVersion,
         version: DataVersion,
@@ -101,9 +152,14 @@ class StorageService:
         file_sha: str,
         file_sha: str,
         owner: str,
         owner: str,
         repo: str,
         repo: str,
+        direction: str = None,
+        label: str = None,
+        extract_json_key: str = None,
+        directory_depth: int = None,
     ) -> bool:
     ) -> bool:
         """Process a file and create a snapshot record.
         """Process a file and create a snapshot record.
 
 
+
         **Snapshot semantics**: a record is ALWAYS created regardless of
         **Snapshot semantics**: a record is ALWAYS created regardless of
         whether the file changed.  This ensures every version is a
         whether the file changed.  This ensures every version is a
         self-contained snapshot of all declared output files.
         self-contained snapshot of all declared output files.
@@ -127,8 +183,49 @@ class StorageService:
             .first()
             .first()
         )
         )
 
 
+        should_extract = bool(extract_json_key and relative_path.lower().endswith(".json"))
+        extracted_val = None
+
+        # Calculate group_key based on directory_depth
+        calc_group_key = os.path.dirname(relative_path)  # Default fallback
+        if directory_depth is not None and directory_depth > 0:
+            parts = relative_path.split("/")
+            # Remove filename
+            if len(parts) > 1:
+                parts = parts[:-1]
+                # Combine up to directory_depth
+                calc_group_key = "/".join(parts[:directory_depth])
+            else:
+                calc_group_key = "" # File is in root directory
+
+        async def _extract_val() -> str | None:
+            try:
+                content_bytes = await self.gogs.get_file_content(owner, repo, version.commit_id, relative_path)
+                if not content_bytes:
+                    return None
+                import json
+                parsed = json.loads(content_bytes.decode('utf-8'))
+                val = parsed
+                for key_part in extract_json_key.split("."):
+                    if isinstance(val, dict):
+                        val = val.get(key_part)
+                    else:
+                        val = None
+                        break
+                if val is not None:
+                    if isinstance(val, (dict, list)):
+                        return json.dumps(val, ensure_ascii=False)
+                    return str(val)
+            except Exception as e:
+                logger.warning(f"Failed to extract json key {extract_json_key} from {relative_path}: {e}")
+            return None
+
         if last_file and last_file.file_sha == file_sha:
         if last_file and last_file.file_sha == file_sha:
             # ── Unchanged: reuse previous OSS key, still record a snapshot entry ──
             # ── Unchanged: reuse previous OSS key, still record a snapshot entry ──
+            # Re-extract if needed, or reuse previous extracted_val
+            if should_extract:
+                extracted_val = await _extract_val()
+            
             new_file = DataFile(
             new_file = DataFile(
                 version_id=version.id,
                 version_id=version.id,
                 relative_path=relative_path,
                 relative_path=relative_path,
@@ -136,6 +233,10 @@ class StorageService:
                 file_size=last_file.file_size,
                 file_size=last_file.file_size,
                 file_type=last_file.file_type,
                 file_type=last_file.file_type,
                 file_sha=file_sha,
                 file_sha=file_sha,
+                direction=direction,
+                label=label,
+                extracted_value=extracted_val,
+                group_key=calc_group_key,
             )
             )
             self.db.add(new_file)
             self.db.add(new_file)
             self.db.commit()
             self.db.commit()
@@ -157,6 +258,25 @@ class StorageService:
 
 
         oss_client.upload(oss_key, content)
         oss_client.upload(oss_key, content)
 
 
+        if should_extract:
+            try:
+                import json
+                parsed = json.loads(content.decode('utf-8'))
+                val = parsed
+                for key_part in extract_json_key.split("."):
+                    if isinstance(val, dict):
+                        val = val.get(key_part)
+                    else:
+                        val = None
+                        break
+                if val is not None:
+                    if isinstance(val, (dict, list)):
+                        extracted_val = json.dumps(val, ensure_ascii=False)
+                    else:
+                        extracted_val = str(val)
+            except Exception as e:
+                logger.warning(f"Failed to extract json key {extract_json_key} from {relative_path}: {e}")
+
         new_file = DataFile(
         new_file = DataFile(
             version_id=version.id,
             version_id=version.id,
             relative_path=relative_path,
             relative_path=relative_path,
@@ -164,6 +284,10 @@ class StorageService:
             file_size=file_size,
             file_size=file_size,
             file_type=os.path.splitext(relative_path)[1],
             file_type=os.path.splitext(relative_path)[1],
             file_sha=file_sha,
             file_sha=file_sha,
+            direction=direction,
+            label=label,
+            extracted_value=extracted_val,
+            group_key=calc_group_key,
         )
         )
         self.db.add(new_file)
         self.db.add(new_file)
         self.db.commit()
         self.db.commit()

+ 13 - 2
app/services/webhook_service.py

@@ -138,6 +138,8 @@ class WebhookService:
                     f"Stage '{stage_name}': no data changes detected (content and file set same). "
                     f"Stage '{stage_name}': no data changes detected (content and file set same). "
                     f"Version discarded."
                     f"Version discarded."
                 )
                 )
+            else:
+                self.storage.aggregate_version_records(version)
 
 
     def _get_all_changed_files(self, payload: dict) -> set[str]:
     def _get_all_changed_files(self, payload: dict) -> set[str]:
         """Extract all added, modified, and removed files from all commits in payload."""
         """Extract all added, modified, and removed files from all commits in payload."""
@@ -189,6 +191,11 @@ class WebhookService:
             patterns = output.get("pattern", "*")
             patterns = output.get("pattern", "*")
             excludes = output.get("exclude")
             excludes = output.get("exclude")
 
 
+            direction = output.get("direction")
+            label = output.get("label")
+            extract_json_key = output.get("extract_json_key")
+            directory_depth = output.get("directory_depth")
+
             path_pattern = normalize_path(raw_path_pattern)
             path_pattern = normalize_path(raw_path_pattern)
             is_dir = is_directory_pattern(raw_path_pattern)
             is_dir = is_directory_pattern(raw_path_pattern)
 
 
@@ -212,7 +219,9 @@ class WebhookService:
                     if self._match_patterns(rel_name, patterns, excludes):
                     if self._match_patterns(rel_name, patterns, excludes):
                         try:
                         try:
                             changed = await self.storage.process_file_with_sha(
                             changed = await self.storage.process_file_with_sha(
-                                version, file_path, file_info.get("sha"), owner, repo_name
+                                version, file_path, file_info.get("sha"), owner, repo_name,
+                                direction=direction, label=label, extract_json_key=extract_json_key,
+                                directory_depth=directory_depth
                             )
                             )
                             if changed:
                             if changed:
                                 has_changes = True
                                 has_changes = True
@@ -230,7 +239,9 @@ class WebhookService:
                     if self._match_patterns(filename, patterns, excludes):
                     if self._match_patterns(filename, patterns, excludes):
                         try:
                         try:
                             changed = await self.storage.process_file_with_sha(
                             changed = await self.storage.process_file_with_sha(
-                                version, path_pattern, file_info.get("sha"), owner, repo_name
+                                version, path_pattern, file_info.get("sha"), owner, repo_name,
+                                direction=direction, label=label, extract_json_key=extract_json_key,
+                                directory_depth=directory_depth
                             )
                             )
                             if changed:
                             if changed:
                                 has_changes = True
                                 has_changes = True

+ 82 - 57
app/static/console.html

@@ -580,7 +580,6 @@
 
 
         .fg-children {
         .fg-children {
             display: none;
             display: none;
-            background: rgba(0, 0, 0, 0.1);
         }
         }
 
 
         .fg-children.open {
         .fg-children.open {
@@ -826,14 +825,14 @@
             let h = '';
             let h = '';
 
 
             S.versions.forEach((v, i) => {
             S.versions.forEach((v, i) => {
-                const groups = groupFiles(v.files);
+                const tree = buildFileTree(v.files);
                 h += `<div class="version-card" style="animation-delay:${Math.min(i, 10) * 0.05}s">
                 h += `<div class="version-card" style="animation-delay:${Math.min(i, 10) * 0.05}s">
             <div class="version-head">
             <div class="version-head">
                 <span class="commit-tag">${IC.commit} ${esc(v.commit_id.substring(0, 8))}</span>
                 <span class="commit-tag">${IC.commit} ${esc(v.commit_id.substring(0, 8))}</span>
                 <span class="v-author">${v.author ? esc(v.author) : ''}</span>
                 <span class="v-author">${v.author ? esc(v.author) : ''}</span>
                 <span class="v-time" title="${fmtTime(v.created_at)}">${relTime(v.created_at)}</span>
                 <span class="v-time" title="${fmtTime(v.created_at)}">${relTime(v.created_at)}</span>
             </div>
             </div>
-            <div class="version-files">${renderGroups(groups, v)}</div>
+            <div class="version-files">${renderTree(tree, v, 0)}</div>
         </div>`;
         </div>`;
             });
             });
             if (S.hasMore) {
             if (S.hasMore) {
@@ -849,82 +848,102 @@
         }
         }
 
 
         // ============ File Grouping ============
         // ============ File Grouping ============
-        function groupFiles(files) {
-            if (!files || !files.length) return [];
+        function countFiles(node) {
+            let cnt = node.files.length;
+            Object.values(node.dirs).forEach(d => { cnt += countFiles(d); });
+            return cnt;
+        }
 
 
-            const topLevelGroups = {};
-            const rootFiles = [];
+        function buildFileTree(files) {
+            const root = { dirs: {}, files: [], path: '' };
+            if (!files || !files.length) return root;
 
 
             files.forEach(f => {
             files.forEach(f => {
                 const parts = f.relative_path.split('/');
                 const parts = f.relative_path.split('/');
-                if (parts.length === 1) {
-                    rootFiles.push(f);
-                } else {
-                    const topDir = parts[0];
-                    if (!topLevelGroups[topDir]) topLevelGroups[topDir] = [];
-                    topLevelGroups[topDir].push(f);
-                }
-            });
-
-            const result = [];
-            Object.entries(topLevelGroups).forEach(([topDir, fls]) => {
-                let commonParts = fls[0].relative_path.split('/').slice(0, -1);
-                for (let i = 1; i < fls.length; i++) {
-                    const parts = fls[i].relative_path.split('/').slice(0, -1);
-                    let j = 0;
-                    while (j < commonParts.length && j < parts.length && commonParts[j] === parts[j]) {
-                        j++;
+                let cur = root;
+                for (let i = 0; i < parts.length - 1; i++) {
+                    const p = parts[i];
+                    if (!cur.dirs[p]) {
+                        const curPath = cur.path ? cur.path + '/' + p : p;
+                        cur.dirs[p] = { name: p, path: curPath, dirs: {}, files: [] };
                     }
                     }
-                    commonParts.length = j;
+                    cur = cur.dirs[p];
                 }
                 }
-                const groupName = commonParts.join('/');
-                result.push({ type: 'folder', name: groupName, path: groupName, files: fls });
+                cur.files.push(f);
             });
             });
 
 
-            rootFiles.forEach(f => {
-                result.push({ type: 'file', file: f });
-            });
+            function compact(node) {
+                const dirKeys = Object.keys(node.dirs);
+                dirKeys.forEach(k => {
+                    compact(node.dirs[k]);
+                });
+
+                Object.keys(node.dirs).forEach(k => {
+                    let child = node.dirs[k];
+                    if (!child) return;
+
+                    let changed = true;
+                    while (changed) {
+                        changed = false;
+
+                        if (Object.keys(child.dirs).length === 1 && child.files.length === 0) {
+                            const onlyChildKey = Object.keys(child.dirs)[0];
+                            const onlyChild = child.dirs[onlyChildKey];
+
+                            child.name = child.name + '/' + onlyChild.name;
+                            child.path = onlyChild.path;
+                            child.dirs = onlyChild.dirs;
+                            child.files = onlyChild.files;
+                            changed = true;
+                        }
+
+                        if (Object.keys(child.dirs).length === 0 && child.files.length === 1) {
+                            node.files.push(child.files[0]);
+                            delete node.dirs[k];
+                            // since child is deleted, break inner loops
+                            changed = false;
+                        }
+                    }
+                });
+            }
+            compact(root);
 
 
-            result.sort((a, b) => {
-                if (a.type !== b.type) return a.type === 'folder' ? -1 : 1;
-                return (a.name || a.file.name).localeCompare(b.name || b.file.name);
-            });
-            return result;
+            return root;
         }
         }
 
 
-        function renderGroups(groups, version) {
-            if (!groups.length) return '';
+        function renderTree(node, version, depth) {
             let h = '';
             let h = '';
-            groups.forEach(g => {
-                if (g.type === 'folder') {
-                    const gid = 'fg_' + Math.random().toString(36).substr(2, 6);
-                    h += `
-            <div class="fg-header" onclick="toggleFG('${gid}')">
+
+            const dirKeys = Object.keys(node.dirs).sort((a, b) => a.localeCompare(b));
+            dirKeys.forEach(k => {
+                const d = node.dirs[k];
+                const gid = 'fg_' + Math.random().toString(36).substr(2, 6);
+                const fileCount = countFiles(d);
+                const padding = `padding-left: ${20 + depth * 24}px;`;
+
+                h += `
+            <div class="fg-header" style="${padding}" onclick="toggleFG('${gid}')">
                 <div class="fg-name-wrap">
                 <div class="fg-name-wrap">
                     <span class="fg-arrow" id="fa_${gid}">${IC.chevron}</span>
                     <span class="fg-arrow" id="fa_${gid}">${IC.chevron}</span>
                     <span class="fg-icon">${IC.folder}</span>
                     <span class="fg-icon">${IC.folder}</span>
-                    <span class="fg-name">${esc(g.name)}/</span>
-                    <span class="fg-count">${g.files.length} 个文件</span>
+                    <span class="fg-name" title="${esc(d.path)}">${esc(d.name)}/</span>
+                    <span class="fg-count">${fileCount} 个文件</span>
                 </div>
                 </div>
                 <div></div>
                 <div></div>
             </div>
             </div>
             <div class="fg-children" id="${gid}">
             <div class="fg-children" id="${gid}">
-                ${g.files.map(f => fileRow(f, version, true, g.path)).join('')}
+                ${renderTree(d, version, depth + 1)}
             </div>`;
             </div>`;
-                } else {
-                    h += fileRow(g.file, version, false, null);
-                }
             });
             });
-            return h;
-        }
 
 
-        function fileRow(f, version, isChild, groupPath) {
-            const padding = isChild ? 'padding-left: 44px;' : '';
-            let displayName = f.name;
-            if (groupPath && f.relative_path.startsWith(groupPath + '/')) {
-                displayName = f.relative_path.substring(groupPath.length + 1);
-            }
-            return `
+            node.files.sort((a, b) => a.relative_path.localeCompare(b.relative_path)).forEach(f => {
+                let displayName = f.relative_path || f.name;
+                if (node.path && f.relative_path.startsWith(node.path + '/')) {
+                    displayName = f.relative_path.substring(node.path.length + 1);
+                }
+                const padding = `padding-left: ${depth === 0 ? 20 : 44 + (depth - 1) * 24}px;`;
+
+                h += `
     <div class="file-row" style="${padding}">
     <div class="file-row" style="${padding}">
         <div class="file-name-col" title="${esc(f.relative_path)}">
         <div class="file-name-col" title="${esc(f.relative_path)}">
             <span class="f-icon">${IC.file}</span>
             <span class="f-icon">${IC.file}</span>
@@ -935,6 +954,12 @@
             <a class="btn-dl" href="/files/${f.id}/content" download="${esc(f.name)}" onclick="event.stopPropagation();">${IC.download}</a>
             <a class="btn-dl" href="/files/${f.id}/content" download="${esc(f.name)}" onclick="event.stopPropagation();">${IC.download}</a>
         </div>
         </div>
     </div>`;
     </div>`;
+            });
+
+            if (depth === 0 && h === '') {
+                return '<div style="padding:14px 20px;font-size:13px;color:var(--text-muted)">暂无文件</div>';
+            }
+            return h;
         }
         }
 
 
         function toggleFG(id) {
         function toggleFG(id) {

+ 807 - 0
app/static/records.html

@@ -0,0 +1,807 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Data Nexus - 宽表数据试图</title>
+    <meta name="description" content="Data Nexus 宽表数据视图控制台">
+    <link rel="preconnect" href="https://fonts.googleapis.com">
+    <link
+        href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap"
+        rel="stylesheet">
+    <style>
+        *,
+        *::before,
+        *::after {
+            margin: 0;
+            padding: 0;
+            box-sizing: border-box;
+        }
+
+        :root {
+            --bg-base: #080c18;
+            --bg-sidebar: #0c1222;
+            --bg-surface: #111827;
+            --bg-card: #151f32;
+            --bg-card-head: rgba(0, 0, 0, 0.2);
+            --bg-hover: rgba(255, 255, 255, 0.04);
+            --bg-active: rgba(99, 179, 237, 0.08);
+            --border: rgba(255, 255, 255, 0.06);
+            --border-card: rgba(255, 255, 255, 0.08);
+            --border-active: rgba(99, 179, 237, 0.35);
+            --text-primary: #e2e8f0;
+            --text-secondary: #8b9ab5;
+            --text-muted: #556477;
+            --accent: #63b3ed;
+            --accent-light: #90cdf4;
+            --accent-dim: rgba(99, 179, 237, 0.12);
+            --green: #68d391;
+            --orange: #f6ad55;
+            --purple: #b794f4;
+            --radius: 8px;
+            --sidebar-w: 280px;
+        }
+
+        body {
+            font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'PingFang SC', sans-serif;
+            background: var(--bg-base);
+            color: var(--text-primary);
+            height: 100vh;
+            overflow: hidden;
+            line-height: 1.5;
+        }
+
+        ::-webkit-scrollbar {
+            width: 6px;
+            height: 6px;
+        }
+
+        ::-webkit-scrollbar-track {
+            background: transparent;
+        }
+
+        ::-webkit-scrollbar-thumb {
+            background: rgba(255, 255, 255, 0.1);
+            border-radius: 3px;
+        }
+
+        ::-webkit-scrollbar-thumb:hover {
+            background: rgba(255, 255, 255, 0.2);
+        }
+
+        .app {
+            display: grid;
+            grid-template-columns: var(--sidebar-w) 1fr;
+            height: 100vh;
+        }
+
+        .sidebar {
+            background: var(--bg-sidebar);
+            border-right: 1px solid var(--border);
+            display: flex;
+            flex-direction: column;
+            overflow: hidden;
+        }
+
+        .sidebar-header {
+            display: flex;
+            align-items: center;
+            gap: 10px;
+            padding: 20px 20px 16px;
+            flex-shrink: 0;
+        }
+
+        .sidebar-header svg {
+            width: 26px;
+            height: 26px;
+            color: var(--accent);
+            flex-shrink: 0;
+        }
+
+        .sidebar-header span {
+            font-size: 16px;
+            font-weight: 700;
+            background: linear-gradient(135deg, var(--accent-light), var(--purple));
+            -webkit-background-clip: text;
+            background-clip: text;
+            -webkit-text-fill-color: transparent;
+        }
+
+        .sidebar-divider {
+            height: 1px;
+            background: var(--border);
+            margin: 8px 16px;
+            flex-shrink: 0;
+        }
+
+        .stage-tree-wrap {
+            flex: 1;
+            overflow-y: auto;
+            padding: 0 8px 16px;
+        }
+
+        .tree-branch-header {
+            display: flex;
+            align-items: center;
+            gap: 4px;
+            padding: 7px 10px;
+            cursor: pointer;
+            border-radius: 6px;
+            transition: background 0.12s;
+            user-select: none;
+            font-size: 13px;
+            color: var(--text-secondary);
+        }
+
+        .tree-branch-header:hover {
+            background: var(--bg-hover);
+        }
+
+        .tree-arrow {
+            width: 16px;
+            height: 16px;
+            color: var(--text-muted);
+            transition: transform 0.2s;
+            flex-shrink: 0;
+        }
+
+        .tree-arrow.open {
+            transform: rotate(90deg);
+        }
+
+        .tree-children {
+            display: none;
+            padding-left: 8px;
+            margin-left: 12px;
+            border-left: 1px solid var(--border);
+        }
+
+        .tree-children.open {
+            display: block;
+        }
+
+        .tree-leaf {
+            display: flex;
+            align-items: center;
+            gap: 8px;
+            padding: 7px 10px 7px 12px;
+            cursor: pointer;
+            border-radius: 6px;
+            transition: all 0.12s;
+            font-size: 13px;
+            color: var(--text-secondary);
+        }
+
+        .tree-leaf:hover {
+            background: var(--bg-hover);
+            color: var(--text-primary);
+        }
+
+        .tree-leaf.active {
+            background: var(--bg-active);
+            color: var(--accent);
+            font-weight: 500;
+        }
+
+        .tree-dot {
+            width: 5px;
+            height: 5px;
+            border-radius: 50%;
+            background: var(--text-muted);
+            flex-shrink: 0;
+        }
+
+        .tree-leaf.active .tree-dot {
+            background: var(--accent);
+        }
+
+        .tree-count {
+            margin-left: auto;
+            font-size: 11px;
+            color: var(--text-muted);
+            background: rgba(255, 255, 255, 0.04);
+            padding: 1px 6px;
+            border-radius: 4px;
+        }
+
+        .content {
+            display: flex;
+            flex-direction: column;
+            height: 100vh;
+            overflow: hidden;
+        }
+
+        .content-header {
+            flex-shrink: 0;
+            padding: 18px 28px;
+            border-bottom: 1px solid var(--border);
+            background: rgba(12, 18, 34, 0.6);
+            backdrop-filter: blur(12px);
+            display: flex;
+            align-items: center;
+            justify-content: space-between;
+            min-height: 60px;
+        }
+
+        .stage-path {
+            display: flex;
+            align-items: center;
+            gap: 6px;
+            font-size: 14px;
+        }
+
+        .stage-path .sep {
+            color: var(--text-muted);
+            font-size: 11px;
+        }
+
+        .stage-path .seg {
+            color: var(--text-secondary);
+        }
+
+        .stage-path .seg:last-child {
+            color: var(--text-primary);
+            font-weight: 600;
+        }
+
+        .header-info {
+            font-size: 12px;
+            color: var(--text-muted);
+        }
+
+        .content-body {
+            flex: 1;
+            overflow: auto;
+            padding: 24px;
+        }
+
+        /* Welcome & Loading state */
+        .state-box {
+            display: flex;
+            flex-direction: column;
+            align-items: center;
+            justify-content: center;
+            height: 100%;
+            text-align: center;
+            color: var(--text-muted);
+            padding: 40px;
+        }
+
+        .state-box svg {
+            width: 56px;
+            height: 56px;
+            margin-bottom: 20px;
+            opacity: 0.25;
+        }
+
+        .state-box h2 {
+            font-size: 17px;
+            font-weight: 600;
+            color: var(--text-secondary);
+            margin-bottom: 6px;
+        }
+
+        .state-box p {
+            font-size: 13px;
+        }
+
+        .spinner {
+            width: 28px;
+            height: 28px;
+            border: 3px solid var(--border);
+            border-top-color: var(--accent);
+            border-radius: 50%;
+            animation: spin 0.7s linear infinite;
+            margin-bottom: 16px;
+        }
+
+        @keyframes spin {
+            to {
+                transform: rotate(360deg);
+            }
+        }
+
+        /* Records Table */
+        .records-table {
+            width: 100%;
+            border-collapse: separate;
+            border-spacing: 0;
+            background: var(--bg-card);
+            border: 1px solid var(--border-card);
+            border-radius: var(--radius);
+        }
+
+        .records-table th {
+            background: var(--bg-card-head);
+            color: var(--text-secondary);
+            font-size: 13px;
+            font-weight: 600;
+            text-align: left;
+            padding: 14px 16px;
+            border-bottom: 1px solid var(--border);
+            white-space: nowrap;
+        }
+
+        .records-table td {
+            border-bottom: 1px solid var(--border);
+            padding: 16px;
+            vertical-align: top;
+        }
+
+        .records-table tr:last-child td {
+            border-bottom: none;
+        }
+
+        .records-table td.meta-cell {
+            white-space: nowrap;
+        }
+
+        /* Bubble Tree (File rendering) */
+        .bubble-tree {
+            background: rgba(0, 0, 0, 0.15);
+            border-radius: 6px;
+            border: 1px solid var(--border);
+            overflow: hidden;
+            font-size: 12px;
+            min-width: 200px;
+        }
+
+        .fg-header {
+            display: flex;
+            align-items: center;
+            padding: 8px 10px;
+            gap: 6px;
+            cursor: pointer;
+            border-bottom: 1px solid var(--border);
+            transition: background 0.1s;
+        }
+
+        .fg-header:hover {
+            background: var(--bg-hover);
+        }
+
+        .fg-arrow {
+            width: 12px;
+            height: 12px;
+            color: var(--text-muted);
+            transition: transform 0.2s;
+            flex-shrink: 0;
+        }
+
+        .fg-arrow.open {
+            transform: rotate(90deg);
+        }
+
+        .fg-icon {
+            width: 14px;
+            height: 14px;
+            color: var(--orange);
+            flex-shrink: 0;
+        }
+
+        .fg-name {
+            color: var(--text-primary);
+            font-weight: 500;
+        }
+
+        .file-row {
+            display: flex;
+            align-items: center;
+            justify-content: space-between;
+            padding: 6px 10px;
+            border-bottom: 1px solid var(--border);
+            gap: 10px;
+            transition: background 0.1s;
+        }
+
+        .file-row:hover {
+            background: var(--bg-hover);
+        }
+
+        .file-row:last-child {
+            border-bottom: none;
+        }
+
+        .file-name-col {
+            display: flex;
+            align-items: center;
+            gap: 6px;
+            min-width: 0;
+        }
+
+        .f-icon {
+            width: 14px;
+            height: 14px;
+            color: var(--text-muted);
+            flex-shrink: 0;
+        }
+
+        .f-name {
+            color: var(--text-primary);
+            white-space: nowrap;
+            overflow: hidden;
+            text-overflow: ellipsis;
+            max-width: 200px;
+        }
+
+        .btn-dl {
+            display: inline-flex;
+            align-items: center;
+            padding: 3px 6px;
+            border-radius: 4px;
+            background: var(--accent-dim);
+            color: var(--accent);
+            text-decoration: none;
+            transition: all 0.15s;
+        }
+
+        .btn-dl:hover {
+            background: rgba(99, 179, 237, 0.2);
+        }
+
+        .btn-dl svg {
+            width: 12px;
+            height: 12px;
+        }
+
+        .commit-badge {
+            display: inline-flex;
+            align-items: center;
+            gap: 4px;
+            font-family: 'JetBrains Mono', monospace;
+            background: var(--accent-dim);
+            color: var(--accent);
+            padding: 3px 8px;
+            border-radius: 4px;
+            font-size: 12px;
+            margin-bottom: 4px;
+        }
+
+        .meta-text {
+            font-size: 12px;
+            color: var(--text-muted);
+            margin-bottom: 2px;
+        }
+
+        .empty-cell {
+            color: var(--text-muted);
+            font-size: 13px;
+            font-style: italic;
+        }
+
+        .load-more {
+            display: flex;
+            justify-content: center;
+            padding: 20px 0;
+        }
+
+        .load-more-btn {
+            padding: 9px 28px;
+            border-radius: 6px;
+            border: 1px solid var(--border);
+            background: var(--bg-card);
+            color: var(--text-secondary);
+            cursor: pointer;
+        }
+
+        .load-more-btn:hover {
+            background: var(--bg-hover);
+            color: var(--text-primary);
+        }
+    </style>
+</head>
+
+<body>
+    <div class="app">
+        <aside class="sidebar">
+            <div class="sidebar-header">
+                <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
+                    <rect x="3" y="3" width="18" height="18" rx="2" ry="2" />
+                    <line x1="3" y1="9" x2="21" y2="9" />
+                    <line x1="9" y1="21" x2="9" y2="9" />
+                </svg>
+                <span>宽表数据视图</span>
+            </div>
+            <div class="sidebar-divider"></div>
+            <div class="stage-tree-wrap" id="stageTreeWrap"></div>
+        </aside>
+        <main class="content">
+            <div class="content-header">
+                <div class="stage-path" id="stagePath"><span class="seg" style="color:var(--text-muted)">选择左侧数据阶段</span>
+                </div>
+                <div class="header-info" id="headerInfo"></div>
+            </div>
+            <div class="content-body" id="contentBody">
+                <div class="state-box">
+                    <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5">
+                        <rect x="3" y="3" width="18" height="18" rx="2" ry="2" />
+                        <line x1="3" y1="9" x2="21" y2="9" />
+                        <line x1="9" y1="21" x2="9" y2="9" />
+                    </svg>
+                    <h2>欢迎使用宽表视图控制台</h2>
+                    <p>从左侧选择阶段,即可查看以 group key 聚合排列的数据网格表</p>
+                </div>
+            </div>
+        </main>
+    </div>
+
+    <script>
+        const IC = {
+            commit: '<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><circle cx="12" cy="12" r="4"/><line x1="1.05" y1="12" x2="7" y2="12"/><line x1="17.01" y1="12" x2="22.96" y2="12"/></svg>',
+            file: '<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M14 2H6a2 2 0 00-2 2v16a2 2 0 002 2h12a2 2 0 002-2V8z"/><polyline points="14 2 14 8 20 8"/></svg>',
+            folder: '<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M22 19a2 2 0 01-2 2H4a2 2 0 01-2-2V5a2 2 0 012-2h5l2 3h9a2 2 0 012 2z"/></svg>',
+            download: '<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M21 15v4a2 2 0 01-2 2H5a2 2 0 01-2-2v-4"/><polyline points="7 10 12 15 17 10"/><line x1="12" y1="15" x2="12" y2="3"/></svg>',
+            chevron: '<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><polyline points="9 18 15 12 9 6"/></svg>',
+        };
+
+        const PAGE_SIZE = 20;
+        let S = { stages: [], stageProjectMap: {}, stage: null, records: [], skip: 0, hasMore: true, loading: false };
+
+        const $ = id => document.getElementById(id);
+        function esc(s) { if (!s) return ''; const d = document.createElement('div'); d.textContent = s; return d.innerHTML; }
+        function relTime(iso) {
+            if (!iso) return '';
+            const m = Math.floor((Date.now() - new Date(iso).getTime()) / 60000);
+            if (m < 1) return '刚刚'; if (m < 60) return m + ' 分钟前';
+            const h = Math.floor(m / 60); if (h < 24) return h + ' 小时前';
+            const d = Math.floor(h / 24); if (d < 30) return d + ' 天前';
+            const date = new Date(iso);
+            return `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}-${String(date.getDate()).padStart(2, '0')} ${String(date.getHours()).padStart(2, '0')}:${String(date.getMinutes()).padStart(2, '0')}`;
+        }
+
+        async function api(url) { const r = await fetch(url); if (!r.ok) throw new Error(r.status); return r.json(); }
+
+        async function loadAllStages() {
+            $('stageTreeWrap').innerHTML = '<div style="padding:16px;text-align:center;"><div class="spinner" style="margin:0 auto 8px;"></div><span style="font-size:12px;color:var(--text-muted)">加载中...</span></div>';
+            try {
+                S.stages = await api('/stages/all');
+                S.stageProjectMap = {};
+                S.stages.forEach(st => { S.stageProjectMap[st.name] = st.project_id; });
+                renderStageTree();
+            } catch (e) { $('stageTreeWrap').innerHTML = '<div style="padding:16px;color:#fc8181;font-size:13px;">加载失败</div>'; }
+        }
+
+        function buildTree(stages) {
+            const root = [];
+            for (const st of stages) {
+                const parts = st.name.split('/');
+                let cur = root;
+                for (let i = 0; i < parts.length; i++) {
+                    let node = cur.find(n => n.label === parts[i]);
+                    if (!node) { node = { label: parts[i], children: [] }; cur.push(node); }
+                    if (i === parts.length - 1) { node.stage = st.name; node.count = st.version_count; }
+                    cur = node.children;
+                }
+            }
+            return root;
+        }
+
+        function renderStageTree() {
+            const tree = buildTree(S.stages);
+            $('stageTreeWrap').innerHTML = tree.length ? renderNodes(tree) : '<div style="padding:16px;font-size:13px;color:var(--text-muted)">暂无数据阶段</div>';
+        }
+
+        function renderNodes(nodes) {
+            let h = '';
+            for (const n of nodes) {
+                if (n.stage && n.children.length === 0) {
+                    h += `<div class="tree-leaf" data-stage="${esc(n.stage)}" onclick="selectStage(this, '${esc(n.stage)}')">
+                    <span class="tree-dot"></span><span>${esc(n.label)}</span>
+                    <span class="tree-count">${n.count || ''}</span>
+                </div>`;
+                } else {
+                    const id = 'tb_' + Math.random().toString(36).substr(2, 6);
+                    h += `<div>
+                    <div class="tree-branch-header" onclick="toggleBranch('${id}', this)">
+                        <span class="tree-arrow" id="a_${id}">${IC.chevron}</span><span>${esc(n.label)}</span>
+                    </div>
+                    <div class="tree-children" id="${id}">${renderNodes(n.children)}</div>
+                </div>`;
+                }
+            }
+            return h;
+        }
+
+        function toggleBranch(id) {
+            const ch = $(id), ar = $('a_' + id);
+            if (ch) ch.classList.toggle('open');
+            if (ar) ar.classList.toggle('open');
+        }
+
+        function selectStage(el, stageName) {
+            document.querySelectorAll('.tree-leaf.active').forEach(e => e.classList.remove('active'));
+            el.classList.add('active');
+            S.stage = stageName;
+            S.records = []; S.skip = 0; S.hasMore = true;
+            updateHeader();
+            loadRecords();
+        }
+
+        function updateHeader() {
+            if (!S.stage) {
+                $('stagePath').innerHTML = '<span class="seg" style="color:var(--text-muted)">选择左侧数据阶段</span>';
+                return;
+            }
+            const parts = S.stage.split('/');
+            $('stagePath').innerHTML = parts.map((p, i) => `${i > 0 ? '<span class="sep">/</span>' : ''}<span class="seg">${esc(p)}</span>`).join('');
+        }
+
+        async function loadRecords(append = false) {
+            if (S.loading) return;
+            S.loading = true;
+            if (!append) $('contentBody').innerHTML = '<div class="state-box"><div class="spinner"></div><p>加载中...</p></div>';
+
+            try {
+                const pid = S.stageProjectMap[S.stage];
+                const data = await api(`/projects/${pid}/records?stage=${encodeURIComponent(S.stage)}&skip=${S.skip}&limit=${PAGE_SIZE}`);
+
+                if (!append) S.records = [];
+                S.records.push(...data);
+                S.hasMore = data.length >= PAGE_SIZE;
+                S.skip += data.length;
+
+                renderTable();
+            } catch (e) {
+                if (!append) $('contentBody').innerHTML = '<div class="state-box"><p style="color:#fc8181;">加载失败: ' + esc(e.message) + '</p></div>';
+            }
+            S.loading = false;
+        }
+
+        /* ------- Bubble Directory Tree Builder ------- */
+        function buildFileTree(files) {
+            const root = { dirs: {}, files: [], path: '' };
+            if (!files || !files.length) return root;
+
+            files.forEach(f => {
+                const parts = f.relative_path.split('/');
+                let cur = root;
+                for (let i = 0; i < parts.length - 1; i++) {
+                    const p = parts[i];
+                    if (!cur.dirs[p]) {
+                        const curPath = cur.path ? cur.path + '/' + p : p;
+                        cur.dirs[p] = { name: p, path: curPath, dirs: {}, files: [] };
+                    }
+                    cur = cur.dirs[p];
+                }
+                cur.files.push(f);
+            });
+
+            // Compact single-child directories
+            function compact(node) {
+                Object.keys(node.dirs).forEach(k => compact(node.dirs[k]));
+                Object.keys(node.dirs).forEach(k => {
+                    let child = node.dirs[k];
+                    if (!child) return;
+                    let changed = true;
+                    while (changed) {
+                        changed = false;
+                        if (Object.keys(child.dirs).length === 1 && child.files.length === 0) {
+                            const onlyChildKey = Object.keys(child.dirs)[0];
+                            const onlyChild = child.dirs[onlyChildKey];
+                            child.name = child.name + '/' + onlyChild.name;
+                            child.path = onlyChild.path;
+                            child.dirs = onlyChild.dirs;
+                            child.files = onlyChild.files;
+                            changed = true;
+                        }
+                    }
+                });
+            }
+            compact(root);
+            return root;
+        }
+
+        function renderSubTree(node, depth) {
+            let h = '';
+            const dirKeys = Object.keys(node.dirs).sort((a, b) => a.localeCompare(b));
+            dirKeys.forEach(k => {
+                const d = node.dirs[k];
+                const gid = 'fg_' + Math.random().toString(36).substr(2, 6);
+                const padding = `padding-left: ${10 + depth * 14}px;`;
+                h += `
+                <div class="fg-header" style="${padding}" onclick="toggleBranch('${gid}')">
+                    <span class="fg-arrow" id="a_${gid}">${IC.chevron}</span>
+                    <span class="fg-icon">${IC.folder}</span>
+                    <span class="fg-name" title="${esc(d.path)}">${esc(d.name)}/</span>
+                </div>
+                <div class="tree-children open" id="${gid}" style="margin-left:0; padding-left:0; border-left:none;">
+                    ${renderSubTree(d, depth + 1)}
+                </div>`;
+            });
+
+            node.files.sort((a, b) => a.relative_path.localeCompare(b.relative_path)).forEach(f => {
+                let displayName = f.relative_path;
+                if (node.path && f.relative_path.startsWith(node.path + '/')) {
+                    displayName = f.relative_path.substring(node.path.length + 1);
+                } else if (f.relative_path.includes('/')) {
+                    displayName = f.relative_path.split('/').pop();
+                }
+                const padding = `padding-left: ${10 + depth * 14 + (dirKeys.length > 0 ? 18 : 0)}px;`;
+                h += `
+                <div class="file-row" style="${padding}">
+                    <div class="file-name-col" title="${esc(f.relative_path)}">
+                        <span class="f-icon">${IC.file}</span>
+                        <span class="f-name">${esc(displayName)}</span>
+                    </div>
+                    <a class="btn-dl" href="/files/${f.id}/content" download><svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M21 15v4a2 2 0 01-2 2H5a2 2 0 01-2-2v-4"/><polyline points="7 10 12 15 17 10"/><line x1="12" y1="15" x2="12" y2="3"/></svg></a>
+                </div>`;
+                if (f.extracted_value) {
+                    h += `<div style="padding-left: ${10 + depth * 14 + (dirKeys.length > 0 ? 18 : 0)}px; padding-right:10px; padding-bottom:6px; margin-top:-4px; color:var(--text-muted); font-size:11px; white-space:pre-wrap; word-break:break-all;">↳ Extract: ${esc(f.extracted_value)}</div>`;
+                }
+            });
+            return h;
+        }
+
+        function renderBubbleTree(files) {
+            if (!files || files.length === 0) return '<div class="empty-cell">无数据</div>';
+            const root = buildFileTree(files);
+            return `<div class="bubble-tree" style="margin-bottom:8px;">${renderSubTree(root, 0)}</div>`;
+        }
+
+        /* ------- Render Table ------- */
+        function renderTable() {
+            if (!S.records.length) {
+                $('contentBody').innerHTML = '<div class="state-box"><h2>暂无数据</h2><p>找不到符合要求的阶段记录数据</p></div>';
+                return;
+            }
+
+            // Extract dynamic columns
+            const inLabels = new Set();
+            const outLabels = new Set();
+
+            S.records.forEach(r => {
+                (r.inputs || []).forEach(f => inLabels.add(f.label || '未命名'));
+                (r.outputs || []).forEach(f => outLabels.add(f.label || '未命名'));
+            });
+
+            const sortedInLabels = Array.from(inLabels).sort();
+            const sortedOutLabels = Array.from(outLabels).sort();
+
+            let h = `<div style="overflow-x:auto;">
+            <table class="records-table">
+                <thead>
+                    <tr>
+                        <th>Metadata</th>`;
+            sortedInLabels.forEach(lbl => h += `<th>${esc(lbl)} (输入)</th>`);
+            sortedOutLabels.forEach(lbl => h += `<th>${esc(lbl)} (输出)</th>`);
+            h += `      </tr>
+                </thead>
+                <tbody>`;
+
+            S.records.forEach(r => {
+                h += `<tr><td class="meta-cell">
+                <div class="commit-badge">${IC.commit} ${esc(r.commit_id.substring(0, 8))}</div>
+                <div class="meta-text">By: ${esc(r.author || 'unknown')}</div>
+                <div class="meta-text">Time: ${relTime(r.created_at)}</div>
+                ${r.group_key ? `<div class="meta-text" style="color:var(--orange)">Grp: ${esc(r.group_key)}</div>` : ''}
+            </td>`;
+
+                sortedInLabels.forEach(lbl => {
+                    const groupFiles = (r.inputs || []).filter(f => (f.label || '未命名') === lbl);
+                    h += `<td>${renderBubbleTree(groupFiles)}</td>`;
+                });
+
+                sortedOutLabels.forEach(lbl => {
+                    const groupFiles = (r.outputs || []).filter(f => (f.label || '未命名') === lbl);
+                    h += `<td>${renderBubbleTree(groupFiles)}</td>`;
+                });
+                h += `</tr>`;
+            });
+
+            h += `  </tbody>
+            </table>
+        </div>`;
+
+            if (S.hasMore) {
+                h += '<div class="load-more"><button class="load-more-btn" onclick="loadRecords(true)">加载更多</button></div>';
+            }
+
+            $('contentBody').innerHTML = h;
+        }
+
+        loadAllStages();
+    </script>
+</body>
+
+</html>

+ 21 - 0
manifest.yaml.example

@@ -40,6 +40,23 @@ stages:
       # 示例 D:也可以指定单个文件
       # 示例 D:也可以指定单个文件
       - path: final_report.docx
       - path: final_report.docx
 
 
+  # ---------- 阶段 4:带有元数据与 JSON 值提取的数据 ----------
+  - name: enhanced_data
+    outputs:
+      # 示例 E:指定文件为“输入”,并打上标签
+      - path: data/input/article.md
+        direction: input        # 指定是 input 还是 output
+        label: 帖子输入         # 指定该数据的业务名称(标签)
+
+      # 示例 F:指定文件为“输出”,并且如果是 JSON 文件,可以提取特定 key 的值
+      #          同时指定深度 directory_depth: 2,这样如果文件在 `data/output/foo/bar.json`
+      #          它的 group_key 会被设置成 `data/output` 而不是默认的 `data/output/foo`
+      - path: data/output/
+        pattern: "*.json"
+        direction: output
+        label: 灵感点
+        extract_json_key: "data.idea_content"  # 会解析 JSON 并提取对应 key 的值保存
+        directory_depth: 2
 
 
 # ============================================================
 # ============================================================
 # 字段说明
 # 字段说明
@@ -59,6 +76,10 @@ stages:
 #     - exclude (可选) 文件排除规则,支持通配符或列表
 #     - exclude (可选) 文件排除规则,支持通配符或列表
 #                      示例: "*.tmp"
 #                      示例: "*.tmp"
 #                      示例: ["*.log", ".DS_Store"]
 #                      示例: ["*.log", ".DS_Store"]
+#     - direction (可选) 该文件的流入/流出方向(如 'input', 'output' 等)
+#     - label     (可选) 该文件的业务称呼/标签(如 '帖子输入', '灵感点' 等)
+#     - extract_json_key (可选) 针对 JSON 文件,配置要提取解析的 json key 路径(支持由于嵌套的 . 分隔,例如 'data.content')。提取的值会被记录在数据库中。
+#     - directory_depth  (可选) 定义这组规则生成的文件关联用的父目录深度(如 1 或 2,用来将不同子目录的关联文件合并到一行展示)。
 #
 #
 # ============================================================
 # ============================================================
 # 工作流程
 # 工作流程