|
|
@@ -44,15 +44,55 @@ class StorageService:
|
|
|
return None
|
|
|
|
|
|
def rollback_version(self, version: DataVersion):
|
|
|
- """Remove a version and all its associated file records.
|
|
|
-
|
|
|
- Used when a push didn't produce any data file changes,
|
|
|
- meaning it was a code-only commit with no snapshot value.
|
|
|
- """
|
|
|
+ """Remove a version and all its associated file records."""
|
|
|
self.db.query(DataFile).filter(DataFile.version_id == version.id).delete()
|
|
|
self.db.delete(version)
|
|
|
self.db.commit()
|
|
|
- logger.info(f"Rolled back empty version {version.id}")
|
|
|
+ logger.info(f"Rolled back unchanged version {version.id}")
|
|
|
+
|
|
|
+ def is_snapshot_changed(self, version: DataVersion, has_new_uploads: bool) -> bool:
|
|
|
+ """
|
|
|
+ Determine if this version represents a meaningful change compared to the previous one.
|
|
|
+ A version is meaningful if:
|
|
|
+ 1. Any file content changed (has_new_uploads is True)
|
|
|
+ 2. The set of files (paths) is different from the previous version record for this stage.
|
|
|
+ """
|
|
|
+ # 1. If content changed, it's definitely a new version
|
|
|
+ if has_new_uploads:
|
|
|
+ return True
|
|
|
+
|
|
|
+ # 2. Get current file paths
|
|
|
+ current_files = self.db.query(DataFile).filter(DataFile.version_id == version.id).all()
|
|
|
+ if not current_files:
|
|
|
+ return False # No data files at all, don't keep
|
|
|
+
|
|
|
+ # 3. Find the most recent previous version for this project and stage
|
|
|
+ prev_version = (
|
|
|
+ self.db.query(DataVersion)
|
|
|
+ .filter(
|
|
|
+ DataVersion.project_id == version.project_id,
|
|
|
+ DataVersion.stage == version.stage,
|
|
|
+ DataVersion.id != version.id
|
|
|
+ )
|
|
|
+ .order_by(DataVersion.created_at.desc())
|
|
|
+ .first()
|
|
|
+ )
|
|
|
+
|
|
|
+ # 4. If there's no previous version, we keep this one as the baseline
|
|
|
+ if not prev_version:
|
|
|
+ return True
|
|
|
+
|
|
|
+ # 5. Compare the set of relative paths
|
|
|
+ prev_files = self.db.query(DataFile).filter(DataFile.version_id == prev_version.id).all()
|
|
|
+
|
|
|
+ prev_paths = {f.relative_path for f in prev_files}
|
|
|
+ curr_paths = {f.relative_path for f in current_files}
|
|
|
+
|
|
|
+ if prev_paths != curr_paths:
|
|
|
+ logger.info(f"Snapshot file set changed for stage '{version.stage}': {len(prev_paths)} -> {len(curr_paths)} files")
|
|
|
+ return True
|
|
|
+
|
|
|
+ return False
|
|
|
|
|
|
async def process_file_with_sha(
|
|
|
self,
|