import asyncio import os import sys import shutil import json from unittest.mock import MagicMock, AsyncMock, patch # Add project root to path sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from app.database import Base, engine, SessionLocal from app.services.webhook_service import WebhookService from app.models import Project, DataVersion, DataFile # Setup def setup_db(): Base.metadata.drop_all(bind=engine) Base.metadata.create_all(bind=engine) def teardown_storage(): if os.path.exists("test_storage"): shutil.rmtree("test_storage") # Mocks manifest_yaml = """ project_name: "test_project" stage: "test_stage" outputs: - path: "results/data.csv" - path: "images/" pattern: "*.png" """ mock_tree_response = { "tree": [ {"path": "results/data.csv", "type": "blob", "sha": "sha123", "mode": "100644"}, {"path": "images/plot.png", "type": "blob", "sha": "sha456", "mode": "100644"}, {"path": "images/ignore.txt", "type": "blob", "sha": "sha789", "mode": "100644"}, {"path": "README.md", "type": "blob", "sha": "sha000", "mode": "100644"} ] } async def run_test(): print("Setting up test environment...") setup_db() teardown_storage() # Override settings for storage root with patch("app.config.settings.STORAGE_ROOT", "test_storage"): db = SessionLocal() service = WebhookService(db) # Mock GogsClient service.gogs.get_manifest = AsyncMock(return_value=manifest_yaml) service.gogs.get_recursive_tree = AsyncMock(return_value=mock_tree_response) async def mock_get_file(owner, repo, commit, path): if path == "results/data.csv": return b"csv_data" if path == "images/plot.png": return b"png_data" return b"other_data" service.gogs.get_file_content = AsyncMock(side_effect=mock_get_file) # Mock Payload payload = { "ref": "refs/heads/master", "after": "commit_sha_abc", "repository": { "name": "my-repo", "owner": {"username": "my-user"} }, "pusher": {"username": "test-author"} } print("Processing webhook...") await service.process_webhook(payload) # Verification print("Verifying results...") # Check Project project = db.query(Project).filter_by(project_name="test_project").first() assert project is not None print("[PASS] Project created") # Check Version version = db.query(DataVersion).filter_by(commit_id="commit_sha_abc").first() assert version is not None assert version.stage == "test_stage" print("[PASS] Version created") # Check Files files = db.query(DataFile).filter_by(version_id=version.id).all() file_paths = [f.relative_path for f in files] print(f"Stored files: {file_paths}") assert "results/data.csv" in file_paths assert "images/plot.png" in file_paths # ignore.txt should check against pattern if pattern logic is strict? # My pattern logic was: if dir, check pattern. # manifest: images/ pattern: *.png. So ignore.txt should NOT be there. # But wait, logic in webhook_service: file_path.startswith(path_pattern) ... fnmatch(rel_name, match_pattern) # ignore.txt -> rel_name: ignore.txt. *.png matches? No. Good. if "images/ignore.txt" in file_paths: print("[FAIL] 'images/ignore.txt' should not be included") else: print("[PASS] Pattern filtering worked") # Check Physical Files for f in files: if not os.path.exists(f.storage_path): print(f"[FAIL] {f.storage_path} does not exist") else: print(f"[PASS] File {f.relative_path} stored at {f.storage_path}") # Test Deduplication print("\nTesting Deduplication...") # New payload, same file content (same SHA) payload2 = payload.copy() payload2["after"] = "commit_sha_def" # New commit # We need to simulate that this new commit has the SAME tree for these files # service.gogs.get_recursive_tree is already mocked to return the same tree SHAs # Reset get_file_content mock to track calls service.gogs.get_file_content.reset_mock() await service.process_webhook(payload2) version2 = db.query(DataVersion).filter_by(commit_id="commit_sha_def").first() assert version2 is not None print("[PASS] Version 2 created") # Check if get_file_content was called. It should NOT be called because SHAs are same and files exist. if service.gogs.get_file_content.called: print("[FAIL] Deduplication failed: files were downloaded again") print(service.gogs.get_file_content.mock_calls) else: print("[PASS] Deduplication worked: download skipped") db.close() teardown_storage() print("\nAll tests passed!") if __name__ == "__main__": asyncio.run(run_test())