from sqlalchemy import Column, Integer, String, Text, ForeignKey, DateTime, BigInteger, UniqueConstraint, JSON from sqlalchemy.orm import relationship from sqlalchemy.sql import func from ulid import ULID from app.database import Base def generate_ulid() -> str: """Generate a new ULID string.""" return str(ULID()) class Project(Base): __tablename__ = "projects" id = Column(String(26), primary_key=True, default=generate_ulid) project_name = Column(String(100), unique=True, nullable=False, index=True) description = Column(Text, nullable=True) created_at = Column(DateTime(timezone=True), server_default=func.now()) versions = relationship("DataVersion", back_populates="project") class DataVersion(Base): __tablename__ = "data_versions" id = Column(String(26), primary_key=True, default=generate_ulid) project_id = Column(String(26), ForeignKey("projects.id")) stage = Column(String(200), nullable=False) commit_id = Column(String(64), nullable=False) author = Column(String(50)) commit_message = Column(Text) manifest_snapshot = Column(Text) created_at = Column(DateTime(timezone=True), server_default=func.now()) __table_args__ = ( UniqueConstraint('project_id', 'stage', 'commit_id', name='_project_stage_commit_uc'), ) project = relationship("Project", back_populates="versions") files = relationship("DataFile", back_populates="version") class DataFile(Base): __tablename__ = "data_files" id = Column(Integer, primary_key=True, index=True, autoincrement=True) version_id = Column(String(26), ForeignKey("data_versions.id")) relative_path = Column(String(255)) storage_path = Column(String(500)) file_size = Column(BigInteger) file_type = Column(String(20)) file_sha = Column(String(64), index=True) # Git Blob SHA for deduplication direction = Column(String(20), nullable=True) # e.g., 'input' or 'output' label = Column(String(100), nullable=True) # e.g., '帖子输入' extracted_value = Column(Text, nullable=True) # extracted JSON value group_key = Column(String(255), nullable=True) # Used to group related inputs and outputs created_at = Column(DateTime(timezone=True), server_default=func.now()) version = relationship("DataVersion", back_populates="files") class DataRecord(Base): __tablename__ = "data_records" id = Column(String(26), primary_key=True, default=generate_ulid) project_id = Column(String(26), ForeignKey("projects.id")) version_id = Column(String(26), ForeignKey("data_versions.id")) stage = Column(String(200), index=True) commit_id = Column(String(64)) commit_message = Column(Text) group_key = Column(String(255)) inputs = Column(JSON) outputs = Column(JSON) content_hash = Column(String(64)) author = Column(String(50)) created_at = Column(DateTime(timezone=True), server_default=func.now()) version = relationship("DataVersion") project = relationship("Project")