| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182 |
- from sqlalchemy import Column, Integer, String, Text, ForeignKey, DateTime, BigInteger, UniqueConstraint, JSON
- from sqlalchemy.orm import relationship
- from sqlalchemy.sql import func
- from ulid import ULID
- from app.database import Base
- def generate_ulid() -> str:
- """Generate a new ULID string."""
- return str(ULID())
- class Project(Base):
- __tablename__ = "projects"
- id = Column(String(26), primary_key=True, default=generate_ulid)
- project_name = Column(String(100), unique=True, nullable=False, index=True)
- description = Column(Text, nullable=True)
- created_at = Column(DateTime(timezone=True), server_default=func.now())
- versions = relationship("DataVersion", back_populates="project")
- class DataVersion(Base):
- __tablename__ = "data_versions"
- id = Column(String(26), primary_key=True, default=generate_ulid)
- project_id = Column(String(26), ForeignKey("projects.id"))
- stage = Column(String(200), nullable=False)
- commit_id = Column(String(64), nullable=False)
- author = Column(String(50))
- commit_message = Column(Text)
- manifest_snapshot = Column(Text)
- created_at = Column(DateTime(timezone=True), server_default=func.now())
- __table_args__ = (
- UniqueConstraint('project_id', 'stage', 'commit_id', name='_project_stage_commit_uc'),
- )
- project = relationship("Project", back_populates="versions")
- files = relationship("DataFile", back_populates="version")
- class DataFile(Base):
- __tablename__ = "data_files"
- id = Column(Integer, primary_key=True, index=True, autoincrement=True)
- version_id = Column(String(26), ForeignKey("data_versions.id"))
- relative_path = Column(String(255))
- storage_path = Column(String(500))
- file_size = Column(BigInteger)
- file_type = Column(String(20))
- file_sha = Column(String(64), index=True) # Git Blob SHA for deduplication
- direction = Column(String(20), nullable=True) # e.g., 'input' or 'output'
- label = Column(String(100), nullable=True) # e.g., '帖子输入'
- extracted_value = Column(Text, nullable=True) # extracted JSON value
- group_key = Column(String(255), nullable=True) # Used to group related inputs and outputs
- created_at = Column(DateTime(timezone=True), server_default=func.now())
- version = relationship("DataVersion", back_populates="files")
- class DataRecord(Base):
- __tablename__ = "data_records"
- id = Column(String(26), primary_key=True, default=generate_ulid)
- project_id = Column(String(26), ForeignKey("projects.id"))
- version_id = Column(String(26), ForeignKey("data_versions.id"))
- stage = Column(String(200), index=True)
- commit_id = Column(String(64))
- commit_message = Column(Text)
- group_key = Column(String(255))
-
- inputs = Column(JSON)
- outputs = Column(JSON)
-
- content_hash = Column(String(64))
-
- author = Column(String(50))
- created_at = Column(DateTime(timezone=True), server_default=func.now())
- version = relationship("DataVersion")
- project = relationship("Project")
|