models.py 3.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. from sqlalchemy import Column, Integer, String, Text, ForeignKey, DateTime, BigInteger, UniqueConstraint, JSON
  2. from sqlalchemy.orm import relationship
  3. from sqlalchemy.sql import func
  4. from ulid import ULID
  5. from app.database import Base
  6. def generate_ulid() -> str:
  7. """Generate a new ULID string."""
  8. return str(ULID())
  9. class Project(Base):
  10. __tablename__ = "projects"
  11. id = Column(String(26), primary_key=True, default=generate_ulid)
  12. project_name = Column(String(100), unique=True, nullable=False, index=True)
  13. description = Column(Text, nullable=True)
  14. created_at = Column(DateTime(timezone=True), server_default=func.now())
  15. versions = relationship("DataVersion", back_populates="project")
  16. class DataVersion(Base):
  17. __tablename__ = "data_versions"
  18. id = Column(String(26), primary_key=True, default=generate_ulid)
  19. project_id = Column(String(26), ForeignKey("projects.id"))
  20. stage = Column(String(200), nullable=False)
  21. commit_id = Column(String(64), nullable=False)
  22. author = Column(String(50))
  23. commit_message = Column(Text)
  24. manifest_snapshot = Column(Text)
  25. created_at = Column(DateTime(timezone=True), server_default=func.now())
  26. __table_args__ = (
  27. UniqueConstraint('project_id', 'stage', 'commit_id', name='_project_stage_commit_uc'),
  28. )
  29. project = relationship("Project", back_populates="versions")
  30. files = relationship("DataFile", back_populates="version")
  31. class DataFile(Base):
  32. __tablename__ = "data_files"
  33. id = Column(Integer, primary_key=True, index=True, autoincrement=True)
  34. version_id = Column(String(26), ForeignKey("data_versions.id"))
  35. relative_path = Column(String(255))
  36. storage_path = Column(String(500))
  37. file_size = Column(BigInteger)
  38. file_type = Column(String(20))
  39. file_sha = Column(String(64), index=True) # Git Blob SHA for deduplication
  40. direction = Column(String(20), nullable=True) # e.g., 'input' or 'output'
  41. label = Column(String(100), nullable=True) # e.g., '帖子输入'
  42. extracted_value = Column(Text, nullable=True) # extracted JSON value
  43. group_key = Column(String(255), nullable=True) # Used to group related inputs and outputs
  44. created_at = Column(DateTime(timezone=True), server_default=func.now())
  45. version = relationship("DataVersion", back_populates="files")
  46. class DataRecord(Base):
  47. __tablename__ = "data_records"
  48. id = Column(String(26), primary_key=True, default=generate_ulid)
  49. project_id = Column(String(26), ForeignKey("projects.id"))
  50. version_id = Column(String(26), ForeignKey("data_versions.id"))
  51. stage = Column(String(200), index=True)
  52. commit_id = Column(String(64))
  53. commit_message = Column(Text)
  54. group_key = Column(String(255))
  55. inputs = Column(JSON)
  56. outputs = Column(JSON)
  57. content_hash = Column(String(64))
  58. author = Column(String(50))
  59. created_at = Column(DateTime(timezone=True), server_default=func.now())
  60. version = relationship("DataVersion")
  61. project = relationship("Project")