| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297 |
- from datetime import datetime
- from sqlalchemy import Column, BigInteger, String, Text, Integer, Float, DateTime, Boolean, Enum as SQLEnum, JSON, Index
- from sqlalchemy.dialects.mysql import LONGTEXT
- from sqlalchemy.ext.declarative import declarative_base
- Base = declarative_base()
- class Post(Base):
- """帖子表 - 存储帖子基础信息"""
- __tablename__ = 'post'
- id = Column(BigInteger, primary_key=True, autoincrement=True, comment='主键ID')
- post_id = Column(String(100), nullable=False, unique=True, comment='帖子ID(平台原始ID)')
- title = Column(String(500), comment='帖子标题')
- body_text = Column(Text, comment='正文内容')
- merge_leve1 = Column(String(100), comment='一级品类')
- merge_leve2 = Column(String(100), comment='二级品类')
- gmt_create = Column(DateTime, comment='帖子创建时间')
- view_all = Column(Integer, default=0, comment='总阅读量(20230601后)')
- share_all = Column(Integer, default=0, comment='总分享量')
- return_all = Column(Integer, default=0, comment='总回流量')
- view_recent = Column(Integer, default=0, comment='近30天阅读量')
- share_recent = Column(Integer, default=0, comment='近30天分享量')
- return_recent = Column(Integer, default=0, comment='近30天回流量')
- like_count = Column(Integer, default=0, comment='点赞数')
- comment_count = Column(Integer, default=0, comment='评论数')
- collect_count = Column(Integer, default=0, comment='收藏数')
- images = Column(JSON, comment='图片/视频URL列表')
- platform = Column(String(50), nullable=False, comment='平台标识')
- platform_account_id = Column(String(100), comment='平台作者ID')
- platform_account_name = Column(String(200), comment='平台作者名称')
- publish_timestamp = Column(BigInteger, comment='发布时间戳(ms)')
- import_date = Column(DateTime, nullable=False, default=datetime.now, comment='导入日期')
- def __repr__(self):
- return f"<Post(id={self.id}, post_id={self.post_id}, title={self.title})>"
- class PostDecodeResult(Base):
- """帖子解构结果表 - 存储帖子的最终选题结果"""
- __tablename__ = 'post_decode_result'
- id = Column(BigInteger, primary_key=True, autoincrement=True, comment='主键ID')
- post_id = Column(String(100), nullable=False, comment='关联帖子ID')
- topic = Column(Text, comment='最终选题结果')
- topic_type = Column(String(255), comment='选题类型')
- core_attraction = Column(Text, comment='核心吸引力')
- main_purpose = Column(Text, comment='主要目的')
- confidence = Column(String(50), comment='置信度')
- import_date = Column(DateTime, nullable=False, default=datetime.now, comment='导入日期')
- __table_args__ = (
- Index('idx_post_decode_result_post_id', 'post_id'),
- )
- def __repr__(self):
- return f"<PostDecodeResult(id={self.id}, post_id={self.post_id})>"
- class PostDecodeTopicPoint(Base):
- """选题点表 - 存储解构结果中的选题点"""
- __tablename__ = 'post_decode_topic_point'
- id = Column(BigInteger, primary_key=True, autoincrement=True, comment='主键ID')
- post_decode_result_id = Column(BigInteger, nullable=False, comment='关联解构结果ID')
- post_id = Column(String(100), nullable=False, comment='关联帖子ID')
- topic_point_type = Column(SQLEnum('灵感点', '目的点', '关键点', name='topic_point_type_enum'),
- nullable=False, comment='选题点类型:灵感点/目的点/关键点')
- topic_point_result = Column(String(500), nullable=False, comment='选题点')
- topic_point_description = Column(Text, comment='选题点描述信息')
- __table_args__ = (
- Index('idx_topic_point_post_decode_result_id', 'post_decode_result_id'),
- Index('idx_topic_point_post_id', 'post_id'),
- )
- def __repr__(self):
- return f"<PostDecodeTopicPoint(id={self.id}, type={self.topic_point_type}, result={self.topic_point_result})>"
- class PostDecodeTopicPointElement(Base):
- """选题点元素表 - 存储解构结果中选题点的分类结果"""
- __tablename__ = 'post_decode_topic_point_element'
- id = Column(BigInteger, primary_key=True, autoincrement=True, comment='主键ID')
- post_decode_result_id = Column(BigInteger, nullable=False, comment='关联解构结果ID')
- post_id = Column(String(100), nullable=False, comment='关联帖子ID')
- topic_point_id = Column(BigInteger, nullable=False, comment='关联选题点ID')
- element_type = Column(SQLEnum('实质', '形式', '意图', name='element_type_enum'),
- nullable=False, comment='元素类型:实质/形式/意图')
- element_sub_type = Column(String(100), comment='元素子类型(如:具体元素/具象概念/抽象概念/整体形式等)')
- element_name = Column(String(500), nullable=False, comment='元素名称')
- element_description = Column(Text, comment='元素说明')
- element_source = Column(Text, comment='元素来源')
- element_reason = Column(Text, comment='分类理由')
- __table_args__ = (
- Index('idx_topic_point_element_topic_point_id', 'topic_point_id'),
- Index('idx_topic_point_element_post_id', 'post_id'),
- Index('idx_topic_point_element_type', 'element_type'),
- )
- def __repr__(self):
- return f"<PostDecodeTopicPointElement(id={self.id}, type={self.element_type}, name={self.element_name})>"
- # ============================================================================
- # 全局分类库 V2 - 时间版本标记模型
- # ============================================================================
- class ClassifyExecution(Base):
- """分类执行记录"""
- __tablename__ = 'classify_execution'
- id = Column(BigInteger, primary_key=True, autoincrement=True, comment='主键ID')
- execution_type = Column(String(50), nullable=False, default='classify',
- comment='执行类型: classify/optimize/rollback')
- source_type = Column(String(50), comment='元素类型: 实质/形式/意图')
- based_execution_id = Column(BigInteger, default=0, comment='基于哪次执行(执行链),0=初始')
- status = Column(String(50), nullable=False, default='running',
- comment='状态: running/success/failed/rolled_back')
- batch_info = Column(JSON, comment='批次信息: {"batch_id": "...", "element_count": N, "unique_count": M}')
- model_name = Column(String(100), comment='模型名称')
- trigger_context = Column(Text, comment='触发上下文')
- execution_summary = Column(Text, comment='Agent执行总结')
- input_tokens = Column(Integer, default=0, comment='输入Token数')
- output_tokens = Column(Integer, default=0, comment='输出Token数')
- cost_usd = Column(Float, default=0.0, comment='费用(USD)')
- start_time = Column(DateTime, nullable=False, default=datetime.now, comment='开始时间')
- end_time = Column(DateTime, comment='结束时间')
- error_message = Column(Text, comment='错误信息')
- __table_args__ = (
- Index('idx_classify_exec_source_type', 'source_type'),
- Index('idx_classify_exec_status', 'status'),
- Index('idx_classify_exec_based_id', 'based_execution_id'),
- )
- def __repr__(self):
- return f"<ClassifyExecution(id={self.id}, type={self.execution_type}, status={self.status})>"
- class GlobalCategory(Base):
- """全局分类库 - 时间版本标记"""
- __tablename__ = 'global_category'
- id = Column(BigInteger, primary_key=True, autoincrement=True, comment='行级主键,每次修改产生新行')
- stable_id = Column(BigInteger, nullable=False, comment='逻辑稳定ID,同一分类跨版本保持不变')
- name = Column(String(500), nullable=False, comment='分类名称')
- description = Column(Text, comment='分类说明')
- parent_stable_id = Column(BigInteger, comment='父分类的stable_id,NULL=根节点')
- source_type = Column(String(50), nullable=False, comment='元素类型: 实质/形式/意图')
- category_nature = Column(String(50), comment='分类性质: 内容/维度/领域')
- level = Column(Integer, default=1, comment='层级深度')
- path = Column(String(1000), comment='完整路径,如 /主体/角色类型/人物角色')
- # 版本生命周期
- created_at_execution_id = Column(BigInteger, nullable=False,
- comment='创建此行的执行ID')
- retired_at_execution_id = Column(BigInteger, comment='废弃此行的执行ID,NULL=当前有效')
- # 元数据
- create_reason = Column(Text, comment='创建/修改原因')
- __table_args__ = (
- Index('idx_gc_stable_id', 'stable_id'),
- Index('idx_gc_parent_stable_id', 'parent_stable_id'),
- Index('idx_gc_source_type', 'source_type'),
- Index('idx_gc_retired', 'retired_at_execution_id'),
- Index('idx_gc_created_retired', 'created_at_execution_id', 'retired_at_execution_id'),
- )
- def __repr__(self):
- return f"<GlobalCategory(id={self.id}, stable_id={self.stable_id}, name={self.name})>"
- class GlobalElement(Base):
- """全局库中的标准化元素 - 时间版本标记"""
- __tablename__ = 'global_element'
- id = Column(BigInteger, primary_key=True, autoincrement=True, comment='主键ID')
- name = Column(String(500), nullable=False, comment='标准化元素名称')
- description = Column(Text, comment='元素描述')
- belong_category_stable_id = Column(BigInteger, nullable=False,
- comment='所属分类的stable_id')
- source_type = Column(String(50), nullable=False, comment='元素类型: 实质/形式/意图')
- element_sub_type = Column(String(100), comment='具体元素/具象概念/抽象概念等')
- occurrence_count = Column(Integer, default=1, comment='出现次数(跨帖子去重计数)')
- # 版本生命周期
- created_at_execution_id = Column(BigInteger, nullable=False, comment='创建此行的执行ID')
- retired_at_execution_id = Column(BigInteger, comment='废弃此行的执行ID,NULL=当前有效')
- __table_args__ = (
- Index('idx_ge_category_stable_id', 'belong_category_stable_id'),
- Index('idx_ge_source_type', 'source_type'),
- Index('idx_ge_retired', 'retired_at_execution_id'),
- Index('idx_ge_name', 'name'),
- )
- def __repr__(self):
- return f"<GlobalElement(id={self.id}, name={self.name})>"
- class ElementClassificationMapping(Base):
- """PostDecodeTopicPointElement → GlobalCategory/GlobalElement 映射"""
- __tablename__ = 'element_classification_mapping'
- id = Column(BigInteger, primary_key=True, autoincrement=True, comment='主键ID')
- post_decode_topic_point_element_id = Column(BigInteger, nullable=False,
- comment='FK → PostDecodeTopicPointElement.id')
- post_id = Column(String(100), comment='冗余帖子ID,便于查询')
- element_name = Column(String(500), comment='原始元素名称')
- element_type = Column(String(50), comment='实质/形式/意图')
- element_sub_type = Column(String(100), comment='具体子类型')
- global_element_id = Column(BigInteger, comment='→ GlobalElement.id')
- global_category_stable_id = Column(BigInteger, comment='→ GlobalCategory.stable_id')
- classification_path = Column(String(1000), comment='分类路径,如 /主体/角色类型/人物角色')
- classify_execution_id = Column(BigInteger, nullable=False, comment='哪次执行分类的')
- created_at = Column(DateTime, nullable=False, default=datetime.now, comment='创建时间')
- __table_args__ = (
- Index('idx_ecm_element_id', 'post_decode_topic_point_element_id'),
- Index('idx_ecm_post_id', 'post_id'),
- Index('idx_ecm_global_element_id', 'global_element_id'),
- Index('idx_ecm_category_stable_id', 'global_category_stable_id'),
- Index('idx_ecm_execution_id', 'classify_execution_id'),
- Index('idx_ecm_element_type', 'element_type'),
- )
- def __repr__(self):
- return f"<ElementClassificationMapping(id={self.id}, element={self.element_name})>"
- class ClassifyExecutionLog(Base):
- """分类执行日志表 - 存储完整的执行日志内容"""
- __tablename__ = 'classify_execution_log'
- id = Column(BigInteger, primary_key=True, autoincrement=True)
- classify_execution_id = Column(BigInteger, nullable=False, unique=True, comment='关联的分类执行ID')
- log_content = Column(LONGTEXT, nullable=False, comment='完整的执行日志内容')
- log_type = Column(String(50), nullable=False, default='classify', comment='日志类型:classify/optimize')
- created_at = Column(DateTime, nullable=False, default=datetime.now, comment='日志保存时间')
- def __repr__(self):
- return f"<ClassifyExecutionLog(id={self.id}, classify_execution_id={self.classify_execution_id}, log_type={self.log_type})>"
- class ClassifyBatch(Base):
- """批次追踪"""
- __tablename__ = 'classify_batch'
- id = Column(BigInteger, primary_key=True, autoincrement=True, comment='主键ID')
- batch_name = Column(String(200), comment='批次名称,如 实质_batch_001')
- source_type = Column(String(50), nullable=False, comment='实质/形式/意图')
- total_element_count = Column(Integer, default=0, comment='总元素数')
- unique_element_count = Column(Integer, default=0, comment='去重后元素数')
- status = Column(String(50), nullable=False, default='pending',
- comment='状态: pending/running/success/failed')
- classify_execution_id = Column(BigInteger, comment='关联执行记录ID')
- created_at = Column(DateTime, nullable=False, default=datetime.now, comment='创建时间')
- completed_at = Column(DateTime, comment='完成时间')
- __table_args__ = (
- Index('idx_cb_source_type', 'source_type'),
- Index('idx_cb_status', 'status'),
- Index('idx_cb_execution_id', 'classify_execution_id'),
- )
- def __repr__(self):
- return f"<ClassifyBatch(id={self.id}, name={self.batch_name}, status={self.status})>"
- class PostClassificationStatus(Base):
- """帖子分类完成状态追踪表"""
- __tablename__ = 'post_classification_status'
- id = Column(BigInteger, primary_key=True, autoincrement=True, comment='主键ID')
- post_id = Column(String(100), nullable=False, comment='帖子ID')
- source_type = Column(String(50), nullable=False, comment='元素类型: 实质/形式/意图')
- total_elements = Column(Integer, nullable=False, default=0, comment='该帖子该类型的元素总数')
- classified_elements = Column(Integer, nullable=False, default=0, comment='已分类的元素数')
- is_completed = Column(Boolean, nullable=False, default=False, comment='是否全部分类完成')
- last_updated_execution_id = Column(BigInteger, default=0, comment='最近更新的执行ID')
- updated_at = Column(DateTime, nullable=False, default=datetime.now, onupdate=datetime.now, comment='最后更新时间')
- __table_args__ = (
- Index('uq_pcs_post_source', 'post_id', 'source_type', unique=True),
- Index('idx_pcs_source_completed', 'source_type', 'is_completed'),
- Index('idx_pcs_completed', 'is_completed'),
- )
- def __repr__(self):
- return f"<PostClassificationStatus(id={self.id}, post_id={self.post_id}, source_type={self.source_type}, completed={self.is_completed})>"
|