from datetime import datetime from sqlalchemy import Column, BigInteger, String, Text, Integer, Float, DateTime, Boolean, Enum as SQLEnum, JSON, Index from sqlalchemy.dialects.mysql import LONGTEXT from sqlalchemy.ext.declarative import declarative_base Base = declarative_base() class Post(Base): """帖子表 - 存储帖子基础信息""" __tablename__ = 'post' id = Column(BigInteger, primary_key=True, autoincrement=True, comment='主键ID') post_id = Column(String(100), nullable=False, unique=True, comment='帖子ID(平台原始ID)') title = Column(String(500), comment='帖子标题') body_text = Column(Text, comment='正文内容') merge_leve1 = Column(String(100), comment='一级品类') merge_leve2 = Column(String(100), comment='二级品类') gmt_create = Column(DateTime, comment='帖子创建时间') view_all = Column(Integer, default=0, comment='总阅读量(20230601后)') share_all = Column(Integer, default=0, comment='总分享量') return_all = Column(Integer, default=0, comment='总回流量') view_recent = Column(Integer, default=0, comment='近30天阅读量') share_recent = Column(Integer, default=0, comment='近30天分享量') return_recent = Column(Integer, default=0, comment='近30天回流量') like_count = Column(Integer, default=0, comment='点赞数') comment_count = Column(Integer, default=0, comment='评论数') collect_count = Column(Integer, default=0, comment='收藏数') images = Column(JSON, comment='图片/视频URL列表') platform = Column(String(50), nullable=False, comment='平台标识') platform_account_id = Column(String(100), comment='平台作者ID') platform_account_name = Column(String(200), comment='平台作者名称') publish_timestamp = Column(BigInteger, comment='发布时间戳(ms)') import_date = Column(DateTime, nullable=False, default=datetime.now, comment='导入日期') def __repr__(self): return f"" class PostDecodeResult(Base): """帖子解构结果表 - 存储帖子的最终选题结果""" __tablename__ = 'post_decode_result' id = Column(BigInteger, primary_key=True, autoincrement=True, comment='主键ID') post_id = Column(String(100), nullable=False, comment='关联帖子ID') topic = Column(Text, comment='最终选题结果') topic_type = Column(String(255), comment='选题类型') core_attraction = Column(Text, comment='核心吸引力') main_purpose = Column(Text, comment='主要目的') confidence = Column(String(50), comment='置信度') import_date = Column(DateTime, nullable=False, default=datetime.now, comment='导入日期') __table_args__ = ( Index('idx_post_decode_result_post_id', 'post_id'), ) def __repr__(self): return f"" class PostDecodeTopicPoint(Base): """选题点表 - 存储解构结果中的选题点""" __tablename__ = 'post_decode_topic_point' id = Column(BigInteger, primary_key=True, autoincrement=True, comment='主键ID') post_decode_result_id = Column(BigInteger, nullable=False, comment='关联解构结果ID') post_id = Column(String(100), nullable=False, comment='关联帖子ID') topic_point_type = Column(SQLEnum('灵感点', '目的点', '关键点', name='topic_point_type_enum'), nullable=False, comment='选题点类型:灵感点/目的点/关键点') topic_point_result = Column(String(500), nullable=False, comment='选题点') topic_point_description = Column(Text, comment='选题点描述信息') __table_args__ = ( Index('idx_topic_point_post_decode_result_id', 'post_decode_result_id'), Index('idx_topic_point_post_id', 'post_id'), ) def __repr__(self): return f"" class PostDecodeTopicPointElement(Base): """选题点元素表 - 存储解构结果中选题点的分类结果""" __tablename__ = 'post_decode_topic_point_element' id = Column(BigInteger, primary_key=True, autoincrement=True, comment='主键ID') post_decode_result_id = Column(BigInteger, nullable=False, comment='关联解构结果ID') post_id = Column(String(100), nullable=False, comment='关联帖子ID') topic_point_id = Column(BigInteger, nullable=False, comment='关联选题点ID') element_type = Column(SQLEnum('实质', '形式', '意图', name='element_type_enum'), nullable=False, comment='元素类型:实质/形式/意图') element_sub_type = Column(String(100), comment='元素子类型(如:具体元素/具象概念/抽象概念/整体形式等)') element_name = Column(String(500), nullable=False, comment='元素名称') element_description = Column(Text, comment='元素说明') element_source = Column(Text, comment='元素来源') element_reason = Column(Text, comment='分类理由') __table_args__ = ( Index('idx_topic_point_element_topic_point_id', 'topic_point_id'), Index('idx_topic_point_element_post_id', 'post_id'), Index('idx_topic_point_element_type', 'element_type'), ) def __repr__(self): return f"" # ============================================================================ # 全局分类库 V2 - 时间版本标记模型 # ============================================================================ class ClassifyExecution(Base): """分类执行记录""" __tablename__ = 'classify_execution' id = Column(BigInteger, primary_key=True, autoincrement=True, comment='主键ID') execution_type = Column(String(50), nullable=False, default='classify', comment='执行类型: classify/optimize/rollback') source_type = Column(String(50), comment='元素类型: 实质/形式/意图') based_execution_id = Column(BigInteger, default=0, comment='基于哪次执行(执行链),0=初始') status = Column(String(50), nullable=False, default='running', comment='状态: running/success/failed/rolled_back') batch_info = Column(JSON, comment='批次信息: {"batch_id": "...", "element_count": N, "unique_count": M}') model_name = Column(String(100), comment='模型名称') trigger_context = Column(Text, comment='触发上下文') execution_summary = Column(Text, comment='Agent执行总结') input_tokens = Column(Integer, default=0, comment='输入Token数') output_tokens = Column(Integer, default=0, comment='输出Token数') cost_usd = Column(Float, default=0.0, comment='费用(USD)') start_time = Column(DateTime, nullable=False, default=datetime.now, comment='开始时间') end_time = Column(DateTime, comment='结束时间') error_message = Column(Text, comment='错误信息') __table_args__ = ( Index('idx_classify_exec_source_type', 'source_type'), Index('idx_classify_exec_status', 'status'), Index('idx_classify_exec_based_id', 'based_execution_id'), ) def __repr__(self): return f"" class GlobalCategory(Base): """全局分类库 - 时间版本标记""" __tablename__ = 'global_category' id = Column(BigInteger, primary_key=True, autoincrement=True, comment='行级主键,每次修改产生新行') stable_id = Column(BigInteger, nullable=False, comment='逻辑稳定ID,同一分类跨版本保持不变') name = Column(String(500), nullable=False, comment='分类名称') description = Column(Text, comment='分类说明') parent_stable_id = Column(BigInteger, comment='父分类的stable_id,NULL=根节点') source_type = Column(String(50), nullable=False, comment='元素类型: 实质/形式/意图') category_nature = Column(String(50), comment='分类性质: 内容/维度/领域') level = Column(Integer, default=1, comment='层级深度') path = Column(String(1000), comment='完整路径,如 /主体/角色类型/人物角色') # 版本生命周期 created_at_execution_id = Column(BigInteger, nullable=False, comment='创建此行的执行ID') retired_at_execution_id = Column(BigInteger, comment='废弃此行的执行ID,NULL=当前有效') # 元数据 create_reason = Column(Text, comment='创建/修改原因') __table_args__ = ( Index('idx_gc_stable_id', 'stable_id'), Index('idx_gc_parent_stable_id', 'parent_stable_id'), Index('idx_gc_source_type', 'source_type'), Index('idx_gc_retired', 'retired_at_execution_id'), Index('idx_gc_created_retired', 'created_at_execution_id', 'retired_at_execution_id'), ) def __repr__(self): return f"" class GlobalElement(Base): """全局库中的标准化元素 - 时间版本标记""" __tablename__ = 'global_element' id = Column(BigInteger, primary_key=True, autoincrement=True, comment='主键ID') name = Column(String(500), nullable=False, comment='标准化元素名称') description = Column(Text, comment='元素描述') belong_category_stable_id = Column(BigInteger, nullable=False, comment='所属分类的stable_id') source_type = Column(String(50), nullable=False, comment='元素类型: 实质/形式/意图') element_sub_type = Column(String(100), comment='具体元素/具象概念/抽象概念等') occurrence_count = Column(Integer, default=1, comment='出现次数(跨帖子去重计数)') # 版本生命周期 created_at_execution_id = Column(BigInteger, nullable=False, comment='创建此行的执行ID') retired_at_execution_id = Column(BigInteger, comment='废弃此行的执行ID,NULL=当前有效') __table_args__ = ( Index('idx_ge_category_stable_id', 'belong_category_stable_id'), Index('idx_ge_source_type', 'source_type'), Index('idx_ge_retired', 'retired_at_execution_id'), Index('idx_ge_name', 'name'), ) def __repr__(self): return f"" class ElementClassificationMapping(Base): """PostDecodeTopicPointElement → GlobalCategory/GlobalElement 映射""" __tablename__ = 'element_classification_mapping' id = Column(BigInteger, primary_key=True, autoincrement=True, comment='主键ID') post_decode_topic_point_element_id = Column(BigInteger, nullable=False, comment='FK → PostDecodeTopicPointElement.id') post_id = Column(String(100), comment='冗余帖子ID,便于查询') element_name = Column(String(500), comment='原始元素名称') element_type = Column(String(50), comment='实质/形式/意图') element_sub_type = Column(String(100), comment='具体子类型') global_element_id = Column(BigInteger, comment='→ GlobalElement.id') global_category_stable_id = Column(BigInteger, comment='→ GlobalCategory.stable_id') classification_path = Column(String(1000), comment='分类路径,如 /主体/角色类型/人物角色') classify_execution_id = Column(BigInteger, nullable=False, comment='哪次执行分类的') created_at = Column(DateTime, nullable=False, default=datetime.now, comment='创建时间') __table_args__ = ( Index('idx_ecm_element_id', 'post_decode_topic_point_element_id'), Index('idx_ecm_post_id', 'post_id'), Index('idx_ecm_global_element_id', 'global_element_id'), Index('idx_ecm_category_stable_id', 'global_category_stable_id'), Index('idx_ecm_execution_id', 'classify_execution_id'), Index('idx_ecm_element_type', 'element_type'), ) def __repr__(self): return f"" class ClassifyExecutionLog(Base): """分类执行日志表 - 存储完整的执行日志内容""" __tablename__ = 'classify_execution_log' id = Column(BigInteger, primary_key=True, autoincrement=True) classify_execution_id = Column(BigInteger, nullable=False, unique=True, comment='关联的分类执行ID') log_content = Column(LONGTEXT, nullable=False, comment='完整的执行日志内容') log_type = Column(String(50), nullable=False, default='classify', comment='日志类型:classify/optimize') created_at = Column(DateTime, nullable=False, default=datetime.now, comment='日志保存时间') def __repr__(self): return f"" class ClassifyBatch(Base): """批次追踪""" __tablename__ = 'classify_batch' id = Column(BigInteger, primary_key=True, autoincrement=True, comment='主键ID') batch_name = Column(String(200), comment='批次名称,如 实质_batch_001') source_type = Column(String(50), nullable=False, comment='实质/形式/意图') total_element_count = Column(Integer, default=0, comment='总元素数') unique_element_count = Column(Integer, default=0, comment='去重后元素数') status = Column(String(50), nullable=False, default='pending', comment='状态: pending/running/success/failed') classify_execution_id = Column(BigInteger, comment='关联执行记录ID') created_at = Column(DateTime, nullable=False, default=datetime.now, comment='创建时间') completed_at = Column(DateTime, comment='完成时间') __table_args__ = ( Index('idx_cb_source_type', 'source_type'), Index('idx_cb_status', 'status'), Index('idx_cb_execution_id', 'classify_execution_id'), ) def __repr__(self): return f"" class PostClassificationStatus(Base): """帖子分类完成状态追踪表""" __tablename__ = 'post_classification_status' id = Column(BigInteger, primary_key=True, autoincrement=True, comment='主键ID') post_id = Column(String(100), nullable=False, comment='帖子ID') source_type = Column(String(50), nullable=False, comment='元素类型: 实质/形式/意图') total_elements = Column(Integer, nullable=False, default=0, comment='该帖子该类型的元素总数') classified_elements = Column(Integer, nullable=False, default=0, comment='已分类的元素数') is_completed = Column(Boolean, nullable=False, default=False, comment='是否全部分类完成') last_updated_execution_id = Column(BigInteger, default=0, comment='最近更新的执行ID') updated_at = Column(DateTime, nullable=False, default=datetime.now, onupdate=datetime.now, comment='最后更新时间') __table_args__ = ( Index('uq_pcs_post_source', 'post_id', 'source_type', unique=True), Index('idx_pcs_source_completed', 'source_type', 'is_completed'), Index('idx_pcs_completed', 'is_completed'), ) def __repr__(self): return f""