"""Pattern Mining 数据模型""" from sqlalchemy import Column, BigInteger, Integer, String, Float, Boolean, Text, DateTime, JSON, Index from sqlalchemy.ext.declarative import declarative_base Base = declarative_base() class TopicPatternExecution(Base): __tablename__ = 'topic_pattern_execution' id = Column(BigInteger, primary_key=True, autoincrement=True) # 筛选条件快照 cluster_name = Column(String(32), nullable=True) merge_leve2 = Column(String(100), nullable=True) platform = Column(String(50), nullable=True) account_name = Column(String(200), nullable=True) post_limit = Column(Integer, nullable=False, default=500) # 挖掘参数 min_absolute_support = Column(Integer, nullable=False, default=2) classify_execution_id = Column(BigInteger, nullable=True) mining_configs = Column(JSON, nullable=True) # [{dimension_mode, target_depths: [...]}, ...] # 结果摘要 post_count = Column(Integer, nullable=True) itemset_count = Column(Integer, nullable=True) # 所有 config 的项集总数 # 状态 status = Column(String(50), nullable=False, default='running') error_message = Column(Text, nullable=True) start_time = Column(DateTime, nullable=True) end_time = Column(DateTime, nullable=True) class TopicPatternMiningConfig(Base): """一次执行中的一个 (dimension_mode, target_depth) 挖掘配置""" __tablename__ = 'topic_pattern_mining_config' id = Column(BigInteger, primary_key=True, autoincrement=True) execution_id = Column(BigInteger, nullable=False, index=True) dimension_mode = Column(String(50), nullable=False) target_depth = Column(String(50), nullable=False) # 结果摘要 transaction_count = Column(Integer, nullable=True) itemset_count = Column(Integer, nullable=True) class TopicPatternCategory(Base): """分类树节点快照 - 记录执行时的分类结构""" __tablename__ = 'topic_pattern_category' id = Column(BigInteger, primary_key=True, autoincrement=True) execution_id = Column(BigInteger, nullable=False, index=True) source_stable_id = Column(BigInteger, nullable=True) # 原 GlobalCategory.stable_id source_type = Column(String(50), nullable=False) # 实质/形式/意图 name = Column(String(500), nullable=False) description = Column(Text, nullable=True) category_nature = Column(String(50), nullable=True) # 内容/维度/领域 path = Column(String(1000), nullable=True) # 完整路径 如 /食品/水果 level = Column(Integer, nullable=True) parent_id = Column(BigInteger, nullable=True) # FK → 本表 id(快照内的父节点) parent_source_stable_id = Column(BigInteger, nullable=True) # 原 parent_stable_id(方便建树) element_count = Column(Integer, default=0) # 该分类下直属元素数 class TopicPatternElement(Base): """帖子级元素记录 - 每行 = 一个帖子的一个点下的一个元素,代替 data_cache JSON""" __tablename__ = 'topic_pattern_element' id = Column(BigInteger, primary_key=True, autoincrement=True) execution_id = Column(BigInteger, nullable=False, index=True) # 帖子 & 选题点上下文 post_id = Column(String(100), nullable=False, index=True) point_type = Column(String(50), nullable=False) # 灵感点/目的点/关键点 point_text = Column(String(500), nullable=True) # 选题点内容("点" 字段) # 元素信息 element_type = Column(String(50), nullable=False) # 实质/形式/意图 name = Column(String(500), nullable=False) description = Column(Text, nullable=True) # 分类关联 category_id = Column(BigInteger, nullable=True, index=True) # FK → TopicPatternCategory.id category_path = Column(String(1000), nullable=True) # 冗余存储分类路径 如 "食品>水果" class Post(Base): """帖子元数据 - 全局通用,跨 execution,通过 post_id 关联筛选""" __tablename__ = 'post' id = Column(BigInteger, primary_key=True, autoincrement=True) post_id = Column(String(100), nullable=False, unique=True, index=True) account_name = Column(String(200), nullable=True, index=True) merge_leve2 = Column(String(100), nullable=True, index=True) platform = Column(String(50), nullable=True, index=True) class TopicPatternItemset(Base): """频繁项集""" __tablename__ = 'topic_pattern_itemset' id = Column(BigInteger, primary_key=True, autoincrement=True) execution_id = Column(BigInteger, nullable=False, index=True) mining_config_id = Column(BigInteger, nullable=False, index=True) # FK → TopicPatternMiningConfig.id combination_type = Column(String(200), nullable=False, index=True) item_count = Column(Integer, nullable=False, index=True) support = Column(Float, nullable=False) absolute_support = Column(Integer, nullable=False, index=True) dimensions = Column(JSON, nullable=True) # ["实质","形式"] is_cross_point = Column(Boolean, nullable=False, default=False) matched_post_ids = Column(JSON, nullable=True) # ["post_id_1", "post_id_2", ...] 直接存 post_id class TopicPatternItemsetItem(Base): """项集中的单个 item - 关联到分类节点""" __tablename__ = 'topic_pattern_itemset_item' id = Column(BigInteger, primary_key=True, autoincrement=True) itemset_id = Column(BigInteger, nullable=False, index=True) # FK → TopicPatternItemset.id point_type = Column(String(50), nullable=True) # 灵感点/目的点/关键点(substance_form_only 模式下为空) dimension = Column(String(50), nullable=True) # 实质/形式/意图(point_type_only 模式下为空) category_id = Column(BigInteger, nullable=True) # FK → TopicPatternCategory.id category_path = Column(String(1000), nullable=True) # 分类路径部分 如 "食品>水果" element_name = Column(String(500), nullable=True) # 若为 name 层 item(含||),提取的元素名称