| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120 |
- """Pattern Mining 数据模型"""
- from sqlalchemy import Column, BigInteger, Integer, String, Float, Boolean, Text, DateTime, JSON, Index
- from sqlalchemy.ext.declarative import declarative_base
- Base = declarative_base()
- class TopicPatternExecution(Base):
- __tablename__ = 'topic_pattern_execution'
- id = Column(BigInteger, primary_key=True, autoincrement=True)
- # 筛选条件快照
- cluster_name = Column(String(32), nullable=True)
- merge_leve2 = Column(String(100), nullable=True)
- platform = Column(String(50), nullable=True)
- account_name = Column(String(200), nullable=True)
- post_limit = Column(Integer, nullable=False, default=500)
- # 挖掘参数
- min_absolute_support = Column(Integer, nullable=False, default=2)
- classify_execution_id = Column(BigInteger, nullable=True)
- mining_configs = Column(JSON, nullable=True) # [{dimension_mode, target_depths: [...]}, ...]
- # 结果摘要
- post_count = Column(Integer, nullable=True)
- itemset_count = Column(Integer, nullable=True) # 所有 config 的项集总数
- # 状态
- status = Column(String(50), nullable=False, default='running')
- error_message = Column(Text, nullable=True)
- start_time = Column(DateTime, nullable=True)
- end_time = Column(DateTime, nullable=True)
- class TopicPatternMiningConfig(Base):
- """一次执行中的一个 (dimension_mode, target_depth) 挖掘配置"""
- __tablename__ = 'topic_pattern_mining_config'
- id = Column(BigInteger, primary_key=True, autoincrement=True)
- execution_id = Column(BigInteger, nullable=False, index=True)
- dimension_mode = Column(String(50), nullable=False)
- target_depth = Column(String(50), nullable=False)
- # 结果摘要
- transaction_count = Column(Integer, nullable=True)
- itemset_count = Column(Integer, nullable=True)
- class TopicPatternCategory(Base):
- """分类树节点快照 - 记录执行时的分类结构"""
- __tablename__ = 'topic_pattern_category'
- id = Column(BigInteger, primary_key=True, autoincrement=True)
- execution_id = Column(BigInteger, nullable=False, index=True)
- source_stable_id = Column(BigInteger, nullable=True) # 原 GlobalCategory.stable_id
- source_type = Column(String(50), nullable=False) # 实质/形式/意图
- name = Column(String(500), nullable=False)
- description = Column(Text, nullable=True)
- category_nature = Column(String(50), nullable=True) # 内容/维度/领域
- path = Column(String(1000), nullable=True) # 完整路径 如 /食品/水果
- level = Column(Integer, nullable=True)
- parent_id = Column(BigInteger, nullable=True) # FK → 本表 id(快照内的父节点)
- parent_source_stable_id = Column(BigInteger, nullable=True) # 原 parent_stable_id(方便建树)
- element_count = Column(Integer, default=0) # 该分类下直属元素数
- class TopicPatternElement(Base):
- """帖子级元素记录 - 每行 = 一个帖子的一个点下的一个元素,代替 data_cache JSON"""
- __tablename__ = 'topic_pattern_element'
- id = Column(BigInteger, primary_key=True, autoincrement=True)
- execution_id = Column(BigInteger, nullable=False, index=True)
- # 帖子 & 选题点上下文
- post_id = Column(String(100), nullable=False, index=True)
- point_type = Column(String(50), nullable=False) # 灵感点/目的点/关键点
- point_text = Column(String(500), nullable=True) # 选题点内容("点" 字段)
- # 元素信息
- element_type = Column(String(50), nullable=False) # 实质/形式/意图
- name = Column(String(500), nullable=False)
- description = Column(Text, nullable=True)
- # 分类关联
- category_id = Column(BigInteger, nullable=True, index=True) # FK → TopicPatternCategory.id
- category_path = Column(String(1000), nullable=True) # 冗余存储分类路径 如 "食品>水果"
- class Post(Base):
- """帖子元数据 - 全局通用,跨 execution,通过 post_id 关联筛选"""
- __tablename__ = 'post'
- id = Column(BigInteger, primary_key=True, autoincrement=True)
- post_id = Column(String(100), nullable=False, unique=True, index=True)
- account_name = Column(String(200), nullable=True, index=True)
- merge_leve2 = Column(String(100), nullable=True, index=True)
- platform = Column(String(50), nullable=True, index=True)
- class TopicPatternItemset(Base):
- """频繁项集"""
- __tablename__ = 'topic_pattern_itemset'
- id = Column(BigInteger, primary_key=True, autoincrement=True)
- execution_id = Column(BigInteger, nullable=False, index=True)
- mining_config_id = Column(BigInteger, nullable=False, index=True) # FK → TopicPatternMiningConfig.id
- combination_type = Column(String(200), nullable=False, index=True)
- item_count = Column(Integer, nullable=False, index=True)
- support = Column(Float, nullable=False)
- absolute_support = Column(Integer, nullable=False, index=True)
- dimensions = Column(JSON, nullable=True) # ["实质","形式"]
- is_cross_point = Column(Boolean, nullable=False, default=False)
- matched_post_ids = Column(JSON, nullable=True) # ["post_id_1", "post_id_2", ...] 直接存 post_id
- class TopicPatternItemsetItem(Base):
- """项集中的单个 item - 关联到分类节点"""
- __tablename__ = 'topic_pattern_itemset_item'
- id = Column(BigInteger, primary_key=True, autoincrement=True)
- itemset_id = Column(BigInteger, nullable=False, index=True) # FK → TopicPatternItemset.id
- point_type = Column(String(50), nullable=True) # 灵感点/目的点/关键点(substance_form_only 模式下为空)
- dimension = Column(String(50), nullable=True) # 实质/形式/意图(point_type_only 模式下为空)
- category_id = Column(BigInteger, nullable=True) # FK → TopicPatternCategory.id
- category_path = Column(String(1000), nullable=True) # 分类路径部分 如 "食品>水果"
- element_name = Column(String(500), nullable=True) # 若为 name 层 item(含||),提取的元素名称
|