models.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. """Pattern Mining 数据模型"""
  2. from sqlalchemy import Column, BigInteger, Integer, String, Float, Boolean, Text, DateTime, JSON, Index
  3. from sqlalchemy.ext.declarative import declarative_base
  4. Base = declarative_base()
  5. class TopicPatternExecution(Base):
  6. __tablename__ = 'topic_pattern_execution'
  7. id = Column(BigInteger, primary_key=True, autoincrement=True)
  8. # 筛选条件快照
  9. cluster_name = Column(String(32), nullable=True)
  10. merge_leve2 = Column(String(100), nullable=True)
  11. platform = Column(String(50), nullable=True)
  12. account_name = Column(String(200), nullable=True)
  13. post_limit = Column(Integer, nullable=False, default=500)
  14. # 挖掘参数
  15. min_absolute_support = Column(Integer, nullable=False, default=2)
  16. classify_execution_id = Column(BigInteger, nullable=True)
  17. mining_configs = Column(JSON, nullable=True) # [{dimension_mode, target_depths: [...]}, ...]
  18. # 结果摘要
  19. post_count = Column(Integer, nullable=True)
  20. itemset_count = Column(Integer, nullable=True) # 所有 config 的项集总数
  21. # 状态
  22. status = Column(String(50), nullable=False, default='running')
  23. error_message = Column(Text, nullable=True)
  24. start_time = Column(DateTime, nullable=True)
  25. end_time = Column(DateTime, nullable=True)
  26. class TopicPatternMiningConfig(Base):
  27. """一次执行中的一个 (dimension_mode, target_depth) 挖掘配置"""
  28. __tablename__ = 'topic_pattern_mining_config'
  29. id = Column(BigInteger, primary_key=True, autoincrement=True)
  30. execution_id = Column(BigInteger, nullable=False, index=True)
  31. dimension_mode = Column(String(50), nullable=False)
  32. target_depth = Column(String(50), nullable=False)
  33. # 结果摘要
  34. transaction_count = Column(Integer, nullable=True)
  35. itemset_count = Column(Integer, nullable=True)
  36. class TopicPatternCategory(Base):
  37. """分类树节点快照 - 记录执行时的分类结构"""
  38. __tablename__ = 'topic_pattern_category'
  39. id = Column(BigInteger, primary_key=True, autoincrement=True)
  40. execution_id = Column(BigInteger, nullable=False, index=True)
  41. source_stable_id = Column(BigInteger, nullable=True) # 原 GlobalCategory.stable_id
  42. source_type = Column(String(50), nullable=False) # 实质/形式/意图
  43. name = Column(String(500), nullable=False)
  44. description = Column(Text, nullable=True)
  45. category_nature = Column(String(50), nullable=True) # 内容/维度/领域
  46. path = Column(String(1000), nullable=True) # 完整路径 如 /食品/水果
  47. level = Column(Integer, nullable=True)
  48. parent_id = Column(BigInteger, nullable=True) # FK → 本表 id(快照内的父节点)
  49. parent_source_stable_id = Column(BigInteger, nullable=True) # 原 parent_stable_id(方便建树)
  50. element_count = Column(Integer, default=0) # 该分类下直属元素数
  51. class TopicPatternElement(Base):
  52. """帖子级元素记录 - 每行 = 一个帖子的一个点下的一个元素,代替 data_cache JSON"""
  53. __tablename__ = 'topic_pattern_element'
  54. id = Column(BigInteger, primary_key=True, autoincrement=True)
  55. execution_id = Column(BigInteger, nullable=False, index=True)
  56. # 帖子 & 选题点上下文
  57. post_id = Column(String(100), nullable=False, index=True)
  58. point_type = Column(String(50), nullable=False) # 灵感点/目的点/关键点
  59. point_text = Column(String(500), nullable=True) # 选题点内容("点" 字段)
  60. # 元素信息
  61. element_type = Column(String(50), nullable=False) # 实质/形式/意图
  62. name = Column(String(500), nullable=False)
  63. description = Column(Text, nullable=True)
  64. # 分类关联
  65. category_id = Column(BigInteger, nullable=True, index=True) # FK → TopicPatternCategory.id
  66. category_path = Column(String(1000), nullable=True) # 冗余存储分类路径 如 "食品>水果"
  67. class Post(Base):
  68. """帖子元数据 - 全局通用,跨 execution,通过 post_id 关联筛选"""
  69. __tablename__ = 'post'
  70. id = Column(BigInteger, primary_key=True, autoincrement=True)
  71. post_id = Column(String(100), nullable=False, unique=True, index=True)
  72. account_name = Column(String(200), nullable=True, index=True)
  73. merge_leve2 = Column(String(100), nullable=True, index=True)
  74. platform = Column(String(50), nullable=True, index=True)
  75. class TopicPatternItemset(Base):
  76. """频繁项集"""
  77. __tablename__ = 'topic_pattern_itemset'
  78. id = Column(BigInteger, primary_key=True, autoincrement=True)
  79. execution_id = Column(BigInteger, nullable=False, index=True)
  80. mining_config_id = Column(BigInteger, nullable=False, index=True) # FK → TopicPatternMiningConfig.id
  81. combination_type = Column(String(200), nullable=False, index=True)
  82. item_count = Column(Integer, nullable=False, index=True)
  83. support = Column(Float, nullable=False)
  84. absolute_support = Column(Integer, nullable=False, index=True)
  85. dimensions = Column(JSON, nullable=True) # ["实质","形式"]
  86. is_cross_point = Column(Boolean, nullable=False, default=False)
  87. matched_post_ids = Column(JSON, nullable=True) # ["post_id_1", "post_id_2", ...] 直接存 post_id
  88. class TopicPatternItemsetItem(Base):
  89. """项集中的单个 item - 关联到分类节点"""
  90. __tablename__ = 'topic_pattern_itemset_item'
  91. id = Column(BigInteger, primary_key=True, autoincrement=True)
  92. itemset_id = Column(BigInteger, nullable=False, index=True) # FK → TopicPatternItemset.id
  93. point_type = Column(String(50), nullable=True) # 灵感点/目的点/关键点(substance_form_only 模式下为空)
  94. dimension = Column(String(50), nullable=True) # 实质/形式/意图(point_type_only 模式下为空)
  95. category_id = Column(BigInteger, nullable=True) # FK → TopicPatternCategory.id
  96. category_path = Column(String(1000), nullable=True) # 分类路径部分 如 "食品>水果"
  97. element_name = Column(String(500), nullable=True) # 若为 name 层 item(含||),提取的元素名称