models.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. """Pattern Mining 数据模型"""
  2. from sqlalchemy import Column, BigInteger, Integer, String, Float, Boolean, Text, DateTime, JSON, Index
  3. from sqlalchemy.ext.declarative import declarative_base
  4. Base = declarative_base()
  5. class TopicPatternExecution(Base):
  6. __tablename__ = 'topic_pattern_execution'
  7. id = Column(BigInteger, primary_key=True, autoincrement=True)
  8. # 筛选条件快照
  9. merge_leve2 = Column(String(100), nullable=True)
  10. platform = Column(String(50), nullable=True)
  11. account_name = Column(String(200), nullable=True)
  12. post_limit = Column(Integer, nullable=False, default=500)
  13. # 挖掘参数
  14. min_absolute_support = Column(Integer, nullable=False, default=2)
  15. classify_execution_id = Column(BigInteger, nullable=True)
  16. mining_configs = Column(JSON, nullable=True) # [{dimension_mode, target_depths: [...]}, ...]
  17. # 结果摘要
  18. post_count = Column(Integer, nullable=True)
  19. itemset_count = Column(Integer, nullable=True) # 所有 config 的项集总数
  20. # 状态
  21. status = Column(String(50), nullable=False, default='running')
  22. error_message = Column(Text, nullable=True)
  23. start_time = Column(DateTime, nullable=True)
  24. end_time = Column(DateTime, nullable=True)
  25. class TopicPatternMiningConfig(Base):
  26. """一次执行中的一个 (dimension_mode, target_depth) 挖掘配置"""
  27. __tablename__ = 'topic_pattern_mining_config'
  28. id = Column(BigInteger, primary_key=True, autoincrement=True)
  29. execution_id = Column(BigInteger, nullable=False, index=True)
  30. dimension_mode = Column(String(50), nullable=False)
  31. target_depth = Column(String(50), nullable=False)
  32. # 结果摘要
  33. transaction_count = Column(Integer, nullable=True)
  34. itemset_count = Column(Integer, nullable=True)
  35. class TopicPatternCategory(Base):
  36. """分类树节点快照 - 记录执行时的分类结构"""
  37. __tablename__ = 'topic_pattern_category'
  38. id = Column(BigInteger, primary_key=True, autoincrement=True)
  39. execution_id = Column(BigInteger, nullable=False, index=True)
  40. source_stable_id = Column(BigInteger, nullable=True) # 原 GlobalCategory.stable_id
  41. source_type = Column(String(50), nullable=False) # 实质/形式/意图
  42. name = Column(String(500), nullable=False)
  43. description = Column(Text, nullable=True)
  44. category_nature = Column(String(50), nullable=True) # 内容/维度/领域
  45. path = Column(String(1000), nullable=True) # 完整路径 如 /食品/水果
  46. level = Column(Integer, nullable=True)
  47. parent_id = Column(BigInteger, nullable=True) # FK → 本表 id(快照内的父节点)
  48. parent_source_stable_id = Column(BigInteger, nullable=True) # 原 parent_stable_id(方便建树)
  49. element_count = Column(Integer, default=0) # 该分类下直属元素数
  50. class TopicPatternElement(Base):
  51. """帖子级元素记录 - 每行 = 一个帖子的一个点下的一个元素,代替 data_cache JSON"""
  52. __tablename__ = 'topic_pattern_element'
  53. id = Column(BigInteger, primary_key=True, autoincrement=True)
  54. execution_id = Column(BigInteger, nullable=False, index=True)
  55. # 帖子 & 选题点上下文
  56. post_id = Column(String(100), nullable=False, index=True)
  57. point_type = Column(String(50), nullable=False) # 灵感点/目的点/关键点
  58. point_text = Column(String(500), nullable=True) # 选题点内容("点" 字段)
  59. # 元素信息
  60. element_type = Column(String(50), nullable=False) # 实质/形式/意图
  61. name = Column(String(500), nullable=False)
  62. description = Column(Text, nullable=True)
  63. # 分类关联
  64. category_id = Column(BigInteger, nullable=True, index=True) # FK → TopicPatternCategory.id
  65. category_path = Column(String(1000), nullable=True) # 冗余存储分类路径 如 "食品>水果"
  66. class Post(Base):
  67. """帖子元数据 - 全局通用,跨 execution,通过 post_id 关联筛选"""
  68. __tablename__ = 'post'
  69. id = Column(BigInteger, primary_key=True, autoincrement=True)
  70. post_id = Column(String(100), nullable=False, unique=True, index=True)
  71. account_name = Column(String(200), nullable=True, index=True)
  72. merge_leve2 = Column(String(100), nullable=True, index=True)
  73. platform = Column(String(50), nullable=True, index=True)
  74. class TopicPatternItemset(Base):
  75. """频繁项集"""
  76. __tablename__ = 'topic_pattern_itemset'
  77. id = Column(BigInteger, primary_key=True, autoincrement=True)
  78. execution_id = Column(BigInteger, nullable=False, index=True)
  79. mining_config_id = Column(BigInteger, nullable=False, index=True) # FK → TopicPatternMiningConfig.id
  80. combination_type = Column(String(200), nullable=False, index=True)
  81. item_count = Column(Integer, nullable=False, index=True)
  82. support = Column(Float, nullable=False)
  83. absolute_support = Column(Integer, nullable=False, index=True)
  84. dimensions = Column(JSON, nullable=True) # ["实质","形式"]
  85. is_cross_point = Column(Boolean, nullable=False, default=False)
  86. matched_post_ids = Column(JSON, nullable=True) # ["post_id_1", "post_id_2", ...] 直接存 post_id
  87. class TopicPatternItemsetItem(Base):
  88. """项集中的单个 item - 关联到分类节点"""
  89. __tablename__ = 'topic_pattern_itemset_item'
  90. id = Column(BigInteger, primary_key=True, autoincrement=True)
  91. itemset_id = Column(BigInteger, nullable=False, index=True) # FK → TopicPatternItemset.id
  92. point_type = Column(String(50), nullable=True) # 灵感点/目的点/关键点(substance_form_only 模式下为空)
  93. dimension = Column(String(50), nullable=True) # 实质/形式/意图(point_type_only 模式下为空)
  94. category_id = Column(BigInteger, nullable=True) # FK → TopicPatternCategory.id
  95. category_path = Column(String(1000), nullable=True) # 分类路径部分 如 "食品>水果"
  96. element_name = Column(String(500), nullable=True) # 若为 name 层 item(含||),提取的元素名称
  97. # ============================================================================
  98. # 选题构建 - 以 CompositionItem 为中心的 CRUD 模型
  99. #
  100. # 数据关系:
  101. # TopicBuildRecord (1次构建会话)
  102. # └── TopicBuildTopic (N个选题)
  103. # └── TopicBuildCompositionItem (选题组成元素,有独立生命周期)
  104. # └── TopicBuildItemSource (每个元素的推导来源,多态统一表)
  105. #
  106. # 核心设计:
  107. # Agent 过程 = CRUD CompositionItem
  108. # 每个 item 有独立的 reason、point_type、状态生命周期 (created → validated → refined → deactivated)
  109. # 每个 item 的推导来源通过 TopicBuildItemSource 完整记录
  110. # 来源可以是:其他 CompositionItem、itemset、element、category、post、user_input
  111. # ============================================================================
  112. class TopicBuildRecord(Base):
  113. """选题构建执行记录 - 一次完整的构建会话"""
  114. __tablename__ = 'topic_build_record'
  115. id = Column(BigInteger, primary_key=True, autoincrement=True)
  116. execution_id = Column(BigInteger, nullable=False, index=True) # FK → TopicPatternExecution.id
  117. # ── 需求描述 ──
  118. demand = Column(Text, nullable=True) # 用户的构建需求/目标描述
  119. demand_constraints = Column(JSON, nullable=True) # 约束条件 {"target_audience", "content_style", "focus_dimensions", ...}
  120. # ── Agent & 模型配置 ──
  121. agent_type = Column(String(50), nullable=True, default='ClaudeCodeAgent') # ClaudeCodeAgent / LangchainAgent
  122. agent_config = Column(JSON, nullable=True) # Agent 类型相关配置,结构因 agent_type 而异
  123. # ClaudeCodeAgent: {"model_name": "claude-sonnet-4-6", "effort": "high"}
  124. # LangchainAgent: {"model_name": "openrouter:google/gemini-3-flash-preview", ...}
  125. status = Column(String(50), nullable=False, default='running') # running/success/failed
  126. is_deleted = Column(Boolean, nullable=False, default=False) # 软删除
  127. summary = Column(Text, nullable=True) # Agent 生成的执行总结
  128. topic_count = Column(Integer, nullable=True) # 最终产出选题数
  129. # ── Token & 成本 ──
  130. input_tokens = Column(Integer, nullable=True)
  131. output_tokens = Column(Integer, nullable=True)
  132. cost_usd = Column(Float, nullable=True)
  133. error_message = Column(Text, nullable=True)
  134. start_time = Column(DateTime, nullable=True)
  135. end_time = Column(DateTime, nullable=True)
  136. # ── 策略配置 ──
  137. strategies_config = Column(JSON, nullable=True) # {"always_on": ["策略A"], "on_demand": ["策略B"]}
  138. class TopicBuildTopic(Base):
  139. """选题
  140. topic_understanding: 对用户需求的理解(聚焦需求本身,不做自由联想)
  141. result: 最终选题结果(从 CompositionItems 实例化生成,定稿时写入)
  142. status: seed → developing → mature / infeasible
  143. failure_reason: 当 status=infeasible 时,记录无法产生选题的原因
  144. """
  145. __tablename__ = 'topic_build_topic'
  146. id = Column(BigInteger, primary_key=True, autoincrement=True)
  147. build_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildRecord.id
  148. execution_id = Column(BigInteger, nullable=False, index=True) # FK → TopicPatternExecution.id
  149. sort_order = Column(Integer, nullable=False, default=0)
  150. # ── 需求理解 ──
  151. topic_understanding = Column(Text, nullable=True) # 对用户需求的理解(聚焦需求本身)
  152. # ── 选题结果 ──
  153. result = Column(Text, nullable=True) # 最终选题结果(从 CompositionItems 实例化生成)
  154. # ── 状态 ──
  155. status = Column(String(50), nullable=False, default='seed') # seed/developing/mature/infeasible
  156. # ── 不可行原因 ──
  157. failure_reason = Column(Text, nullable=True) # 当 status=infeasible 时,记录无法产生选题的具体原因
  158. class TopicBuildCompositionItem(Base):
  159. """选题组成元素 — Agent 过程的核心实体
  160. 每个 CompositionItem 代表选题中的一个组成部分。
  161. 通过 item_level 区分层级:
  162. - category: 分类方向(如 "政治领袖/实质"),定义选题的宏观方向
  163. - element: 具体元素(如 "毛泽东/实质"),定义选题的具体内容落点
  164. 推导链通过 TopicBuildItemRelation 记录 item 间因果关系,
  165. 外部数据证据通过 TopicBuildItemSource 记录。
  166. """
  167. __tablename__ = 'topic_build_composition_item'
  168. id = Column(BigInteger, primary_key=True, autoincrement=True)
  169. topic_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildTopic.id
  170. build_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildRecord.id
  171. # ── 元素信息 ──
  172. item_level = Column(String(20), nullable=False, default='category') # category / element
  173. dimension = Column(String(50), nullable=True) # 实质/形式/意图
  174. point_type = Column(String(50), nullable=True) # 灵感点/目的点/关键点
  175. element_name = Column(String(500), nullable=False)
  176. category_path = Column(String(1000), nullable=True)
  177. category_id = Column(BigInteger, nullable=True) # FK → TopicPatternCategory.id
  178. # ── 推导 ──
  179. derivation_type = Column(String(50), nullable=True) # user_demand/itemset_pattern/hierarchy/co_occurrence/post_extract/agent_reasoning
  180. step = Column(Integer, nullable=False, default=0) # 创建批次号
  181. reason = Column(Text, nullable=True) # 为什么加入这个元素
  182. # ── 管理 ──
  183. sort_order = Column(Integer, nullable=False, default=0)
  184. is_active = Column(Boolean, nullable=False, default=True)
  185. note = Column(Text, nullable=True)
  186. created_at = Column(DateTime, nullable=True)
  187. updated_at = Column(DateTime, nullable=True)
  188. class TopicBuildItemRelation(Base):
  189. """Item 间推导关系 — 记录 CompositionItem 之间的因果链(多对多)
  190. "因为 source_item 已在选题中,Agent 才去探索并发现/产生了 target_item"。
  191. 一个 item 可以有多个上游 parent(多个已有 item 共同触发了它的发现)。
  192. 可视化 DAG 的边直接来自这张表。
  193. """
  194. __tablename__ = 'topic_build_item_relation'
  195. id = Column(BigInteger, primary_key=True, autoincrement=True)
  196. topic_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildTopic.id
  197. source_item_id = Column(BigInteger, nullable=False, index=True) # 上游 item(触发方)
  198. target_item_id = Column(BigInteger, nullable=False, index=True) # 下游 item(被发现/产生的)
  199. reason = Column(Text, nullable=True) # 为什么这个关系成立
  200. created_at = Column(DateTime, nullable=True)
  201. class TopicBuildItemSource(Base):
  202. """外部数据证据 — 记录 CompositionItem 引用的外部数据(一对多)
  203. 只记录外部数据来源(项集、分类、帖子、用户输入等),
  204. 不记录 item-to-item 关系(那个由 TopicBuildItemRelation 负责)。
  205. source_type: itemset / element / category / post / user_input
  206. derivation_type: user_demand / itemset_pattern / hierarchy / co_occurrence / post_extract / agent_reasoning
  207. """
  208. __tablename__ = 'topic_build_item_source'
  209. id = Column(BigInteger, primary_key=True, autoincrement=True)
  210. topic_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildTopic.id
  211. target_item_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildCompositionItem.id
  212. # ── 外部数据来源 ──
  213. source_type = Column(String(50), nullable=False) # itemset / element / category / post / user_input
  214. source_reference_id = Column(BigInteger, nullable=True) # 外部数据 ID(itemset_id / category_id 等)
  215. source_detail = Column(JSON, nullable=True) # 来源数据快照 JSON
  216. # ── 描述 ──
  217. reason = Column(Text, nullable=True) # 这条证据说明了什么
  218. # ── 管理 ──
  219. is_active = Column(Boolean, nullable=False, default=True)
  220. created_at = Column(DateTime, nullable=True)
  221. class TopicBuildStrategy(Base):
  222. """构建策略 — 可注入 Agent 的行为规则集(主表)
  223. 存储策略元数据,内容版本通过 TopicBuildStrategyVersion 管理。
  224. """
  225. __tablename__ = 'topic_build_strategy'
  226. id = Column(BigInteger, primary_key=True, autoincrement=True)
  227. name = Column(String(200), nullable=False, unique=True) # 策略名称(唯一)
  228. description = Column(String(500), nullable=False) # 一行描述,用于 Agent 判断是否需要加载
  229. tags = Column(JSON, nullable=True) # 标签 ["对比", "竞品"]
  230. is_active = Column(Boolean, nullable=False, default=True) # 软删除/禁用
  231. current_version = Column(Integer, nullable=False, default=1) # 当前版本号
  232. created_at = Column(DateTime, nullable=True)
  233. updated_at = Column(DateTime, nullable=True)
  234. class TopicBuildStrategyVersion(Base):
  235. """构建策略版本 — 保存策略内容的历史版本
  236. 每次更新 content 时创建新版本行,旧版本保留可追溯。
  237. """
  238. __tablename__ = 'topic_build_strategy_version'
  239. id = Column(BigInteger, primary_key=True, autoincrement=True)
  240. strategy_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildStrategy.id
  241. version = Column(Integer, nullable=False) # 版本号
  242. content = Column(Text, nullable=False) # 完整的策略规则文本(Markdown)
  243. change_note = Column(String(500), nullable=True) # 变更说明
  244. created_at = Column(DateTime, nullable=True)
  245. class TopicBuildLog(Base):
  246. """选题构建执行日志 - 完整的 Agent 运行输出"""
  247. __tablename__ = 'topic_build_log'
  248. id = Column(BigInteger, primary_key=True, autoincrement=True)
  249. build_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildRecord.id (unique)
  250. log_content = Column(Text(length=4294967295), nullable=True) # LONGTEXT
  251. created_at = Column(DateTime, nullable=True)