models2.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. """Pattern Mining 数据模型"""
  2. from sqlalchemy import Column, BigInteger, Integer, String, Float, Boolean, Text, DateTime, JSON, Index
  3. from sqlalchemy.ext.declarative import declarative_base
  4. Base = declarative_base()
  5. class TopicPatternExecution(Base):
  6. __tablename__ = 'topic_pattern_execution'
  7. id = Column(BigInteger, primary_key=True, autoincrement=True)
  8. # 筛选条件快照
  9. cluster_name = Column(String(32), nullable=True)
  10. merge_leve2 = Column(String(100), nullable=True)
  11. platform = Column(String(50), nullable=True)
  12. account_name = Column(String(200), nullable=True)
  13. post_limit = Column(Integer, nullable=False, default=500)
  14. # 挖掘参数
  15. min_absolute_support = Column(Integer, nullable=False, default=2)
  16. classify_execution_id = Column(BigInteger, nullable=True)
  17. mining_configs = Column(JSON, nullable=True) # [{dimension_mode, target_depths: [...]}, ...]
  18. # 结果摘要
  19. post_count = Column(Integer, nullable=True)
  20. itemset_count = Column(Integer, nullable=True) # 所有 config 的项集总数
  21. # 状态
  22. status = Column(String(50), nullable=False, default='running')
  23. error_message = Column(Text, nullable=True)
  24. start_time = Column(DateTime, nullable=True)
  25. end_time = Column(DateTime, nullable=True)
  26. class TopicPatternMiningConfig(Base):
  27. """一次执行中的一个 (dimension_mode, target_depth) 挖掘配置"""
  28. __tablename__ = 'topic_pattern_mining_config'
  29. id = Column(BigInteger, primary_key=True, autoincrement=True)
  30. execution_id = Column(BigInteger, nullable=False, index=True)
  31. dimension_mode = Column(String(50), nullable=False)
  32. target_depth = Column(String(50), nullable=False)
  33. # 结果摘要
  34. transaction_count = Column(Integer, nullable=True)
  35. itemset_count = Column(Integer, nullable=True)
  36. class TopicPatternCategory(Base):
  37. """分类树节点快照 - 记录执行时的分类结构"""
  38. __tablename__ = 'topic_pattern_category'
  39. id = Column(BigInteger, primary_key=True, autoincrement=True)
  40. execution_id = Column(BigInteger, nullable=False, index=True)
  41. source_stable_id = Column(BigInteger, nullable=True) # 原 GlobalCategory.stable_id
  42. source_type = Column(String(50), nullable=False) # 实质/形式/意图
  43. name = Column(String(500), nullable=False)
  44. description = Column(Text, nullable=True)
  45. category_nature = Column(String(50), nullable=True) # 内容/维度/领域
  46. path = Column(String(1000), nullable=True) # 完整路径 如 /食品/水果
  47. level = Column(Integer, nullable=True)
  48. parent_id = Column(BigInteger, nullable=True) # FK → 本表 id(快照内的父节点)
  49. parent_source_stable_id = Column(BigInteger, nullable=True) # 原 parent_stable_id(方便建树)
  50. element_count = Column(Integer, default=0) # 该分类下直属元素数
  51. class TopicPatternElement(Base):
  52. """帖子级元素记录 - 每行 = 一个帖子的一个点下的一个元素,代替 data_cache JSON"""
  53. __tablename__ = 'topic_pattern_element'
  54. id = Column(BigInteger, primary_key=True, autoincrement=True)
  55. execution_id = Column(BigInteger, nullable=False, index=True)
  56. # 帖子 & 选题点上下文
  57. post_id = Column(String(100), nullable=False, index=True)
  58. point_type = Column(String(50), nullable=False) # 灵感点/目的点/关键点
  59. point_text = Column(String(500), nullable=True) # 选题点内容("点" 字段)
  60. # 元素信息
  61. element_type = Column(String(50), nullable=False) # 实质/形式/意图
  62. name = Column(String(500), nullable=False)
  63. description = Column(Text, nullable=True)
  64. # 分类关联
  65. category_id = Column(BigInteger, nullable=True, index=True) # FK → TopicPatternCategory.id
  66. category_path = Column(String(1000), nullable=True) # 冗余存储分类路径 如 "食品>水果"
  67. class Post(Base):
  68. """帖子元数据 - 全局通用,跨 execution,通过 post_id 关联筛选"""
  69. __tablename__ = 'post'
  70. id = Column(BigInteger, primary_key=True, autoincrement=True)
  71. post_id = Column(String(100), nullable=False, unique=True, index=True)
  72. account_name = Column(String(200), nullable=True, index=True)
  73. merge_leve2 = Column(String(100), nullable=True, index=True)
  74. platform = Column(String(50), nullable=True, index=True)
  75. class TopicPatternItemset(Base):
  76. """频繁项集"""
  77. __tablename__ = 'topic_pattern_itemset'
  78. id = Column(BigInteger, primary_key=True, autoincrement=True)
  79. execution_id = Column(BigInteger, nullable=False, index=True)
  80. mining_config_id = Column(BigInteger, nullable=False, index=True) # FK → TopicPatternMiningConfig.id
  81. combination_type = Column(String(200), nullable=False, index=True)
  82. item_count = Column(Integer, nullable=False, index=True)
  83. support = Column(Float, nullable=False)
  84. absolute_support = Column(Integer, nullable=False, index=True)
  85. dimensions = Column(JSON, nullable=True) # ["实质","形式"]
  86. is_cross_point = Column(Boolean, nullable=False, default=False)
  87. matched_post_ids = Column(JSON, nullable=True) # ["post_id_1", "post_id_2", ...] 直接存 post_id
  88. class TopicPatternItemsetItem(Base):
  89. """项集中的单个 item - 关联到分类节点"""
  90. __tablename__ = 'topic_pattern_itemset_item'
  91. id = Column(BigInteger, primary_key=True, autoincrement=True)
  92. itemset_id = Column(BigInteger, nullable=False, index=True) # FK → TopicPatternItemset.id
  93. point_type = Column(String(50), nullable=True) # 灵感点/目的点/关键点(substance_form_only 模式下为空)
  94. dimension = Column(String(50), nullable=True) # 实质/形式/意图(point_type_only 模式下为空)
  95. category_id = Column(BigInteger, nullable=True) # FK → TopicPatternCategory.id
  96. category_path = Column(String(1000), nullable=True) # 分类路径部分 如 "食品>水果"
  97. element_name = Column(String(500), nullable=True) # 若为 name 层 item(含||),提取的元素名称
  98. # ============================================================================
  99. # 选题构建 - 以 CompositionItem 为中心的 CRUD 模型
  100. #
  101. # 数据关系:
  102. # TopicBuildRecord (1次构建会话)
  103. # └── TopicBuildTopic (N个选题)
  104. # └── TopicBuildCompositionItem (选题组成元素,有独立生命周期)
  105. # └── TopicBuildItemSource (每个元素的推导来源,多态统一表)
  106. #
  107. # 核心设计:
  108. # Agent 过程 = CRUD CompositionItem
  109. # 每个 item 有独立的 reason、point_type、状态生命周期 (created → validated → refined → deactivated)
  110. # 每个 item 的推导来源通过 TopicBuildItemSource 完整记录
  111. # 来源可以是:其他 CompositionItem、itemset、element、category、post、user_input
  112. # ============================================================================
  113. class TopicBuildRecord(Base):
  114. """选题构建执行记录 - 一次完整的构建会话"""
  115. __tablename__ = 'topic_build_record'
  116. id = Column(BigInteger, primary_key=True, autoincrement=True)
  117. execution_id = Column(BigInteger, nullable=False, index=True) # FK → TopicPatternExecution.id
  118. # ── 需求描述 ──
  119. demand = Column(Text, nullable=True) # 用户的构建需求/目标描述
  120. demand_constraints = Column(JSON, nullable=True) # 约束条件 {"target_audience", "content_style", "focus_dimensions", ...}
  121. # ── Agent & 模型配置 ──
  122. agent_type = Column(String(50), nullable=True, default='ClaudeCodeAgent') # ClaudeCodeAgent / LangchainAgent
  123. agent_config = Column(JSON, nullable=True) # Agent 类型相关配置,结构因 agent_type 而异
  124. # ClaudeCodeAgent: {"model_name": "claude-sonnet-4-6", "effort": "high"}
  125. # LangchainAgent: {"model_name": "openrouter:google/gemini-3-flash-preview", ...}
  126. status = Column(String(50), nullable=False, default='running') # running/success/failed
  127. is_deleted = Column(Boolean, nullable=False, default=False) # 软删除
  128. summary = Column(Text, nullable=True) # Agent 生成的执行总结
  129. topic_count = Column(Integer, nullable=True) # 最终产出选题数
  130. # ── Token & 成本 ──
  131. input_tokens = Column(Integer, nullable=True)
  132. output_tokens = Column(Integer, nullable=True)
  133. cost_usd = Column(Float, nullable=True)
  134. error_message = Column(Text, nullable=True)
  135. start_time = Column(DateTime, nullable=True)
  136. end_time = Column(DateTime, nullable=True)
  137. # ── 策略配置 ──
  138. strategies_config = Column(JSON, nullable=True) # {"always_on": ["策略A"], "on_demand": ["策略B"]}
  139. class TopicBuildTopic(Base):
  140. """选题
  141. topic_understanding: 对用户需求的理解(聚焦需求本身,不做自由联想)
  142. result: 最终选题结果(从 CompositionItems 实例化生成,定稿时写入)
  143. status: seed → developing → mature / infeasible
  144. failure_reason: 当 status=infeasible 时,记录无法产生选题的原因
  145. """
  146. __tablename__ = 'topic_build_topic'
  147. id = Column(BigInteger, primary_key=True, autoincrement=True)
  148. build_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildRecord.id
  149. execution_id = Column(BigInteger, nullable=False, index=True) # FK → TopicPatternExecution.id
  150. sort_order = Column(Integer, nullable=False, default=0)
  151. # ── 需求理解 ──
  152. topic_understanding = Column(Text, nullable=True) # 对用户需求的理解(聚焦需求本身)
  153. # ── 选题结果 ──
  154. result = Column(Text, nullable=True) # 最终选题结果(从 CompositionItems 实例化生成)
  155. # ── 状态 ──
  156. status = Column(String(50), nullable=False, default='seed') # seed/developing/mature/infeasible
  157. # ── 不可行原因 ──
  158. failure_reason = Column(Text, nullable=True) # 当 status=infeasible 时,记录无法产生选题的具体原因
  159. class TopicBuildCompositionItem(Base):
  160. """选题组成元素 — Agent 过程的核心实体
  161. 每个 CompositionItem 代表选题中的一个组成部分。
  162. 通过 item_level 区分层级:
  163. - category: 分类方向(如 "政治领袖/实质"),定义选题的宏观方向
  164. - element: 具体元素(如 "毛泽东/实质"),定义选题的具体内容落点
  165. 推导链通过 TopicBuildItemRelation 记录 item 间因果关系,
  166. 外部数据证据通过 TopicBuildItemSource 记录。
  167. """
  168. __tablename__ = 'topic_build_composition_item'
  169. id = Column(BigInteger, primary_key=True, autoincrement=True)
  170. topic_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildTopic.id
  171. build_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildRecord.id
  172. # ── 元素信息 ──
  173. item_level = Column(String(20), nullable=False, default='category') # category / element
  174. dimension = Column(String(50), nullable=True) # 实质/形式/意图
  175. point_type = Column(String(50), nullable=True) # 灵感点/目的点/关键点
  176. element_name = Column(String(500), nullable=False)
  177. category_path = Column(String(1000), nullable=True)
  178. category_id = Column(BigInteger, nullable=True) # FK → TopicPatternCategory.id
  179. # ── 推导 ──
  180. derivation_type = Column(String(50), nullable=True) # user_demand/itemset_pattern/hierarchy/co_occurrence/post_extract/agent_reasoning
  181. step = Column(Integer, nullable=False, default=0) # 创建批次号
  182. reason = Column(Text, nullable=True) # 为什么加入这个元素
  183. # ── 管理 ──
  184. sort_order = Column(Integer, nullable=False, default=0)
  185. is_active = Column(Boolean, nullable=False, default=True)
  186. note = Column(Text, nullable=True)
  187. created_at = Column(DateTime, nullable=True)
  188. updated_at = Column(DateTime, nullable=True)
  189. class TopicBuildItemRelation(Base):
  190. """Item 间推导关系 — 记录 CompositionItem 之间的因果链(多对多)
  191. "因为 source_item 已在选题中,Agent 才去探索并发现/产生了 target_item"。
  192. 一个 item 可以有多个上游 parent(多个已有 item 共同触发了它的发现)。
  193. 可视化 DAG 的边直接来自这张表。
  194. """
  195. __tablename__ = 'topic_build_item_relation'
  196. id = Column(BigInteger, primary_key=True, autoincrement=True)
  197. topic_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildTopic.id
  198. source_item_id = Column(BigInteger, nullable=False, index=True) # 上游 item(触发方)
  199. target_item_id = Column(BigInteger, nullable=False, index=True) # 下游 item(被发现/产生的)
  200. reason = Column(Text, nullable=True) # 为什么这个关系成立
  201. created_at = Column(DateTime, nullable=True)
  202. class TopicBuildItemSource(Base):
  203. """外部数据证据 — 记录 CompositionItem 引用的外部数据(一对多)
  204. 只记录外部数据来源(项集、分类、帖子、用户输入等),
  205. 不记录 item-to-item 关系(那个由 TopicBuildItemRelation 负责)。
  206. source_type: itemset / element / category / post / user_input
  207. derivation_type: user_demand / itemset_pattern / hierarchy / co_occurrence / post_extract / agent_reasoning
  208. """
  209. __tablename__ = 'topic_build_item_source'
  210. id = Column(BigInteger, primary_key=True, autoincrement=True)
  211. topic_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildTopic.id
  212. target_item_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildCompositionItem.id
  213. # ── 外部数据来源 ──
  214. source_type = Column(String(50), nullable=False) # itemset / element / category / post / user_input
  215. source_reference_id = Column(BigInteger, nullable=True) # 外部数据 ID(itemset_id / category_id 等)
  216. source_detail = Column(JSON, nullable=True) # 来源数据快照 JSON
  217. # ── 描述 ──
  218. reason = Column(Text, nullable=True) # 这条证据说明了什么
  219. # ── 管理 ──
  220. is_active = Column(Boolean, nullable=False, default=True)
  221. created_at = Column(DateTime, nullable=True)
  222. class TopicBuildStrategy(Base):
  223. """构建策略 — 可注入 Agent 的行为规则集(主表)
  224. 存储策略元数据,内容版本通过 TopicBuildStrategyVersion 管理。
  225. """
  226. __tablename__ = 'topic_build_strategy'
  227. id = Column(BigInteger, primary_key=True, autoincrement=True)
  228. name = Column(String(200), nullable=False, unique=True) # 策略名称(唯一)
  229. description = Column(String(500), nullable=False) # 一行描述,用于 Agent 判断是否需要加载
  230. tags = Column(JSON, nullable=True) # 标签 ["对比", "竞品"]
  231. is_active = Column(Boolean, nullable=False, default=True) # 软删除/禁用
  232. current_version = Column(Integer, nullable=False, default=1) # 当前版本号
  233. created_at = Column(DateTime, nullable=True)
  234. updated_at = Column(DateTime, nullable=True)
  235. class TopicBuildStrategyVersion(Base):
  236. """构建策略版本 — 保存策略内容的历史版本
  237. 每次更新 content 时创建新版本行,旧版本保留可追溯。
  238. """
  239. __tablename__ = 'topic_build_strategy_version'
  240. id = Column(BigInteger, primary_key=True, autoincrement=True)
  241. strategy_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildStrategy.id
  242. version = Column(Integer, nullable=False) # 版本号
  243. content = Column(Text, nullable=False) # 完整的策略规则文本(Markdown)
  244. change_note = Column(String(500), nullable=True) # 变更说明
  245. created_at = Column(DateTime, nullable=True)
  246. class TopicBuildLog(Base):
  247. """选题构建执行日志 - 完整的 Agent 运行输出"""
  248. __tablename__ = 'topic_build_log'
  249. id = Column(BigInteger, primary_key=True, autoincrement=True)
  250. build_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildRecord.id (unique)
  251. log_content = Column(Text(length=4294967295), nullable=True) # LONGTEXT
  252. created_at = Column(DateTime, nullable=True)