| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308 |
- """Pattern Mining 数据模型"""
- from sqlalchemy import Column, BigInteger, Integer, String, Float, Boolean, Text, DateTime, JSON, Index
- from sqlalchemy.ext.declarative import declarative_base
- Base = declarative_base()
- class TopicPatternExecution(Base):
- __tablename__ = 'topic_pattern_execution'
- id = Column(BigInteger, primary_key=True, autoincrement=True)
- # 筛选条件快照
- cluster_name = Column(String(32), nullable=True)
- merge_leve2 = Column(String(100), nullable=True)
- platform = Column(String(50), nullable=True)
- account_name = Column(String(200), nullable=True)
- post_limit = Column(Integer, nullable=False, default=500)
- # 挖掘参数
- min_absolute_support = Column(Integer, nullable=False, default=2)
- classify_execution_id = Column(BigInteger, nullable=True)
- mining_configs = Column(JSON, nullable=True) # [{dimension_mode, target_depths: [...]}, ...]
- # 结果摘要
- post_count = Column(Integer, nullable=True)
- itemset_count = Column(Integer, nullable=True) # 所有 config 的项集总数
- # 状态
- status = Column(String(50), nullable=False, default='running')
- error_message = Column(Text, nullable=True)
- start_time = Column(DateTime, nullable=True)
- end_time = Column(DateTime, nullable=True)
- class TopicPatternMiningConfig(Base):
- """一次执行中的一个 (dimension_mode, target_depth) 挖掘配置"""
- __tablename__ = 'topic_pattern_mining_config'
- id = Column(BigInteger, primary_key=True, autoincrement=True)
- execution_id = Column(BigInteger, nullable=False, index=True)
- dimension_mode = Column(String(50), nullable=False)
- target_depth = Column(String(50), nullable=False)
- # 结果摘要
- transaction_count = Column(Integer, nullable=True)
- itemset_count = Column(Integer, nullable=True)
- class TopicPatternCategory(Base):
- """分类树节点快照 - 记录执行时的分类结构"""
- __tablename__ = 'topic_pattern_category'
- id = Column(BigInteger, primary_key=True, autoincrement=True)
- execution_id = Column(BigInteger, nullable=False, index=True)
- source_stable_id = Column(BigInteger, nullable=True) # 原 GlobalCategory.stable_id
- source_type = Column(String(50), nullable=False) # 实质/形式/意图
- name = Column(String(500), nullable=False)
- description = Column(Text, nullable=True)
- category_nature = Column(String(50), nullable=True) # 内容/维度/领域
- path = Column(String(1000), nullable=True) # 完整路径 如 /食品/水果
- level = Column(Integer, nullable=True)
- parent_id = Column(BigInteger, nullable=True) # FK → 本表 id(快照内的父节点)
- parent_source_stable_id = Column(BigInteger, nullable=True) # 原 parent_stable_id(方便建树)
- element_count = Column(Integer, default=0) # 该分类下直属元素数
- class TopicPatternElement(Base):
- """帖子级元素记录 - 每行 = 一个帖子的一个点下的一个元素,代替 data_cache JSON"""
- __tablename__ = 'topic_pattern_element'
- id = Column(BigInteger, primary_key=True, autoincrement=True)
- execution_id = Column(BigInteger, nullable=False, index=True)
- # 帖子 & 选题点上下文
- post_id = Column(String(100), nullable=False, index=True)
- point_type = Column(String(50), nullable=False) # 灵感点/目的点/关键点
- point_text = Column(String(500), nullable=True) # 选题点内容("点" 字段)
- # 元素信息
- element_type = Column(String(50), nullable=False) # 实质/形式/意图
- name = Column(String(500), nullable=False)
- description = Column(Text, nullable=True)
- # 分类关联
- category_id = Column(BigInteger, nullable=True, index=True) # FK → TopicPatternCategory.id
- category_path = Column(String(1000), nullable=True) # 冗余存储分类路径 如 "食品>水果"
- class Post(Base):
- """帖子元数据 - 全局通用,跨 execution,通过 post_id 关联筛选"""
- __tablename__ = 'post'
- id = Column(BigInteger, primary_key=True, autoincrement=True)
- post_id = Column(String(100), nullable=False, unique=True, index=True)
- account_name = Column(String(200), nullable=True, index=True)
- merge_leve2 = Column(String(100), nullable=True, index=True)
- platform = Column(String(50), nullable=True, index=True)
- class TopicPatternItemset(Base):
- """频繁项集"""
- __tablename__ = 'topic_pattern_itemset'
- id = Column(BigInteger, primary_key=True, autoincrement=True)
- execution_id = Column(BigInteger, nullable=False, index=True)
- mining_config_id = Column(BigInteger, nullable=False, index=True) # FK → TopicPatternMiningConfig.id
- combination_type = Column(String(200), nullable=False, index=True)
- item_count = Column(Integer, nullable=False, index=True)
- support = Column(Float, nullable=False)
- absolute_support = Column(Integer, nullable=False, index=True)
- dimensions = Column(JSON, nullable=True) # ["实质","形式"]
- is_cross_point = Column(Boolean, nullable=False, default=False)
- matched_post_ids = Column(JSON, nullable=True) # ["post_id_1", "post_id_2", ...] 直接存 post_id
- class TopicPatternItemsetItem(Base):
- """项集中的单个 item - 关联到分类节点"""
- __tablename__ = 'topic_pattern_itemset_item'
- id = Column(BigInteger, primary_key=True, autoincrement=True)
- itemset_id = Column(BigInteger, nullable=False, index=True) # FK → TopicPatternItemset.id
- point_type = Column(String(50), nullable=True) # 灵感点/目的点/关键点(substance_form_only 模式下为空)
- dimension = Column(String(50), nullable=True) # 实质/形式/意图(point_type_only 模式下为空)
- category_id = Column(BigInteger, nullable=True) # FK → TopicPatternCategory.id
- category_path = Column(String(1000), nullable=True) # 分类路径部分 如 "食品>水果"
- element_name = Column(String(500), nullable=True) # 若为 name 层 item(含||),提取的元素名称
- # ============================================================================
- # 选题构建 - 以 CompositionItem 为中心的 CRUD 模型
- #
- # 数据关系:
- # TopicBuildRecord (1次构建会话)
- # └── TopicBuildTopic (N个选题)
- # └── TopicBuildCompositionItem (选题组成元素,有独立生命周期)
- # └── TopicBuildItemSource (每个元素的推导来源,多态统一表)
- #
- # 核心设计:
- # Agent 过程 = CRUD CompositionItem
- # 每个 item 有独立的 reason、point_type、状态生命周期 (created → validated → refined → deactivated)
- # 每个 item 的推导来源通过 TopicBuildItemSource 完整记录
- # 来源可以是:其他 CompositionItem、itemset、element、category、post、user_input
- # ============================================================================
- class TopicBuildRecord(Base):
- """选题构建执行记录 - 一次完整的构建会话"""
- __tablename__ = 'topic_build_record'
- id = Column(BigInteger, primary_key=True, autoincrement=True)
- execution_id = Column(BigInteger, nullable=False, index=True) # FK → TopicPatternExecution.id
- # ── 需求描述 ──
- demand = Column(Text, nullable=True) # 用户的构建需求/目标描述
- demand_constraints = Column(JSON, nullable=True) # 约束条件 {"target_audience", "content_style", "focus_dimensions", ...}
- # ── Agent & 模型配置 ──
- agent_type = Column(String(50), nullable=True, default='ClaudeCodeAgent') # ClaudeCodeAgent / LangchainAgent
- agent_config = Column(JSON, nullable=True) # Agent 类型相关配置,结构因 agent_type 而异
- # ClaudeCodeAgent: {"model_name": "claude-sonnet-4-6", "effort": "high"}
- # LangchainAgent: {"model_name": "openrouter:google/gemini-3-flash-preview", ...}
- status = Column(String(50), nullable=False, default='running') # running/success/failed
- is_deleted = Column(Boolean, nullable=False, default=False) # 软删除
- summary = Column(Text, nullable=True) # Agent 生成的执行总结
- topic_count = Column(Integer, nullable=True) # 最终产出选题数
- # ── Token & 成本 ──
- input_tokens = Column(Integer, nullable=True)
- output_tokens = Column(Integer, nullable=True)
- cost_usd = Column(Float, nullable=True)
- error_message = Column(Text, nullable=True)
- start_time = Column(DateTime, nullable=True)
- end_time = Column(DateTime, nullable=True)
- # ── 策略配置 ──
- strategies_config = Column(JSON, nullable=True) # {"always_on": ["策略A"], "on_demand": ["策略B"]}
- class TopicBuildTopic(Base):
- """选题
- topic_understanding: 对用户需求的理解(聚焦需求本身,不做自由联想)
- result: 最终选题结果(从 CompositionItems 实例化生成,定稿时写入)
- status: seed → developing → mature / infeasible
- failure_reason: 当 status=infeasible 时,记录无法产生选题的原因
- """
- __tablename__ = 'topic_build_topic'
- id = Column(BigInteger, primary_key=True, autoincrement=True)
- build_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildRecord.id
- execution_id = Column(BigInteger, nullable=False, index=True) # FK → TopicPatternExecution.id
- sort_order = Column(Integer, nullable=False, default=0)
- # ── 需求理解 ──
- topic_understanding = Column(Text, nullable=True) # 对用户需求的理解(聚焦需求本身)
- # ── 选题结果 ──
- result = Column(Text, nullable=True) # 最终选题结果(从 CompositionItems 实例化生成)
- # ── 状态 ──
- status = Column(String(50), nullable=False, default='seed') # seed/developing/mature/infeasible
- # ── 不可行原因 ──
- failure_reason = Column(Text, nullable=True) # 当 status=infeasible 时,记录无法产生选题的具体原因
- class TopicBuildCompositionItem(Base):
- """选题组成元素 — Agent 过程的核心实体
- 每个 CompositionItem 代表选题中的一个组成部分。
- 通过 item_level 区分层级:
- - category: 分类方向(如 "政治领袖/实质"),定义选题的宏观方向
- - element: 具体元素(如 "毛泽东/实质"),定义选题的具体内容落点
- 推导链通过 TopicBuildItemRelation 记录 item 间因果关系,
- 外部数据证据通过 TopicBuildItemSource 记录。
- """
- __tablename__ = 'topic_build_composition_item'
- id = Column(BigInteger, primary_key=True, autoincrement=True)
- topic_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildTopic.id
- build_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildRecord.id
- # ── 元素信息 ──
- item_level = Column(String(20), nullable=False, default='category') # category / element
- dimension = Column(String(50), nullable=True) # 实质/形式/意图
- point_type = Column(String(50), nullable=True) # 灵感点/目的点/关键点
- element_name = Column(String(500), nullable=False)
- category_path = Column(String(1000), nullable=True)
- category_id = Column(BigInteger, nullable=True) # FK → TopicPatternCategory.id
- # ── 推导 ──
- derivation_type = Column(String(50), nullable=True) # user_demand/itemset_pattern/hierarchy/co_occurrence/post_extract/agent_reasoning
- step = Column(Integer, nullable=False, default=0) # 创建批次号
- reason = Column(Text, nullable=True) # 为什么加入这个元素
- # ── 管理 ──
- sort_order = Column(Integer, nullable=False, default=0)
- is_active = Column(Boolean, nullable=False, default=True)
- note = Column(Text, nullable=True)
- created_at = Column(DateTime, nullable=True)
- updated_at = Column(DateTime, nullable=True)
- class TopicBuildItemRelation(Base):
- """Item 间推导关系 — 记录 CompositionItem 之间的因果链(多对多)
- "因为 source_item 已在选题中,Agent 才去探索并发现/产生了 target_item"。
- 一个 item 可以有多个上游 parent(多个已有 item 共同触发了它的发现)。
- 可视化 DAG 的边直接来自这张表。
- """
- __tablename__ = 'topic_build_item_relation'
- id = Column(BigInteger, primary_key=True, autoincrement=True)
- topic_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildTopic.id
- source_item_id = Column(BigInteger, nullable=False, index=True) # 上游 item(触发方)
- target_item_id = Column(BigInteger, nullable=False, index=True) # 下游 item(被发现/产生的)
- reason = Column(Text, nullable=True) # 为什么这个关系成立
- created_at = Column(DateTime, nullable=True)
- class TopicBuildItemSource(Base):
- """外部数据证据 — 记录 CompositionItem 引用的外部数据(一对多)
- 只记录外部数据来源(项集、分类、帖子、用户输入等),
- 不记录 item-to-item 关系(那个由 TopicBuildItemRelation 负责)。
- source_type: itemset / element / category / post / user_input
- derivation_type: user_demand / itemset_pattern / hierarchy / co_occurrence / post_extract / agent_reasoning
- """
- __tablename__ = 'topic_build_item_source'
- id = Column(BigInteger, primary_key=True, autoincrement=True)
- topic_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildTopic.id
- target_item_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildCompositionItem.id
- # ── 外部数据来源 ──
- source_type = Column(String(50), nullable=False) # itemset / element / category / post / user_input
- source_reference_id = Column(BigInteger, nullable=True) # 外部数据 ID(itemset_id / category_id 等)
- source_detail = Column(JSON, nullable=True) # 来源数据快照 JSON
- # ── 描述 ──
- reason = Column(Text, nullable=True) # 这条证据说明了什么
- # ── 管理 ──
- is_active = Column(Boolean, nullable=False, default=True)
- created_at = Column(DateTime, nullable=True)
- class TopicBuildStrategy(Base):
- """构建策略 — 可注入 Agent 的行为规则集(主表)
- 存储策略元数据,内容版本通过 TopicBuildStrategyVersion 管理。
- """
- __tablename__ = 'topic_build_strategy'
- id = Column(BigInteger, primary_key=True, autoincrement=True)
- name = Column(String(200), nullable=False, unique=True) # 策略名称(唯一)
- description = Column(String(500), nullable=False) # 一行描述,用于 Agent 判断是否需要加载
- tags = Column(JSON, nullable=True) # 标签 ["对比", "竞品"]
- is_active = Column(Boolean, nullable=False, default=True) # 软删除/禁用
- current_version = Column(Integer, nullable=False, default=1) # 当前版本号
- created_at = Column(DateTime, nullable=True)
- updated_at = Column(DateTime, nullable=True)
- class TopicBuildStrategyVersion(Base):
- """构建策略版本 — 保存策略内容的历史版本
- 每次更新 content 时创建新版本行,旧版本保留可追溯。
- """
- __tablename__ = 'topic_build_strategy_version'
- id = Column(BigInteger, primary_key=True, autoincrement=True)
- strategy_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildStrategy.id
- version = Column(Integer, nullable=False) # 版本号
- content = Column(Text, nullable=False) # 完整的策略规则文本(Markdown)
- change_note = Column(String(500), nullable=True) # 变更说明
- created_at = Column(DateTime, nullable=True)
- class TopicBuildLog(Base):
- """选题构建执行日志 - 完整的 Agent 运行输出"""
- __tablename__ = 'topic_build_log'
- id = Column(BigInteger, primary_key=True, autoincrement=True)
- build_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildRecord.id (unique)
- log_content = Column(Text(length=4294967295), nullable=True) # LONGTEXT
- created_at = Column(DateTime, nullable=True)
|