"""Pattern Mining 数据模型""" from sqlalchemy import Column, BigInteger, Integer, String, Float, Boolean, Text, DateTime, JSON, Index from sqlalchemy.ext.declarative import declarative_base Base = declarative_base() class TopicPatternExecution(Base): __tablename__ = 'topic_pattern_execution' id = Column(BigInteger, primary_key=True, autoincrement=True) # 筛选条件快照 cluster_name = Column(String(32), nullable=True) merge_leve2 = Column(String(100), nullable=True) platform = Column(String(50), nullable=True) account_name = Column(String(200), nullable=True) post_limit = Column(Integer, nullable=False, default=500) # 挖掘参数 min_absolute_support = Column(Integer, nullable=False, default=2) classify_execution_id = Column(BigInteger, nullable=True) mining_configs = Column(JSON, nullable=True) # [{dimension_mode, target_depths: [...]}, ...] # 结果摘要 post_count = Column(Integer, nullable=True) itemset_count = Column(Integer, nullable=True) # 所有 config 的项集总数 # 状态 status = Column(String(50), nullable=False, default='running') error_message = Column(Text, nullable=True) start_time = Column(DateTime, nullable=True) end_time = Column(DateTime, nullable=True) class TopicPatternMiningConfig(Base): """一次执行中的一个 (dimension_mode, target_depth) 挖掘配置""" __tablename__ = 'topic_pattern_mining_config' id = Column(BigInteger, primary_key=True, autoincrement=True) execution_id = Column(BigInteger, nullable=False, index=True) dimension_mode = Column(String(50), nullable=False) target_depth = Column(String(50), nullable=False) # 结果摘要 transaction_count = Column(Integer, nullable=True) itemset_count = Column(Integer, nullable=True) class TopicPatternCategory(Base): """分类树节点快照 - 记录执行时的分类结构""" __tablename__ = 'topic_pattern_category' id = Column(BigInteger, primary_key=True, autoincrement=True) execution_id = Column(BigInteger, nullable=False, index=True) source_stable_id = Column(BigInteger, nullable=True) # 原 GlobalCategory.stable_id source_type = Column(String(50), nullable=False) # 实质/形式/意图 name = Column(String(500), nullable=False) description = Column(Text, nullable=True) category_nature = Column(String(50), nullable=True) # 内容/维度/领域 path = Column(String(1000), nullable=True) # 完整路径 如 /食品/水果 level = Column(Integer, nullable=True) parent_id = Column(BigInteger, nullable=True) # FK → 本表 id(快照内的父节点) parent_source_stable_id = Column(BigInteger, nullable=True) # 原 parent_stable_id(方便建树) element_count = Column(Integer, default=0) # 该分类下直属元素数 class TopicPatternElement(Base): """帖子级元素记录 - 每行 = 一个帖子的一个点下的一个元素,代替 data_cache JSON""" __tablename__ = 'topic_pattern_element' id = Column(BigInteger, primary_key=True, autoincrement=True) execution_id = Column(BigInteger, nullable=False, index=True) # 帖子 & 选题点上下文 post_id = Column(String(100), nullable=False, index=True) point_type = Column(String(50), nullable=False) # 灵感点/目的点/关键点 point_text = Column(String(500), nullable=True) # 选题点内容("点" 字段) # 元素信息 element_type = Column(String(50), nullable=False) # 实质/形式/意图 name = Column(String(500), nullable=False) description = Column(Text, nullable=True) # 分类关联 category_id = Column(BigInteger, nullable=True, index=True) # FK → TopicPatternCategory.id category_path = Column(String(1000), nullable=True) # 冗余存储分类路径 如 "食品>水果" class Post(Base): """帖子元数据 - 全局通用,跨 execution,通过 post_id 关联筛选""" __tablename__ = 'post' id = Column(BigInteger, primary_key=True, autoincrement=True) post_id = Column(String(100), nullable=False, unique=True, index=True) account_name = Column(String(200), nullable=True, index=True) merge_leve2 = Column(String(100), nullable=True, index=True) platform = Column(String(50), nullable=True, index=True) class TopicPatternItemset(Base): """频繁项集""" __tablename__ = 'topic_pattern_itemset' id = Column(BigInteger, primary_key=True, autoincrement=True) execution_id = Column(BigInteger, nullable=False, index=True) mining_config_id = Column(BigInteger, nullable=False, index=True) # FK → TopicPatternMiningConfig.id combination_type = Column(String(200), nullable=False, index=True) item_count = Column(Integer, nullable=False, index=True) support = Column(Float, nullable=False) absolute_support = Column(Integer, nullable=False, index=True) dimensions = Column(JSON, nullable=True) # ["实质","形式"] is_cross_point = Column(Boolean, nullable=False, default=False) matched_post_ids = Column(JSON, nullable=True) # ["post_id_1", "post_id_2", ...] 直接存 post_id class TopicPatternItemsetItem(Base): """项集中的单个 item - 关联到分类节点""" __tablename__ = 'topic_pattern_itemset_item' id = Column(BigInteger, primary_key=True, autoincrement=True) itemset_id = Column(BigInteger, nullable=False, index=True) # FK → TopicPatternItemset.id point_type = Column(String(50), nullable=True) # 灵感点/目的点/关键点(substance_form_only 模式下为空) dimension = Column(String(50), nullable=True) # 实质/形式/意图(point_type_only 模式下为空) category_id = Column(BigInteger, nullable=True) # FK → TopicPatternCategory.id category_path = Column(String(1000), nullable=True) # 分类路径部分 如 "食品>水果" element_name = Column(String(500), nullable=True) # 若为 name 层 item(含||),提取的元素名称 # ============================================================================ # 选题构建 - 以 CompositionItem 为中心的 CRUD 模型 # # 数据关系: # TopicBuildRecord (1次构建会话) # └── TopicBuildTopic (N个选题) # └── TopicBuildCompositionItem (选题组成元素,有独立生命周期) # └── TopicBuildItemSource (每个元素的推导来源,多态统一表) # # 核心设计: # Agent 过程 = CRUD CompositionItem # 每个 item 有独立的 reason、point_type、状态生命周期 (created → validated → refined → deactivated) # 每个 item 的推导来源通过 TopicBuildItemSource 完整记录 # 来源可以是:其他 CompositionItem、itemset、element、category、post、user_input # ============================================================================ class TopicBuildRecord(Base): """选题构建执行记录 - 一次完整的构建会话""" __tablename__ = 'topic_build_record' id = Column(BigInteger, primary_key=True, autoincrement=True) execution_id = Column(BigInteger, nullable=False, index=True) # FK → TopicPatternExecution.id # ── 需求描述 ── demand = Column(Text, nullable=True) # 用户的构建需求/目标描述 demand_constraints = Column(JSON, nullable=True) # 约束条件 {"target_audience", "content_style", "focus_dimensions", ...} # ── Agent & 模型配置 ── agent_type = Column(String(50), nullable=True, default='ClaudeCodeAgent') # ClaudeCodeAgent / LangchainAgent agent_config = Column(JSON, nullable=True) # Agent 类型相关配置,结构因 agent_type 而异 # ClaudeCodeAgent: {"model_name": "claude-sonnet-4-6", "effort": "high"} # LangchainAgent: {"model_name": "openrouter:google/gemini-3-flash-preview", ...} status = Column(String(50), nullable=False, default='running') # running/success/failed is_deleted = Column(Boolean, nullable=False, default=False) # 软删除 summary = Column(Text, nullable=True) # Agent 生成的执行总结 topic_count = Column(Integer, nullable=True) # 最终产出选题数 # ── Token & 成本 ── input_tokens = Column(Integer, nullable=True) output_tokens = Column(Integer, nullable=True) cost_usd = Column(Float, nullable=True) error_message = Column(Text, nullable=True) start_time = Column(DateTime, nullable=True) end_time = Column(DateTime, nullable=True) # ── 策略配置 ── strategies_config = Column(JSON, nullable=True) # {"always_on": ["策略A"], "on_demand": ["策略B"]} class TopicBuildTopic(Base): """选题 topic_understanding: 对用户需求的理解(聚焦需求本身,不做自由联想) result: 最终选题结果(从 CompositionItems 实例化生成,定稿时写入) status: seed → developing → mature / infeasible failure_reason: 当 status=infeasible 时,记录无法产生选题的原因 """ __tablename__ = 'topic_build_topic' id = Column(BigInteger, primary_key=True, autoincrement=True) build_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildRecord.id execution_id = Column(BigInteger, nullable=False, index=True) # FK → TopicPatternExecution.id sort_order = Column(Integer, nullable=False, default=0) # ── 需求理解 ── topic_understanding = Column(Text, nullable=True) # 对用户需求的理解(聚焦需求本身) # ── 选题结果 ── result = Column(Text, nullable=True) # 最终选题结果(从 CompositionItems 实例化生成) # ── 状态 ── status = Column(String(50), nullable=False, default='seed') # seed/developing/mature/infeasible # ── 不可行原因 ── failure_reason = Column(Text, nullable=True) # 当 status=infeasible 时,记录无法产生选题的具体原因 class TopicBuildCompositionItem(Base): """选题组成元素 — Agent 过程的核心实体 每个 CompositionItem 代表选题中的一个组成部分。 通过 item_level 区分层级: - category: 分类方向(如 "政治领袖/实质"),定义选题的宏观方向 - element: 具体元素(如 "毛泽东/实质"),定义选题的具体内容落点 推导链通过 TopicBuildItemRelation 记录 item 间因果关系, 外部数据证据通过 TopicBuildItemSource 记录。 """ __tablename__ = 'topic_build_composition_item' id = Column(BigInteger, primary_key=True, autoincrement=True) topic_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildTopic.id build_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildRecord.id # ── 元素信息 ── item_level = Column(String(20), nullable=False, default='category') # category / element dimension = Column(String(50), nullable=True) # 实质/形式/意图 point_type = Column(String(50), nullable=True) # 灵感点/目的点/关键点 element_name = Column(String(500), nullable=False) category_path = Column(String(1000), nullable=True) category_id = Column(BigInteger, nullable=True) # FK → TopicPatternCategory.id # ── 推导 ── derivation_type = Column(String(50), nullable=True) # user_demand/itemset_pattern/hierarchy/co_occurrence/post_extract/agent_reasoning step = Column(Integer, nullable=False, default=0) # 创建批次号 reason = Column(Text, nullable=True) # 为什么加入这个元素 # ── 管理 ── sort_order = Column(Integer, nullable=False, default=0) is_active = Column(Boolean, nullable=False, default=True) note = Column(Text, nullable=True) created_at = Column(DateTime, nullable=True) updated_at = Column(DateTime, nullable=True) class TopicBuildItemRelation(Base): """Item 间推导关系 — 记录 CompositionItem 之间的因果链(多对多) "因为 source_item 已在选题中,Agent 才去探索并发现/产生了 target_item"。 一个 item 可以有多个上游 parent(多个已有 item 共同触发了它的发现)。 可视化 DAG 的边直接来自这张表。 """ __tablename__ = 'topic_build_item_relation' id = Column(BigInteger, primary_key=True, autoincrement=True) topic_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildTopic.id source_item_id = Column(BigInteger, nullable=False, index=True) # 上游 item(触发方) target_item_id = Column(BigInteger, nullable=False, index=True) # 下游 item(被发现/产生的) reason = Column(Text, nullable=True) # 为什么这个关系成立 created_at = Column(DateTime, nullable=True) class TopicBuildItemSource(Base): """外部数据证据 — 记录 CompositionItem 引用的外部数据(一对多) 只记录外部数据来源(项集、分类、帖子、用户输入等), 不记录 item-to-item 关系(那个由 TopicBuildItemRelation 负责)。 source_type: itemset / element / category / post / user_input derivation_type: user_demand / itemset_pattern / hierarchy / co_occurrence / post_extract / agent_reasoning """ __tablename__ = 'topic_build_item_source' id = Column(BigInteger, primary_key=True, autoincrement=True) topic_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildTopic.id target_item_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildCompositionItem.id # ── 外部数据来源 ── source_type = Column(String(50), nullable=False) # itemset / element / category / post / user_input source_reference_id = Column(BigInteger, nullable=True) # 外部数据 ID(itemset_id / category_id 等) source_detail = Column(JSON, nullable=True) # 来源数据快照 JSON # ── 描述 ── reason = Column(Text, nullable=True) # 这条证据说明了什么 # ── 管理 ── is_active = Column(Boolean, nullable=False, default=True) created_at = Column(DateTime, nullable=True) class TopicBuildStrategy(Base): """构建策略 — 可注入 Agent 的行为规则集(主表) 存储策略元数据,内容版本通过 TopicBuildStrategyVersion 管理。 """ __tablename__ = 'topic_build_strategy' id = Column(BigInteger, primary_key=True, autoincrement=True) name = Column(String(200), nullable=False, unique=True) # 策略名称(唯一) description = Column(String(500), nullable=False) # 一行描述,用于 Agent 判断是否需要加载 tags = Column(JSON, nullable=True) # 标签 ["对比", "竞品"] is_active = Column(Boolean, nullable=False, default=True) # 软删除/禁用 current_version = Column(Integer, nullable=False, default=1) # 当前版本号 created_at = Column(DateTime, nullable=True) updated_at = Column(DateTime, nullable=True) class TopicBuildStrategyVersion(Base): """构建策略版本 — 保存策略内容的历史版本 每次更新 content 时创建新版本行,旧版本保留可追溯。 """ __tablename__ = 'topic_build_strategy_version' id = Column(BigInteger, primary_key=True, autoincrement=True) strategy_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildStrategy.id version = Column(Integer, nullable=False) # 版本号 content = Column(Text, nullable=False) # 完整的策略规则文本(Markdown) change_note = Column(String(500), nullable=True) # 变更说明 created_at = Column(DateTime, nullable=True) class TopicBuildLog(Base): """选题构建执行日志 - 完整的 Agent 运行输出""" __tablename__ = 'topic_build_log' id = Column(BigInteger, primary_key=True, autoincrement=True) build_id = Column(BigInteger, nullable=False, index=True) # FK → TopicBuildRecord.id (unique) log_content = Column(Text(length=4294967295), nullable=True) # LONGTEXT created_at = Column(DateTime, nullable=True)