| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272 |
- -- =========================================================
- -- Search Agent 数据库表结构
- -- 数据库名称: search_agent
- -- 字符集: utf8mb4
- -- =========================================================
- SET NAMES utf8mb4;
- SET FOREIGN_KEY_CHECKS = 0;
- -- =========================================================
- -- 1) 上游需求快照表:冻结需求上下文,便于回溯
- -- =========================================================
- CREATE TABLE IF NOT EXISTS supply_demand_snapshot (
- id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT COMMENT '自增主键',
- demand_id BIGINT NOT NULL COMMENT '上游需求ID(业务侧主键)',
- demand_code VARCHAR(64) DEFAULT NULL COMMENT '上游需求编码(可选)',
- query VARCHAR(255) NOT NULL COMMENT '核心搜索词',
- query_expansion JSON DEFAULT NULL COMMENT '扩展词列表(同义词/场景词/禁用词等)',
- platform VARCHAR(32) NOT NULL DEFAULT 'weixin' COMMENT '目标平台,当前为 weixin',
- expected_count INT NOT NULL DEFAULT 10 COMMENT '期望返回条数',
- audience_profile JSON DEFAULT NULL COMMENT '目标人群画像(如50+)',
- quality_constraints JSON DEFAULT NULL COMMENT '质量约束(最低阅读量、黑名单词等)',
- source_payload JSON DEFAULT NULL COMMENT '上游原始请求快照',
- version INT NOT NULL DEFAULT 1 COMMENT '快照版本号(同一 demand_id 可多版本)',
- created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
- updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
- PRIMARY KEY (id),
- KEY idx_demand_id (demand_id),
- KEY idx_platform (platform),
- KEY idx_created_at (created_at)
- ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='上游需求快照表';
- -- =========================================================
- -- 2) 任务主表:一个 demand 可触发多次任务(重跑/补跑)
- -- =========================================================
- CREATE TABLE IF NOT EXISTS supply_task (
- id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT COMMENT '自增主键',
- task_id VARCHAR(64) NOT NULL COMMENT '任务唯一ID(建议UUID/雪花ID)',
- demand_snapshot_id BIGINT UNSIGNED NOT NULL COMMENT '关联 supply_demand_snapshot.id',
- demand_id BIGINT NOT NULL COMMENT '冗余字段:上游需求ID',
- trace_id VARCHAR(64) DEFAULT NULL COMMENT 'Agent trace_id,便于追踪模型执行',
- status VARCHAR(32) NOT NULL COMMENT '任务状态:PENDING/RUNNING/RECALL_DONE/ENRICHED/RANKED/PLAN_CREATED/COMPLETED/FAILED/CANCELLED',
- current_stage VARCHAR(32) DEFAULT NULL COMMENT '当前阶段:INIT/RECALL/ACCOUNT_ENRICH/ACCOUNT_PROFILE/CONTENT_ENRICH/RANK/CREATE_PLAN/FINALIZE',
- priority TINYINT NOT NULL DEFAULT 5 COMMENT '优先级(1最高,9最低)',
- retry_count INT NOT NULL DEFAULT 0 COMMENT '已重试次数',
- max_retry INT NOT NULL DEFAULT 3 COMMENT '最大重试次数',
- is_idempotent TINYINT NOT NULL DEFAULT 1 COMMENT '是否启用幂等(1是0否)',
- idempotency_key VARCHAR(128) DEFAULT NULL COMMENT '幂等键(可由 demand_id+query_hash 组成)',
- started_at DATETIME DEFAULT NULL COMMENT '任务启动时间',
- finished_at DATETIME DEFAULT NULL COMMENT '任务完成时间',
- duration_ms BIGINT DEFAULT NULL COMMENT '总耗时(毫秒)',
- error_code VARCHAR(64) DEFAULT NULL COMMENT '错误码(网络超时/参数错误/平台限流等)',
- error_message TEXT COMMENT '错误详情',
- operator VARCHAR(64) DEFAULT 'agent' COMMENT '执行方(agent/manual/cron)',
- ext JSON DEFAULT NULL COMMENT '扩展字段(灰度标记、AB实验信息等)',
- created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
- updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
- PRIMARY KEY (id),
- UNIQUE KEY uk_task_id (task_id),
- UNIQUE KEY uk_idempotency_key (idempotency_key),
- KEY idx_demand_id (demand_id),
- KEY idx_status_priority (status, priority),
- KEY idx_current_stage (current_stage),
- KEY idx_created_at (created_at)
- ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='供给任务主表(状态机核心)';
- -- =========================================================
- -- 3) 阶段执行表:记录每个阶段的执行情况
- -- =========================================================
- CREATE TABLE IF NOT EXISTS supply_task_stage (
- id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT COMMENT '自增主键',
- task_id VARCHAR(64) NOT NULL COMMENT '关联任务ID',
- stage_name VARCHAR(32) NOT NULL COMMENT '阶段名:RECALL/ACCOUNT_ENRICH/ACCOUNT_PROFILE/CONTENT_ENRICH/RANK/CREATE_PLAN',
- stage_status VARCHAR(32) NOT NULL COMMENT '阶段状态:PENDING/RUNNING/SUCCESS/FAILED/SKIPPED',
- attempt_no INT NOT NULL DEFAULT 1 COMMENT '该阶段第几次执行',
- input_payload JSON DEFAULT NULL COMMENT '阶段输入快照',
- output_payload JSON DEFAULT NULL COMMENT '阶段输出快照(摘要)',
- started_at DATETIME DEFAULT NULL COMMENT '阶段开始时间',
- finished_at DATETIME DEFAULT NULL COMMENT '阶段结束时间',
- duration_ms BIGINT DEFAULT NULL COMMENT '阶段耗时(毫秒)',
- error_code VARCHAR(64) DEFAULT NULL COMMENT '阶段错误码',
- error_message TEXT COMMENT '阶段错误信息',
- created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
- updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
- PRIMARY KEY (id),
- KEY idx_task_stage (task_id, stage_name),
- KEY idx_stage_status (stage_status),
- KEY idx_started_at (started_at)
- ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='任务阶段执行明细表';
- -- =========================================================
- -- 4) 候选内容池:搜索召回到的原始候选
- -- =========================================================
- CREATE TABLE IF NOT EXISTS supply_candidate_content (
- id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT COMMENT '自增主键',
- task_id VARCHAR(64) NOT NULL COMMENT '关联任务ID',
- source_keyword VARCHAR(255) DEFAULT NULL COMMENT '触发召回的关键词',
- recall_round INT NOT NULL DEFAULT 1 COMMENT '第几轮召回(核心词/扩展词/场景词)',
- recall_page INT DEFAULT NULL COMMENT '召回分页页码/游标序号',
- platform VARCHAR(32) NOT NULL DEFAULT 'weixin' COMMENT '来源平台',
- content_id VARCHAR(128) DEFAULT NULL COMMENT '平台内容ID(若可提取)',
- title VARCHAR(512) NOT NULL COMMENT '文章标题',
- url VARCHAR(1024) NOT NULL COMMENT '文章链接(业务唯一候选)',
- digest VARCHAR(1000) DEFAULT NULL COMMENT '文章摘要',
- cover_url VARCHAR(1024) DEFAULT NULL COMMENT '封面图链接',
- publish_time DATETIME DEFAULT NULL COMMENT '发布时间',
- account_id VARCHAR(128) DEFAULT NULL COMMENT '公众号ID(wx_gh)',
- account_name VARCHAR(255) DEFAULT NULL COMMENT '公众号名称',
- biz_info VARCHAR(128) DEFAULT NULL COMMENT '公众号 biz 标识',
- view_count BIGINT DEFAULT NULL COMMENT '阅读量(若有)',
- like_count BIGINT DEFAULT NULL COMMENT '点赞量(若有)',
- comment_count BIGINT DEFAULT NULL COMMENT '评论量(若有)',
- favorite_count BIGINT DEFAULT NULL COMMENT '收藏量(若有)',
- share_count BIGINT DEFAULT NULL COMMENT '分享量(若有)',
- raw_statistics JSON DEFAULT NULL COMMENT '原始统计字段',
- raw_payload JSON DEFAULT NULL COMMENT '原始API返回(单条)',
- dedup_hash CHAR(64) DEFAULT NULL COMMENT '去重哈希(建议 title+url 归一化后sha256)',
- quality_flag VARCHAR(32) DEFAULT 'UNKNOWN' COMMENT '初步质量标记:UNKNOWN/PASS/REJECT',
- reject_reason VARCHAR(255) DEFAULT NULL COMMENT '初筛淘汰原因',
- is_deleted TINYINT NOT NULL DEFAULT 0 COMMENT '软删除标记(1删除)',
- created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
- updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
- PRIMARY KEY (id),
- UNIQUE KEY uk_task_url (task_id, url(255)),
- KEY idx_task_round (task_id, recall_round),
- KEY idx_task_account (task_id, account_id),
- KEY idx_publish_time (publish_time),
- KEY idx_quality_flag (quality_flag),
- KEY idx_dedup_hash (dedup_hash)
- ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='候选内容池(召回原始结果)';
- -- =========================================================
- -- 5) 账号画像表:账号维度的补全与打分输入
- -- =========================================================
- CREATE TABLE IF NOT EXISTS supply_account_profile (
- id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT COMMENT '自增主键',
- task_id VARCHAR(64) NOT NULL COMMENT '关联任务ID',
- account_id VARCHAR(128) NOT NULL COMMENT '公众号ID(wx_gh)',
- account_name VARCHAR(255) DEFAULT NULL COMMENT '账号名称',
- biz_info VARCHAR(128) DEFAULT NULL COMMENT '账号biz标识',
- intro TEXT COMMENT '账号简介',
- verify_status TINYINT DEFAULT NULL COMMENT '认证状态(0未认证1认证,按平台定义)',
- category VARCHAR(64) DEFAULT NULL COMMENT '账号分类(健康/资讯/情感等)',
- update_frequency VARCHAR(32) DEFAULT NULL COMMENT '更新频率(daily/weekly/irregular)',
- recent_article_count INT DEFAULT 0 COMMENT '近周期发文数',
- median_view_count BIGINT DEFAULT NULL COMMENT '历史阅读量中位数',
- median_like_count BIGINT DEFAULT NULL COMMENT '历史点赞量中位数',
- vertical_score DECIMAL(5,2) DEFAULT 0 COMMENT '垂直度评分(0-100)',
- stability_score DECIMAL(5,2) DEFAULT 0 COMMENT '稳定更新评分(0-100)',
- credibility_score DECIMAL(5,2) DEFAULT 0 COMMENT '可信度评分(0-100)',
- risk_tags JSON DEFAULT NULL COMMENT '风险标签(标题党/营销号/违规高风险)',
- raw_profile_payload JSON DEFAULT NULL COMMENT '账号详情原始返回',
- raw_history_payload JSON DEFAULT NULL COMMENT '历史文章原始返回(摘要)',
- created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
- updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
- PRIMARY KEY (id),
- UNIQUE KEY uk_task_account (task_id, account_id),
- KEY idx_task_credibility (task_id, credibility_score),
- KEY idx_category (category)
- ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='账号画像表(账号维度特征)';
- -- =========================================================
- -- 6) 内容特征表:文章详情解析后的质量特征
- -- =========================================================
- CREATE TABLE IF NOT EXISTS supply_content_feature (
- id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT COMMENT '自增主键',
- task_id VARCHAR(64) NOT NULL COMMENT '关联任务ID',
- candidate_id BIGINT UNSIGNED NOT NULL COMMENT '关联候选内容ID',
- content_length INT DEFAULT NULL COMMENT '正文长度(字符数)',
- image_count INT DEFAULT NULL COMMENT '图片数量',
- video_count INT DEFAULT NULL COMMENT '视频数量',
- has_source_reference TINYINT DEFAULT 0 COMMENT '是否包含来源引用(0否1是)',
- readability_score DECIMAL(5,2) DEFAULT 0 COMMENT '可读性评分(0-100)',
- information_density_score DECIMAL(5,2) DEFAULT 0 COMMENT '信息密度评分(0-100)',
- elder_friendly_score DECIMAL(5,2) DEFAULT 0 COMMENT '老年友好评分(0-100)',
- sentiment_score DECIMAL(5,2) DEFAULT NULL COMMENT '情绪倾向分(可选)',
- risk_score DECIMAL(5,2) DEFAULT 0 COMMENT '风险分(越高风险越大)',
- risk_reasons JSON DEFAULT NULL COMMENT '风险原因明细(夸张词/伪科学词/诱导词)',
- keyword_coverage JSON DEFAULT NULL COMMENT 'query关键词覆盖明细',
- summary TEXT COMMENT '文章摘要(抽取)',
- raw_detail_payload JSON DEFAULT NULL COMMENT '文章详情原始返回',
- created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
- updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
- PRIMARY KEY (id),
- UNIQUE KEY uk_task_candidate_feature (task_id, candidate_id),
- KEY idx_task_quality (task_id, readability_score, information_density_score)
- ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='内容特征表(文章维度特征)';
- -- =========================================================
- -- 7) 评分与筛选结果表:最终排序与入选
- -- =========================================================
- CREATE TABLE IF NOT EXISTS supply_content_score (
- id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT COMMENT '自增主键',
- task_id VARCHAR(64) NOT NULL COMMENT '关联任务ID',
- candidate_id BIGINT UNSIGNED NOT NULL COMMENT '关联候选内容ID',
- relevance_score DECIMAL(5,2) NOT NULL DEFAULT 0 COMMENT '相关性分(0-100)',
- popularity_score DECIMAL(5,2) NOT NULL DEFAULT 0 COMMENT '热度分(0-100)',
- quality_score DECIMAL(5,2) NOT NULL DEFAULT 0 COMMENT '内容质量分(0-100)',
- account_score DECIMAL(5,2) NOT NULL DEFAULT 0 COMMENT '账号可信分(0-100)',
- elder_fit_score DECIMAL(5,2) NOT NULL DEFAULT 0 COMMENT '老年适配分(0-100)',
- diversity_penalty DECIMAL(5,2) NOT NULL DEFAULT 0 COMMENT '多样性惩罚分(防止同账号过多)',
- total_score DECIMAL(6,2) NOT NULL DEFAULT 0 COMMENT '综合分',
- filter_status VARCHAR(32) NOT NULL DEFAULT 'PENDING' COMMENT '过滤状态:PENDING/PASS/REJECT',
- filter_reason VARCHAR(255) DEFAULT NULL COMMENT '过滤原因',
- is_selected TINYINT NOT NULL DEFAULT 0 COMMENT '是否入选最终结果(1是0否)',
- rank_no INT DEFAULT NULL COMMENT '最终排序名次(从1开始)',
- score_version VARCHAR(32) DEFAULT 'v1' COMMENT '评分模型版本',
- score_detail JSON DEFAULT NULL COMMENT '评分明细(权重、中间分)',
- created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
- updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
- PRIMARY KEY (id),
- UNIQUE KEY uk_task_candidate_score (task_id, candidate_id),
- KEY idx_task_selected_rank (task_id, is_selected, rank_no),
- KEY idx_task_total_score (task_id, total_score)
- ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='内容评分与筛选结果表';
- -- =========================================================
- -- 8) 抓取计划映射表:与AIGC平台交互记录
- -- =========================================================
- CREATE TABLE IF NOT EXISTS supply_crawler_plan (
- id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT COMMENT '自增主键',
- task_id VARCHAR(64) NOT NULL COMMENT '关联任务ID',
- platform VARCHAR(32) NOT NULL DEFAULT 'weixin' COMMENT '平台',
- plan_type VARCHAR(32) NOT NULL COMMENT '建计划方式:BY_CONTENT/BY_ACCOUNT',
- input_count INT NOT NULL DEFAULT 0 COMMENT '输入内容/账号数量',
- crawler_plan_id VARCHAR(128) DEFAULT NULL COMMENT 'AIGC爬取计划ID',
- crawler_plan_name VARCHAR(255) DEFAULT NULL COMMENT 'AIGC爬取计划名称',
- produce_plan_id VARCHAR(128) DEFAULT NULL COMMENT '绑定的生成计划ID(可空)',
- produce_bind_status VARCHAR(32) DEFAULT NULL COMMENT '绑定状态:NOT_REQUIRED/SUCCESS/FAILED',
- request_payload JSON NOT NULL COMMENT '创建计划请求体',
- response_payload JSON DEFAULT NULL COMMENT '创建计划响应体',
- plan_status VARCHAR(32) NOT NULL COMMENT '计划状态:CREATED/FAILED/BIND_SUCCESS/BIND_FAILED',
- error_code VARCHAR(64) DEFAULT NULL COMMENT '失败错误码',
- error_message TEXT COMMENT '失败错误信息',
- created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
- updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
- PRIMARY KEY (id),
- KEY idx_task_id (task_id),
- KEY idx_crawler_plan_id (crawler_plan_id),
- KEY idx_plan_status (plan_status)
- ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='AIGC抓取计划映射表';
- -- =========================================================
- -- 9) 事件日志表:完整可观测性与审计
- -- =========================================================
- CREATE TABLE IF NOT EXISTS supply_task_event (
- id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT COMMENT '自增主键',
- task_id VARCHAR(64) NOT NULL COMMENT '关联任务ID',
- stage_name VARCHAR(32) DEFAULT NULL COMMENT '所属阶段',
- event_type VARCHAR(64) NOT NULL COMMENT '事件类型:STATE_CHANGE/TOOL_CALL/TOOL_RESULT/RETRY/ERROR/MANUAL_INTERVENTION',
- event_level VARCHAR(16) NOT NULL DEFAULT 'INFO' COMMENT '日志级别:DEBUG/INFO/WARN/ERROR',
- tool_name VARCHAR(64) DEFAULT NULL COMMENT '工具名(如 weixin_search)',
- request_id VARCHAR(64) DEFAULT NULL COMMENT '外部请求ID(可用于链路追踪)',
- event_payload JSON DEFAULT NULL COMMENT '事件详情(入参/出参摘要)',
- message VARCHAR(1000) DEFAULT NULL COMMENT '事件描述',
- created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
- PRIMARY KEY (id),
- KEY idx_task_created (task_id, created_at),
- KEY idx_stage_type (stage_name, event_type),
- KEY idx_tool_name (tool_name),
- KEY idx_event_level (event_level)
- ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='任务事件日志表';
- SET FOREIGN_KEY_CHECKS = 1;
|