story_training_data_framework.md 15 KB

故事创作训练数据构造框架

一、核心环节拆解

1. 故事构思环节

输入(Input)

{
  "task": "story_conception",
  "constraints": {
    "genre": "玄幻/都市/科幻等",
    "target_audience": "目标读者群",
    "length": "短篇/中篇/长篇",
    "core_theme": "核心主题"
  },
  "inspiration": "灵感来源或关键词"
}

思维链(Chain of Thought)

{
  "reasoning_steps": [
    {
      "step": 1,
      "thought": "分析题材特点和读者期待",
      "consideration": "玄幻题材需要完整的世界观和修炼体系"
    },
    {
      "step": 2,
      "thought": "确定核心冲突",
      "consideration": "主角与世界规则的冲突,个人成长与命运的对抗"
    },
    {
      "step": 3,
      "thought": "设计独特卖点",
      "consideration": "打更人身份 + 破案推理 + 修仙升级的融合"
    }
  ]
}

输出(Output)

{
  "story_concept": {
    "premise": "核心设定一句话概括",
    "unique_selling_point": "独特卖点",
    "world_building_outline": "世界观框架",
    "protagonist_concept": "主角人设概念",
    "core_conflict": "核心冲突"
  }
}

2. 角色设计环节

输入(Input)

{
  "task": "character_design",
  "story_context": {
    "world_setting": "世界观设定",
    "plot_requirements": "剧情需求",
    "existing_characters": "已有角色列表"
  },
  "character_role": "主角/配角/反派",
  "requirements": "特定需求"
}

思维链(Chain of Thought)

{
  "reasoning_steps": [
    {
      "step": 1,
      "thought": "角色在故事中的功能定位",
      "analysis": "许七安需要同时承担推理和战斗两条线"
    },
    {
      "step": 2,
      "thought": "性格设计的合理性",
      "analysis": "现代灵魂穿越,保留理性思维和现代知识"
    },
    {
      "step": 3,
      "thought": "成长空间设计",
      "analysis": "从底层打更人到高层修士的完整路径"
    },
    {
      "step": 4,
      "thought": "与其他角色的关系网络",
      "analysis": "需要设计导师、盟友、对手等多层关系"
    }
  ]
}

输出(Output)

{
  "character_profile": {
    "basic_info": {
      "name": "许七安",
      "age": "21岁",
      "identity": "打更人"
    },
    "personality": {
      "core_traits": ["理性", "谨慎", "幽默"],
      "motivation": "生存并找到回家的路",
      "fears": "失去自我,被世界规则吞噬"
    },
    "abilities": {
      "current": "基础武力,现代知识",
      "potential": "修炼天赋,推理能力"
    },
    "relationships": {
      "mentor": "周老",
      "allies": ["李玉春"],
      "rivals": []
    },
    "arc": "从被动适应到主动改变世界"
  }
}

3. 情节设计环节

输入(Input)

{
  "task": "plot_design",
  "context": {
    "current_chapter": 4,
    "previous_events": "前3章发生的事件摘要",
    "character_states": "当前角色状态",
    "world_state": "当前世界状态"
  },
  "plot_goals": {
    "immediate": "本章要达成的目标",
    "mid_term": "本卷要达成的目标",
    "long_term": "全书要达成的目标"
  }
}

思维链(Chain of Thought)

{
  "reasoning_steps": [
    {
      "step": 1,
      "thought": "确定本章的核心事件",
      "analysis": "需要推进主线(修炼体系揭示)同时保持节奏(日常案件)"
    },
    {
      "step": 2,
      "thought": "设计冲突和转折",
      "analysis": "通过税银案引出更大的阴谋,制造悬念"
    },
    {
      "step": 3,
      "thought": "角色成长的体现",
      "analysis": "许七安运用现代知识破案,展示独特优势"
    },
    {
      "step": 4,
      "thought": "伏笔和铺垫",
      "analysis": "埋下关于修炼体系的线索,为后续展开做准备"
    },
    {
      "step": 5,
      "thought": "情绪节奏控制",
      "analysis": "紧张调查 → 推理高潮 → 轻松日常,张弛有度"
    }
  ]
}

输出(Output)

{
  "chapter_plot": {
    "title": "章节标题",
    "summary": "一句话概括",
    "scenes": [
      {
        "scene_id": 1,
        "location": "打更人衙门",
        "characters": ["许七安", "李玉春"],
        "event": "接到税银失窃案",
        "purpose": "引入主线冲突",
        "emotional_tone": "紧张"
      }
    ],
    "plot_points": {
      "hook": "开篇钩子",
      "development": "情节发展",
      "climax": "高潮",
      "resolution": "结局"
    },
    "foreshadowing": ["伏笔1", "伏笔2"]
  }
}

4. 场景描写环节

输入(Input)

{
  "task": "scene_writing",
  "scene_context": {
    "location": "打更人衙门大堂",
    "time": "清晨",
    "weather": "阴天",
    "characters_present": ["许七安", "李玉春", "周老"]
  },
  "scene_purpose": "展示打更人的工作环境,推进案件调查",
  "emotional_tone": "严肃、紧张",
  "pov": "许七安第一人称"
}

思维链(Chain of Thought)

{
  "reasoning_steps": [
    {
      "step": 1,
      "thought": "选择描写重点",
      "decision": "重点描写环境氛围和人物状态,而非细节堆砌"
    },
    {
      "step": 2,
      "thought": "感官细节的选择",
      "decision": "视觉:昏暗的光线;听觉:急促的脚步声;嗅觉:潮湿的霉味"
    },
    {
      "step": 3,
      "thought": "如何体现POV角色的视角",
      "decision": "通过许七安的现代思维对比古代环境,产生反差"
    },
    {
      "step": 4,
      "thought": "节奏控制",
      "decision": "快速带过环境,重点放在对话和行动上"
    }
  ]
}

输出(Output)

{
  "scene_text": "完整的场景文本",
  "writing_techniques": {
    "sensory_details": ["视觉", "听觉", "嗅觉"],
    "pov_consistency": "第一人称限制视角",
    "pacing": "快节奏,对话为主",
    "show_vs_tell": "70%展示,30%叙述"
  }
}

5. 对话生成环节

输入(Input)

{
  "task": "dialogue_generation",
  "context": {
    "characters": [
      {
        "name": "许七安",
        "personality": "理性、谨慎、略带幽默",
        "current_emotion": "警惕",
        "goal": "获取案件信息"
      },
      {
        "name": "李玉春",
        "personality": "正直、严肃、经验丰富",
        "current_emotion": "焦虑",
        "goal": "尽快破案"
      }
    ],
    "situation": "讨论税银失窃案的调查方向",
    "subtext": "李玉春对许七安的能力有所怀疑"
  },
  "dialogue_purpose": "推进情节,展示人物关系,揭示信息"
}

思维链(Chain of Thought)

{
  "reasoning_steps": [
    {
      "step": 1,
      "thought": "每个角色的说话方式",
      "analysis": "许七安:简洁、逻辑性强;李玉春:直接、带有命令口吻"
    },
    {
      "step": 2,
      "thought": "对话的潜台词",
      "analysis": "表面讨论案件,实际是李玉春在试探许七安的能力"
    },
    {
      "step": 3,
      "thought": "信息揭示的节奏",
      "analysis": "逐步透露案件细节,保持悬念"
    },
    {
      "step": 4,
      "thought": "对话的自然性",
      "analysis": "加入停顿、重复、未完成的句子,避免过于书面化"
    }
  ]
}

输出(Output)

{
  "dialogue": [
    {
      "speaker": "李玉春",
      "text": "许七安,这次的案子不简单。",
      "action": "皱眉看着卷宗",
      "subtext": "在观察许七安的反应"
    },
    {
      "speaker": "许七安",
      "text": "我看过卷宗了,确实疑点很多。",
      "action": "平静地回应",
      "subtext": "展示自己已经做了功课"
    }
  ],
  "dialogue_analysis": {
    "character_voice_consistency": "符合人物性格",
    "information_revealed": "案件的复杂性",
    "relationship_development": "建立初步信任",
    "subtext_effectiveness": "成功传达潜台词"
  }
}

二、训练数据集构造策略

1. 数据分层

Level 1: 宏观层(故事构思、大纲设计)
Level 2: 中观层(章节规划、情节设计)
Level 3: 微观层(场景描写、对话生成)

2. 数据增强方法

同一输入的多样化输出

{
  "input": "设计一个玄幻世界的修炼体系",
  "outputs": [
    {
      "version": "A",
      "style": "传统仙侠",
      "system": "炼气→筑基→金丹→元婴"
    },
    {
      "version": "B",
      "style": "创新融合",
      "system": "儒道佛武巫五大体系并存"
    }
  ]
}

同一输出的多样化输入

{
  "output": "许七安运用现代推理方法破案",
  "inputs": [
    {
      "version": "A",
      "context": "税银失窃案,需要找出内鬼"
    },
    {
      "version": "B",
      "context": "命案调查,需要还原作案过程"
    }
  ]
}

3. 思维链标注策略

显式思维链

{
  "type": "explicit_cot",
  "format": "step_by_step",
  "content": "第一步...第二步...第三步..."
}

隐式思维链

{
  "type": "implicit_cot",
  "format": "embedded_reasoning",
  "content": "在描写中自然融入推理过程"
}

4. 质量控制维度

{
  "quality_metrics": {
    "consistency": "前后一致性检查",
    "creativity": "创新性评分",
    "readability": "可读性评分",
    "plot_logic": "情节逻辑性",
    "character_depth": "人物深度",
    "pacing": "节奏控制"
  }
}

三、具体训练任务设计

Task 1: 情节续写

{
  "task_type": "plot_continuation",
  "input": {
    "previous_chapters": "前N章内容",
    "current_state": "当前状态",
    "constraints": "必须包含的元素"
  },
  "cot_required": true,
  "output_format": "下一章的详细情节"
}

Task 2: 角色对话生成

{
  "task_type": "dialogue_generation",
  "input": {
    "characters": "角色信息",
    "situation": "场景设定",
    "goal": "对话目标"
  },
  "cot_required": true,
  "output_format": "完整对话及动作描写"
}

Task 3: 世界观扩展

{
  "task_type": "worldbuilding_expansion",
  "input": {
    "existing_lore": "已有设定",
    "expansion_direction": "扩展方向"
  },
  "cot_required": true,
  "output_format": "新的世界观元素及其与现有设定的关联"
}

Task 4: 情节修复

{
  "task_type": "plot_hole_fixing",
  "input": {
    "plot_summary": "情节摘要",
    "identified_issues": "发现的问题"
  },
  "cot_required": true,
  "output_format": "修复方案及修改后的情节"
}

Task 5: 风格迁移

{
  "task_type": "style_transfer",
  "input": {
    "source_text": "原始文本",
    "target_style": "目标风格"
  },
  "cot_required": true,
  "output_format": "改写后的文本"
}

四、数据标注示例

完整示例:第4章情节设计

Input

{
  "task": "chapter_plot_design",
  "chapter_number": 4,
  "previous_summary": "许七安穿越到大奉王朝,成为打更人,初步了解世界观",
  "character_state": {
    "protagonist": "刚适应新身份,渴望了解修炼体系",
    "supporting": "李玉春开始认可许七安的能力"
  },
  "plot_requirements": {
    "main_quest": "揭示修炼体系的基本框架",
    "sub_quest": "通过案件展示主角能力",
    "character_development": "许七安获得初步认可"
  }
}

Chain of Thought

{
  "reasoning_process": [
    {
      "step": 1,
      "thought": "如何自然地引入修炼体系?",
      "options": [
        "直接讲解(生硬)",
        "通过战斗展示(太早)",
        "通过案件调查接触(自然)"
      ],
      "decision": "选择通过案件让主角接触到修炼者,产生好奇",
      "reason": "符合主角性格,推进自然"
    },
    {
      "step": 2,
      "thought": "案件应该是什么类型?",
      "options": [
        "简单盗窃(缺乏张力)",
        "命案(过于沉重)",
        "涉及官员的税银案(合适)"
      ],
      "decision": "税银失窃案,涉及官场,有悬念",
      "reason": "既能展示推理,又能引出更大的世界"
    },
    {
      "step": 3,
      "thought": "如何设计情节节奏?",
      "structure": "接案(快)→ 调查(中)→ 推理(慢)→ 揭示(快)",
      "reason": "张弛有度,保持读者兴趣"
    },
    {
      "step": 4,
      "thought": "本章需要埋下什么伏笔?",
      "foreshadowing": [
        "修炼体系的复杂性",
        "官场的黑暗",
        "主角的特殊能力"
      ],
      "reason": "为后续章节做铺垫"
    }
  ]
}

Output

{
  "chapter_4_plot": {
    "title": "税银案",
    "structure": {
      "opening": "接到税银失窃的紧急案件",
      "development": "调查过程中发现疑点重重",
      "climax": "运用现代推理方法锁定嫌疑人",
      "resolution": "案件告破,但引出更大的谜团"
    },
    "scenes": [
      {
        "scene_1": "衙门接案,了解案情",
        "scene_2": "现场勘查,收集证据",
        "scene_3": "审讯嫌疑人,推理分析",
        "scene_4": "真相揭晓,埋下伏笔"
      }
    ],
    "character_arcs": {
      "许七安": "从新人到展示能力,获得认可",
      "李玉春": "从怀疑到信任"
    },
    "worldbuilding_reveals": [
      "打更人的职责范围",
      "修炼者在社会中的地位",
      "官场的复杂性"
    ]
  }
}

五、训练数据的组织结构

目录结构建议

story_training_data/
├── macro_level/          # 宏观层数据
│   ├── story_conception/
│   ├── outline_design/
│   └── worldbuilding/
├── meso_level/           # 中观层数据
│   ├── chapter_planning/
│   ├── plot_design/
│   └── character_arcs/
├── micro_level/          # 微观层数据
│   ├── scene_writing/
│   ├── dialogue_generation/
│   └── description/
└── cross_level/          # 跨层级任务
    ├── consistency_check/
    ├── plot_hole_fixing/
    └── style_refinement/

单条数据格式

{
  "id": "unique_identifier",
  "task_type": "plot_design",
  "level": "meso",
  "input": {},
  "chain_of_thought": {},
  "output": {},
  "metadata": {
    "source": "大奉打更人_第4章",
    "annotator": "human/ai",
    "quality_score": 0.95,
    "tags": ["推理", "世界观", "人物成长"]
  }
}

六、评估指标

自动评估

  • 一致性检查: 前后设定是否矛盾
  • 完整性检查: 是否包含所有必需元素
  • 逻辑性检查: 因果关系是否合理

人工评估

  • 创意性: 1-5分
  • 可读性: 1-5分
  • 情感共鸣: 1-5分
  • 情节吸引力: 1-5分

混合评估

  • A/B测试: 读者偏好
  • 续读率: 读者是否继续阅读
  • 互动数据: 评论、点赞等