故事创作训练数据构造框架
一、核心环节拆解
1. 故事构思环节
输入(Input)
{
"task": "story_conception",
"constraints": {
"genre": "玄幻/都市/科幻等",
"target_audience": "目标读者群",
"length": "短篇/中篇/长篇",
"core_theme": "核心主题"
},
"inspiration": "灵感来源或关键词"
}
思维链(Chain of Thought)
{
"reasoning_steps": [
{
"step": 1,
"thought": "分析题材特点和读者期待",
"consideration": "玄幻题材需要完整的世界观和修炼体系"
},
{
"step": 2,
"thought": "确定核心冲突",
"consideration": "主角与世界规则的冲突,个人成长与命运的对抗"
},
{
"step": 3,
"thought": "设计独特卖点",
"consideration": "打更人身份 + 破案推理 + 修仙升级的融合"
}
]
}
输出(Output)
{
"story_concept": {
"premise": "核心设定一句话概括",
"unique_selling_point": "独特卖点",
"world_building_outline": "世界观框架",
"protagonist_concept": "主角人设概念",
"core_conflict": "核心冲突"
}
}
2. 角色设计环节
输入(Input)
{
"task": "character_design",
"story_context": {
"world_setting": "世界观设定",
"plot_requirements": "剧情需求",
"existing_characters": "已有角色列表"
},
"character_role": "主角/配角/反派",
"requirements": "特定需求"
}
思维链(Chain of Thought)
{
"reasoning_steps": [
{
"step": 1,
"thought": "角色在故事中的功能定位",
"analysis": "许七安需要同时承担推理和战斗两条线"
},
{
"step": 2,
"thought": "性格设计的合理性",
"analysis": "现代灵魂穿越,保留理性思维和现代知识"
},
{
"step": 3,
"thought": "成长空间设计",
"analysis": "从底层打更人到高层修士的完整路径"
},
{
"step": 4,
"thought": "与其他角色的关系网络",
"analysis": "需要设计导师、盟友、对手等多层关系"
}
]
}
输出(Output)
{
"character_profile": {
"basic_info": {
"name": "许七安",
"age": "21岁",
"identity": "打更人"
},
"personality": {
"core_traits": ["理性", "谨慎", "幽默"],
"motivation": "生存并找到回家的路",
"fears": "失去自我,被世界规则吞噬"
},
"abilities": {
"current": "基础武力,现代知识",
"potential": "修炼天赋,推理能力"
},
"relationships": {
"mentor": "周老",
"allies": ["李玉春"],
"rivals": []
},
"arc": "从被动适应到主动改变世界"
}
}
3. 情节设计环节
输入(Input)
{
"task": "plot_design",
"context": {
"current_chapter": 4,
"previous_events": "前3章发生的事件摘要",
"character_states": "当前角色状态",
"world_state": "当前世界状态"
},
"plot_goals": {
"immediate": "本章要达成的目标",
"mid_term": "本卷要达成的目标",
"long_term": "全书要达成的目标"
}
}
思维链(Chain of Thought)
{
"reasoning_steps": [
{
"step": 1,
"thought": "确定本章的核心事件",
"analysis": "需要推进主线(修炼体系揭示)同时保持节奏(日常案件)"
},
{
"step": 2,
"thought": "设计冲突和转折",
"analysis": "通过税银案引出更大的阴谋,制造悬念"
},
{
"step": 3,
"thought": "角色成长的体现",
"analysis": "许七安运用现代知识破案,展示独特优势"
},
{
"step": 4,
"thought": "伏笔和铺垫",
"analysis": "埋下关于修炼体系的线索,为后续展开做准备"
},
{
"step": 5,
"thought": "情绪节奏控制",
"analysis": "紧张调查 → 推理高潮 → 轻松日常,张弛有度"
}
]
}
输出(Output)
{
"chapter_plot": {
"title": "章节标题",
"summary": "一句话概括",
"scenes": [
{
"scene_id": 1,
"location": "打更人衙门",
"characters": ["许七安", "李玉春"],
"event": "接到税银失窃案",
"purpose": "引入主线冲突",
"emotional_tone": "紧张"
}
],
"plot_points": {
"hook": "开篇钩子",
"development": "情节发展",
"climax": "高潮",
"resolution": "结局"
},
"foreshadowing": ["伏笔1", "伏笔2"]
}
}
4. 场景描写环节
输入(Input)
{
"task": "scene_writing",
"scene_context": {
"location": "打更人衙门大堂",
"time": "清晨",
"weather": "阴天",
"characters_present": ["许七安", "李玉春", "周老"]
},
"scene_purpose": "展示打更人的工作环境,推进案件调查",
"emotional_tone": "严肃、紧张",
"pov": "许七安第一人称"
}
思维链(Chain of Thought)
{
"reasoning_steps": [
{
"step": 1,
"thought": "选择描写重点",
"decision": "重点描写环境氛围和人物状态,而非细节堆砌"
},
{
"step": 2,
"thought": "感官细节的选择",
"decision": "视觉:昏暗的光线;听觉:急促的脚步声;嗅觉:潮湿的霉味"
},
{
"step": 3,
"thought": "如何体现POV角色的视角",
"decision": "通过许七安的现代思维对比古代环境,产生反差"
},
{
"step": 4,
"thought": "节奏控制",
"decision": "快速带过环境,重点放在对话和行动上"
}
]
}
输出(Output)
{
"scene_text": "完整的场景文本",
"writing_techniques": {
"sensory_details": ["视觉", "听觉", "嗅觉"],
"pov_consistency": "第一人称限制视角",
"pacing": "快节奏,对话为主",
"show_vs_tell": "70%展示,30%叙述"
}
}
5. 对话生成环节
输入(Input)
{
"task": "dialogue_generation",
"context": {
"characters": [
{
"name": "许七安",
"personality": "理性、谨慎、略带幽默",
"current_emotion": "警惕",
"goal": "获取案件信息"
},
{
"name": "李玉春",
"personality": "正直、严肃、经验丰富",
"current_emotion": "焦虑",
"goal": "尽快破案"
}
],
"situation": "讨论税银失窃案的调查方向",
"subtext": "李玉春对许七安的能力有所怀疑"
},
"dialogue_purpose": "推进情节,展示人物关系,揭示信息"
}
思维链(Chain of Thought)
{
"reasoning_steps": [
{
"step": 1,
"thought": "每个角色的说话方式",
"analysis": "许七安:简洁、逻辑性强;李玉春:直接、带有命令口吻"
},
{
"step": 2,
"thought": "对话的潜台词",
"analysis": "表面讨论案件,实际是李玉春在试探许七安的能力"
},
{
"step": 3,
"thought": "信息揭示的节奏",
"analysis": "逐步透露案件细节,保持悬念"
},
{
"step": 4,
"thought": "对话的自然性",
"analysis": "加入停顿、重复、未完成的句子,避免过于书面化"
}
]
}
输出(Output)
{
"dialogue": [
{
"speaker": "李玉春",
"text": "许七安,这次的案子不简单。",
"action": "皱眉看着卷宗",
"subtext": "在观察许七安的反应"
},
{
"speaker": "许七安",
"text": "我看过卷宗了,确实疑点很多。",
"action": "平静地回应",
"subtext": "展示自己已经做了功课"
}
],
"dialogue_analysis": {
"character_voice_consistency": "符合人物性格",
"information_revealed": "案件的复杂性",
"relationship_development": "建立初步信任",
"subtext_effectiveness": "成功传达潜台词"
}
}
二、训练数据集构造策略
1. 数据分层
Level 1: 宏观层(故事构思、大纲设计)
Level 2: 中观层(章节规划、情节设计)
Level 3: 微观层(场景描写、对话生成)
2. 数据增强方法
同一输入的多样化输出
{
"input": "设计一个玄幻世界的修炼体系",
"outputs": [
{
"version": "A",
"style": "传统仙侠",
"system": "炼气→筑基→金丹→元婴"
},
{
"version": "B",
"style": "创新融合",
"system": "儒道佛武巫五大体系并存"
}
]
}
同一输出的多样化输入
{
"output": "许七安运用现代推理方法破案",
"inputs": [
{
"version": "A",
"context": "税银失窃案,需要找出内鬼"
},
{
"version": "B",
"context": "命案调查,需要还原作案过程"
}
]
}
3. 思维链标注策略
显式思维链
{
"type": "explicit_cot",
"format": "step_by_step",
"content": "第一步...第二步...第三步..."
}
隐式思维链
{
"type": "implicit_cot",
"format": "embedded_reasoning",
"content": "在描写中自然融入推理过程"
}
4. 质量控制维度
{
"quality_metrics": {
"consistency": "前后一致性检查",
"creativity": "创新性评分",
"readability": "可读性评分",
"plot_logic": "情节逻辑性",
"character_depth": "人物深度",
"pacing": "节奏控制"
}
}
三、具体训练任务设计
Task 1: 情节续写
{
"task_type": "plot_continuation",
"input": {
"previous_chapters": "前N章内容",
"current_state": "当前状态",
"constraints": "必须包含的元素"
},
"cot_required": true,
"output_format": "下一章的详细情节"
}
Task 2: 角色对话生成
{
"task_type": "dialogue_generation",
"input": {
"characters": "角色信息",
"situation": "场景设定",
"goal": "对话目标"
},
"cot_required": true,
"output_format": "完整对话及动作描写"
}
Task 3: 世界观扩展
{
"task_type": "worldbuilding_expansion",
"input": {
"existing_lore": "已有设定",
"expansion_direction": "扩展方向"
},
"cot_required": true,
"output_format": "新的世界观元素及其与现有设定的关联"
}
Task 4: 情节修复
{
"task_type": "plot_hole_fixing",
"input": {
"plot_summary": "情节摘要",
"identified_issues": "发现的问题"
},
"cot_required": true,
"output_format": "修复方案及修改后的情节"
}
Task 5: 风格迁移
{
"task_type": "style_transfer",
"input": {
"source_text": "原始文本",
"target_style": "目标风格"
},
"cot_required": true,
"output_format": "改写后的文本"
}
四、数据标注示例
完整示例:第4章情节设计
Input
{
"task": "chapter_plot_design",
"chapter_number": 4,
"previous_summary": "许七安穿越到大奉王朝,成为打更人,初步了解世界观",
"character_state": {
"protagonist": "刚适应新身份,渴望了解修炼体系",
"supporting": "李玉春开始认可许七安的能力"
},
"plot_requirements": {
"main_quest": "揭示修炼体系的基本框架",
"sub_quest": "通过案件展示主角能力",
"character_development": "许七安获得初步认可"
}
}
Chain of Thought
{
"reasoning_process": [
{
"step": 1,
"thought": "如何自然地引入修炼体系?",
"options": [
"直接讲解(生硬)",
"通过战斗展示(太早)",
"通过案件调查接触(自然)"
],
"decision": "选择通过案件让主角接触到修炼者,产生好奇",
"reason": "符合主角性格,推进自然"
},
{
"step": 2,
"thought": "案件应该是什么类型?",
"options": [
"简单盗窃(缺乏张力)",
"命案(过于沉重)",
"涉及官员的税银案(合适)"
],
"decision": "税银失窃案,涉及官场,有悬念",
"reason": "既能展示推理,又能引出更大的世界"
},
{
"step": 3,
"thought": "如何设计情节节奏?",
"structure": "接案(快)→ 调查(中)→ 推理(慢)→ 揭示(快)",
"reason": "张弛有度,保持读者兴趣"
},
{
"step": 4,
"thought": "本章需要埋下什么伏笔?",
"foreshadowing": [
"修炼体系的复杂性",
"官场的黑暗",
"主角的特殊能力"
],
"reason": "为后续章节做铺垫"
}
]
}
Output
{
"chapter_4_plot": {
"title": "税银案",
"structure": {
"opening": "接到税银失窃的紧急案件",
"development": "调查过程中发现疑点重重",
"climax": "运用现代推理方法锁定嫌疑人",
"resolution": "案件告破,但引出更大的谜团"
},
"scenes": [
{
"scene_1": "衙门接案,了解案情",
"scene_2": "现场勘查,收集证据",
"scene_3": "审讯嫌疑人,推理分析",
"scene_4": "真相揭晓,埋下伏笔"
}
],
"character_arcs": {
"许七安": "从新人到展示能力,获得认可",
"李玉春": "从怀疑到信任"
},
"worldbuilding_reveals": [
"打更人的职责范围",
"修炼者在社会中的地位",
"官场的复杂性"
]
}
}
五、训练数据的组织结构
目录结构建议
story_training_data/
├── macro_level/ # 宏观层数据
│ ├── story_conception/
│ ├── outline_design/
│ └── worldbuilding/
├── meso_level/ # 中观层数据
│ ├── chapter_planning/
│ ├── plot_design/
│ └── character_arcs/
├── micro_level/ # 微观层数据
│ ├── scene_writing/
│ ├── dialogue_generation/
│ └── description/
└── cross_level/ # 跨层级任务
├── consistency_check/
├── plot_hole_fixing/
└── style_refinement/
单条数据格式
{
"id": "unique_identifier",
"task_type": "plot_design",
"level": "meso",
"input": {},
"chain_of_thought": {},
"output": {},
"metadata": {
"source": "大奉打更人_第4章",
"annotator": "human/ai",
"quality_score": 0.95,
"tags": ["推理", "世界观", "人物成长"]
}
}
六、评估指标
自动评估
- 一致性检查: 前后设定是否矛盾
- 完整性检查: 是否包含所有必需元素
- 逻辑性检查: 因果关系是否合理
人工评估
- 创意性: 1-5分
- 可读性: 1-5分
- 情感共鸣: 1-5分
- 情节吸引力: 1-5分
混合评估
- A/B测试: 读者偏好
- 续读率: 读者是否继续阅读
- 互动数据: 评论、点赞等