|
|
@@ -43,10 +43,12 @@ import asyncio
|
|
|
import argparse
|
|
|
from copy import deepcopy
|
|
|
from pathlib import Path
|
|
|
-from openai import AsyncOpenAI
|
|
|
+from openai import AsyncOpenAI, BadRequestError, RateLimitError, APIError
|
|
|
+from typing import Optional, List, Set
|
|
|
from dotenv import load_dotenv
|
|
|
|
|
|
load_dotenv()
|
|
|
+load_dotenv(Path(__file__).parent.parent / ".env") # 项目根目录 .env
|
|
|
|
|
|
client = AsyncOpenAI(
|
|
|
api_key=os.getenv("ALI_API_KEY"),
|
|
|
@@ -60,6 +62,10 @@ client = AsyncOpenAI(
|
|
|
# ──────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
+class ContentFilterError(Exception):
|
|
|
+ """内容审查不通过,跳过该条样本,不重试"""
|
|
|
+
|
|
|
+
|
|
|
def load_text(path: str) -> str:
|
|
|
for enc in ["utf-8", "gbk", "gb2312", "gb18030"]:
|
|
|
try:
|
|
|
@@ -74,14 +80,30 @@ async def llm_call(
|
|
|
model: str,
|
|
|
temperature: float = 0.6,
|
|
|
max_tokens: int = 4096,
|
|
|
+ max_retries: int = 3,
|
|
|
) -> str:
|
|
|
- resp = await client.chat.completions.create(
|
|
|
- model=model,
|
|
|
- messages=messages,
|
|
|
- temperature=temperature,
|
|
|
- max_tokens=max_tokens,
|
|
|
- )
|
|
|
- return resp.choices[0].message.content
|
|
|
+ delay = 5.0
|
|
|
+ for attempt in range(1, max_retries + 2): # +1 for the final attempt
|
|
|
+ try:
|
|
|
+ resp = await client.chat.completions.create(
|
|
|
+ model=model,
|
|
|
+ messages=messages,
|
|
|
+ temperature=temperature,
|
|
|
+ max_tokens=max_tokens,
|
|
|
+ )
|
|
|
+ return resp.choices[0].message.content
|
|
|
+ except BadRequestError as e:
|
|
|
+ err_code = getattr(e, "code", "") or ""
|
|
|
+ # 阿里云内容审查:data_inspection_failed / content_filter 等
|
|
|
+ if "data_inspection_failed" in str(e) or "content_filter" in err_code:
|
|
|
+ raise ContentFilterError(f"内容审查不通过: {e}") from e
|
|
|
+ raise # 其他 400 错误直接抛出
|
|
|
+ except (RateLimitError, APIError) as e:
|
|
|
+ if attempt > max_retries:
|
|
|
+ raise
|
|
|
+ print(f" [重试 {attempt}/{max_retries}] {type(e).__name__}: {e},{delay:.0f}s 后重试...")
|
|
|
+ await asyncio.sleep(delay)
|
|
|
+ delay = min(delay * 2, 60)
|
|
|
|
|
|
|
|
|
def extract_json_block(text: str) -> dict:
|
|
|
@@ -94,7 +116,7 @@ def extract_json_block(text: str) -> dict:
|
|
|
return json.loads(json_str)
|
|
|
|
|
|
|
|
|
-def write_jsonl(samples: list[dict], path: Path) -> None:
|
|
|
+def write_jsonl(samples: List[dict], path: Path) -> None:
|
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
with open(path, "w", encoding="utf-8") as f:
|
|
|
for s in samples:
|
|
|
@@ -109,41 +131,59 @@ def write_jsonl(samples: list[dict], path: Path) -> None:
|
|
|
# ──────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
-def apply_state_changes(state: dict, changes: dict) -> dict:
|
|
|
- """将一个 beat 的 state_changes 应用到状态快照,返回新快照"""
|
|
|
- state = deepcopy(state)
|
|
|
- for pl in changes.get("plot_lines", []):
|
|
|
- for line in state["plot_lines"]:
|
|
|
- if line["name"] == pl["name"]:
|
|
|
- line["status"] = pl["new_state"]
|
|
|
- break
|
|
|
- else:
|
|
|
- state["plot_lines"].append(
|
|
|
- {"name": pl["name"], "status": pl["new_state"],
|
|
|
- "mice_type": "?", "description": pl.get("new_state", "")}
|
|
|
- )
|
|
|
- for ch in changes.get("characters", []):
|
|
|
- for char in state["characters"]:
|
|
|
- if char["name"] == ch["name"]:
|
|
|
- char.setdefault("recent_changes", []).append(ch["change"])
|
|
|
- # 只保留最近 3 条变化
|
|
|
- char["recent_changes"] = char["recent_changes"][-3:]
|
|
|
- break
|
|
|
- return state
|
|
|
-
|
|
|
-
|
|
|
def build_state_snapshot(analysis: dict, beat_index: int) -> dict:
|
|
|
- """返回 beat_index 之前的故事状态快照"""
|
|
|
+ """
|
|
|
+ 返回 beat_index 之前的故事状态快照。
|
|
|
+
|
|
|
+ 额外字段(比单纯状态更丰富):
|
|
|
+ - plot_line_events: {线索名 -> [事件描述列表]}
|
|
|
+ - recent_beats: 最近 5 个 beat 的简要记录
|
|
|
+ """
|
|
|
state = {
|
|
|
"plot_lines": deepcopy(analysis.get("outline", {}).get("plot_lines", [])),
|
|
|
"characters": deepcopy(analysis.get("characters", [])),
|
|
|
+ "plot_line_events": {}, # name -> [str]
|
|
|
+ "recent_beats": [],
|
|
|
}
|
|
|
for b in analysis.get("beats", [])[:beat_index]:
|
|
|
- state = apply_state_changes(state, b.get("state_changes", {}))
|
|
|
+ changes = b.get("state_changes", {})
|
|
|
+
|
|
|
+ # 更新线索状态 + 记录事件历史
|
|
|
+ for pl in changes.get("plot_lines", []):
|
|
|
+ matched = False
|
|
|
+ for line in state["plot_lines"]:
|
|
|
+ if line["name"] == pl["name"]:
|
|
|
+ line["status"] = pl["new_state"]
|
|
|
+ matched = True
|
|
|
+ break
|
|
|
+ if not matched:
|
|
|
+ state["plot_lines"].append(
|
|
|
+ {"name": pl["name"], "status": pl["new_state"],
|
|
|
+ "mice_type": "?", "description": pl.get("new_state", "")}
|
|
|
+ )
|
|
|
+ event = f"{pl.get('old_state', '?')} → {pl['new_state']}"
|
|
|
+ state["plot_line_events"].setdefault(pl["name"], []).append(event)
|
|
|
+
|
|
|
+ # 更新人物近期变化
|
|
|
+ for ch in changes.get("characters", []):
|
|
|
+ for char in state["characters"]:
|
|
|
+ if char["name"] == ch["name"]:
|
|
|
+ char.setdefault("recent_changes", []).append(ch["change"])
|
|
|
+ char["recent_changes"] = char["recent_changes"][-3:]
|
|
|
+ break
|
|
|
+
|
|
|
+ # 记录近期节拍(保留最近 5 个)
|
|
|
+ state["recent_beats"].append({
|
|
|
+ "id": b.get("id", ""),
|
|
|
+ "type": b["type"],
|
|
|
+ "summary": b.get("summary", ""),
|
|
|
+ "outcome": b.get("disaster", "") if b["type"] == "scene" else b.get("decision", ""),
|
|
|
+ })
|
|
|
+ state["recent_beats"] = state["recent_beats"][-5:]
|
|
|
return state
|
|
|
|
|
|
|
|
|
-def get_last_disaster_decision(beats: list[dict], before_index: int) -> tuple[str, str]:
|
|
|
+def get_last_disaster_decision(beats: List[dict], before_index: int) -> tuple:
|
|
|
"""返回 beat_index 之前最后一个 scene 的 disaster 和 最后一个 sequel 的 decision"""
|
|
|
last_disaster = "无(故事开局)"
|
|
|
last_decision = "无(故事开局)"
|
|
|
@@ -155,24 +195,104 @@ def get_last_disaster_decision(beats: list[dict], before_index: int) -> tuple[st
|
|
|
return last_disaster, last_decision
|
|
|
|
|
|
|
|
|
-def format_mice_threads(plot_lines: list[dict]) -> str:
|
|
|
- active = [pl for pl in plot_lines if pl.get("status") not in ["已解决", "已关闭"]]
|
|
|
- if not active:
|
|
|
- return "(无活跃线程)"
|
|
|
- lines = []
|
|
|
- for pl in active:
|
|
|
- mice = pl.get("mice_type", "?")
|
|
|
- lines.append(f" [{mice}] {pl['name']}({pl['status']}):{pl.get('description', '')}")
|
|
|
- return "\n".join(lines)
|
|
|
+def format_story_notes(
|
|
|
+ analysis: dict,
|
|
|
+ state: dict,
|
|
|
+ last_disaster: str,
|
|
|
+ last_decision: str,
|
|
|
+) -> str:
|
|
|
+ """
|
|
|
+ 生成故事笔记(约 2000-4000 字符)。
|
|
|
+ 包含 core_question/next_steps(线索)、speaking_style/current_state(人物)、writing_insights(窗口级)。
|
|
|
+ """
|
|
|
+ parts = []
|
|
|
|
|
|
+ # 1. 主线摘要
|
|
|
+ main_plot = analysis.get("outline", {}).get("main_plot", "")
|
|
|
+ if main_plot:
|
|
|
+ parts.append(f"**主线**:{main_plot}")
|
|
|
+
|
|
|
+ # 2. 活跃剧情线索(含 core_question, next_steps, 历史事件)
|
|
|
+ active = [pl for pl in state["plot_lines"]
|
|
|
+ if pl.get("status") not in ["已解决", "已关闭"]]
|
|
|
+ resolved = [pl for pl in state["plot_lines"]
|
|
|
+ if pl.get("status") in ["已解决", "已关闭"]]
|
|
|
+ if active:
|
|
|
+ lines = ["**活跃线索**:"]
|
|
|
+ for pl in active:
|
|
|
+ mice = pl.get("mice_type", "?")
|
|
|
+ events = state.get("plot_line_events", {}).get(pl["name"], [])
|
|
|
+ ev_str = f"(进展:{';'.join(events[-3:])})" if events else ""
|
|
|
+ cq = pl.get("core_question", "")
|
|
|
+ ns = pl.get("next_steps", "")
|
|
|
+ extra = ""
|
|
|
+ if cq:
|
|
|
+ extra += f" 核心问:{cq}"
|
|
|
+ if ns:
|
|
|
+ extra += f" 待推进:{ns}"
|
|
|
+ lines.append(
|
|
|
+ f"- [{mice}] {pl['name']}({pl['status']}):"
|
|
|
+ f"{pl.get('description', '')}{ev_str}{extra}"
|
|
|
+ )
|
|
|
+ if resolved:
|
|
|
+ lines.append(f"- 已结:{'、'.join(p['name'] for p in resolved)}")
|
|
|
+ parts.append("\n".join(lines))
|
|
|
+
|
|
|
+ # 3. 人物状态(含 speaking_style, current_state, 性格, 关系, 近期变化)
|
|
|
+ if state["characters"]:
|
|
|
+ lines = ["**人物**:"]
|
|
|
+ for c in state["characters"]:
|
|
|
+ segs = [f"{c['name']}({c.get('role', '?')})目标:{c.get('goal', '')}"]
|
|
|
+ traits = c.get("traits", [])
|
|
|
+ if traits:
|
|
|
+ segs.append(f"性格:{'、'.join(traits)}")
|
|
|
+ style = c.get("speaking_style", [])
|
|
|
+ if style:
|
|
|
+ style_str = ",".join(style) if isinstance(style, list) else str(style)
|
|
|
+ segs.append(f"说话风格:{style_str}")
|
|
|
+ cur_state = c.get("current_state", "")
|
|
|
+ if cur_state:
|
|
|
+ segs.append(f"当前处境:{cur_state}")
|
|
|
+ rels = c.get("relationships", {})
|
|
|
+ if rels:
|
|
|
+ rel_items = [f"{k}→{v}" for k, v in list(rels.items())[:4]]
|
|
|
+ segs.append(f"关系:{';'.join(rel_items)}")
|
|
|
+ recent = c.get("recent_changes", [])
|
|
|
+ if recent:
|
|
|
+ segs.append(f"近期:{';'.join(recent)}")
|
|
|
+ lines.append("- " + "。".join(segs))
|
|
|
+ parts.append("\n".join(lines))
|
|
|
+
|
|
|
+ # 4. 近期节拍
|
|
|
+ recent_beats = state.get("recent_beats", [])
|
|
|
+ if recent_beats:
|
|
|
+ lines = ["**近期节拍**:"]
|
|
|
+ for b in recent_beats:
|
|
|
+ tag = "场景" if b["type"] == "scene" else "后续"
|
|
|
+ outcome_label = "结局" if b["type"] == "scene" else "决定"
|
|
|
+ outcome = f" → {outcome_label}:{b['outcome']}" if b.get("outcome") else ""
|
|
|
+ lines.append(f"- [{b['id']}·{tag}] {b['summary']}{outcome}")
|
|
|
+ parts.append("\n".join(lines))
|
|
|
+
|
|
|
+ # 5. 写作亮点(窗口级,来自 step1 提取的 writing_insights)
|
|
|
+ wi = analysis.get("writing_insights", {})
|
|
|
+ if wi:
|
|
|
+ wi_lines = []
|
|
|
+ for item in wi.get("techniques", []):
|
|
|
+ wi_lines.append(f"- 技巧:{item}")
|
|
|
+ for item in wi.get("shuang_designs", []):
|
|
|
+ wi_lines.append(f"- 爽点设计:{item}")
|
|
|
+ for item in wi.get("pacing", []):
|
|
|
+ wi_lines.append(f"- 节奏:{item}")
|
|
|
+ if wi_lines:
|
|
|
+ parts.append("**写作亮点**:\n" + "\n".join(wi_lines))
|
|
|
+
|
|
|
+ # 6. 悬而未决
|
|
|
+ parts.append(
|
|
|
+ f"**待解决**:上一场景结局:{last_disaster};上一个决定:{last_decision}"
|
|
|
+ )
|
|
|
|
|
|
-def format_characters(characters: list[dict]) -> str:
|
|
|
- parts = []
|
|
|
- for c in characters:
|
|
|
- recent = "、".join(c.get("recent_changes", []))
|
|
|
- recent_str = f"近期:{recent}" if recent else ""
|
|
|
- parts.append(f" {c['name']}({c.get('role', '?')})目标:{c.get('goal', '')} {recent_str}")
|
|
|
- return "\n".join(parts)
|
|
|
+ return "\n\n".join(parts)
|
|
|
|
|
|
|
|
|
def calc_position_percent(beat: dict, total_chars: int) -> float:
|
|
|
@@ -183,24 +303,99 @@ def calc_position_percent(beat: dict, total_chars: int) -> float:
|
|
|
# Task 1:结构规划(Structure Planning)
|
|
|
# ──────────────────────────────────────────────────────────────
|
|
|
|
|
|
-TASK1_SYSTEM = (
|
|
|
- "你是一位专业的长篇小说结构规划师,精通 Scene-Sequel 结构、MICE 线程理论、"
|
|
|
- "以及中国网文爽点与钩子设计。请严格按指定格式输出。"
|
|
|
-)
|
|
|
+TASK1_SYSTEM = """\
|
|
|
+你是资深网文作者,擅长基于故事笔记规划场景。
|
|
|
+
|
|
|
+## 核心能力
|
|
|
+1. **分析笔记**:理解当前故事状态、活跃线索、人物动态
|
|
|
+2. **规划场景**:基于笔记设计下一个场景的结构
|
|
|
+3. **更新笔记**:记录场景对故事状态的改变
|
|
|
+
|
|
|
+## 工作流程
|
|
|
+1. 仔细阅读故事笔记(当前状态、活跃线索、待办事项)
|
|
|
+2. 在 `<think>` 中展示你的思考过程(800-1500字)
|
|
|
+3. 输出场景规划(JSON 格式)
|
|
|
+4. 输出笔记更新(Markdown 格式)
|
|
|
+
|
|
|
+---
|
|
|
+
|
|
|
+## Think 要求
|
|
|
+
|
|
|
+在 `<think>` 标签中,展示你真实的创作思维过程。**不要求固定格式**,但需要包含以下核心要素:
|
|
|
+
|
|
|
+必须包含的要素:
|
|
|
+1. **笔记分析**:当前故事进行到哪里?哪些线索在推进?主要角色的目标、冲突、关系状态;笔记中标记的待推进事项和风险点
|
|
|
+2. **方案推演**:至少考虑 2-3 种不同的场景设计方案;对比各方案的优缺点;说明为什么选择某个方案
|
|
|
+3. **笔记更新计划**:这个场景会推进哪些线索?哪些人物状态会变化?需要新增或完成哪些待推进事项?
|
|
|
+
|
|
|
+鼓励的思维方式:
|
|
|
+- **跳跃联想**:从笔记的某个细节突然想到类似案例
|
|
|
+- **自我质疑**:推翻之前的想法,重新思考
|
|
|
+- **细节推敲**:对某个对话、动作、道具的反复打磨
|
|
|
+- **灵感闪现**:突然意识到某个巧妙的设计
|
|
|
+- **风险预警**:发现可能的逻辑漏洞或人设崩塌
|
|
|
+
|
|
|
+不要求固定章节标题(如【笔记分析】【方案推演】),不需要按固定顺序展开,可以有口语化、跳跃、修正。
|
|
|
+
|
|
|
+---
|
|
|
+
|
|
|
+## 输出格式
|
|
|
+
|
|
|
+### 1. 场景规划(JSON)
|
|
|
+```json
|
|
|
+{
|
|
|
+ "scene_type": "scene | sequel",
|
|
|
+ "goal": "角色目标",
|
|
|
+ "conflict_type": "冲突类型",
|
|
|
+ "conflict_description": "...",
|
|
|
+ "disaster": "场景结尾的灾难/转折(scene 类型必填)",
|
|
|
+ "sequel": {"reaction": "...", "dilemma": "...", "decision": "..."},
|
|
|
+ "pacing": "fast|medium|slow",
|
|
|
+ "dialogue_ratio": 0.4,
|
|
|
+ "shuang_point": {
|
|
|
+ "has_shuang": true,
|
|
|
+ "type": "打脸|升级|装逼|获得|碾压",
|
|
|
+ "mechanism": "实现机制"
|
|
|
+ },
|
|
|
+ "hooks": ["悬念1", "悬念2"],
|
|
|
+ "mice_threads": {
|
|
|
+ "推进": ["线索名"],
|
|
|
+ "开启": ["新线索名"],
|
|
|
+ "解决": ["已完成线索名"]
|
|
|
+ },
|
|
|
+ "estimated_words": 2000
|
|
|
+}
|
|
|
+```
|
|
|
+
|
|
|
+### 2. 笔记更新(Markdown)
|
|
|
+```markdown
|
|
|
+## 笔记更新
|
|
|
+
|
|
|
+### 剧情线索变化
|
|
|
+- [线索名]:[旧状态] → [新状态]
|
|
|
+- [新线索]:开启([简短描述])
|
|
|
+
|
|
|
+### 人物状态变化
|
|
|
+- [角色名]:[变化描述]
|
|
|
+
|
|
|
+### 待推进更新
|
|
|
+- [✓] [已完成事项]
|
|
|
+- [ ] [新增事项](紧急/重要)
|
|
|
+
|
|
|
+### 新增写作亮点(可选)
|
|
|
+- [技巧/桥段]:[描述]
|
|
|
+```
|
|
|
+"""
|
|
|
|
|
|
TASK1_USER_TMPL = """\
|
|
|
-## 故事状态
|
|
|
+## 故事笔记
|
|
|
|
|
|
- 书名:{title}
|
|
|
- 当前位置:第 {chapter} 章,约 {position_pct}% 处
|
|
|
-- 已激活的 MICE 线程:
|
|
|
-{mice_threads}
|
|
|
-- 上一个 Scene 的 Disaster:{last_disaster}
|
|
|
-- 上一个 Sequel 的 Decision:{last_decision}
|
|
|
|
|
|
-## 当前人物状态
|
|
|
+{story_notes}
|
|
|
|
|
|
-{characters}
|
|
|
+---
|
|
|
|
|
|
## 上文(最近 {context_chars} 字)
|
|
|
|
|
|
@@ -208,21 +403,21 @@ TASK1_USER_TMPL = """\
|
|
|
|
|
|
## 任务
|
|
|
|
|
|
-请规划下一个 Scene-Sequel 单元的结构。"""
|
|
|
+请基于故事笔记和上文,完成以下任务:
|
|
|
+
|
|
|
+1. 分析当前故事状态(在 `<think>` 中展示你的思考过程)
|
|
|
+2. 规划下一个场景的结构(JSON 格式)
|
|
|
+3. 输出笔记更新(Markdown 格式)"""
|
|
|
|
|
|
TASK1_COT_GEN_TMPL = """\
|
|
|
-## 故事状态
|
|
|
+## 故事笔记
|
|
|
|
|
|
- 书名:{title}
|
|
|
- 当前位置:第 {chapter} 章,约 {position_pct}% 处
|
|
|
-- 已激活的 MICE 线程:
|
|
|
-{mice_threads}
|
|
|
-- 上一个 Scene 的 Disaster:{last_disaster}
|
|
|
-- 上一个 Sequel 的 Decision:{last_decision}
|
|
|
|
|
|
-## 当前人物状态
|
|
|
+{story_notes}
|
|
|
|
|
|
-{characters}
|
|
|
+---
|
|
|
|
|
|
## 上文(最近 {context_chars} 字)
|
|
|
|
|
|
@@ -237,47 +432,54 @@ TASK1_COT_GEN_TMPL = """\
|
|
|
|
|
|
---
|
|
|
|
|
|
-请以"事前规划"的视角写出你的思考过程和最终规划。
|
|
|
-
|
|
|
-**输出格式**:
|
|
|
+请以"事前规划"的视角展示你真实的创作思维过程(分析笔记状态、推演至少 2-3 个方案并对比优缺点、规划笔记更新),然后输出规划 JSON 和笔记更新。
|
|
|
|
|
|
<think>
|
|
|
-## 叙事状态分析
|
|
|
-[分析当前处于哪个 MICE 线程、节拍、读者情绪积累]
|
|
|
-[分析上一个 Disaster/Decision 对下一步的约束]
|
|
|
-
|
|
|
-## 续写决策
|
|
|
-[决定下一个 Scene 的 Goal、Conflict 类型、Disaster 方向]
|
|
|
-[决定是否需要爽点/钩子,类型和强度]
|
|
|
-[决定节奏:快/慢,对话比例]
|
|
|
+[自由思考过程]
|
|
|
</think>
|
|
|
|
|
|
```json
|
|
|
{{
|
|
|
- "scene": {{
|
|
|
- "goal": "...",
|
|
|
- "conflict_type": "人物冲突|环境冲突|内心冲突|信息冲突",
|
|
|
- "conflict_description": "...",
|
|
|
- "disaster": "...",
|
|
|
- "pacing": "fast|medium|slow",
|
|
|
- "dialogue_ratio": 0.4
|
|
|
- }},
|
|
|
- "sequel": {{
|
|
|
- "reaction": "...",
|
|
|
- "dilemma": "...",
|
|
|
- "decision": "..."
|
|
|
+ "scene_type": "scene | sequel",
|
|
|
+ "goal": "...",
|
|
|
+ "conflict_type": "人物冲突|环境冲突|内心冲突|信息冲突",
|
|
|
+ "conflict_description": "...",
|
|
|
+ "disaster": "...",
|
|
|
+ "sequel": {{"reaction": "...", "dilemma": "...", "decision": "..."}},
|
|
|
+ "pacing": "fast|medium|slow",
|
|
|
+ "dialogue_ratio": 0.4,
|
|
|
+ "shuang_point": {{
|
|
|
+ "has_shuang": true,
|
|
|
+ "type": "打脸|升级|装逼|获得|碾压",
|
|
|
+ "mechanism": "..."
|
|
|
}},
|
|
|
"hooks": [
|
|
|
{{"type": "chapter_end|mid_chapter", "content": "..."}}
|
|
|
],
|
|
|
- "shuang_point": {{
|
|
|
- "has_shuang": true,
|
|
|
- "type": "打脸|升级|装逼|获得|碾压",
|
|
|
- "position": "scene_start|scene_mid|scene_end"
|
|
|
+ "mice_threads": {{
|
|
|
+ "推进": ["线索名"],
|
|
|
+ "开启": ["新线索名"],
|
|
|
+ "解决": ["已完成线索名"]
|
|
|
}},
|
|
|
- "mice_advancement": "M|I|C|E",
|
|
|
"estimated_words": 2000
|
|
|
}}
|
|
|
+```
|
|
|
+
|
|
|
+```markdown
|
|
|
+## 笔记更新
|
|
|
+
|
|
|
+### 剧情线索变化
|
|
|
+- [线索名]:[旧状态] → [新状态]
|
|
|
+
|
|
|
+### 人物状态变化
|
|
|
+- [角色名]:[变化描述]
|
|
|
+
|
|
|
+### 待推进更新
|
|
|
+- [✓] [已完成]
|
|
|
+- [ ] [新增](紧急/重要)
|
|
|
+
|
|
|
+### 新增写作亮点(可选)
|
|
|
+- [技巧]:[描述]
|
|
|
```"""
|
|
|
|
|
|
|
|
|
@@ -310,7 +512,7 @@ async def gen_task1_sample(
|
|
|
context_chars: int,
|
|
|
model: str,
|
|
|
sem: asyncio.Semaphore,
|
|
|
-) -> dict | None:
|
|
|
+) -> Optional[dict]:
|
|
|
async with sem:
|
|
|
meta = analysis.get("_meta", {})
|
|
|
title = meta.get("novel_title", "未知")
|
|
|
@@ -319,8 +521,6 @@ async def gen_task1_sample(
|
|
|
|
|
|
state = build_state_snapshot(analysis, i)
|
|
|
last_disaster, last_decision = get_last_disaster_decision(beats, i)
|
|
|
- mice_threads = format_mice_threads(state["plot_lines"])
|
|
|
- characters = format_characters(state["characters"])
|
|
|
|
|
|
chapter = beat.get("chapter_start", "?")
|
|
|
position_pct = calc_position_percent(beat, total_chars)
|
|
|
@@ -328,14 +528,13 @@ async def gen_task1_sample(
|
|
|
ctx_start = max(0, beat["position_start"] - context_chars)
|
|
|
context_text = novel_text[ctx_start: beat["position_start"]].strip()
|
|
|
|
|
|
+ story_notes = format_story_notes(analysis, state, last_disaster, last_decision)
|
|
|
+
|
|
|
shared_kwargs = dict(
|
|
|
title=title,
|
|
|
chapter=chapter,
|
|
|
position_pct=position_pct,
|
|
|
- mice_threads=mice_threads,
|
|
|
- last_disaster=last_disaster,
|
|
|
- last_decision=last_decision,
|
|
|
- characters=characters,
|
|
|
+ story_notes=story_notes,
|
|
|
context_chars=context_chars,
|
|
|
context_text=context_text,
|
|
|
)
|
|
|
@@ -354,6 +553,9 @@ async def gen_task1_sample(
|
|
|
]
|
|
|
try:
|
|
|
assistant_content = await llm_call(messages, model=model)
|
|
|
+ except ContentFilterError as e:
|
|
|
+ print(f" [Task1] beat {i+1} 内容审查拦截,跳过:{e}")
|
|
|
+ return None
|
|
|
except Exception as e:
|
|
|
print(f" [Task1] beat {i+1} LLM 调用失败:{e}")
|
|
|
return None
|
|
|
@@ -390,6 +592,14 @@ TASK2_SYSTEM = (
|
|
|
)
|
|
|
|
|
|
TASK2_USER_TMPL = """\
|
|
|
+## 故事笔记(概要)
|
|
|
+
|
|
|
+- 书名:{title},当前位置约 {position_pct}% 处
|
|
|
+
|
|
|
+{story_notes_brief}
|
|
|
+
|
|
|
+---
|
|
|
+
|
|
|
## 上文
|
|
|
|
|
|
{context_text}
|
|
|
@@ -403,6 +613,14 @@ TASK2_USER_TMPL = """\
|
|
|
请续写下一段(约 {target_words} 字),风格与上文保持一致。"""
|
|
|
|
|
|
TASK2_COT_GEN_TMPL = """\
|
|
|
+## 故事笔记(概要)
|
|
|
+
|
|
|
+- 书名:{title},当前位置约 {position_pct}% 处
|
|
|
+
|
|
|
+{story_notes_brief}
|
|
|
+
|
|
|
+---
|
|
|
+
|
|
|
## 上文
|
|
|
|
|
|
{context_text}
|
|
|
@@ -417,20 +635,10 @@ TASK2_COT_GEN_TMPL = """\
|
|
|
|
|
|
---
|
|
|
|
|
|
-请以"事前决策"的视角写出写作思考过程,然后直接输出实际续写内容。
|
|
|
-
|
|
|
-**输出格式**:
|
|
|
+请以"事前决策"的视角自由写出写作思考过程(上文衔接方式、爽点植入、人物动机、对话设计等,无需固定段落),然后直接输出实际续写内容。
|
|
|
|
|
|
<think>
|
|
|
-## 上文理解
|
|
|
-[识别上文的叙事状态:最后一个 Scene/Sequel 的位置,主角的情绪状态]
|
|
|
-[识别关键信息:哪些细节需要在续写中呼应]
|
|
|
-
|
|
|
-## 写法决策
|
|
|
-[开头如何衔接:直接延续/场景切换/时间跳跃]
|
|
|
-[爽点如何植入:在哪个位置,用什么方式]
|
|
|
-[钩子如何设置:章末悬念的具体内容]
|
|
|
-[对话设计:谁说什么,潜台词是什么]
|
|
|
+[自由思考过程]
|
|
|
</think>
|
|
|
|
|
|
{actual_text}"""
|
|
|
@@ -445,10 +653,16 @@ async def gen_task2_sample(
|
|
|
context_chars: int,
|
|
|
model: str,
|
|
|
sem: asyncio.Semaphore,
|
|
|
-) -> dict | None:
|
|
|
+) -> Optional[dict]:
|
|
|
async with sem:
|
|
|
meta = analysis.get("_meta", {})
|
|
|
+ title = meta.get("novel_title", "未知")
|
|
|
total_chars = meta.get("total_chars", len(novel_text))
|
|
|
+ beats = analysis.get("beats", [])
|
|
|
+
|
|
|
+ state = build_state_snapshot(analysis, i)
|
|
|
+ last_disaster, last_decision = get_last_disaster_decision(beats, i)
|
|
|
+ position_pct = calc_position_percent(beat, total_chars)
|
|
|
|
|
|
ctx_start = max(0, beat["position_start"] - context_chars)
|
|
|
context_text = novel_text[ctx_start: beat["position_start"]].strip()
|
|
|
@@ -457,6 +671,9 @@ async def gen_task2_sample(
|
|
|
if not beat_text:
|
|
|
return None
|
|
|
|
|
|
+ # Task2 使用精简版笔记:只含活跃线索和人物,不含近期节拍(上文已涵盖)
|
|
|
+ story_notes_brief = format_story_notes(analysis, state, last_disaster, last_decision)
|
|
|
+
|
|
|
# 从 Task1 样本中提取结构规划(assistant 输出部分)
|
|
|
structure_plan = ""
|
|
|
if i < len(task1_samples) and task1_samples[i]:
|
|
|
@@ -473,6 +690,9 @@ async def gen_task2_sample(
|
|
|
beat_hint = beat_text[:300] + "..." if len(beat_text) > 300 else beat_text
|
|
|
|
|
|
cot_prompt = TASK2_COT_GEN_TMPL.format(
|
|
|
+ title=title,
|
|
|
+ position_pct=position_pct,
|
|
|
+ story_notes_brief=story_notes_brief,
|
|
|
context_text=context_text,
|
|
|
structure_plan=structure_plan,
|
|
|
beat_text_hint=beat_hint,
|
|
|
@@ -484,6 +704,9 @@ async def gen_task2_sample(
|
|
|
]
|
|
|
try:
|
|
|
cot_part = await llm_call(messages, model=model)
|
|
|
+ except ContentFilterError as e:
|
|
|
+ print(f" [Task2] beat {i+1} 内容审查拦截,跳过:{e}")
|
|
|
+ return None
|
|
|
except Exception as e:
|
|
|
print(f" [Task2] beat {i+1} LLM 调用失败:{e}")
|
|
|
return None
|
|
|
@@ -501,6 +724,9 @@ async def gen_task2_sample(
|
|
|
assistant_content = cot_part
|
|
|
|
|
|
user_content = TASK2_USER_TMPL.format(
|
|
|
+ title=title,
|
|
|
+ position_pct=position_pct,
|
|
|
+ story_notes_brief=story_notes_brief,
|
|
|
context_text=context_text,
|
|
|
structure_plan=structure_plan,
|
|
|
target_words=target_words,
|
|
|
@@ -535,6 +761,12 @@ TASK3_SYSTEM = (
|
|
|
)
|
|
|
|
|
|
TASK3_GEN_TMPL = """\
|
|
|
+## 故事背景(用于理解爽点来源)
|
|
|
+
|
|
|
+{story_notes_brief}
|
|
|
+
|
|
|
+---
|
|
|
+
|
|
|
## 原文(包含爽点的增强版)
|
|
|
|
|
|
{beat_text}
|
|
|
@@ -546,6 +778,7 @@ TASK3_GEN_TMPL = """\
|
|
|
1. 判断这段文字是否包含明显爽点(打脸/升级/装逼/获得/碾压)
|
|
|
2. 如果有,生成去掉爽点后的"平淡草稿"(保留核心情节事件,但去掉爽感设计)
|
|
|
3. 以编辑视角,写出重新注入爽点的完整思考过程(CoT)和修改说明
|
|
|
+ 注意:CoT 应分析人物性格/关系如何使这个爽点成立,以及与当前剧情线索的联动
|
|
|
|
|
|
**输出格式(严格 JSON)**:
|
|
|
|
|
|
@@ -555,7 +788,7 @@ TASK3_GEN_TMPL = """\
|
|
|
"shuang_type": "打脸|升级|装逼|获得|碾压",
|
|
|
"intensity": "low|medium|high",
|
|
|
"flat_draft": "去掉爽点后的平淡版本(完整文字)",
|
|
|
- "cot": "<think>\\n## 草稿分析\\n[识别草稿问题]\\n\\n## 爽点设计\\n[注入方案]\\n</think>",
|
|
|
+ "cot": "<think>\\n[自由分析草稿问题和注入方案,结合人物特质和线索背景]\\n</think>",
|
|
|
"modification_notes": "注入位置:...\\n爽点类型:...\\n关键改动:..."
|
|
|
}}
|
|
|
```
|
|
|
@@ -563,6 +796,12 @@ TASK3_GEN_TMPL = """\
|
|
|
如果不包含明显爽点,输出:`{{"has_shuang": false}}`"""
|
|
|
|
|
|
TASK3_USER_TMPL = """\
|
|
|
+## 故事背景
|
|
|
+
|
|
|
+{story_notes_brief}
|
|
|
+
|
|
|
+---
|
|
|
+
|
|
|
## 平淡草稿
|
|
|
|
|
|
{flat_draft}
|
|
|
@@ -572,6 +811,7 @@ TASK3_USER_TMPL = """\
|
|
|
- 爽点类型:{shuang_type}
|
|
|
- 强度:{intensity}(low=轻微强化 | medium=明显提升 | high=大幅改写)
|
|
|
- 不改变核心情节,只增强情感冲击力
|
|
|
+- 结合人物性格特质和当前剧情线索设计爽感
|
|
|
|
|
|
## 任务
|
|
|
|
|
|
@@ -585,7 +825,7 @@ async def gen_task3_sample(
|
|
|
novel_text: str,
|
|
|
model: str,
|
|
|
sem: asyncio.Semaphore,
|
|
|
-) -> dict | None:
|
|
|
+) -> Optional[dict]:
|
|
|
# 只处理有爽点的 beat
|
|
|
sp = beat.get("shuang_point", {})
|
|
|
if not sp.get("has_shuang"):
|
|
|
@@ -594,19 +834,30 @@ async def gen_task3_sample(
|
|
|
async with sem:
|
|
|
meta = analysis.get("_meta", {})
|
|
|
total_chars = meta.get("total_chars", len(novel_text))
|
|
|
+ beats = analysis.get("beats", [])
|
|
|
+
|
|
|
+ state = build_state_snapshot(analysis, i)
|
|
|
+ last_disaster, last_decision = get_last_disaster_decision(beats, i)
|
|
|
+ story_notes_brief = format_story_notes(analysis, state, last_disaster, last_decision)
|
|
|
|
|
|
beat_text = novel_text[beat["position_start"]: beat["position_end"]].strip()
|
|
|
if len(beat_text) < 200:
|
|
|
return None
|
|
|
|
|
|
# 生成平淡草稿 + CoT
|
|
|
- gen_prompt = TASK3_GEN_TMPL.format(beat_text=beat_text)
|
|
|
+ gen_prompt = TASK3_GEN_TMPL.format(
|
|
|
+ story_notes_brief=story_notes_brief,
|
|
|
+ beat_text=beat_text,
|
|
|
+ )
|
|
|
messages = [
|
|
|
{"role": "system", "content": TASK3_SYSTEM},
|
|
|
{"role": "user", "content": gen_prompt},
|
|
|
]
|
|
|
try:
|
|
|
raw = await llm_call(messages, model=model)
|
|
|
+ except ContentFilterError as e:
|
|
|
+ print(f" [Task3] beat {i+1} 内容审查拦截,跳过:{e}")
|
|
|
+ return None
|
|
|
except Exception as e:
|
|
|
print(f" [Task3] beat {i+1} LLM 调用失败:{e}")
|
|
|
return None
|
|
|
@@ -614,7 +865,10 @@ async def gen_task3_sample(
|
|
|
try:
|
|
|
result = extract_json_block(raw)
|
|
|
except Exception:
|
|
|
- print(f" [Task3] beat {i+1} JSON 解析失败,跳过")
|
|
|
+ # 保存原始响应供排查
|
|
|
+ debug_path = Path(f"/tmp/task3_beat{i+1}_debug.txt")
|
|
|
+ debug_path.write_text(raw, encoding="utf-8")
|
|
|
+ print(f" [Task3] beat {i+1} JSON 解析失败,原始响应已保存至 {debug_path},跳过")
|
|
|
return None
|
|
|
|
|
|
if not result.get("has_shuang"):
|
|
|
@@ -631,6 +885,7 @@ async def gen_task3_sample(
|
|
|
|
|
|
# 训练样本
|
|
|
user_content = TASK3_USER_TMPL.format(
|
|
|
+ story_notes_brief=story_notes_brief,
|
|
|
flat_draft=flat_draft,
|
|
|
shuang_type=shuang_type,
|
|
|
intensity=intensity,
|
|
|
@@ -672,15 +927,19 @@ async def build_all(
|
|
|
novel_path: str,
|
|
|
output_dir: str,
|
|
|
context_chars: int,
|
|
|
- skip_tasks: set[int],
|
|
|
+ skip_tasks: Set[int],
|
|
|
model: str,
|
|
|
concurrency: int,
|
|
|
+ max_beats: Optional[int] = None,
|
|
|
):
|
|
|
with open(analysis_path, encoding="utf-8") as f:
|
|
|
analysis = json.load(f)
|
|
|
|
|
|
novel_text = load_text(novel_path)
|
|
|
beats = analysis.get("beats", [])
|
|
|
+ if max_beats is not None:
|
|
|
+ beats = beats[:max_beats]
|
|
|
+ analysis = dict(analysis, beats=beats) # 局部视图,不修改文件
|
|
|
out = Path(output_dir)
|
|
|
sem = asyncio.Semaphore(concurrency)
|
|
|
|
|
|
@@ -692,7 +951,7 @@ async def build_all(
|
|
|
stats = {}
|
|
|
|
|
|
# ── Task 1 ──────────────────────────────────
|
|
|
- task1_samples: list[dict | None] = [None] * len(beats)
|
|
|
+ task1_samples: List[Optional[dict]] = [None] * len(beats)
|
|
|
if 1 not in skip_tasks:
|
|
|
print("[Task 1] 结构规划(Structure Planning)...")
|
|
|
tasks = [
|
|
|
@@ -765,6 +1024,10 @@ def main():
|
|
|
help="并发 LLM 调用数(默认 5)",
|
|
|
)
|
|
|
parser.add_argument("--model", default="qwen-plus", help="使用的模型名称")
|
|
|
+ parser.add_argument(
|
|
|
+ "--max-beats", type=int, default=None,
|
|
|
+ help="只处理前 N 个 beat(用于试运行验证)",
|
|
|
+ )
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
asyncio.run(
|
|
|
@@ -776,6 +1039,7 @@ def main():
|
|
|
set(args.skip_task),
|
|
|
args.model,
|
|
|
args.concurrency,
|
|
|
+ args.max_beats,
|
|
|
)
|
|
|
)
|
|
|
|