| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169 |
- #!/usr/bin/env python3
- """
- 账号人设总结:
- 1. 从 input/{account_name}/tree 目录下读取人设树 JSON 文件并合并
- 2. 将合并后的 JSON 填充到 topic_summary_prompt.md 中的 {topic_point_tree}
- 3. 调用大模型生成账号人设总结,写入 input/{account_name}/persona_data/persona_summary.json
- """
- import argparse
- import asyncio
- import json
- import logging
- import sys
- from pathlib import Path
- from typing import Any, Dict
- logger = logging.getLogger(__name__)
- # 确保可以导入 agent 内的 LLM 调用封装
- _project_root = Path(__file__).resolve().parent.parent.parent
- if str(_project_root) not in sys.path:
- sys.path.insert(0, str(_project_root))
- try:
- from agent.llm.openrouter import openrouter_llm_call
- except ImportError: # pragma: no cover - 仅用于本地缺少依赖时的降级提示
- openrouter_llm_call = None # type: ignore[assignment]
- # 复用与 search_and_eval 相同的模型,保证行为一致
- EVAL_LLM_MODEL = "google/gemini-3.1-pro-preview"
- BASE_DIR = Path(__file__).resolve().parent
- INPUT_BASE = BASE_DIR / "input"
- def _extract_json_object(content: str) -> Dict[str, Any]:
- """
- 从 LLM 回复中解析第一个 JSON 对象(允许被 ```json ... ``` 包裹)。
- 逻辑参考 tools/search_and_eval.py 中的实现。
- """
- content = content.strip()
- # 处理 ```json ... ``` 包裹的情况
- import re
- m = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", content)
- if m:
- content = m.group(1).strip()
- # 截取最外层 { ... }
- start = content.find("{")
- end = content.rfind("}")
- if start != -1 and end != -1:
- content = content[start : end + 1]
- return json.loads(content)
- def _load_topic_point_tree(account_name: str) -> Dict[str, Any]:
- """
- 读取 input/{account_name}/tree 目录下的所有 JSON 文件,并合并成一个字典:
- {
- "<文件名去掉后缀>": <该文件对应的树 JSON>,
- ...
- }
- """
- tree_dir = INPUT_BASE / account_name / "tree"
- if not tree_dir.is_dir():
- raise FileNotFoundError(f"人设树目录不存在: {tree_dir}")
- merged: Dict[str, Any] = {}
- files = sorted(tree_dir.glob("*.json"))
- if not files:
- raise FileNotFoundError(f"人设树目录中未找到任何 JSON 文件: {tree_dir}")
- for path in files:
- with open(path, "r", encoding="utf-8") as f:
- try:
- data = json.load(f)
- except json.JSONDecodeError as e:
- raise ValueError(f"解析 JSON 文件失败: {path}") from e
- merged[path.stem] = data
- logger.info("已加载人设树文件: %s", path.name)
- return merged
- def _load_prompt_template() -> str:
- """读取 topic_summary_prompt.md 模板。"""
- prompt_path = BASE_DIR / "topic_summary_prompt.md"
- if not prompt_path.is_file():
- raise FileNotFoundError(f"找不到 prompt 模板文件: {prompt_path}")
- with open(prompt_path, "r", encoding="utf-8") as f:
- return f.read()
- async def generate_topic_summary(account_name: str) -> Dict[str, Any]:
- """
- 生成账号人设总结,并返回解析后的 JSON 结果。
- 同时将结果写入 persona_summary.json 文件。
- """
- if openrouter_llm_call is None:
- raise RuntimeError("未找到 openrouter_llm_call,请检查 agent.llm 依赖是否可用。")
- # 1. 加载并合并人设树
- topic_tree = _load_topic_point_tree(account_name)
- topic_tree_str = json.dumps(topic_tree, ensure_ascii=False, indent=2)
- logger.info("已合并人设树,共包含 %d 个子树", len(topic_tree))
- # 2. 读取并填充 prompt 模板
- prompt_template = _load_prompt_template()
- system_prompt = prompt_template.replace("{topic_point_tree}", topic_tree_str)
- # 3. 调用 LLM 生成总结
- messages = [
- {"role": "system", "content": system_prompt},
- {
- "role": "user",
- "content": "请根据以上说明,严格按照 JSON 模板输出账号人设总结,仅输出 JSON,不要包含其他解释性文字。",
- },
- ]
- logger.info("开始调用 LLM 生成账号人设总结,account_name=%s", account_name)
- llm_result = await openrouter_llm_call(messages, model=EVAL_LLM_MODEL)
- content = llm_result.get("content", "") if isinstance(llm_result, dict) else ""
- if not content:
- raise RuntimeError("LLM 未返回任何内容")
- try:
- summary_data = _extract_json_object(content)
- except Exception as e: # noqa: BLE001
- logger.exception("解析 LLM 返回的 JSON 失败")
- raise RuntimeError(f"解析 LLM 返回内容失败: {e}") from e
- # 4. 写入 persona_summary.json
- persona_dir = INPUT_BASE / account_name / "persona_data"
- persona_dir.mkdir(parents=True, exist_ok=True)
- persona_file = persona_dir / "persona_summary.json"
- with open(persona_file, "w", encoding="utf-8") as f:
- json.dump(summary_data, f, ensure_ascii=False, indent=2)
- logger.info("已写入账号人设总结到文件: %s", persona_file)
- return summary_data
- def main(account_name) -> None:
- # parser = argparse.ArgumentParser(description="根据人设树生成账号人设总结")
- # parser.add_argument("account_name", help="账号名称(对应 input/{account_name} 目录)")
- # args = parser.parse_args(argv)
- logging.basicConfig(
- level=logging.INFO,
- format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
- datefmt="%H:%M:%S",
- )
- logger.info("生成账号人设总结,account_name=%s", account_name)
- async def _run() -> None:
- summary = await generate_topic_summary(account_name)
- print(json.dumps(summary, ensure_ascii=False, indent=2))
- asyncio.run(_run())
- if __name__ == "__main__":
- main(account_name="家有大志")
|