batch_3forms.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. """
  2. 三形式 query 批量搜索 + 多模态评估
  3. 针对 high_priority_queries.json 的前 N 条高优 query,每条 query 用三种形式搜索 + 评估:
  4. 形式 A(原词组合):直接用 item["q"],如 "反推 提示词 教程"
  5. 形式 B(句子填充):gemini flash 把词组改写成自然搜索短句,**禁止注入具体工具/品牌/示例**
  6. 形式 C(同义替换):按 synonym_pools 对 动作/类型/知识词 各取同义词重组
  7. 输出(按 query 分文件夹):
  8. output_dir/
  9. q00/ form_A.json form_B.json form_C.json
  10. q01/ ...
  11. ...
  12. forms_preview.json # 三形式 query 预览
  13. summary.json # 三形式对比汇总
  14. 每个 form_X.json = {query 词} ↔ {帖子源信息 + 评估结果}(一对多)。
  15. 搜索 / 评估 / 多模态图片逻辑复用 script/search_and_evaluate.py。
  16. 用法:
  17. python batch_3forms.py --count 10 --platforms xhs,gzh,zhihu --max-count 10 \
  18. --output-dir runs/3forms_001
  19. """
  20. import argparse
  21. import asyncio
  22. import json
  23. import random
  24. import sys
  25. from pathlib import Path
  26. from typing import Any, Callable, Dict, List, Optional, Tuple
  27. _PROJECT_ROOT = Path(__file__).resolve().parents[4]
  28. if str(_PROJECT_ROOT) not in sys.path:
  29. sys.path.insert(0, str(_PROJECT_ROOT))
  30. from examples.process_pipeline.script.llm_helper import call_llm_with_retry
  31. from examples.process_pipeline.script.search_and_evaluate import (
  32. search_all, evaluate_posts, transcribe_video_posts, build_query_overrides,
  33. )
  34. from examples.process_pipeline.script.llm_evaluate_sources import build_eval_llm_call
  35. _EVAL_DIR = _PROJECT_ROOT / "examples" / "process_pipeline" / "test_script" / "evaluation"
  36. _HIGH_PRIORITY = Path(__file__).resolve().parent / "high_priority_queries.json"
  37. _SYNONYM_POOLS = _EVAL_DIR / "synonym_pools.json"
  38. # ── 形式 A:原词组合 ─────────────────────────────────────────────────────────────
  39. def form_a(items: List[Dict[str, Any]]) -> List[str]:
  40. return [it["q"] for it in items]
  41. # ── 形式 B:gemini 句子化(禁止注入示例)─────────────────────────────────────────
  42. def _validate_sentences(data: Dict[str, Any], n: int) -> Optional[str]:
  43. qs = data.get("sentences")
  44. if not isinstance(qs, list):
  45. return "sentences 必须是数组"
  46. if len(qs) != n:
  47. return f"sentences 长度应为 {n},得到 {len(qs)}"
  48. if not all(isinstance(x, str) and x.strip() for x in qs):
  49. return "sentences 每项必须是非空字符串"
  50. return None
  51. async def form_b(items: List[Dict[str, Any]], llm_call: Callable, model: str) -> Tuple[List[str], float]:
  52. """把每条词组改写成自然搜索短句(一次批量调用,按序对齐)。"""
  53. words = [it["q"] for it in items]
  54. system = (
  55. "你是中文搜索词改写器。把每个『关键词组』改写成一句自然、口语、适合在内容平台"
  56. "搜索框输入的短句。严格要求:只表达词组本身的意图,"
  57. "**绝不添加任何具体工具名 / 品牌 / 产品 / 模型名 / 风格名 / 数字示例**"
  58. "(如 Midjourney、赛博朋克、SD 等都禁止出现)。只输出 JSON。"
  59. )
  60. user = (
  61. "把下面每个词组改写成一句自然搜索短句,顺序一一对应,输出:\n"
  62. '{"sentences": ["短句1", "短句2", ...]}\n\n'
  63. f"词组列表(共 {len(words)} 个):\n{json.dumps(words, ensure_ascii=False, indent=2)}"
  64. )
  65. data, cost = await call_llm_with_retry(
  66. llm_call=llm_call, messages=[{"role": "system", "content": system},
  67. {"role": "user", "content": user}],
  68. model=model, temperature=0.4, max_tokens=2000,
  69. validate_fn=lambda d: _validate_sentences(d, len(words)), task_name="FormB",
  70. )
  71. if not data:
  72. print(" ⚠️ form B 生成失败,回退用原词组")
  73. return list(words), cost
  74. return [s.strip() for s in data["sentences"]], cost
  75. # ── 形式 C:同义替换重组 ─────────────────────────────────────────────────────────
  76. class SynonymComposer:
  77. def __init__(self, pools: Dict[str, Any], rng: random.Random):
  78. self.action = pools.get("action_leaves", {})
  79. self.types = pools.get("types", {})
  80. self.knowledge = pools.get("knowledge", {})
  81. self.tool_type = pools.get("tool_type", {})
  82. self.rng = rng
  83. def _pick(self, pool: Any, fallback: str) -> str:
  84. pool = [x for x in pool if isinstance(x, str)] if isinstance(pool, list) else []
  85. return self.rng.choice(pool) if pool else fallback
  86. def compose(self, item: Dict[str, Any]) -> str:
  87. """按 synonym_pools._usage:[模态/工具前缀] 动作 类型 知识词。"""
  88. parts: List[str] = []
  89. c = item.get("constraint")
  90. if isinstance(c, dict):
  91. if c.get("kind") == "模态" and c.get("value"):
  92. parts.append(str(c["value"]))
  93. elif c.get("kind") == "工具类型":
  94. parts.append(self._pick(self.tool_type.get(c.get("value")), str(c.get("限定词") or "")))
  95. parts.append(self._pick(self.action.get(item.get("action", "")), item.get("action", "")))
  96. parts.append(self._pick(self.types.get(item.get("type", "")), item.get("type", "")))
  97. gx = self.knowledge.get("工序", {})
  98. parts.append(self._pick(gx.get("单步") if isinstance(gx, dict) else None, "教程"))
  99. return " ".join(p for p in parts if p)
  100. def form_c(items: List[Dict[str, Any]], seed: int) -> List[str]:
  101. pools = json.loads(_SYNONYM_POOLS.read_text(encoding="utf-8"))
  102. composer = SynonymComposer(pools, random.Random(seed))
  103. return [composer.compose(it) for it in items]
  104. # ── 单个 (query, form) 的搜索 + 评估 + 落盘 ──────────────────────────────────────
  105. async def run_one(
  106. qtext: str, form_key: str, original_q: str,
  107. args, eval_llm, eval_model_id, out_file: Path,
  108. query_overrides=None,
  109. ) -> Dict[str, Any]:
  110. platforms = [p.strip() for p in args.platforms.split(",") if p.strip()]
  111. sources = await search_all(platforms, [qtext], args.max_count, args.max_concurrent,
  112. query_overrides=query_overrides)
  113. try:
  114. from examples.process_pipeline.script.extract_sources import _convert_timestamps
  115. _convert_timestamps(sources)
  116. except Exception:
  117. pass
  118. # 视频帖转写:把字幕并入正文再评估(默认开)
  119. if not args.no_transcribe and sources:
  120. n = await transcribe_video_posts(sources, concurrency=args.max_concurrent)
  121. if n:
  122. print(f" 🎙️ 视频转写 {n} 条")
  123. cost = 0.0
  124. if not args.no_eval and sources:
  125. # 评估只看 query 词 + 帖子:把该形式的搜索词 qtext 作为检索锚点
  126. sources, cost = await evaluate_posts(
  127. sources, "", eval_llm, eval_model_id, args.max_concurrent,
  128. include_images=not args.no_images, max_images=args.max_images,
  129. image_mode=args.image_mode, query=qtext,
  130. )
  131. for s in sources:
  132. imgs = s.pop("_image_data_urls", None)
  133. if imgs is not None:
  134. s["images_sent"] = len(imgs)
  135. rep = sum(1 for s in sources
  136. if ((s.get("llm_evaluation") or {}).get("制作相关性") or {}).get("得分") in (2, 3, 2.0, 3.0, "2", "3"))
  137. dis = sum(1 for s in sources
  138. if ((s.get("llm_evaluation") or {}).get("制作相关性") or {}).get("得分") in (1, 1.0, "1"))
  139. failed = sum(1 for s in sources if (s.get("llm_evaluation") or {}).get("_error"))
  140. out_file.parent.mkdir(parents=True, exist_ok=True)
  141. out_file.write_text(json.dumps({
  142. "form": form_key,
  143. "query": qtext, # 该形式实际搜索用的词(也是评估的检索锚点)
  144. "original_q": original_q, # 原词组(形式 A 的基准)
  145. "platforms": platforms,
  146. "total": len(sources), "report": rep, "discard": dis, "failed": failed,
  147. "results": sources, # 帖子源信息 + llm_evaluation,一对多
  148. }, ensure_ascii=False, indent=2), encoding="utf-8")
  149. print(f" [{form_key}] {qtext!r} → total={len(sources)} report={rep} discard={dis} "
  150. f"failed={failed} cost=${cost:.4f}")
  151. return {"form": form_key, "total": len(sources), "report": rep,
  152. "discard": dis, "failed": failed, "cost": round(cost, 4)}
  153. async def reeval_existing(args, eval_llm, eval_model_id) -> None:
  154. """只重跑评估、覆盖旧评估,不重新搜索。
  155. 读 output_dir 下已有的 q*/form_*.json,对里面已抓到的 post 重新评估(评估锚点 = 文件里
  156. 记录的该形式 query 词),原地覆盖 llm_evaluation 后写回。适合改了评估 prompt / 模型后复评。
  157. 用 --start / --count 在 q 编号层(自然数序)切片限制范围,与主流程同语义;每个 q 文件夹下
  158. 的所有 form_A/B/C.json 一起复评(三种形式可比性)。
  159. """
  160. import re
  161. output_dir = Path(args.output_dir)
  162. # 按 q 编号自然数排序:避免 "q10" < "q2" 这种字符串误排(与 server.py _qnum 同口径)
  163. def _qnum(p):
  164. m = re.search(r"\d+", p.name)
  165. return (int(m.group()) if m else 0, p.name)
  166. q_dirs = sorted([d for d in output_dir.glob("q*") if d.is_dir()], key=_qnum)
  167. if not q_dirs:
  168. print(f"❌ {output_dir} 下没有 q*/ 子目录,无可复评内容"); return
  169. # --reeval-q 优先于 --start/--count:直接按 q 名过滤(接 "q01" 或 "q01,q05,q12" 多选)
  170. reeval_q = getattr(args, 'reeval_q', None)
  171. if reeval_q:
  172. wanted = {x.strip() for x in reeval_q.split(',') if x.strip()}
  173. sliced = [d for d in q_dirs if d.name in wanted]
  174. if not sliced:
  175. print(f"[X] 指定 q ({reeval_q}) 在 {output_dir} 下不存在"); return
  176. range_label = f"q={','.join(d.name for d in sliced)}"
  177. else:
  178. sliced = q_dirs[args.start : args.start + args.count]
  179. range_label = f"q[{args.start}:{args.start + args.count}]"
  180. files = [f for qd in sliced for f in sorted(qd.glob("form_*.json"))]
  181. if not files:
  182. print(f"❌ {output_dir} 切片 {range_label} 下没有 form_*.json"); return
  183. print(f"♻️ 复评模式:{range_label} → {len(sliced)} 个 query / "
  184. f"{len(files)} 个文件,模型 {eval_model_id}(不重新搜索)")
  185. for f in files:
  186. d = json.loads(f.read_text(encoding="utf-8"))
  187. results = d.get("results", [])
  188. if not results:
  189. print(f" - {f.relative_to(output_dir)}: 空,跳过"); continue
  190. # 清掉旧评估痕迹,重新评
  191. for s in results:
  192. s.pop("llm_evaluation", None)
  193. s.pop("images_sent", None)
  194. s.pop("_image_data_urls", None)
  195. qtext = d.get("query", "") # 该形式实际搜索词 = 评估检索锚点
  196. if not args.no_transcribe and results:
  197. await transcribe_video_posts(results, concurrency=args.max_concurrent)
  198. results, cost = await evaluate_posts(
  199. results, "", eval_llm, eval_model_id, args.max_concurrent,
  200. include_images=not args.no_images, max_images=args.max_images,
  201. image_mode=args.image_mode, query=qtext,
  202. )
  203. for s in results:
  204. imgs = s.pop("_image_data_urls", None)
  205. if imgs is not None:
  206. s["images_sent"] = len(imgs)
  207. rep = sum(1 for s in results
  208. if ((s.get("llm_evaluation") or {}).get("制作相关性") or {}).get("得分") in (2, 3, 2.0, 3.0, "2", "3"))
  209. dis = sum(1 for s in results
  210. if ((s.get("llm_evaluation") or {}).get("制作相关性") or {}).get("得分") in (1, 1.0, "1"))
  211. failed = sum(1 for s in results if (s.get("llm_evaluation") or {}).get("_error"))
  212. d.update({"results": results,
  213. "total": len(results), "report": rep, "discard": dis, "failed": failed})
  214. d.pop("requirement", None) # 不再用 requirement
  215. f.write_text(json.dumps(d, ensure_ascii=False, indent=2), encoding="utf-8")
  216. print(f" ✓ {f.relative_to(output_dir)}: total={len(results)} report={rep} "
  217. f"discard={dis} failed={failed} cost=${cost:.4f}")
  218. print("♻️ 复评完成(已覆盖原文件)")
  219. async def append_existing(args, eval_llm, eval_model_id, gen_llm, gen_model_id) -> None:
  220. """往已有 q*/form_*.json 追加新渠道结果,不重搜旧渠道。
  221. 用文件里存的 query 词、只搜 --platforms 指定的新渠道,评估后按 (平台, id) 去重合并进
  222. 原 results,旧渠道结果原样保留。适合先跑了中文渠道、再补 youtube/x 等。
  223. """
  224. from examples.process_pipeline.script.extract_sources import _convert_timestamps
  225. output_dir = Path(args.output_dir)
  226. files = sorted(output_dir.glob("q*/form_*.json"))
  227. if not files:
  228. print(f"❌ {output_dir} 下没有 q*/form_*.json,无可追加目标"); return
  229. new_plats = [p.strip() for p in args.platforms.split(",") if p.strip()]
  230. print(f"➕ 追加模式:{len(files)} 个文件追加渠道 {new_plats}(不重搜旧渠道)")
  231. # 英文平台一次性翻译所有 query
  232. queries = list(dict.fromkeys(json.loads(f.read_text(encoding="utf-8")).get("query", "") for f in files))
  233. overrides = await build_query_overrides(new_plats, queries, gen_llm, gen_model_id)
  234. for f in files:
  235. d = json.loads(f.read_text(encoding="utf-8"))
  236. qtext = d.get("query", "")
  237. existing = d.get("results", [])
  238. existing_keys = {(r.get("platform"), r.get("channel_content_id")) for r in existing}
  239. new_sources = await search_all(new_plats, [qtext], args.max_count, args.max_concurrent,
  240. query_overrides=overrides)
  241. new_sources = [s for s in new_sources
  242. if (s.get("platform"), s.get("channel_content_id")) not in existing_keys]
  243. try:
  244. _convert_timestamps(new_sources)
  245. except Exception:
  246. pass
  247. if not args.no_transcribe and new_sources:
  248. await transcribe_video_posts(new_sources, concurrency=args.max_concurrent)
  249. cost = 0.0
  250. if not args.no_eval and new_sources:
  251. new_sources, cost = await evaluate_posts(
  252. new_sources, "", eval_llm, eval_model_id, args.max_concurrent,
  253. include_images=not args.no_images, max_images=args.max_images,
  254. image_mode=args.image_mode, query=qtext,
  255. )
  256. for s in new_sources:
  257. imgs = s.pop("_image_data_urls", None)
  258. if imgs is not None:
  259. s["images_sent"] = len(imgs)
  260. merged = existing + new_sources
  261. plats_union = list(dict.fromkeys((d.get("platforms") or []) + new_plats))
  262. rep = sum(1 for s in merged
  263. if ((s.get("llm_evaluation") or {}).get("制作相关性") or {}).get("得分") in (2, 3, 2.0, 3.0, "2", "3"))
  264. dis = sum(1 for s in merged
  265. if ((s.get("llm_evaluation") or {}).get("制作相关性") or {}).get("得分") in (1, 1.0, "1"))
  266. failed = sum(1 for s in merged if (s.get("llm_evaluation") or {}).get("_error"))
  267. d.update({"platforms": plats_union, "results": merged,
  268. "total": len(merged), "report": rep, "discard": dis, "failed": failed})
  269. f.write_text(json.dumps(d, ensure_ascii=False, indent=2), encoding="utf-8")
  270. print(f" ✓ {f.relative_to(output_dir)}: +{len(new_sources)} 新帖 → total={len(merged)} "
  271. f"report={rep} discard={dis} failed={failed} cost=${cost:.4f}")
  272. print("➕ 追加完成(已并入原文件)")
  273. async def run(args) -> None:
  274. eval_llm0, eval_model0 = build_eval_llm_call(args.eval_model)
  275. if args.reeval:
  276. await reeval_existing(args, eval_llm0, eval_model0)
  277. return
  278. if args.append:
  279. gen_llm0, gen_model0 = build_eval_llm_call(args.gen_model)
  280. await append_existing(args, eval_llm0, eval_model0, gen_llm0, gen_model0)
  281. return
  282. queries_file = Path(args.queries_file) if getattr(args, "queries_file", None) else _HIGH_PRIORITY
  283. all_items = json.loads(queries_file.read_text(encoding="utf-8"))["queries"]
  284. print(f"📂 query 源: {queries_file.name} ({len(all_items)} 条)")
  285. only_q = getattr(args, "only_q", None)
  286. if only_q:
  287. # 支持 "1,5,51" 或 "q01,q05,q51";优先级高于 --start/--count
  288. import re as _re
  289. raw = [t.strip() for t in only_q.split(",") if t.strip()]
  290. idxs = []
  291. for t in raw:
  292. m = _re.match(r"q?(\d+)$", t)
  293. if not m:
  294. print(f"⚠️ 忽略无法解析的 q: {t!r}"); continue
  295. i = int(m.group(1))
  296. if 0 <= i < len(all_items):
  297. idxs.append(i)
  298. else:
  299. print(f"⚠️ idx {i} 超出范围 [0,{len(all_items)}),忽略")
  300. idxs = sorted(dict.fromkeys(idxs)) # 去重保序
  301. if not idxs:
  302. print("❌ --only-q 没有合法索引可用"); return
  303. items = [all_items[i] for i in idxs]
  304. print(f"📋 取 high_priority 指定 {len(idxs)} 条 query (idx={','.join(map(str, idxs))})"
  305. f" | 渠道 {args.platforms} | 每渠道≤{args.max_count} 帖")
  306. else:
  307. start = args.start
  308. items = all_items[start:start + args.count]
  309. idxs = list(range(start, start + len(items))) # 绝对下标,用于文件夹命名
  310. print(f"📋 取 high_priority 第 {start}~{start+len(items)-1} 条 query(共 {len(items)} 条)"
  311. f" | 渠道 {args.platforms} | 每渠道≤{args.max_count} 帖")
  312. eval_llm, eval_model_id = build_eval_llm_call(args.eval_model)
  313. gen_llm, gen_model_id = build_eval_llm_call(args.gen_model)
  314. print(f"🧠 评估模型 {args.eval_model}->{eval_model_id} | form B 生成 {args.gen_model}->{gen_model_id}")
  315. output_dir = Path(args.output_dir)
  316. output_dir.mkdir(parents=True, exist_ok=True)
  317. qa = form_a(items)
  318. qb, b_cost = await form_b(items, gen_llm, gen_model_id)
  319. qc = form_c(items, args.seed)
  320. # forms_preview 用绝对下标做 key,多次区间跑不会互相覆盖
  321. preview_path = output_dir / "forms_preview.json"
  322. preview = {}
  323. if preview_path.exists():
  324. try:
  325. loaded = json.loads(preview_path.read_text(encoding="utf-8"))
  326. if isinstance(loaded, dict):
  327. preview = loaded # 旧版本写成 list,非 dict 一律重置
  328. except Exception:
  329. preview = {}
  330. for j, absi in enumerate(idxs):
  331. preview[str(absi)] = {"idx": absi, "A": qa[j], "B": qb[j], "C": qc[j]}
  332. preview_path.write_text(json.dumps(preview, ensure_ascii=False, indent=2), encoding="utf-8")
  333. print("📝 三形式预览 → forms_preview.json")
  334. for j, absi in enumerate(idxs):
  335. print(f" [{absi}] A={qa[j]!r} B={qb[j]!r} C={qc[j]!r}")
  336. # 英文平台(youtube/x):对全部形式的 query 一次性翻成英文
  337. platforms = [p.strip() for p in args.platforms.split(",") if p.strip()]
  338. all_q = list(dict.fromkeys(qa + qb + qc))
  339. overrides = await build_query_overrides(platforms, all_q, gen_llm, gen_model_id)
  340. summary = []
  341. for j, absi in enumerate(idxs):
  342. qdir = output_dir / f"q{absi:02d}"
  343. print(f"\n▶ q{absi:02d} 原词={qa[j]!r}")
  344. per_form = {}
  345. for fk, qtext in (("A", qa[j]), ("B", qb[j]), ("C", qc[j])):
  346. stat = await run_one(qtext, fk, qa[j], args, eval_llm, eval_model_id,
  347. qdir / f"form_{fk}.json", query_overrides=overrides)
  348. per_form[fk] = stat
  349. summary.append({"idx": absi, "q": qa[j], "forms": per_form})
  350. (output_dir / "summary.json").write_text(json.dumps({
  351. "count": len(items), "platforms": args.platforms, "eval_model": eval_model_id,
  352. "form_b_gen_cost": round(b_cost, 4), "per_query": summary,
  353. }, ensure_ascii=False, indent=2), encoding="utf-8")
  354. # 形式聚合对比
  355. print(f"\n{'='*60}\n📊 三形式对比 (各形式 report/total 合计)")
  356. for fk in ("A", "B", "C"):
  357. tot = sum(s["forms"][fk]["total"] for s in summary)
  358. rep = sum(s["forms"][fk]["report"] for s in summary)
  359. dis = sum(s["forms"][fk]["discard"] for s in summary)
  360. print(f" 形式 {fk}: report={rep}/{tot} discard={dis}")
  361. print(f"→ {output_dir/'summary.json'}")
  362. def main() -> None:
  363. from dotenv import load_dotenv
  364. load_dotenv()
  365. from examples.process_pipeline.script.llm_evaluate_sources import EVAL_MODELS
  366. p = argparse.ArgumentParser(description="三形式 query 批量搜索 + 多模态评估")
  367. p.add_argument("--start", type=int, default=0, help="起始 query 下标(0-based,默认 0)")
  368. p.add_argument("--count", type=int, default=10, help="从 --start 起取几条 query(默认 10)")
  369. p.add_argument("--only-q", default=None,
  370. help="离散指定要跑的 q(如 '51,55,331' 或 'q51,q55,q331'),优先于 --start/--count")
  371. p.add_argument("--queries-file", default=None,
  372. help="自定义 query 源 JSON 路径(结构需含 queries[...]),默认读 high_priority_queries.json")
  373. p.add_argument("--platforms", default="xhs,gzh,zhihu", help="逗号分隔渠道(默认 xhs,gzh,zhihu)")
  374. p.add_argument("--max-count", type=int, default=10, help="每个 (渠道,query) 取几条帖子(默认 10)")
  375. p.add_argument("--output-dir", required=True, help="输出目录")
  376. p.add_argument("--eval-model", default="gemini-flash-lite", choices=list(EVAL_MODELS),
  377. help="评估模型(默认 gemini-flash-lite,多模态)")
  378. p.add_argument("--gen-model", default="gemini-flash-lite", choices=list(EVAL_MODELS),
  379. help="form B 句子生成模型(默认 gemini-flash-lite)")
  380. p.add_argument("--max-concurrent", type=int, default=3, help="搜索 / 评估并发上限")
  381. p.add_argument("--max-images", type=int, default=4, help="每帖最多发给模型几张配图")
  382. p.add_argument("--image-mode", choices=["url", "base64"], default="url",
  383. help="图片传输:url 直传(快,默认) / base64 下载内嵌(稳)")
  384. p.add_argument("--no-images", action="store_true", help="不发图(纯文本评估)")
  385. p.add_argument("--no-transcribe", action="store_true",
  386. help="不对视频帖转写(默认会转写并把字幕并入正文再评估)")
  387. p.add_argument("--no-eval", action="store_true", help="只搜不评估")
  388. p.add_argument("--reeval", action="store_true",
  389. help="只重跑评估、覆盖 output-dir 下已有 q*/form_*.json(不重新搜索);"
  390. "用 --start / --count 在 q 编号层限范围,或 --reeval-q 直接指定")
  391. p.add_argument("--reeval-q", default=None,
  392. help="仅复评指定的 q(如 'q01' 或 'q01,q05,q12'),优先于 --start/--count")
  393. p.add_argument("--append", action="store_true",
  394. help="往已有 q*/form_*.json 追加 --platforms 指定的新渠道结果(不重搜旧渠道)")
  395. p.add_argument("--seed", type=int, default=42, help="form C 同义替换随机种子")
  396. args = p.parse_args()
  397. asyncio.run(run(args))
  398. if __name__ == "__main__":
  399. main()