hotspot_profile.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740
  1. """
  2. 热点宝画像数据工具(示例)
  3. 调用内部爬虫服务获取账号/内容的粉丝画像。
  4. """
  5. import json
  6. import logging
  7. import os
  8. import time
  9. from pathlib import Path
  10. from typing import Any, Dict, List, Optional, Tuple, TypedDict
  11. import requests
  12. from agent.tools import tool, ToolResult
  13. from utils.tool_logging import format_tool_result_for_log, log_tool_call
  14. logger = logging.getLogger(__name__)
  15. _LABEL_ACCOUNT = "工具调用:get_account_fans_portrait -> 抖音账号粉丝画像(热点宝)"
  16. _LABEL_CONTENT = "工具调用:get_content_fans_portrait -> 内容点赞用户画像(热点宝)"
  17. _LABEL_BATCH = "工具调用:batch_fetch_portraits -> 批量获取内容/账号画像(热点宝)"
  18. BATCH_MAX_ITEMS = 30
  19. _BATCH_SNAPSHOT_NAME = "batch_portraits.json"
  20. def _repo_root_from_this_file() -> Path:
  21. # examples/content_finder/tools/hotspot_profile.py -> Agent 仓库根
  22. return Path(__file__).resolve().parents[3]
  23. def _resolve_output_dir_path() -> Path:
  24. raw = (os.getenv("OUTPUT_DIR") or ".cache/output").strip()
  25. p = Path(raw).expanduser()
  26. return p.resolve() if p.is_absolute() else (_repo_root_from_this_file() / p).resolve()
  27. def _persist_batch_portraits_json(
  28. trace_id: Optional[str],
  29. results: List[Dict[str, Any]],
  30. count: int,
  31. ) -> Optional[str]:
  32. """将批量画像结果写入 OUTPUT_DIR/<trace_id>/batch_portraits.json,便于 read_file 与排障。"""
  33. if not trace_id:
  34. return None
  35. try:
  36. out_dir = _resolve_output_dir_path() / trace_id
  37. out_dir.mkdir(parents=True, exist_ok=True)
  38. path = out_dir / _BATCH_SNAPSHOT_NAME
  39. path.write_text(
  40. json.dumps(
  41. {"trace_id": trace_id, "count": count, "results": results},
  42. ensure_ascii=False,
  43. indent=2,
  44. ),
  45. encoding="utf-8",
  46. )
  47. return str(path)
  48. except OSError as e:
  49. logger.warning("batch portrait snapshot write failed: %s", e)
  50. return None
  51. ACCOUNT_FANS_PORTRAIT_API = "http://crawapi.piaoquantv.com/crawler/dou_yin/re_dian_bao/account_fans_portrait"
  52. CONTENT_FANS_PORTRAIT_API = "http://crawapi.piaoquantv.com/crawler/dou_yin/re_dian_bao/video_like_portrait"
  53. DEFAULT_TIMEOUT = 60.0
  54. class _PortraitOk(TypedDict):
  55. output: str
  56. has_portrait: bool
  57. portrait_data: Dict[str, Any]
  58. raw_data: Any
  59. def _log_return(label: str, params: Dict[str, Any], r: ToolResult) -> ToolResult:
  60. log_tool_call(label, params, format_tool_result_for_log(r))
  61. return r
  62. def _top_k(items: Dict[str, Any], k: int) -> List[Tuple[str, Any]]:
  63. def percent_value(entry: Tuple[str, Any]) -> float:
  64. metrics = entry[1] if isinstance(entry[1], dict) else {}
  65. return metrics.get("percentage")
  66. return sorted(items.items(), key=percent_value, reverse=True)[:k]
  67. def _format_portrait_summary(
  68. header_line: str,
  69. link_line: str,
  70. portrait: Dict[str, Any],
  71. ) -> str:
  72. summary_lines = [header_line, link_line, ""]
  73. for k, v in portrait.items():
  74. if not isinstance(v, dict):
  75. continue
  76. if k in ("省份", "城市"):
  77. summary_lines.append(f"【{k} TOP5】分布")
  78. items = _top_k(v, 5)
  79. else:
  80. summary_lines.append(f"【{k}】分布")
  81. items = v.items()
  82. for name, metrics in items:
  83. ratio = metrics.get("percentage")
  84. tgi = metrics.get("preference")
  85. summary_lines.append(f" {name}: {ratio} (偏好度: {tgi})")
  86. summary_lines.append("")
  87. return "\n".join(summary_lines)
  88. def _validate_account_id(account_id: str) -> Optional[str]:
  89. if not account_id or not isinstance(account_id, str):
  90. return "account_id 参数无效:必须是非空字符串"
  91. if not account_id.startswith("MS4wLjABAAAA"):
  92. return (
  93. f"account_id 格式错误:必须以 MS4wLjABAAAA 开头,"
  94. f"当前值: {account_id[:min(20, len(account_id))]}..."
  95. )
  96. return None
  97. def _validate_content_id(content_id: str) -> Optional[str]:
  98. if not content_id or not isinstance(content_id, str):
  99. return "content_id 参数无效:必须是非空字符串"
  100. if not content_id.isdigit():
  101. return f"content_id 格式错误:aweme_id 应该是纯数字,当前值: {content_id[:20]}..."
  102. if len(content_id) < 15 or len(content_id) > 25:
  103. return f"content_id 长度异常:期望 15-25 位数字,实际 {len(content_id)} 位"
  104. return None
  105. def _dimension_flags(
  106. need_province: bool,
  107. need_city: bool,
  108. need_city_level: bool,
  109. need_gender: bool,
  110. need_age: bool,
  111. need_phone_brand: bool,
  112. need_phone_price: bool,
  113. ) -> Dict[str, bool]:
  114. return {
  115. "need_province": need_province,
  116. "need_city": need_city,
  117. "need_city_level": need_city_level,
  118. "need_gender": need_gender,
  119. "need_age": need_age,
  120. "need_phone_brand": need_phone_brand,
  121. "need_phone_price": need_phone_price,
  122. }
  123. def _sync_fetch_account_portrait(
  124. account_id: str,
  125. flags: Dict[str, bool],
  126. request_timeout: float,
  127. ) -> Tuple[Optional[str], Optional[_PortraitOk]]:
  128. err = _validate_account_id(account_id)
  129. if err:
  130. return err, None
  131. payload = {"account_id": account_id, **flags}
  132. try:
  133. response = requests.post(
  134. ACCOUNT_FANS_PORTRAIT_API,
  135. json=payload,
  136. headers={"Content-Type": "application/json"},
  137. timeout=request_timeout,
  138. )
  139. response.raise_for_status()
  140. data = response.json()
  141. except requests.exceptions.HTTPError as e:
  142. return f"HTTP {e.response.status_code}: {e.response.text}", None
  143. except requests.exceptions.Timeout:
  144. return f"请求超时({request_timeout}秒)", None
  145. except requests.exceptions.RequestException as e:
  146. return f"网络错误: {str(e)}", None
  147. except Exception as e:
  148. logger.error(
  149. "account portrait request failed",
  150. extra={"account_id": account_id, "error": str(e)},
  151. exc_info=True,
  152. )
  153. return f"未知错误: {str(e)}", None
  154. data_block = data.get("data", {}) if isinstance(data.get("data"), dict) else {}
  155. portrait = data_block.get("data", {}) if isinstance(data_block.get("data"), dict) else {}
  156. header = f"账号 {account_id} 的粉丝画像"
  157. link = (
  158. f"画像链接:https://douhot.douyin.com/creator/detail?"
  159. f"active_tab=creator_fans_portrait&creator_id={account_id}"
  160. )
  161. output = _format_portrait_summary(header, link, portrait)
  162. has_valid = bool(portrait and any(isinstance(v, dict) and v for v in portrait.values()))
  163. return None, _PortraitOk(
  164. output=output,
  165. has_portrait=has_valid,
  166. portrait_data=portrait,
  167. raw_data=data,
  168. )
  169. def _sync_fetch_content_portrait(
  170. content_id: str,
  171. flags: Dict[str, bool],
  172. request_timeout: float,
  173. ) -> Tuple[Optional[str], Optional[_PortraitOk]]:
  174. err = _validate_content_id(content_id)
  175. if err:
  176. return err, None
  177. payload = {"content_id": content_id, **flags}
  178. try:
  179. response = requests.post(
  180. CONTENT_FANS_PORTRAIT_API,
  181. json=payload,
  182. headers={"Content-Type": "application/json"},
  183. timeout=request_timeout,
  184. )
  185. response.raise_for_status()
  186. data = response.json()
  187. except requests.exceptions.HTTPError as e:
  188. return f"HTTP {e.response.status_code}: {e.response.text}", None
  189. except requests.exceptions.Timeout:
  190. return f"请求超时({request_timeout}秒)", None
  191. except requests.exceptions.RequestException as e:
  192. return f"网络错误: {str(e)}", None
  193. except Exception as e:
  194. logger.error(
  195. "content portrait request failed",
  196. extra={"content_id": content_id, "error": str(e)},
  197. exc_info=True,
  198. )
  199. return f"未知错误: {str(e)}", None
  200. data_block = data.get("data", {}) if isinstance(data.get("data"), dict) else {}
  201. portrait = data_block.get("data", {}) if isinstance(data_block.get("data"), dict) else {}
  202. header = f"内容 {content_id} 的点赞用户画像"
  203. link = (
  204. f"画像链接:https://douhot.douyin.com/video/detail?"
  205. f"active_tab=video_fans&video_id={content_id}"
  206. )
  207. output = _format_portrait_summary(header, link, portrait)
  208. has_valid = bool(portrait and any(isinstance(v, dict) and v for v in portrait.values()))
  209. return None, _PortraitOk(
  210. output=output,
  211. has_portrait=has_valid,
  212. portrait_data=portrait,
  213. raw_data=data,
  214. )
  215. @tool(description="获取抖音账号粉丝画像(热点宝),支持选择画像维度")
  216. async def get_account_fans_portrait(
  217. account_id: str,
  218. need_province: bool = False,
  219. need_city: bool = False,
  220. need_city_level: bool = False,
  221. need_gender: bool = False,
  222. need_age: bool = True,
  223. need_phone_brand: bool = False,
  224. need_phone_price: bool = False,
  225. timeout: Optional[float] = None,
  226. ) -> ToolResult:
  227. """
  228. 获取抖音账号粉丝画像(热点宝数据)
  229. 获取指定账号的粉丝画像数据,包括年龄、性别、地域等多个维度。
  230. Args:
  231. account_id: 抖音账号ID(使用 author.sec_uid)
  232. need_province: 是否获取省份分布,默认 False
  233. need_city: 是否获取城市分布,默认 False
  234. need_city_level: 是否获取城市等级分布(一线/新一线/二线等),默认 False
  235. need_gender: 是否获取性别分布,默认 False
  236. need_age: 是否获取年龄分布,默认 True
  237. need_phone_brand: 是否获取手机品牌分布,默认 False
  238. need_phone_price: 是否获取手机价格分布,默认 False
  239. timeout: 超时时间(秒),默认 60
  240. Returns:
  241. ToolResult: 包含以下内容:
  242. - output: 文本格式的画像摘要
  243. - metadata.has_portrait: 布尔值,表示是否有有效画像数据
  244. - True: 有有效画像数据
  245. - False: 无画像数据
  246. - metadata.portrait_data: 结构化的画像数据(字典格式)
  247. - 键: 维度名称(如 "年龄"、"性别")
  248. - 值: 该维度的分布数据(字典)
  249. - percentage: 占比(如 "48.35%")
  250. - preference: 偏好度/TGI(如 "210.05")
  251. - metadata.raw_data: 原始 API 返回数据
  252. Note:
  253. - account_id 参数使用 author.sec_uid(约80字符)
  254. - 默认只返回年龄分布,需要其他维度时设置对应参数为 True
  255. - 省份数据只显示 TOP5
  256. - 偏好度(TGI)说明:
  257. - > 100: 该人群偏好高于平均水平
  258. - = 100: 平均水平
  259. - < 100: 低于平均水平
  260. - 使用 metadata.has_portrait 判断画像是否有效,不要解析 output 文本
  261. - 从 metadata.portrait_data 获取结构化画像数据
  262. """
  263. start_time = time.time()
  264. call_params = {
  265. "account_id": account_id,
  266. "need_province": need_province,
  267. "need_city": need_city,
  268. "need_city_level": need_city_level,
  269. "need_gender": need_gender,
  270. "need_age": need_age,
  271. "need_phone_brand": need_phone_brand,
  272. "need_phone_price": need_phone_price,
  273. "timeout": timeout,
  274. }
  275. flags = _dimension_flags(
  276. need_province,
  277. need_city,
  278. need_city_level,
  279. need_gender,
  280. need_age,
  281. need_phone_brand,
  282. need_phone_price,
  283. )
  284. request_timeout = timeout if timeout is not None else DEFAULT_TIMEOUT
  285. err, ok = _sync_fetch_account_portrait(account_id, flags, request_timeout)
  286. duration_ms = int((time.time() - start_time) * 1000)
  287. if err:
  288. logger.error("get_account_fans_portrait failed", extra={"account_id": account_id, "error": err})
  289. return _log_return(
  290. _LABEL_ACCOUNT,
  291. call_params,
  292. ToolResult(
  293. title="账号粉丝画像获取失败",
  294. output="",
  295. error=err,
  296. ),
  297. )
  298. assert ok is not None
  299. logger.info(
  300. "get_account_fans_portrait completed",
  301. extra={
  302. "account_id": account_id,
  303. "has_portrait": ok["has_portrait"],
  304. "portrait_dimensions": list(ok["portrait_data"].keys()) if ok["portrait_data"] else [],
  305. "duration_ms": duration_ms,
  306. },
  307. )
  308. return _log_return(
  309. _LABEL_ACCOUNT,
  310. call_params,
  311. ToolResult(
  312. title=f"账号粉丝画像: {account_id}",
  313. output=ok["output"],
  314. long_term_memory=f"Fetched fans portrait for account '{account_id}'",
  315. metadata={
  316. "raw_data": ok["raw_data"],
  317. "has_portrait": ok["has_portrait"],
  318. "portrait_data": ok["portrait_data"],
  319. },
  320. ),
  321. )
  322. @tool(description="获取抖音内容点赞用户画像(热点宝),支持选择画像维度")
  323. async def get_content_fans_portrait(
  324. content_id: str,
  325. need_province: bool = False,
  326. need_city: bool = False,
  327. need_city_level: bool = False,
  328. need_gender: bool = False,
  329. need_age: bool = True,
  330. need_phone_brand: bool = False,
  331. need_phone_price: bool = False,
  332. timeout: Optional[float] = None,
  333. ) -> ToolResult:
  334. """
  335. 获取抖音内容点赞用户画像(热点宝数据)
  336. 获取指定视频内容的点赞用户画像数据,包括年龄、性别、地域等多个维度。
  337. Args:
  338. content_id: 抖音内容ID(使用 aweme_id)
  339. need_province: 是否获取省份分布,默认 False
  340. need_city: 是否获取城市分布,默认 False
  341. need_city_level: 是否获取城市等级分布(一线/新一线/二线等),默认 False
  342. need_gender: 是否获取性别分布,默认 False
  343. need_age: 是否获取年龄分布,默认 True
  344. need_phone_brand: 是否获取手机品牌分布,默认 False
  345. need_phone_price: 是否获取手机价格分布,默认 False
  346. timeout: 超时时间(秒),默认 60
  347. Returns:
  348. ToolResult: 包含以下内容:
  349. - output: 文本格式的画像摘要
  350. - metadata.has_portrait: 布尔值,表示是否有有效画像数据
  351. - True: 有有效画像数据
  352. - False: 无画像数据(需要使用账号画像兜底)
  353. - metadata.portrait_data: 结构化的画像数据(字典格式)
  354. - 键: 维度名称(如 "年龄"、"性别")
  355. - 值: 该维度的分布数据(字典)
  356. - percentage: 占比(如 "48.35%")
  357. - preference: 偏好度/TGI(如 "210.05")
  358. - metadata.raw_data: 原始 API 返回数据
  359. Note:
  360. - content_id 参数使用 aweme_id
  361. - 默认只返回年龄分布,需要其他维度时设置对应参数为 True
  362. - 省份数据只显示 TOP5
  363. - 偏好度(TGI)说明:
  364. - > 100: 该人群偏好高于平均水平
  365. - = 100: 平均水平
  366. - < 100: 低于平均水平
  367. - 使用 metadata.has_portrait 判断画像是否有效,不要解析 output 文本
  368. - 如果 has_portrait 为 False,应使用 get_account_fans_portrait 作为兜底
  369. - 从 metadata.portrait_data 获取结构化画像数据
  370. """
  371. start_time = time.time()
  372. call_params = {
  373. "content_id": content_id,
  374. "need_province": need_province,
  375. "need_city": need_city,
  376. "need_city_level": need_city_level,
  377. "need_gender": need_gender,
  378. "need_age": need_age,
  379. "need_phone_brand": need_phone_brand,
  380. "need_phone_price": need_phone_price,
  381. "timeout": timeout,
  382. }
  383. flags = _dimension_flags(
  384. need_province,
  385. need_city,
  386. need_city_level,
  387. need_gender,
  388. need_age,
  389. need_phone_brand,
  390. need_phone_price,
  391. )
  392. request_timeout = timeout if timeout is not None else DEFAULT_TIMEOUT
  393. err, ok = _sync_fetch_content_portrait(content_id, flags, request_timeout)
  394. duration_ms = int((time.time() - start_time) * 1000)
  395. if err:
  396. logger.error("get_content_fans_portrait failed", extra={"content_id": content_id, "error": err})
  397. return _log_return(
  398. _LABEL_CONTENT,
  399. call_params,
  400. ToolResult(
  401. title="内容点赞用户画像获取失败",
  402. output="",
  403. error=err,
  404. ),
  405. )
  406. assert ok is not None
  407. logger.info(
  408. "get_content_fans_portrait completed",
  409. extra={
  410. "content_id": content_id,
  411. "has_portrait": ok["has_portrait"],
  412. "portrait_dimensions": list(ok["portrait_data"].keys()) if ok["portrait_data"] else [],
  413. "duration_ms": duration_ms,
  414. },
  415. )
  416. return _log_return(
  417. _LABEL_CONTENT,
  418. call_params,
  419. ToolResult(
  420. title=f"内容点赞用户画像: {content_id}",
  421. output=ok["output"],
  422. long_term_memory=f"Fetched fans portrait for content '{content_id}'",
  423. metadata={
  424. "raw_data": ok["raw_data"],
  425. "has_portrait": ok["has_portrait"],
  426. "portrait_data": ok["portrait_data"],
  427. },
  428. ),
  429. )
  430. @tool(
  431. description=(
  432. "批量获取多条候选视频的画像:工具内依次请求内容点赞画像;"
  433. "若无画像且允许兜底则再请求作者粉丝画像。一次调用返回所有条目,减少对话轮次。"
  434. "完整结构化结果在同一条 tool 消息的 metadata JSON 中,并写入 OUTPUT_DIR/<trace_id>/batch_portraits.json。"
  435. ),
  436. hidden_params=["context"],
  437. )
  438. async def batch_fetch_portraits(
  439. candidates_json: str,
  440. need_province: bool = False,
  441. need_city: bool = False,
  442. need_city_level: bool = False,
  443. need_gender: bool = False,
  444. need_age: bool = True,
  445. need_phone_brand: bool = False,
  446. need_phone_price: bool = False,
  447. timeout: Optional[float] = None,
  448. context: Optional[Dict[str, Any]] = None,
  449. ) -> ToolResult:
  450. """
  451. 批量拉取内容画像并在规则允许时用账号画像兜底(单工具、多 HTTP 顺序请求)。
  452. Args:
  453. candidates_json: JSON 数组字符串。每项为对象,字段:
  454. - aweme_id (必填): 视频 id
  455. - author_sec_uid (可选): 作者 sec_uid,兜底时需要
  456. - try_account_fallback (可选,默认 true): 为 false 时不请求账号画像
  457. (对应来自 douyin_user_videos 的条目,与单条工具规则一致)
  458. need_* / timeout: 与各单条画像工具一致
  459. Returns:
  460. ToolResult.output: 人类可读的分条摘要
  461. metadata.results: 与 candidates 顺序一致的列表,每项含 content / account 子对象;
  462. 通过 ToolResult.include_metadata_in_llm 会进入本轮 tool 消息正文(JSON),无需从 log 猜测。
  463. metadata.snapshot_path: 落盘文件绝对路径(若写入成功)
  464. Note:
  465. context 由 Runner 注入,含 trace_id,用于写入 batch_portraits.json。
  466. """
  467. call_params: Dict[str, Any] = {
  468. "candidates_json": candidates_json[:2000] + ("..." if len(candidates_json) > 2000 else ""),
  469. "need_age": need_age,
  470. "timeout": timeout,
  471. }
  472. raw = (candidates_json or "").strip()
  473. if not raw:
  474. return _log_return(
  475. _LABEL_BATCH,
  476. call_params,
  477. ToolResult(title="批量画像失败", output="", error="candidates_json 为空"),
  478. )
  479. try:
  480. parsed = json.loads(raw)
  481. except json.JSONDecodeError as e:
  482. return _log_return(
  483. _LABEL_BATCH,
  484. call_params,
  485. ToolResult(
  486. title="批量画像失败",
  487. output="",
  488. error=f"candidates_json 不是合法 JSON: {e}",
  489. ),
  490. )
  491. if not isinstance(parsed, list):
  492. return _log_return(
  493. _LABEL_BATCH,
  494. call_params,
  495. ToolResult(title="批量画像失败", output="", error="candidates_json 必须是 JSON 数组"),
  496. )
  497. if len(parsed) > BATCH_MAX_ITEMS:
  498. return _log_return(
  499. _LABEL_BATCH,
  500. call_params,
  501. ToolResult(
  502. title="批量画像失败",
  503. output="",
  504. error=f"条目数超过上限 {BATCH_MAX_ITEMS},请分批调用",
  505. ),
  506. )
  507. flags = _dimension_flags(
  508. need_province,
  509. need_city,
  510. need_city_level,
  511. need_gender,
  512. need_age,
  513. need_phone_brand,
  514. need_phone_price,
  515. )
  516. request_timeout = timeout if timeout is not None else DEFAULT_TIMEOUT
  517. results: List[Dict[str, Any]] = []
  518. output_chunks: List[str] = []
  519. for idx, entry in enumerate(parsed):
  520. if not isinstance(entry, dict):
  521. results.append(
  522. {
  523. "aweme_id": None,
  524. "error": "条目不是对象",
  525. "content": None,
  526. "account": None,
  527. }
  528. )
  529. output_chunks.append(f"[{idx}] 跳过:条目不是 JSON 对象")
  530. continue
  531. aweme_id = entry.get("aweme_id") or entry.get("content_id")
  532. author_sec = entry.get("author_sec_uid") or entry.get("account_id")
  533. try_fallback = entry.get("try_account_fallback", True)
  534. if isinstance(try_fallback, str):
  535. try_fallback = try_fallback.strip().lower() in ("1", "true", "yes")
  536. if not aweme_id or not isinstance(aweme_id, str):
  537. results.append(
  538. {
  539. "aweme_id": aweme_id,
  540. "error": "缺少 aweme_id",
  541. "content": None,
  542. "account": None,
  543. }
  544. )
  545. output_chunks.append(f"[{idx}] 跳过:缺少 aweme_id")
  546. continue
  547. item_result: Dict[str, Any] = {
  548. "aweme_id": aweme_id,
  549. "author_sec_uid": author_sec if isinstance(author_sec, str) else None,
  550. "try_account_fallback": bool(try_fallback),
  551. "content": None,
  552. "account": None,
  553. "error": None,
  554. }
  555. cerr, cok = _sync_fetch_content_portrait(aweme_id, flags, request_timeout)
  556. if cerr:
  557. item_result["content"] = {
  558. "ok": False,
  559. "error": cerr,
  560. "has_portrait": False,
  561. "portrait_data": {},
  562. }
  563. else:
  564. assert cok is not None
  565. item_result["content"] = {
  566. "ok": True,
  567. "error": None,
  568. "has_portrait": cok["has_portrait"],
  569. "portrait_data": cok["portrait_data"],
  570. "output": cok["output"],
  571. }
  572. c_block = item_result["content"]
  573. content_has = bool(c_block and c_block.get("has_portrait"))
  574. need_account = bool(try_fallback) and not content_has
  575. if need_account:
  576. if not author_sec or not isinstance(author_sec, str):
  577. item_result["account"] = {
  578. "attempted": False,
  579. "skipped_reason": "缺少 author_sec_uid,无法账号兜底",
  580. "has_portrait": False,
  581. "portrait_data": {},
  582. }
  583. else:
  584. aerr, aok = _sync_fetch_account_portrait(author_sec, flags, request_timeout)
  585. if aerr:
  586. item_result["account"] = {
  587. "attempted": True,
  588. "error": aerr,
  589. "has_portrait": False,
  590. "portrait_data": {},
  591. }
  592. else:
  593. assert aok is not None
  594. item_result["account"] = {
  595. "attempted": True,
  596. "error": None,
  597. "has_portrait": aok["has_portrait"],
  598. "portrait_data": aok["portrait_data"],
  599. "output": aok["output"],
  600. }
  601. else:
  602. skip_reason = (
  603. "try_account_fallback 为 false(如 douyin_user_videos 来源)"
  604. if not try_fallback
  605. else "内容侧已有有效画像,无需账号兜底"
  606. )
  607. item_result["account"] = {
  608. "attempted": False,
  609. "skipped_reason": skip_reason,
  610. "has_portrait": False,
  611. "portrait_data": {},
  612. }
  613. results.append(item_result)
  614. # 压缩每条在 output 中的篇幅
  615. c_part = item_result["content"] or {}
  616. a_part = item_result["account"] or {}
  617. line = (
  618. f"[{idx}] aweme_id={aweme_id} "
  619. f"content_has_portrait={c_part.get('has_portrait')} "
  620. f"account_attempted={a_part.get('attempted')} "
  621. f"account_has_portrait={a_part.get('has_portrait')}"
  622. )
  623. output_chunks.append(line)
  624. full_text = "\n".join(output_chunks)
  625. trace_id = None
  626. if isinstance(context, dict):
  627. tid = context.get("trace_id")
  628. if isinstance(tid, str) and tid.strip():
  629. trace_id = tid.strip()
  630. snapshot_path = _persist_batch_portraits_json(trace_id, results, len(results))
  631. out_display = (os.getenv("OUTPUT_DIR") or ".cache/output").strip()
  632. rel_hint = (
  633. f"{out_display}/{trace_id}/{_BATCH_SNAPSHOT_NAME}"
  634. if trace_id
  635. else f"{out_display}/<trace_id>/{_BATCH_SNAPSHOT_NAME}"
  636. )
  637. meta_hint = (
  638. "\n\n本条 tool 消息在标题与摘要后附有 ## metadata (JSON),其中 results 含每条 "
  639. "content/account 的 has_portrait 与 portrait_data;若上下文被压缩,可用 read_file 读取:"
  640. f" {rel_hint}"
  641. + (f"(本机路径: {snapshot_path})" if snapshot_path else "")
  642. )
  643. output_body = full_text + meta_hint
  644. logger.info(
  645. "batch_fetch_portraits completed",
  646. extra={
  647. "count": len(results),
  648. "candidates": len(parsed),
  649. "trace_id": trace_id,
  650. "snapshot_path": snapshot_path,
  651. },
  652. )
  653. meta: Dict[str, Any] = {
  654. "results": results,
  655. "count": len(results),
  656. }
  657. if snapshot_path:
  658. meta["snapshot_path"] = snapshot_path
  659. return _log_return(
  660. _LABEL_BATCH,
  661. call_params,
  662. ToolResult(
  663. title=f"批量画像完成 ({len(results)} 条)",
  664. output=output_body,
  665. long_term_memory=f"Batch portrait fetch for {len(results)} items",
  666. metadata=meta,
  667. include_metadata_in_llm=True,
  668. ),
  669. )