hotspot_profile.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692
  1. """
  2. 热点宝画像数据工具(示例)
  3. 调用内部爬虫服务获取账号/内容的粉丝画像。
  4. """
  5. import json
  6. import logging
  7. import os
  8. import time
  9. from pathlib import Path
  10. from typing import Any, Dict, List, Optional, Tuple, TypedDict
  11. import requests
  12. from agent.tools import tool, ToolResult
  13. from utils.tool_logging import format_tool_result_for_log, log_tool_call
  14. logger = logging.getLogger(__name__)
  15. _LABEL_ACCOUNT = "工具调用:get_account_fans_portrait -> 抖音账号粉丝画像(热点宝)"
  16. _LABEL_CONTENT = "工具调用:get_content_fans_portrait -> 内容点赞用户画像(热点宝)"
  17. _LABEL_BATCH = "工具调用:batch_fetch_portraits -> 批量获取内容/账号画像(热点宝)"
  18. BATCH_MAX_ITEMS = 30
  19. ACCOUNT_FANS_PORTRAIT_API = "http://crawapi.piaoquantv.com/crawler/dou_yin/re_dian_bao/account_fans_portrait"
  20. CONTENT_FANS_PORTRAIT_API = "http://crawapi.piaoquantv.com/crawler/dou_yin/re_dian_bao/video_like_portrait"
  21. DEFAULT_TIMEOUT = 60.0
  22. class _PortraitOk(TypedDict):
  23. output: str
  24. has_portrait: bool
  25. portrait_data: Dict[str, Any]
  26. raw_data: Any
  27. def _log_return(label: str, params: Dict[str, Any], r: ToolResult) -> ToolResult:
  28. log_tool_call(label, params, format_tool_result_for_log(r))
  29. return r
  30. def _top_k(items: Dict[str, Any], k: int) -> List[Tuple[str, Any]]:
  31. def percent_value(entry: Tuple[str, Any]) -> float:
  32. metrics = entry[1] if isinstance(entry[1], dict) else {}
  33. return metrics.get("percentage")
  34. return sorted(items.items(), key=percent_value, reverse=True)[:k]
  35. def _format_portrait_summary(
  36. header_line: str,
  37. link_line: str,
  38. portrait: Dict[str, Any],
  39. ) -> str:
  40. summary_lines = [header_line, link_line, ""]
  41. for k, v in portrait.items():
  42. if not isinstance(v, dict):
  43. continue
  44. if k in ("省份", "城市"):
  45. summary_lines.append(f"【{k} TOP5】分布")
  46. items = _top_k(v, 5)
  47. else:
  48. summary_lines.append(f"【{k}】分布")
  49. items = v.items()
  50. for name, metrics in items:
  51. ratio = metrics.get("percentage")
  52. tgi = metrics.get("preference")
  53. summary_lines.append(f" {name}: {ratio} (偏好度: {tgi})")
  54. summary_lines.append("")
  55. return "\n".join(summary_lines)
  56. def _validate_account_id(account_id: str) -> Optional[str]:
  57. if not account_id or not isinstance(account_id, str):
  58. return "account_id 参数无效:必须是非空字符串"
  59. if not account_id.startswith("MS4wLjABAAAA"):
  60. return (
  61. f"account_id 格式错误:必须以 MS4wLjABAAAA 开头,"
  62. f"当前值: {account_id[:min(20, len(account_id))]}..."
  63. )
  64. return None
  65. def _validate_content_id(content_id: str) -> Optional[str]:
  66. if not content_id or not isinstance(content_id, str):
  67. return "content_id 参数无效:必须是非空字符串"
  68. if not content_id.isdigit():
  69. return f"content_id 格式错误:aweme_id 应该是纯数字,当前值: {content_id[:20]}..."
  70. if len(content_id) < 15 or len(content_id) > 25:
  71. return f"content_id 长度异常:期望 15-25 位数字,实际 {len(content_id)} 位"
  72. return None
  73. def _dimension_flags(
  74. need_province: bool,
  75. need_city: bool,
  76. need_city_level: bool,
  77. need_gender: bool,
  78. need_age: bool,
  79. need_phone_brand: bool,
  80. need_phone_price: bool,
  81. ) -> Dict[str, bool]:
  82. return {
  83. "need_province": need_province,
  84. "need_city": need_city,
  85. "need_city_level": need_city_level,
  86. "need_gender": need_gender,
  87. "need_age": need_age,
  88. "need_phone_brand": need_phone_brand,
  89. "need_phone_price": need_phone_price,
  90. }
  91. def _sync_fetch_account_portrait(
  92. account_id: str,
  93. flags: Dict[str, bool],
  94. request_timeout: float,
  95. ) -> Tuple[Optional[str], Optional[_PortraitOk]]:
  96. err = _validate_account_id(account_id)
  97. if err:
  98. return err, None
  99. payload = {"account_id": account_id, **flags}
  100. try:
  101. response = requests.post(
  102. ACCOUNT_FANS_PORTRAIT_API,
  103. json=payload,
  104. headers={"Content-Type": "application/json"},
  105. timeout=request_timeout,
  106. )
  107. response.raise_for_status()
  108. data = response.json()
  109. except requests.exceptions.HTTPError as e:
  110. return f"HTTP {e.response.status_code}: {e.response.text}", None
  111. except requests.exceptions.Timeout:
  112. return f"请求超时({request_timeout}秒)", None
  113. except requests.exceptions.RequestException as e:
  114. return f"网络错误: {str(e)}", None
  115. except Exception as e:
  116. logger.error(
  117. "account portrait request failed",
  118. extra={"account_id": account_id, "error": str(e)},
  119. exc_info=True,
  120. )
  121. return f"未知错误: {str(e)}", None
  122. data_block = data.get("data", {}) if isinstance(data.get("data"), dict) else {}
  123. portrait = data_block.get("data", {}) if isinstance(data_block.get("data"), dict) else {}
  124. header = f"账号 {account_id} 的粉丝画像"
  125. link = (
  126. f"画像链接:https://douhot.douyin.com/creator/detail?"
  127. f"active_tab=creator_fans_portrait&creator_id={account_id}"
  128. )
  129. output = _format_portrait_summary(header, link, portrait)
  130. has_valid = bool(portrait and any(isinstance(v, dict) and v for v in portrait.values()))
  131. return None, _PortraitOk(
  132. output=output,
  133. has_portrait=has_valid,
  134. portrait_data=portrait,
  135. raw_data=data,
  136. )
  137. def _sync_fetch_content_portrait(
  138. content_id: str,
  139. flags: Dict[str, bool],
  140. request_timeout: float,
  141. ) -> Tuple[Optional[str], Optional[_PortraitOk]]:
  142. err = _validate_content_id(content_id)
  143. if err:
  144. return err, None
  145. payload = {"content_id": content_id, **flags}
  146. try:
  147. response = requests.post(
  148. CONTENT_FANS_PORTRAIT_API,
  149. json=payload,
  150. headers={"Content-Type": "application/json"},
  151. timeout=request_timeout,
  152. )
  153. response.raise_for_status()
  154. data = response.json()
  155. except requests.exceptions.HTTPError as e:
  156. return f"HTTP {e.response.status_code}: {e.response.text}", None
  157. except requests.exceptions.Timeout:
  158. return f"请求超时({request_timeout}秒)", None
  159. except requests.exceptions.RequestException as e:
  160. return f"网络错误: {str(e)}", None
  161. except Exception as e:
  162. logger.error(
  163. "content portrait request failed",
  164. extra={"content_id": content_id, "error": str(e)},
  165. exc_info=True,
  166. )
  167. return f"未知错误: {str(e)}", None
  168. data_block = data.get("data", {}) if isinstance(data.get("data"), dict) else {}
  169. portrait = data_block.get("data", {}) if isinstance(data_block.get("data"), dict) else {}
  170. header = f"内容 {content_id} 的点赞用户画像"
  171. link = (
  172. f"画像链接:https://douhot.douyin.com/video/detail?"
  173. f"active_tab=video_fans&video_id={content_id}"
  174. )
  175. output = _format_portrait_summary(header, link, portrait)
  176. has_valid = bool(portrait and any(isinstance(v, dict) and v for v in portrait.values()))
  177. return None, _PortraitOk(
  178. output=output,
  179. has_portrait=has_valid,
  180. portrait_data=portrait,
  181. raw_data=data,
  182. )
  183. @tool(description="获取抖音账号粉丝画像(热点宝),支持选择画像维度")
  184. async def get_account_fans_portrait(
  185. account_id: str,
  186. need_province: bool = False,
  187. need_city: bool = False,
  188. need_city_level: bool = False,
  189. need_gender: bool = False,
  190. need_age: bool = True,
  191. need_phone_brand: bool = False,
  192. need_phone_price: bool = False,
  193. timeout: Optional[float] = None,
  194. ) -> ToolResult:
  195. """
  196. 获取抖音账号粉丝画像(热点宝数据)
  197. 获取指定账号的粉丝画像数据,包括年龄、性别、地域等多个维度。
  198. Args:
  199. account_id: 抖音账号ID(使用 author.sec_uid)
  200. need_province: 是否获取省份分布,默认 False
  201. need_city: 是否获取城市分布,默认 False
  202. need_city_level: 是否获取城市等级分布(一线/新一线/二线等),默认 False
  203. need_gender: 是否获取性别分布,默认 False
  204. need_age: 是否获取年龄分布,默认 True
  205. need_phone_brand: 是否获取手机品牌分布,默认 False
  206. need_phone_price: 是否获取手机价格分布,默认 False
  207. timeout: 超时时间(秒),默认 60
  208. Returns:
  209. ToolResult: 包含以下内容:
  210. - output: 文本格式的画像摘要
  211. - metadata.has_portrait: 布尔值,表示是否有有效画像数据
  212. - True: 有有效画像数据
  213. - False: 无画像数据
  214. - metadata.portrait_data: 结构化的画像数据(字典格式)
  215. - 键: 维度名称(如 "年龄"、"性别")
  216. - 值: 该维度的分布数据(字典)
  217. - percentage: 占比(如 "48.35%")
  218. - preference: 偏好度/TGI(如 "210.05")
  219. - metadata.raw_data: 原始 API 返回数据
  220. Note:
  221. - account_id 参数使用 author.sec_uid(约80字符)
  222. - 默认只返回年龄分布,需要其他维度时设置对应参数为 True
  223. - 省份数据只显示 TOP5
  224. - 偏好度(TGI)说明:
  225. - > 100: 该人群偏好高于平均水平
  226. - = 100: 平均水平
  227. - < 100: 低于平均水平
  228. - 使用 metadata.has_portrait 判断画像是否有效,不要解析 output 文本
  229. - 从 metadata.portrait_data 获取结构化画像数据
  230. """
  231. start_time = time.time()
  232. call_params = {
  233. "account_id": account_id,
  234. "need_province": need_province,
  235. "need_city": need_city,
  236. "need_city_level": need_city_level,
  237. "need_gender": need_gender,
  238. "need_age": need_age,
  239. "need_phone_brand": need_phone_brand,
  240. "need_phone_price": need_phone_price,
  241. "timeout": timeout,
  242. }
  243. flags = _dimension_flags(
  244. need_province,
  245. need_city,
  246. need_city_level,
  247. need_gender,
  248. need_age,
  249. need_phone_brand,
  250. need_phone_price,
  251. )
  252. request_timeout = timeout if timeout is not None else DEFAULT_TIMEOUT
  253. err, ok = _sync_fetch_account_portrait(account_id, flags, request_timeout)
  254. duration_ms = int((time.time() - start_time) * 1000)
  255. if err:
  256. logger.error("get_account_fans_portrait failed", extra={"account_id": account_id, "error": err})
  257. return _log_return(
  258. _LABEL_ACCOUNT,
  259. call_params,
  260. ToolResult(
  261. title="账号粉丝画像获取失败",
  262. output="",
  263. error=err,
  264. ),
  265. )
  266. assert ok is not None
  267. logger.info(
  268. "get_account_fans_portrait completed",
  269. extra={
  270. "account_id": account_id,
  271. "has_portrait": ok["has_portrait"],
  272. "portrait_dimensions": list(ok["portrait_data"].keys()) if ok["portrait_data"] else [],
  273. "duration_ms": duration_ms,
  274. },
  275. )
  276. return _log_return(
  277. _LABEL_ACCOUNT,
  278. call_params,
  279. ToolResult(
  280. title=f"账号粉丝画像: {account_id}",
  281. output=ok["output"],
  282. long_term_memory=f"Fetched fans portrait for account '{account_id}'",
  283. metadata={
  284. "raw_data": ok["raw_data"],
  285. "has_portrait": ok["has_portrait"],
  286. "portrait_data": ok["portrait_data"],
  287. },
  288. ),
  289. )
  290. @tool(description="获取抖音内容点赞用户画像(热点宝),支持选择画像维度")
  291. async def get_content_fans_portrait(
  292. content_id: str,
  293. need_province: bool = False,
  294. need_city: bool = False,
  295. need_city_level: bool = False,
  296. need_gender: bool = False,
  297. need_age: bool = True,
  298. need_phone_brand: bool = False,
  299. need_phone_price: bool = False,
  300. timeout: Optional[float] = None,
  301. ) -> ToolResult:
  302. """
  303. 获取抖音内容点赞用户画像(热点宝数据)
  304. 获取指定视频内容的点赞用户画像数据,包括年龄、性别、地域等多个维度。
  305. Args:
  306. content_id: 抖音内容ID(使用 aweme_id)
  307. need_province: 是否获取省份分布,默认 False
  308. need_city: 是否获取城市分布,默认 False
  309. need_city_level: 是否获取城市等级分布(一线/新一线/二线等),默认 False
  310. need_gender: 是否获取性别分布,默认 False
  311. need_age: 是否获取年龄分布,默认 True
  312. need_phone_brand: 是否获取手机品牌分布,默认 False
  313. need_phone_price: 是否获取手机价格分布,默认 False
  314. timeout: 超时时间(秒),默认 60
  315. Returns:
  316. ToolResult: 包含以下内容:
  317. - output: 文本格式的画像摘要
  318. - metadata.has_portrait: 布尔值,表示是否有有效画像数据
  319. - True: 有有效画像数据
  320. - False: 无画像数据(需要使用账号画像兜底)
  321. - metadata.portrait_data: 结构化的画像数据(字典格式)
  322. - 键: 维度名称(如 "年龄"、"性别")
  323. - 值: 该维度的分布数据(字典)
  324. - percentage: 占比(如 "48.35%")
  325. - preference: 偏好度/TGI(如 "210.05")
  326. - metadata.raw_data: 原始 API 返回数据
  327. Note:
  328. - content_id 参数使用 aweme_id
  329. - 默认只返回年龄分布,需要其他维度时设置对应参数为 True
  330. - 省份数据只显示 TOP5
  331. - 偏好度(TGI)说明:
  332. - > 100: 该人群偏好高于平均水平
  333. - = 100: 平均水平
  334. - < 100: 低于平均水平
  335. - 使用 metadata.has_portrait 判断画像是否有效,不要解析 output 文本
  336. - 如果 has_portrait 为 False,应使用 get_account_fans_portrait 作为兜底
  337. - 从 metadata.portrait_data 获取结构化画像数据
  338. """
  339. start_time = time.time()
  340. call_params = {
  341. "content_id": content_id,
  342. "need_province": need_province,
  343. "need_city": need_city,
  344. "need_city_level": need_city_level,
  345. "need_gender": need_gender,
  346. "need_age": need_age,
  347. "need_phone_brand": need_phone_brand,
  348. "need_phone_price": need_phone_price,
  349. "timeout": timeout,
  350. }
  351. flags = _dimension_flags(
  352. need_province,
  353. need_city,
  354. need_city_level,
  355. need_gender,
  356. need_age,
  357. need_phone_brand,
  358. need_phone_price,
  359. )
  360. request_timeout = timeout if timeout is not None else DEFAULT_TIMEOUT
  361. err, ok = _sync_fetch_content_portrait(content_id, flags, request_timeout)
  362. duration_ms = int((time.time() - start_time) * 1000)
  363. if err:
  364. logger.error("get_content_fans_portrait failed", extra={"content_id": content_id, "error": err})
  365. return _log_return(
  366. _LABEL_CONTENT,
  367. call_params,
  368. ToolResult(
  369. title="内容点赞用户画像获取失败",
  370. output="",
  371. error=err,
  372. ),
  373. )
  374. assert ok is not None
  375. logger.info(
  376. "get_content_fans_portrait completed",
  377. extra={
  378. "content_id": content_id,
  379. "has_portrait": ok["has_portrait"],
  380. "portrait_dimensions": list(ok["portrait_data"].keys()) if ok["portrait_data"] else [],
  381. "duration_ms": duration_ms,
  382. },
  383. )
  384. return _log_return(
  385. _LABEL_CONTENT,
  386. call_params,
  387. ToolResult(
  388. title=f"内容点赞用户画像: {content_id}",
  389. output=ok["output"],
  390. long_term_memory=f"Fetched fans portrait for content '{content_id}'",
  391. metadata={
  392. "raw_data": ok["raw_data"],
  393. "has_portrait": ok["has_portrait"],
  394. "portrait_data": ok["portrait_data"],
  395. },
  396. ),
  397. )
  398. @tool(
  399. description=(
  400. "批量获取多条候选视频的画像:工具内依次请求内容点赞画像;"
  401. "若无画像且允许兜底则再请求作者粉丝画像。一次调用返回所有条目,减少对话轮次。"
  402. "完整结构化结果在同一条 tool 消息的 metadata JSON 中"
  403. ),
  404. hidden_params=["context"],
  405. )
  406. async def batch_fetch_portraits(
  407. candidates_json: str,
  408. need_province: bool = False,
  409. need_city: bool = False,
  410. need_city_level: bool = False,
  411. need_gender: bool = False,
  412. need_age: bool = True,
  413. need_phone_brand: bool = False,
  414. need_phone_price: bool = False,
  415. timeout: Optional[float] = None,
  416. context: Optional[Dict[str, Any]] = None,
  417. ) -> ToolResult:
  418. """
  419. 批量拉取内容画像并在规则允许时用账号画像兜底(单工具、多 HTTP 顺序请求)。
  420. Args:
  421. candidates_json: JSON 数组字符串。每项为对象,字段:
  422. - aweme_id (必填): 视频 id
  423. - author_sec_uid (可选): 作者 sec_uid,兜底时需要
  424. - try_account_fallback (可选,默认 true): 为 false 时不请求账号画像
  425. (对应来自 douyin_user_videos 的条目,与单条工具规则一致)
  426. need_* / timeout: 与各单条画像工具一致
  427. Returns:
  428. ToolResult.output: 人类可读的分条摘要
  429. metadata.results: 与 candidates 顺序一致的列表,每项含 content / account 子对象;
  430. 通过 ToolResult.include_metadata_in_llm 会进入本轮 tool 消息正文(JSON),无需从 log 猜测。
  431. metadata.snapshot_path: 落盘文件绝对路径(若写入成功)
  432. Note:
  433. context 由 Runner 注入,含 trace_id,用于写入 batch_portraits.json。
  434. """
  435. call_params: Dict[str, Any] = {
  436. "candidates_json": candidates_json[:2000] + ("..." if len(candidates_json) > 2000 else ""),
  437. "need_age": need_age,
  438. "timeout": timeout,
  439. }
  440. raw = (candidates_json or "").strip()
  441. if not raw:
  442. return _log_return(
  443. _LABEL_BATCH,
  444. call_params,
  445. ToolResult(title="批量画像失败", output="", error="candidates_json 为空"),
  446. )
  447. try:
  448. parsed = json.loads(raw)
  449. except json.JSONDecodeError as e:
  450. return _log_return(
  451. _LABEL_BATCH,
  452. call_params,
  453. ToolResult(
  454. title="批量画像失败",
  455. output="",
  456. error=f"candidates_json 不是合法 JSON: {e}",
  457. ),
  458. )
  459. if not isinstance(parsed, list):
  460. return _log_return(
  461. _LABEL_BATCH,
  462. call_params,
  463. ToolResult(title="批量画像失败", output="", error="candidates_json 必须是 JSON 数组"),
  464. )
  465. if len(parsed) > BATCH_MAX_ITEMS:
  466. return _log_return(
  467. _LABEL_BATCH,
  468. call_params,
  469. ToolResult(
  470. title="批量画像失败",
  471. output="",
  472. error=f"条目数超过上限 {BATCH_MAX_ITEMS},请分批调用",
  473. ),
  474. )
  475. flags = _dimension_flags(
  476. need_province,
  477. need_city,
  478. need_city_level,
  479. need_gender,
  480. need_age,
  481. need_phone_brand,
  482. need_phone_price,
  483. )
  484. request_timeout = timeout if timeout is not None else DEFAULT_TIMEOUT
  485. results: List[Dict[str, Any]] = []
  486. output_chunks: List[str] = []
  487. for idx, entry in enumerate(parsed):
  488. if not isinstance(entry, dict):
  489. results.append(
  490. {
  491. "aweme_id": None,
  492. "error": "条目不是对象",
  493. "content": None,
  494. "account": None,
  495. }
  496. )
  497. output_chunks.append(f"[{idx}] 跳过:条目不是 JSON 对象")
  498. continue
  499. aweme_id = entry.get("aweme_id") or entry.get("content_id")
  500. author_sec = entry.get("author_sec_uid") or entry.get("account_id")
  501. try_fallback = entry.get("try_account_fallback", True)
  502. if isinstance(try_fallback, str):
  503. try_fallback = try_fallback.strip().lower() in ("1", "true", "yes")
  504. if not aweme_id or not isinstance(aweme_id, str):
  505. results.append(
  506. {
  507. "aweme_id": aweme_id,
  508. "error": "缺少 aweme_id",
  509. "content": None,
  510. "account": None,
  511. }
  512. )
  513. output_chunks.append(f"[{idx}] 跳过:缺少 aweme_id")
  514. continue
  515. item_result: Dict[str, Any] = {
  516. "aweme_id": aweme_id,
  517. "author_sec_uid": author_sec if isinstance(author_sec, str) else None,
  518. "try_account_fallback": bool(try_fallback),
  519. "content": None,
  520. "account": None,
  521. "error": None,
  522. }
  523. cerr, cok = _sync_fetch_content_portrait(aweme_id, flags, request_timeout)
  524. if cerr:
  525. item_result["content"] = {
  526. "ok": False,
  527. "error": cerr,
  528. "has_portrait": False,
  529. "portrait_data": {},
  530. }
  531. else:
  532. assert cok is not None
  533. item_result["content"] = {
  534. "ok": True,
  535. "error": None,
  536. "has_portrait": cok["has_portrait"],
  537. "portrait_data": cok["portrait_data"],
  538. "output": cok["output"],
  539. }
  540. c_block = item_result["content"]
  541. content_has = bool(c_block and c_block.get("has_portrait"))
  542. need_account = bool(try_fallback) and not content_has
  543. if need_account:
  544. if not author_sec or not isinstance(author_sec, str):
  545. item_result["account"] = {
  546. "attempted": False,
  547. "skipped_reason": "缺少 author_sec_uid,无法账号兜底",
  548. "has_portrait": False,
  549. "portrait_data": {},
  550. }
  551. else:
  552. aerr, aok = _sync_fetch_account_portrait(author_sec, flags, request_timeout)
  553. if aerr:
  554. item_result["account"] = {
  555. "attempted": True,
  556. "error": aerr,
  557. "has_portrait": False,
  558. "portrait_data": {},
  559. }
  560. else:
  561. assert aok is not None
  562. item_result["account"] = {
  563. "attempted": True,
  564. "error": None,
  565. "has_portrait": aok["has_portrait"],
  566. "portrait_data": aok["portrait_data"],
  567. "output": aok["output"],
  568. }
  569. else:
  570. skip_reason = (
  571. "try_account_fallback 为 false(如 douyin_user_videos 来源)"
  572. if not try_fallback
  573. else "内容侧已有有效画像,无需账号兜底"
  574. )
  575. item_result["account"] = {
  576. "attempted": False,
  577. "skipped_reason": skip_reason,
  578. "has_portrait": False,
  579. "portrait_data": {},
  580. }
  581. results.append(item_result)
  582. # 压缩每条在 output 中的篇幅
  583. c_part = item_result["content"] or {}
  584. a_part = item_result["account"] or {}
  585. line = (
  586. f"[{idx}] aweme_id={aweme_id} "
  587. f"content_has_portrait={c_part.get('has_portrait')} "
  588. f"account_attempted={a_part.get('attempted')} "
  589. f"account_has_portrait={a_part.get('has_portrait')}"
  590. )
  591. output_chunks.append(line)
  592. full_text = "\n".join(output_chunks)
  593. trace_id = None
  594. if isinstance(context, dict):
  595. tid = context.get("trace_id")
  596. if isinstance(tid, str) and tid.strip():
  597. trace_id = tid.strip()
  598. out_display = (os.getenv("OUTPUT_DIR") or ".cache/output").strip()
  599. meta_hint = (
  600. "\n\n本条 tool 消息在标题与摘要后附有 ## metadata (JSON),其中 results 含每条 "
  601. "content/account 的 has_portrait 与 portrait_data;若上下文被压缩,可用 read_file 读取:"
  602. f" {out_display}/{trace_id}/output.json"
  603. )
  604. output_body = full_text + meta_hint
  605. logger.info(
  606. "batch_fetch_portraits completed",
  607. extra={
  608. "count": len(results),
  609. "candidates": len(parsed),
  610. "trace_id": trace_id,
  611. },
  612. )
  613. meta: Dict[str, Any] = {
  614. "results": results,
  615. "count": len(results),
  616. }
  617. return _log_return(
  618. _LABEL_BATCH,
  619. call_params,
  620. ToolResult(
  621. title=f"批量画像完成 ({len(results)} 条)",
  622. output=output_body,
  623. long_term_memory=f"Batch portrait fetch for {len(results)} items",
  624. metadata=meta,
  625. include_metadata_in_llm=True,
  626. ),
  627. )