""" 热点宝画像数据工具(示例) 调用内部爬虫服务获取账号/内容的粉丝画像。 """ import asyncio import logging import time from typing import Optional, Dict, Any, List, Tuple import requests from agent.tools import tool, ToolResult logger = logging.getLogger(__name__) ACCOUNT_FANS_PORTRAIT_API = "http://crawapi.piaoquantv.com/crawler/dou_yin/re_dian_bao/account_fans_portrait" CONTENT_FANS_PORTRAIT_API = "http://crawapi.piaoquantv.com/crawler/dou_yin/re_dian_bao/video_like_portrait" DEFAULT_TIMEOUT = 60.0 @tool(description="获取抖音账号粉丝画像(热点宝),支持选择画像维度") async def get_account_fans_portrait( account_id: str, need_province: bool = False, need_city: bool = False, need_city_level: bool = False, need_gender: bool = False, need_age: bool = True, need_phone_brand: bool = False, need_phone_price: bool = False, timeout: Optional[float] = None, ) -> ToolResult: """ 获取抖音账号粉丝画像(热点宝数据) 获取指定账号的粉丝画像数据,包括年龄、性别、地域等多个维度。 Args: account_id: 抖音账号ID(使用 author.sec_uid) need_province: 是否获取省份分布,默认 False need_city: 是否获取城市分布,默认 False need_city_level: 是否获取城市等级分布(一线/新一线/二线等),默认 False need_gender: 是否获取性别分布,默认 False need_age: 是否获取年龄分布,默认 True need_phone_brand: 是否获取手机品牌分布,默认 False need_phone_price: 是否获取手机价格分布,默认 False timeout: 超时时间(秒),默认 60 Returns: ToolResult: 包含以下内容: - output: 文本格式的画像摘要 - metadata.has_portrait: 布尔值,表示是否有有效画像数据 - True: 有有效画像数据 - False: 无画像数据 - metadata.portrait_data: 结构化的画像数据(字典格式) - 键: 维度名称(如 "年龄"、"性别") - 值: 该维度的分布数据(字典) - percentage: 占比(如 "48.35%") - preference: 偏好度/TGI(如 "210.05") - metadata.raw_data: 原始 API 返回数据 Note: - account_id 参数使用 author.sec_uid(约80字符) - 默认只返回年龄分布,需要其他维度时设置对应参数为 True - 省份数据只显示 TOP5 - 偏好度(TGI)说明: - > 100: 该人群偏好高于平均水平 - = 100: 平均水平 - < 100: 低于平均水平 - 使用 metadata.has_portrait 判断画像是否有效,不要解析 output 文本 - 从 metadata.portrait_data 获取结构化画像数据 """ start_time = time.time() # 验证 account_id 格式 if not account_id or not isinstance(account_id, str): logger.error("get_account_fans_portrait invalid account_id", extra={"account_id": account_id}) return ToolResult( title="账号粉丝画像获取失败", output="", error="account_id 参数无效:必须是非空字符串", ) if not account_id.startswith("MS4wLjABAAAA"): logger.error("get_account_fans_portrait invalid sec_uid format", extra={"account_id": account_id}) return ToolResult( title="账号粉丝画像获取失败", output="", error=f"account_id 格式错误:必须以 MS4wLjABAAAA 开头,当前值: {account_id[:min(20, len(account_id))]}...", ) if len(account_id) < 70 or len(account_id) > 90: logger.error("get_account_fans_portrait invalid sec_uid length", extra={"account_id": account_id, "length": len(account_id)}) return ToolResult( title="账号粉丝画像获取失败", output="", error=f"account_id 长度异常:期望 70-90 字符,实际 {len(account_id)} 字符。这可能是编造或截断的数据。", ) try: payload = { "account_id": account_id, "need_province": need_province, "need_city": need_city, "need_city_level": need_city_level, "need_gender": need_gender, "need_age": need_age, "need_phone_brand": need_phone_brand, "need_phone_price": need_phone_price, } request_timeout = timeout if timeout is not None else DEFAULT_TIMEOUT response = requests.post( ACCOUNT_FANS_PORTRAIT_API, json=payload, headers={"Content-Type": "application/json"}, timeout=request_timeout ) response.raise_for_status() data = response.json() data_block = data.get("data", {}) if isinstance(data.get("data"), dict) else {} portrait = data_block.get("data", {}) if isinstance(data_block.get("data"), dict) else {} # 格式化输出摘要 summary_lines = [f"账号 {account_id} 的粉丝画像"] summary_lines.append(f"画像链接:https://douhot.douyin.com/creator/detail?active_tab=creator_fans_portrait&creator_id={account_id}") summary_lines.append("") for k, v in portrait.items(): if not isinstance(v, dict): continue if k in ("省份", "城市"): summary_lines.append(f"【{k} TOP5】分布") items = _top_k(v, 5) else: summary_lines.append(f"【{k}】分布") items = v.items() for name, metrics in items: ratio = metrics.get("percentage") tgi = metrics.get("preference") summary_lines.append(f" {name}: {ratio} (偏好度: {tgi})") summary_lines.append("") duration_ms = int((time.time() - start_time) * 1000) has_valid_portrait = bool(portrait and any( isinstance(v, dict) and v for v in portrait.values() )) logger.info( "get_account_fans_portrait completed", extra={ "account_id": account_id, "has_portrait": has_valid_portrait, "portrait_dimensions": list(portrait.keys()) if portrait else [], "duration_ms": duration_ms } ) return ToolResult( title=f"账号粉丝画像: {account_id}", output="\n".join(summary_lines), long_term_memory=f"Fetched fans portrait for account '{account_id}'", metadata={ "raw_data": data, "has_portrait": has_valid_portrait, "portrait_data": portrait } ) except requests.exceptions.HTTPError as e: logger.error( "get_account_fans_portrait HTTP error", extra={ "account_id": account_id, "status_code": e.response.status_code, "error": str(e) } ) return ToolResult( title="账号粉丝画像获取失败", output="", error=f"HTTP {e.response.status_code}: {e.response.text}", ) except requests.exceptions.Timeout: logger.error("get_account_fans_portrait timeout", extra={"account_id": account_id, "timeout": request_timeout}) return ToolResult( title="账号粉丝画像获取失败", output="", error=f"请求超时({request_timeout}秒)", ) except requests.exceptions.RequestException as e: logger.error("get_account_fans_portrait network error", extra={"account_id": account_id, "error": str(e)}) return ToolResult( title="账号粉丝画像获取失败", output="", error=f"网络错误: {str(e)}", ) except Exception as e: logger.error("get_account_fans_portrait unexpected error", extra={"account_id": account_id, "error": str(e)}, exc_info=True) return ToolResult( title="账号粉丝画像获取失败", output="", error=f"未知错误: {str(e)}", ) @tool(description="获取抖音内容点赞用户画像(热点宝),支持选择画像维度") async def get_content_fans_portrait( content_id: str, need_province: bool = False, need_city: bool = False, need_city_level: bool = False, need_gender: bool = False, need_age: bool = True, need_phone_brand: bool = False, need_phone_price: bool = False, timeout: Optional[float] = None, ) -> ToolResult: """ 获取抖音内容点赞用户画像(热点宝数据) 获取指定视频内容的点赞用户画像数据,包括年龄、性别、地域等多个维度。 Args: content_id: 抖音内容ID(使用 aweme_id) need_province: 是否获取省份分布,默认 False need_city: 是否获取城市分布,默认 False need_city_level: 是否获取城市等级分布(一线/新一线/二线等),默认 False need_gender: 是否获取性别分布,默认 False need_age: 是否获取年龄分布,默认 True need_phone_brand: 是否获取手机品牌分布,默认 False need_phone_price: 是否获取手机价格分布,默认 False timeout: 超时时间(秒),默认 60 Returns: ToolResult: 包含以下内容: - output: 文本格式的画像摘要 - metadata.has_portrait: 布尔值,表示是否有有效画像数据 - True: 有有效画像数据 - False: 无画像数据(需要使用账号画像兜底) - metadata.portrait_data: 结构化的画像数据(字典格式) - 键: 维度名称(如 "年龄"、"性别") - 值: 该维度的分布数据(字典) - percentage: 占比(如 "48.35%") - preference: 偏好度/TGI(如 "210.05") - metadata.raw_data: 原始 API 返回数据 Note: - content_id 参数使用 aweme_id - 默认只返回年龄分布,需要其他维度时设置对应参数为 True - 省份数据只显示 TOP5 - 偏好度(TGI)说明: - > 100: 该人群偏好高于平均水平 - = 100: 平均水平 - < 100: 低于平均水平 - 使用 metadata.has_portrait 判断画像是否有效,不要解析 output 文本 - 如果 has_portrait 为 False,应使用 get_account_fans_portrait 作为兜底 - 从 metadata.portrait_data 获取结构化画像数据 """ start_time = time.time() # 验证 content_id 格式 if not content_id or not isinstance(content_id, str): logger.error("get_content_fans_portrait invalid content_id", extra={"content_id": content_id}) return ToolResult( title="内容点赞用户画像获取失败", output="", error="content_id 参数无效:必须是非空字符串", ) # aweme_id 应该是纯数字字符串,长度约 19 位 if not content_id.isdigit(): logger.error("get_content_fans_portrait invalid aweme_id format", extra={"content_id": content_id}) return ToolResult( title="内容点赞用户画像获取失败", output="", error=f"content_id 格式错误:aweme_id 应该是纯数字,当前值: {content_id[:20]}...", ) if len(content_id) < 15 or len(content_id) > 25: logger.error("get_content_fans_portrait invalid aweme_id length", extra={"content_id": content_id, "length": len(content_id)}) return ToolResult( title="内容点赞用户画像获取失败", output="", error=f"content_id 长度异常:期望 15-25 位数字,实际 {len(content_id)} 位", ) try: payload = { "content_id": content_id, "need_province": need_province, "need_city": need_city, "need_city_level": need_city_level, "need_gender": need_gender, "need_age": need_age, "need_phone_brand": need_phone_brand, "need_phone_price": need_phone_price, } request_timeout = timeout if timeout is not None else DEFAULT_TIMEOUT response = requests.post( CONTENT_FANS_PORTRAIT_API, json=payload, headers={"Content-Type": "application/json"}, timeout=request_timeout ) response.raise_for_status() data = response.json() data_block = data.get("data", {}) if isinstance(data.get("data"), dict) else {} portrait = data_block.get("data", {}) if isinstance(data_block.get("data"), dict) else {} # 格式化输出摘要 summary_lines = [f"内容 {content_id} 的点赞用户画像"] summary_lines.append(f"画像链接:https://douhot.douyin.com/video/detail?active_tab=video_fans&video_id={content_id}") summary_lines.append("") for k, v in portrait.items(): if not isinstance(v, dict): continue if k in ("省份", "城市"): summary_lines.append(f"【{k} TOP5】分布") items = _top_k(v, 5) else: summary_lines.append(f"【{k}】分布") items = v.items() for name, metrics in items: ratio = metrics.get("percentage") tgi = metrics.get("preference") summary_lines.append(f" {name}: {ratio} (偏好度: {tgi})") summary_lines.append("") duration_ms = int((time.time() - start_time) * 1000) has_valid_portrait = bool(portrait and any( isinstance(v, dict) and v for v in portrait.values() )) logger.info( "get_content_fans_portrait completed", extra={ "content_id": content_id, "has_portrait": has_valid_portrait, "portrait_dimensions": list(portrait.keys()) if portrait else [], "duration_ms": duration_ms } ) return ToolResult( title=f"内容点赞用户画像: {content_id}", output="\n".join(summary_lines), long_term_memory=f"Fetched fans portrait for content '{content_id}'", metadata={ "raw_data": data, "has_portrait": has_valid_portrait, "portrait_data": portrait } ) except requests.exceptions.HTTPError as e: logger.error( "get_content_fans_portrait HTTP error", extra={ "content_id": content_id, "status_code": e.response.status_code, "error": str(e) } ) return ToolResult( title="内容点赞用户画像获取失败", output="", error=f"HTTP {e.response.status_code}: {e.response.text}", ) except requests.exceptions.Timeout: logger.error("get_content_fans_portrait timeout", extra={"content_id": content_id, "timeout": request_timeout}) return ToolResult( title="内容点赞用户画像获取失败", output="", error=f"请求超时({request_timeout}秒)", ) except requests.exceptions.RequestException as e: logger.error("get_content_fans_portrait network error", extra={"content_id": content_id, "error": str(e)}) return ToolResult( title="内容点赞用户画像获取失败", output="", error=f"网络错误: {str(e)}", ) except Exception as e: logger.error("get_content_fans_portrait unexpected error", extra={"content_id": content_id, "error": str(e)}, exc_info=True) return ToolResult( title="内容点赞用户画像获取失败", output="", error=f"未知错误: {str(e)}", ) def _top_k(items: Dict[str, Any], k: int) -> List[Tuple[str, Any]]: def percent_value(entry: Tuple[str, Any]) -> float: metrics = entry[1] if isinstance(entry[1], dict) else {} return metrics.get("percentage") return sorted(items.items(), key=percent_value, reverse=True)[:k]