| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404 |
- """
- 热点宝画像数据工具(示例)
- 调用内部爬虫服务获取账号/内容的粉丝画像。
- """
- import asyncio
- import logging
- import time
- from typing import Optional, Dict, Any, List, Tuple
- import requests
- from agent.tools import tool, ToolResult
- logger = logging.getLogger(__name__)
- ACCOUNT_FANS_PORTRAIT_API = "http://crawapi.piaoquantv.com/crawler/dou_yin/re_dian_bao/account_fans_portrait"
- CONTENT_FANS_PORTRAIT_API = "http://crawapi.piaoquantv.com/crawler/dou_yin/re_dian_bao/video_like_portrait"
- DEFAULT_TIMEOUT = 60.0
- @tool(description="获取抖音账号粉丝画像(热点宝),支持选择画像维度")
- async def get_account_fans_portrait(
- account_id: str,
- need_province: bool = False,
- need_city: bool = False,
- need_city_level: bool = False,
- need_gender: bool = False,
- need_age: bool = True,
- need_phone_brand: bool = False,
- need_phone_price: bool = False,
- timeout: Optional[float] = None,
- ) -> ToolResult:
- """
- 获取抖音账号粉丝画像(热点宝数据)
- 获取指定账号的粉丝画像数据,包括年龄、性别、地域等多个维度。
- Args:
- account_id: 抖音账号ID(使用 author.sec_uid)
- need_province: 是否获取省份分布,默认 False
- need_city: 是否获取城市分布,默认 False
- need_city_level: 是否获取城市等级分布(一线/新一线/二线等),默认 False
- need_gender: 是否获取性别分布,默认 False
- need_age: 是否获取年龄分布,默认 True
- need_phone_brand: 是否获取手机品牌分布,默认 False
- need_phone_price: 是否获取手机价格分布,默认 False
- timeout: 超时时间(秒),默认 60
- Returns:
- ToolResult: 包含以下内容:
- - output: 文本格式的画像摘要
- - metadata.has_portrait: 布尔值,表示是否有有效画像数据
- - True: 有有效画像数据
- - False: 无画像数据
- - metadata.portrait_data: 结构化的画像数据(字典格式)
- - 键: 维度名称(如 "年龄"、"性别")
- - 值: 该维度的分布数据(字典)
- - percentage: 占比(如 "48.35%")
- - preference: 偏好度/TGI(如 "210.05")
- - metadata.raw_data: 原始 API 返回数据
- Note:
- - account_id 参数使用 author.sec_uid(约80字符)
- - 默认只返回年龄分布,需要其他维度时设置对应参数为 True
- - 省份数据只显示 TOP5
- - 偏好度(TGI)说明:
- - > 100: 该人群偏好高于平均水平
- - = 100: 平均水平
- - < 100: 低于平均水平
- - 使用 metadata.has_portrait 判断画像是否有效,不要解析 output 文本
- - 从 metadata.portrait_data 获取结构化画像数据
- """
- start_time = time.time()
- # 验证 account_id 格式
- if not account_id or not isinstance(account_id, str):
- logger.error("get_account_fans_portrait invalid account_id", extra={"account_id": account_id})
- return ToolResult(
- title="账号粉丝画像获取失败",
- output="",
- error="account_id 参数无效:必须是非空字符串",
- )
- if not account_id.startswith("MS4wLjABAAAA"):
- logger.error("get_account_fans_portrait invalid sec_uid format", extra={"account_id": account_id})
- return ToolResult(
- title="账号粉丝画像获取失败",
- output="",
- error=f"account_id 格式错误:必须以 MS4wLjABAAAA 开头,当前值: {account_id[:min(20, len(account_id))]}...",
- )
- if len(account_id) < 70 or len(account_id) > 90:
- logger.error("get_account_fans_portrait invalid sec_uid length", extra={"account_id": account_id, "length": len(account_id)})
- return ToolResult(
- title="账号粉丝画像获取失败",
- output="",
- error=f"account_id 长度异常:期望 70-90 字符,实际 {len(account_id)} 字符。这可能是编造或截断的数据。",
- )
- try:
- payload = {
- "account_id": account_id,
- "need_province": need_province,
- "need_city": need_city,
- "need_city_level": need_city_level,
- "need_gender": need_gender,
- "need_age": need_age,
- "need_phone_brand": need_phone_brand,
- "need_phone_price": need_phone_price,
- }
- request_timeout = timeout if timeout is not None else DEFAULT_TIMEOUT
- response = requests.post(
- ACCOUNT_FANS_PORTRAIT_API,
- json=payload,
- headers={"Content-Type": "application/json"},
- timeout=request_timeout
- )
- response.raise_for_status()
- data = response.json()
- data_block = data.get("data", {}) if isinstance(data.get("data"), dict) else {}
- portrait = data_block.get("data", {}) if isinstance(data_block.get("data"), dict) else {}
- # 格式化输出摘要
- summary_lines = [f"账号 {account_id} 的粉丝画像"]
- summary_lines.append(f"画像链接:https://douhot.douyin.com/creator/detail?active_tab=creator_fans_portrait&creator_id={account_id}")
- summary_lines.append("")
- for k, v in portrait.items():
- if not isinstance(v, dict):
- continue
- if k in ("省份", "城市"):
- summary_lines.append(f"【{k} TOP5】分布")
- items = _top_k(v, 5)
- else:
- summary_lines.append(f"【{k}】分布")
- items = v.items()
- for name, metrics in items:
- ratio = metrics.get("percentage")
- tgi = metrics.get("preference")
- summary_lines.append(f" {name}: {ratio} (偏好度: {tgi})")
- summary_lines.append("")
- duration_ms = int((time.time() - start_time) * 1000)
- has_valid_portrait = bool(portrait and any(
- isinstance(v, dict) and v for v in portrait.values()
- ))
- logger.info(
- "get_account_fans_portrait completed",
- extra={
- "account_id": account_id,
- "has_portrait": has_valid_portrait,
- "portrait_dimensions": list(portrait.keys()) if portrait else [],
- "duration_ms": duration_ms
- }
- )
- return ToolResult(
- title=f"账号粉丝画像: {account_id}",
- output="\n".join(summary_lines),
- long_term_memory=f"Fetched fans portrait for account '{account_id}'",
- metadata={
- "raw_data": data,
- "has_portrait": has_valid_portrait,
- "portrait_data": portrait
- }
- )
- except requests.exceptions.HTTPError as e:
- logger.error(
- "get_account_fans_portrait HTTP error",
- extra={
- "account_id": account_id,
- "status_code": e.response.status_code,
- "error": str(e)
- }
- )
- return ToolResult(
- title="账号粉丝画像获取失败",
- output="",
- error=f"HTTP {e.response.status_code}: {e.response.text}",
- )
- except requests.exceptions.Timeout:
- logger.error("get_account_fans_portrait timeout", extra={"account_id": account_id, "timeout": request_timeout})
- return ToolResult(
- title="账号粉丝画像获取失败",
- output="",
- error=f"请求超时({request_timeout}秒)",
- )
- except requests.exceptions.RequestException as e:
- logger.error("get_account_fans_portrait network error", extra={"account_id": account_id, "error": str(e)})
- return ToolResult(
- title="账号粉丝画像获取失败",
- output="",
- error=f"网络错误: {str(e)}",
- )
- except Exception as e:
- logger.error("get_account_fans_portrait unexpected error", extra={"account_id": account_id, "error": str(e)}, exc_info=True)
- return ToolResult(
- title="账号粉丝画像获取失败",
- output="",
- error=f"未知错误: {str(e)}",
- )
- @tool(description="获取抖音内容点赞用户画像(热点宝),支持选择画像维度")
- async def get_content_fans_portrait(
- content_id: str,
- need_province: bool = False,
- need_city: bool = False,
- need_city_level: bool = False,
- need_gender: bool = False,
- need_age: bool = True,
- need_phone_brand: bool = False,
- need_phone_price: bool = False,
- timeout: Optional[float] = None,
- ) -> ToolResult:
- """
- 获取抖音内容点赞用户画像(热点宝数据)
- 获取指定视频内容的点赞用户画像数据,包括年龄、性别、地域等多个维度。
- Args:
- content_id: 抖音内容ID(使用 aweme_id)
- need_province: 是否获取省份分布,默认 False
- need_city: 是否获取城市分布,默认 False
- need_city_level: 是否获取城市等级分布(一线/新一线/二线等),默认 False
- need_gender: 是否获取性别分布,默认 False
- need_age: 是否获取年龄分布,默认 True
- need_phone_brand: 是否获取手机品牌分布,默认 False
- need_phone_price: 是否获取手机价格分布,默认 False
- timeout: 超时时间(秒),默认 60
- Returns:
- ToolResult: 包含以下内容:
- - output: 文本格式的画像摘要
- - metadata.has_portrait: 布尔值,表示是否有有效画像数据
- - True: 有有效画像数据
- - False: 无画像数据(需要使用账号画像兜底)
- - metadata.portrait_data: 结构化的画像数据(字典格式)
- - 键: 维度名称(如 "年龄"、"性别")
- - 值: 该维度的分布数据(字典)
- - percentage: 占比(如 "48.35%")
- - preference: 偏好度/TGI(如 "210.05")
- - metadata.raw_data: 原始 API 返回数据
- Note:
- - content_id 参数使用 aweme_id
- - 默认只返回年龄分布,需要其他维度时设置对应参数为 True
- - 省份数据只显示 TOP5
- - 偏好度(TGI)说明:
- - > 100: 该人群偏好高于平均水平
- - = 100: 平均水平
- - < 100: 低于平均水平
- - 使用 metadata.has_portrait 判断画像是否有效,不要解析 output 文本
- - 如果 has_portrait 为 False,应使用 get_account_fans_portrait 作为兜底
- - 从 metadata.portrait_data 获取结构化画像数据
- """
- start_time = time.time()
- # 验证 content_id 格式
- if not content_id or not isinstance(content_id, str):
- logger.error("get_content_fans_portrait invalid content_id", extra={"content_id": content_id})
- return ToolResult(
- title="内容点赞用户画像获取失败",
- output="",
- error="content_id 参数无效:必须是非空字符串",
- )
- # aweme_id 应该是纯数字字符串,长度约 19 位
- if not content_id.isdigit():
- logger.error("get_content_fans_portrait invalid aweme_id format", extra={"content_id": content_id})
- return ToolResult(
- title="内容点赞用户画像获取失败",
- output="",
- error=f"content_id 格式错误:aweme_id 应该是纯数字,当前值: {content_id[:20]}...",
- )
- if len(content_id) < 15 or len(content_id) > 25:
- logger.error("get_content_fans_portrait invalid aweme_id length", extra={"content_id": content_id, "length": len(content_id)})
- return ToolResult(
- title="内容点赞用户画像获取失败",
- output="",
- error=f"content_id 长度异常:期望 15-25 位数字,实际 {len(content_id)} 位",
- )
- try:
- payload = {
- "content_id": content_id,
- "need_province": need_province,
- "need_city": need_city,
- "need_city_level": need_city_level,
- "need_gender": need_gender,
- "need_age": need_age,
- "need_phone_brand": need_phone_brand,
- "need_phone_price": need_phone_price,
- }
- request_timeout = timeout if timeout is not None else DEFAULT_TIMEOUT
- response = requests.post(
- CONTENT_FANS_PORTRAIT_API,
- json=payload,
- headers={"Content-Type": "application/json"},
- timeout=request_timeout
- )
- response.raise_for_status()
- data = response.json()
- data_block = data.get("data", {}) if isinstance(data.get("data"), dict) else {}
- portrait = data_block.get("data", {}) if isinstance(data_block.get("data"), dict) else {}
- # 格式化输出摘要
- summary_lines = [f"内容 {content_id} 的点赞用户画像"]
- summary_lines.append(f"画像链接:https://douhot.douyin.com/video/detail?active_tab=video_fans&video_id={content_id}")
- summary_lines.append("")
- for k, v in portrait.items():
- if not isinstance(v, dict):
- continue
- if k in ("省份", "城市"):
- summary_lines.append(f"【{k} TOP5】分布")
- items = _top_k(v, 5)
- else:
- summary_lines.append(f"【{k}】分布")
- items = v.items()
- for name, metrics in items:
- ratio = metrics.get("percentage")
- tgi = metrics.get("preference")
- summary_lines.append(f" {name}: {ratio} (偏好度: {tgi})")
- summary_lines.append("")
- duration_ms = int((time.time() - start_time) * 1000)
- has_valid_portrait = bool(portrait and any(
- isinstance(v, dict) and v for v in portrait.values()
- ))
- logger.info(
- "get_content_fans_portrait completed",
- extra={
- "content_id": content_id,
- "has_portrait": has_valid_portrait,
- "portrait_dimensions": list(portrait.keys()) if portrait else [],
- "duration_ms": duration_ms
- }
- )
- return ToolResult(
- title=f"内容点赞用户画像: {content_id}",
- output="\n".join(summary_lines),
- long_term_memory=f"Fetched fans portrait for content '{content_id}'",
- metadata={
- "raw_data": data,
- "has_portrait": has_valid_portrait,
- "portrait_data": portrait
- }
- )
- except requests.exceptions.HTTPError as e:
- logger.error(
- "get_content_fans_portrait HTTP error",
- extra={
- "content_id": content_id,
- "status_code": e.response.status_code,
- "error": str(e)
- }
- )
- return ToolResult(
- title="内容点赞用户画像获取失败",
- output="",
- error=f"HTTP {e.response.status_code}: {e.response.text}",
- )
- except requests.exceptions.Timeout:
- logger.error("get_content_fans_portrait timeout", extra={"content_id": content_id, "timeout": request_timeout})
- return ToolResult(
- title="内容点赞用户画像获取失败",
- output="",
- error=f"请求超时({request_timeout}秒)",
- )
- except requests.exceptions.RequestException as e:
- logger.error("get_content_fans_portrait network error", extra={"content_id": content_id, "error": str(e)})
- return ToolResult(
- title="内容点赞用户画像获取失败",
- output="",
- error=f"网络错误: {str(e)}",
- )
- except Exception as e:
- logger.error("get_content_fans_portrait unexpected error", extra={"content_id": content_id, "error": str(e)}, exc_info=True)
- return ToolResult(
- title="内容点赞用户画像获取失败",
- output="",
- error=f"未知错误: {str(e)}",
- )
- def _top_k(items: Dict[str, Any], k: int) -> List[Tuple[str, Any]]:
- def percent_value(entry: Tuple[str, Any]) -> float:
- metrics = entry[1] if isinstance(entry[1], dict) else {}
- return metrics.get("percentage")
- return sorted(items.items(), key=percent_value, reverse=True)[:k]
|