|
@@ -4,14 +4,16 @@
|
|
|
调用内部爬虫服务获取账号/内容的粉丝画像。
|
|
调用内部爬虫服务获取账号/内容的粉丝画像。
|
|
|
"""
|
|
"""
|
|
|
import asyncio
|
|
import asyncio
|
|
|
-import json
|
|
|
|
|
|
|
+import logging
|
|
|
|
|
+import time
|
|
|
from typing import Optional, Dict, Any, List, Tuple
|
|
from typing import Optional, Dict, Any, List, Tuple
|
|
|
|
|
|
|
|
-import httpx
|
|
|
|
|
import requests
|
|
import requests
|
|
|
|
|
|
|
|
from agent.tools import tool, ToolResult
|
|
from agent.tools import tool, ToolResult
|
|
|
|
|
|
|
|
|
|
+logger = logging.getLogger(__name__)
|
|
|
|
|
+
|
|
|
|
|
|
|
|
ACCOUNT_FANS_PORTRAIT_API = "http://crawapi.piaoquantv.com/crawler/dou_yin/re_dian_bao/account_fans_portrait"
|
|
ACCOUNT_FANS_PORTRAIT_API = "http://crawapi.piaoquantv.com/crawler/dou_yin/re_dian_bao/account_fans_portrait"
|
|
|
CONTENT_FANS_PORTRAIT_API = "http://crawapi.piaoquantv.com/crawler/dou_yin/re_dian_bao/video_like_portrait"
|
|
CONTENT_FANS_PORTRAIT_API = "http://crawapi.piaoquantv.com/crawler/dou_yin/re_dian_bao/video_like_portrait"
|
|
@@ -33,6 +35,8 @@ async def get_account_fans_portrait(
|
|
|
"""
|
|
"""
|
|
|
获取抖音账号粉丝画像(热点宝数据)
|
|
获取抖音账号粉丝画像(热点宝数据)
|
|
|
|
|
|
|
|
|
|
+ 获取指定账号的粉丝画像数据,包括年龄、性别、地域等多个维度。
|
|
|
|
|
+
|
|
|
Args:
|
|
Args:
|
|
|
account_id: 抖音账号ID(使用 author.sec_uid)
|
|
account_id: 抖音账号ID(使用 author.sec_uid)
|
|
|
need_province: 是否获取省份分布,默认 False
|
|
need_province: 是否获取省份分布,默认 False
|
|
@@ -45,15 +49,56 @@ async def get_account_fans_portrait(
|
|
|
timeout: 超时时间(秒),默认 60
|
|
timeout: 超时时间(秒),默认 60
|
|
|
|
|
|
|
|
Returns:
|
|
Returns:
|
|
|
- ToolResult: 包含粉丝画像数据
|
|
|
|
|
- - ratio: 占比(百分比)
|
|
|
|
|
- - tgi (偏好度): > 100 表示该人群偏好高于平均水平,< 100 表示低于平均,= 100 表示平均水平
|
|
|
|
|
- 例如:50岁以上 tgi=150 表示该账号粉丝中50岁以上人群的偏好度是平台平均的1.5倍
|
|
|
|
|
-
|
|
|
|
|
- 注意:
|
|
|
|
|
|
|
+ ToolResult: 包含以下内容:
|
|
|
|
|
+ - output: 文本格式的画像摘要
|
|
|
|
|
+ - metadata.has_portrait: 布尔值,表示是否有有效画像数据
|
|
|
|
|
+ - True: 有有效画像数据
|
|
|
|
|
+ - False: 无画像数据
|
|
|
|
|
+ - metadata.portrait_data: 结构化的画像数据(字典格式)
|
|
|
|
|
+ - 键: 维度名称(如 "年龄"、"性别")
|
|
|
|
|
+ - 值: 该维度的分布数据(字典)
|
|
|
|
|
+ - percentage: 占比(如 "48.35%")
|
|
|
|
|
+ - preference: 偏好度/TGI(如 "210.05")
|
|
|
|
|
+ - metadata.raw_data: 原始 API 返回数据
|
|
|
|
|
+
|
|
|
|
|
+ Note:
|
|
|
|
|
+ - account_id 参数使用 author.sec_uid(约80字符)
|
|
|
- 默认只返回年龄分布,需要其他维度时设置对应参数为 True
|
|
- 默认只返回年龄分布,需要其他维度时设置对应参数为 True
|
|
|
- 省份数据只显示 TOP5
|
|
- 省份数据只显示 TOP5
|
|
|
|
|
+ - 偏好度(TGI)说明:
|
|
|
|
|
+ - > 100: 该人群偏好高于平均水平
|
|
|
|
|
+ - = 100: 平均水平
|
|
|
|
|
+ - < 100: 低于平均水平
|
|
|
|
|
+ - 使用 metadata.has_portrait 判断画像是否有效,不要解析 output 文本
|
|
|
|
|
+ - 从 metadata.portrait_data 获取结构化画像数据
|
|
|
"""
|
|
"""
|
|
|
|
|
+ start_time = time.time()
|
|
|
|
|
+
|
|
|
|
|
+ # 验证 account_id 格式
|
|
|
|
|
+ if not account_id or not isinstance(account_id, str):
|
|
|
|
|
+ logger.error("get_account_fans_portrait invalid account_id", extra={"account_id": account_id})
|
|
|
|
|
+ return ToolResult(
|
|
|
|
|
+ title="账号粉丝画像获取失败",
|
|
|
|
|
+ output="",
|
|
|
|
|
+ error="account_id 参数无效:必须是非空字符串",
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ if not account_id.startswith("MS4wLjABAAAA"):
|
|
|
|
|
+ logger.error("get_account_fans_portrait invalid sec_uid format", extra={"account_id": account_id})
|
|
|
|
|
+ return ToolResult(
|
|
|
|
|
+ title="账号粉丝画像获取失败",
|
|
|
|
|
+ output="",
|
|
|
|
|
+ error=f"account_id 格式错误:必须以 MS4wLjABAAAA 开头,当前值: {account_id[:min(20, len(account_id))]}...",
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ if len(account_id) < 70 or len(account_id) > 90:
|
|
|
|
|
+ logger.error("get_account_fans_portrait invalid sec_uid length", extra={"account_id": account_id, "length": len(account_id)})
|
|
|
|
|
+ return ToolResult(
|
|
|
|
|
+ title="账号粉丝画像获取失败",
|
|
|
|
|
+ output="",
|
|
|
|
|
+ error=f"account_id 长度异常:期望 70-90 字符,实际 {len(account_id)} 字符。这可能是编造或截断的数据。",
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
try:
|
|
try:
|
|
|
payload = {
|
|
payload = {
|
|
|
"account_id": account_id,
|
|
"account_id": account_id,
|
|
@@ -100,24 +145,65 @@ async def get_account_fans_portrait(
|
|
|
summary_lines.append(f" {name}: {ratio} (偏好度: {tgi})")
|
|
summary_lines.append(f" {name}: {ratio} (偏好度: {tgi})")
|
|
|
summary_lines.append("")
|
|
summary_lines.append("")
|
|
|
|
|
|
|
|
|
|
+ duration_ms = int((time.time() - start_time) * 1000)
|
|
|
|
|
+ has_valid_portrait = bool(portrait and any(
|
|
|
|
|
+ isinstance(v, dict) and v for v in portrait.values()
|
|
|
|
|
+ ))
|
|
|
|
|
+
|
|
|
|
|
+ logger.info(
|
|
|
|
|
+ "get_account_fans_portrait completed",
|
|
|
|
|
+ extra={
|
|
|
|
|
+ "account_id": account_id,
|
|
|
|
|
+ "has_portrait": has_valid_portrait,
|
|
|
|
|
+ "portrait_dimensions": list(portrait.keys()) if portrait else [],
|
|
|
|
|
+ "duration_ms": duration_ms
|
|
|
|
|
+ }
|
|
|
|
|
+ )
|
|
|
|
|
|
|
|
return ToolResult(
|
|
return ToolResult(
|
|
|
title=f"账号粉丝画像: {account_id}",
|
|
title=f"账号粉丝画像: {account_id}",
|
|
|
output="\n".join(summary_lines),
|
|
output="\n".join(summary_lines),
|
|
|
long_term_memory=f"Fetched fans portrait for account '{account_id}'",
|
|
long_term_memory=f"Fetched fans portrait for account '{account_id}'",
|
|
|
- metadata={"raw_data": data}
|
|
|
|
|
|
|
+ metadata={
|
|
|
|
|
+ "raw_data": data,
|
|
|
|
|
+ "has_portrait": has_valid_portrait,
|
|
|
|
|
+ "portrait_data": portrait
|
|
|
|
|
+ }
|
|
|
|
|
+ )
|
|
|
|
|
+ except requests.exceptions.HTTPError as e:
|
|
|
|
|
+ logger.error(
|
|
|
|
|
+ "get_account_fans_portrait HTTP error",
|
|
|
|
|
+ extra={
|
|
|
|
|
+ "account_id": account_id,
|
|
|
|
|
+ "status_code": e.response.status_code,
|
|
|
|
|
+ "error": str(e)
|
|
|
|
|
+ }
|
|
|
)
|
|
)
|
|
|
- except httpx.HTTPStatusError as e:
|
|
|
|
|
return ToolResult(
|
|
return ToolResult(
|
|
|
title="账号粉丝画像获取失败",
|
|
title="账号粉丝画像获取失败",
|
|
|
output="",
|
|
output="",
|
|
|
- error=f"HTTP error {e.response.status_code}: {e.response.text}",
|
|
|
|
|
|
|
+ error=f"HTTP {e.response.status_code}: {e.response.text}",
|
|
|
|
|
+ )
|
|
|
|
|
+ except requests.exceptions.Timeout:
|
|
|
|
|
+ logger.error("get_account_fans_portrait timeout", extra={"account_id": account_id, "timeout": request_timeout})
|
|
|
|
|
+ return ToolResult(
|
|
|
|
|
+ title="账号粉丝画像获取失败",
|
|
|
|
|
+ output="",
|
|
|
|
|
+ error=f"请求超时({request_timeout}秒)",
|
|
|
|
|
+ )
|
|
|
|
|
+ except requests.exceptions.RequestException as e:
|
|
|
|
|
+ logger.error("get_account_fans_portrait network error", extra={"account_id": account_id, "error": str(e)})
|
|
|
|
|
+ return ToolResult(
|
|
|
|
|
+ title="账号粉丝画像获取失败",
|
|
|
|
|
+ output="",
|
|
|
|
|
+ error=f"网络错误: {str(e)}",
|
|
|
)
|
|
)
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
|
|
|
+ logger.error("get_account_fans_portrait unexpected error", extra={"account_id": account_id, "error": str(e)}, exc_info=True)
|
|
|
return ToolResult(
|
|
return ToolResult(
|
|
|
title="账号粉丝画像获取失败",
|
|
title="账号粉丝画像获取失败",
|
|
|
output="",
|
|
output="",
|
|
|
- error=str(e),
|
|
|
|
|
|
|
+ error=f"未知错误: {str(e)}",
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@@ -136,6 +222,8 @@ async def get_content_fans_portrait(
|
|
|
"""
|
|
"""
|
|
|
获取抖音内容点赞用户画像(热点宝数据)
|
|
获取抖音内容点赞用户画像(热点宝数据)
|
|
|
|
|
|
|
|
|
|
+ 获取指定视频内容的点赞用户画像数据,包括年龄、性别、地域等多个维度。
|
|
|
|
|
+
|
|
|
Args:
|
|
Args:
|
|
|
content_id: 抖音内容ID(使用 aweme_id)
|
|
content_id: 抖音内容ID(使用 aweme_id)
|
|
|
need_province: 是否获取省份分布,默认 False
|
|
need_province: 是否获取省份分布,默认 False
|
|
@@ -148,15 +236,58 @@ async def get_content_fans_portrait(
|
|
|
timeout: 超时时间(秒),默认 60
|
|
timeout: 超时时间(秒),默认 60
|
|
|
|
|
|
|
|
Returns:
|
|
Returns:
|
|
|
- ToolResult: 包含点赞用户画像数据
|
|
|
|
|
- - ratio: 占比(百分比)
|
|
|
|
|
- - tgi (偏好度): > 100 表示该人群偏好高于平均水平,< 100 表示低于平均,= 100 表示平均水平
|
|
|
|
|
- 例如:50岁以上 tgi=150 表示该视频点赞用户中50岁以上人群的偏好度是平台平均的1.5倍
|
|
|
|
|
-
|
|
|
|
|
- 注意:
|
|
|
|
|
|
|
+ ToolResult: 包含以下内容:
|
|
|
|
|
+ - output: 文本格式的画像摘要
|
|
|
|
|
+ - metadata.has_portrait: 布尔值,表示是否有有效画像数据
|
|
|
|
|
+ - True: 有有效画像数据
|
|
|
|
|
+ - False: 无画像数据(需要使用账号画像兜底)
|
|
|
|
|
+ - metadata.portrait_data: 结构化的画像数据(字典格式)
|
|
|
|
|
+ - 键: 维度名称(如 "年龄"、"性别")
|
|
|
|
|
+ - 值: 该维度的分布数据(字典)
|
|
|
|
|
+ - percentage: 占比(如 "48.35%")
|
|
|
|
|
+ - preference: 偏好度/TGI(如 "210.05")
|
|
|
|
|
+ - metadata.raw_data: 原始 API 返回数据
|
|
|
|
|
+
|
|
|
|
|
+ Note:
|
|
|
|
|
+ - content_id 参数使用 aweme_id
|
|
|
- 默认只返回年龄分布,需要其他维度时设置对应参数为 True
|
|
- 默认只返回年龄分布,需要其他维度时设置对应参数为 True
|
|
|
- 省份数据只显示 TOP5
|
|
- 省份数据只显示 TOP5
|
|
|
|
|
+ - 偏好度(TGI)说明:
|
|
|
|
|
+ - > 100: 该人群偏好高于平均水平
|
|
|
|
|
+ - = 100: 平均水平
|
|
|
|
|
+ - < 100: 低于平均水平
|
|
|
|
|
+ - 使用 metadata.has_portrait 判断画像是否有效,不要解析 output 文本
|
|
|
|
|
+ - 如果 has_portrait 为 False,应使用 get_account_fans_portrait 作为兜底
|
|
|
|
|
+ - 从 metadata.portrait_data 获取结构化画像数据
|
|
|
"""
|
|
"""
|
|
|
|
|
+ start_time = time.time()
|
|
|
|
|
+
|
|
|
|
|
+ # 验证 content_id 格式
|
|
|
|
|
+ if not content_id or not isinstance(content_id, str):
|
|
|
|
|
+ logger.error("get_content_fans_portrait invalid content_id", extra={"content_id": content_id})
|
|
|
|
|
+ return ToolResult(
|
|
|
|
|
+ title="内容点赞用户画像获取失败",
|
|
|
|
|
+ output="",
|
|
|
|
|
+ error="content_id 参数无效:必须是非空字符串",
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ # aweme_id 应该是纯数字字符串,长度约 19 位
|
|
|
|
|
+ if not content_id.isdigit():
|
|
|
|
|
+ logger.error("get_content_fans_portrait invalid aweme_id format", extra={"content_id": content_id})
|
|
|
|
|
+ return ToolResult(
|
|
|
|
|
+ title="内容点赞用户画像获取失败",
|
|
|
|
|
+ output="",
|
|
|
|
|
+ error=f"content_id 格式错误:aweme_id 应该是纯数字,当前值: {content_id[:20]}...",
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ if len(content_id) < 15 or len(content_id) > 25:
|
|
|
|
|
+ logger.error("get_content_fans_portrait invalid aweme_id length", extra={"content_id": content_id, "length": len(content_id)})
|
|
|
|
|
+ return ToolResult(
|
|
|
|
|
+ title="内容点赞用户画像获取失败",
|
|
|
|
|
+ output="",
|
|
|
|
|
+ error=f"content_id 长度异常:期望 15-25 位数字,实际 {len(content_id)} 位",
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
try:
|
|
try:
|
|
|
payload = {
|
|
payload = {
|
|
|
"content_id": content_id,
|
|
"content_id": content_id,
|
|
@@ -204,44 +335,70 @@ async def get_content_fans_portrait(
|
|
|
summary_lines.append(f" {name}: {ratio} (偏好度: {tgi})")
|
|
summary_lines.append(f" {name}: {ratio} (偏好度: {tgi})")
|
|
|
summary_lines.append("")
|
|
summary_lines.append("")
|
|
|
|
|
|
|
|
|
|
+ duration_ms = int((time.time() - start_time) * 1000)
|
|
|
|
|
+ has_valid_portrait = bool(portrait and any(
|
|
|
|
|
+ isinstance(v, dict) and v for v in portrait.values()
|
|
|
|
|
+ ))
|
|
|
|
|
+
|
|
|
|
|
+ logger.info(
|
|
|
|
|
+ "get_content_fans_portrait completed",
|
|
|
|
|
+ extra={
|
|
|
|
|
+ "content_id": content_id,
|
|
|
|
|
+ "has_portrait": has_valid_portrait,
|
|
|
|
|
+ "portrait_dimensions": list(portrait.keys()) if portrait else [],
|
|
|
|
|
+ "duration_ms": duration_ms
|
|
|
|
|
+ }
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
return ToolResult(
|
|
return ToolResult(
|
|
|
title=f"内容点赞用户画像: {content_id}",
|
|
title=f"内容点赞用户画像: {content_id}",
|
|
|
output="\n".join(summary_lines),
|
|
output="\n".join(summary_lines),
|
|
|
long_term_memory=f"Fetched fans portrait for content '{content_id}'",
|
|
long_term_memory=f"Fetched fans portrait for content '{content_id}'",
|
|
|
- metadata={"raw_data": data}
|
|
|
|
|
|
|
+ metadata={
|
|
|
|
|
+ "raw_data": data,
|
|
|
|
|
+ "has_portrait": has_valid_portrait,
|
|
|
|
|
+ "portrait_data": portrait
|
|
|
|
|
+ }
|
|
|
|
|
+ )
|
|
|
|
|
+ except requests.exceptions.HTTPError as e:
|
|
|
|
|
+ logger.error(
|
|
|
|
|
+ "get_content_fans_portrait HTTP error",
|
|
|
|
|
+ extra={
|
|
|
|
|
+ "content_id": content_id,
|
|
|
|
|
+ "status_code": e.response.status_code,
|
|
|
|
|
+ "error": str(e)
|
|
|
|
|
+ }
|
|
|
|
|
+ )
|
|
|
|
|
+ return ToolResult(
|
|
|
|
|
+ title="内容点赞用户画像获取失败",
|
|
|
|
|
+ output="",
|
|
|
|
|
+ error=f"HTTP {e.response.status_code}: {e.response.text}",
|
|
|
)
|
|
)
|
|
|
- except httpx.HTTPStatusError as e:
|
|
|
|
|
|
|
+ except requests.exceptions.Timeout:
|
|
|
|
|
+ logger.error("get_content_fans_portrait timeout", extra={"content_id": content_id, "timeout": request_timeout})
|
|
|
return ToolResult(
|
|
return ToolResult(
|
|
|
title="内容点赞用户画像获取失败",
|
|
title="内容点赞用户画像获取失败",
|
|
|
output="",
|
|
output="",
|
|
|
- error=f"HTTP error {e.response.status_code}: {e.response.text}",
|
|
|
|
|
|
|
+ error=f"请求超时({request_timeout}秒)",
|
|
|
|
|
+ )
|
|
|
|
|
+ except requests.exceptions.RequestException as e:
|
|
|
|
|
+ logger.error("get_content_fans_portrait network error", extra={"content_id": content_id, "error": str(e)})
|
|
|
|
|
+ return ToolResult(
|
|
|
|
|
+ title="内容点赞用户画像获取失败",
|
|
|
|
|
+ output="",
|
|
|
|
|
+ error=f"网络错误: {str(e)}",
|
|
|
)
|
|
)
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
|
|
|
+ logger.error("get_content_fans_portrait unexpected error", extra={"content_id": content_id, "error": str(e)}, exc_info=True)
|
|
|
return ToolResult(
|
|
return ToolResult(
|
|
|
title="内容点赞用户画像获取失败",
|
|
title="内容点赞用户画像获取失败",
|
|
|
output="",
|
|
output="",
|
|
|
- error=str(e),
|
|
|
|
|
|
|
+ error=f"未知错误: {str(e)}",
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
-
|
|
|
|
|
def _top_k(items: Dict[str, Any], k: int) -> List[Tuple[str, Any]]:
|
|
def _top_k(items: Dict[str, Any], k: int) -> List[Tuple[str, Any]]:
|
|
|
def percent_value(entry: Tuple[str, Any]) -> float:
|
|
def percent_value(entry: Tuple[str, Any]) -> float:
|
|
|
metrics = entry[1] if isinstance(entry[1], dict) else {}
|
|
metrics = entry[1] if isinstance(entry[1], dict) else {}
|
|
|
return metrics.get("percentage")
|
|
return metrics.get("percentage")
|
|
|
|
|
|
|
|
return sorted(items.items(), key=percent_value, reverse=True)[:k]
|
|
return sorted(items.items(), key=percent_value, reverse=True)[:k]
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-# async def main():
|
|
|
|
|
-# # result = await get_account_fans_portrait(
|
|
|
|
|
-# # account_id="MS4wLjABAAAAXvRdWJsdPKkh9Ja3ZirxoB8pAaxNXUXs1KUe14gW0IoqDz-D-fG0xZ8c5kSfTPXx",
|
|
|
|
|
-# # need_province=True
|
|
|
|
|
-# # )
|
|
|
|
|
-# result = await get_content_fans_portrait(
|
|
|
|
|
-# content_id="7614821787578568420"
|
|
|
|
|
-# # need_province=True
|
|
|
|
|
-# )
|
|
|
|
|
-# print(result.output)
|
|
|
|
|
-#
|
|
|
|
|
-# if __name__ == "__main__":
|
|
|
|
|
-# asyncio.run(main())
|
|
|