| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123 |
- """
- 从 demand_find_author 中,用调用方传入的 query 与 content_tags 做文字匹配,
- 按匹配度优先返回作者(sec_uid / 链接)。
- 用途:
- - 先用该工具找到相关作者
- - 再调用 douyin_user_videos(account_id=sec_uid) 获取其作品做二次筛选
- """
- from __future__ import annotations
- import json
- import re
- from typing import Any, Dict, List
- from agent.tools import ToolResult, tool
- from utils.tool_logging import format_tool_result_for_log, log_tool_call
- from db import get_connection
- _LOG_LABEL = "工具调用:find_authors_from_db -> 按 content_tags 匹配优质作者"
- _DOUYIN_USER_URL_RE = re.compile(r"^https?://www\.douyin\.com/user/(?P<sec_uid>[^/?#]+)")
- def _extract_sec_uid(author_link: str) -> str:
- if not author_link:
- return ""
- m = _DOUYIN_USER_URL_RE.match(author_link.strip())
- return m.group("sec_uid") if m else ""
- def _query_authors(conn, query: str, limit: int) -> List[Dict[str, Any]]:
- q = (query or "").strip()
- if not q:
- return []
- # 仅用 query 与 content_tags 文字匹配;匹配度:全等 > 前缀匹配 > 子串匹配,再按标签更短、画像字段排序
- # content_tags 必须出现在 SELECT 中:MySQL 下 DISTINCT + ORDER BY 引用列需一致(否则 3065)
- sql = """
- SELECT DISTINCT
- a.author_name,
- a.author_link,
- a.elderly_ratio,
- a.elderly_tgi,
- a.remark,
- a.trace_id,
- a.content_tags
- FROM demand_find_author a
- WHERE a.content_tags IS NOT NULL
- AND TRIM(a.content_tags) <> ''
- AND a.content_tags LIKE %s
- ORDER BY
- CASE
- WHEN a.content_tags = %s THEN 0
- WHEN a.content_tags LIKE %s THEN 1
- ELSE 2
- END ASC,
- CHAR_LENGTH(a.content_tags) ASC,
- a.elderly_ratio DESC,
- a.elderly_tgi DESC
- LIMIT %s
- """
- like_contains = f"%{q}%"
- like_prefix = f"{q}%"
- with conn.cursor() as cur:
- cur.execute(sql, (like_contains, q, like_prefix, int(limit)))
- rows = cur.fetchall() or []
- return [dict(r) for r in rows]
- @tool(description="从优质作者库中按搜索词匹配查找作者")
- async def find_authors_from_db(query: str, limit: int = 3) -> ToolResult:
- """
- Args:
- query: 与 content_tags 做匹配的关键词(子串匹配;匹配度优先:全等、前缀、包含)
- limit: 返回作者数量上限(默认 3)
- """
- call_params = {"query": query, "limit": limit}
- conn = get_connection()
- try:
- rows = _query_authors(conn, query=query, limit=limit)
- finally:
- conn.close()
- authors: List[Dict[str, Any]] = []
- for r in rows:
- author_link = r.get("author_link") or ""
- authors.append(
- {
- "author_nickname": r.get("author_name") or "",
- "author_url": author_link,
- "author_sec_uid": _extract_sec_uid(author_link),
- "age_50_plus_ratio": r.get("elderly_ratio") or "",
- "age_50_plus_tgi": r.get("elderly_tgi") or "",
- "remark": r.get("remark") or "",
- "trace_id": r.get("trace_id") or "",
- }
- )
- lines = [f"按搜索词「{query}」在数据库中找到 {len(authors)} 个相关作者:", ""]
- for i, a in enumerate(authors, 1):
- lines.append(f"{i}. {a['author_nickname']}")
- if a["author_sec_uid"]:
- lines.append(f" sec_uid: {a['author_sec_uid']}")
- if a["author_url"]:
- lines.append(f" 链接: {a['author_url']}")
- if a["age_50_plus_ratio"] != "" or a["age_50_plus_tgi"] != "":
- lines.append(f" 画像: 50+占比={a['age_50_plus_ratio']} | TGI={a['age_50_plus_tgi']}")
- if a["remark"]:
- lines.append(f" 备注: {a['remark']}")
- lines.append("")
- out = ToolResult(
- title="数据库作者检索",
- output="\n".join(lines).strip(),
- metadata={"authors": authors, "query": query, "limit": limit},
- long_term_memory=f"DB author search for '{query}', found {len(authors)} authors",
- )
- log_tool_call(_LOG_LABEL, call_params, json.dumps(out.metadata.get("authors", []), ensure_ascii=False))
- return out
|