howard
/
Agent


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
							"""
从 demand_find_author 中，用调用方传入的 query 与 content_tags 做文字匹配，
按匹配度优先返回作者（sec_uid / 链接）。

用途：
- 先用该工具找到相关作者
- 再调用 douyin_user_videos(account_id=sec_uid) 获取其作品做二次筛选
"""

from __future__ import annotations

import json
import re
from typing import Any, Dict, List

from agent.tools import ToolResult, tool
from utils.tool_logging import format_tool_result_for_log, log_tool_call

from db import get_connection

_LOG_LABEL = "工具调用：find_authors_from_db -> 按 content_tags 匹配优质作者"


_DOUYIN_USER_URL_RE = re.compile(r"^https?://www\.douyin\.com/user/(?P<sec_uid>[^/?#]+)")


def _extract_sec_uid(author_link: str) -> str:
    if not author_link:
        return ""
    m = _DOUYIN_USER_URL_RE.match(author_link.strip())
    return m.group("sec_uid") if m else ""


def _query_authors(conn, query: str, limit: int) -> List[Dict[str, Any]]:
    q = (query or "").strip()
    if not q:
        return []

    # 仅用 query 与 content_tags 文字匹配；匹配度：全等 > 前缀匹配 > 子串匹配，再按标签更短、画像字段排序
    # content_tags 必须出现在 SELECT 中：MySQL 下 DISTINCT + ORDER BY 引用列需一致（否则 3065）
    sql = """
    SELECT DISTINCT
      a.author_name,
      a.author_link,
      a.elderly_ratio,
      a.elderly_tgi,
      a.remark,
      a.trace_id,
      a.content_tags
    FROM demand_find_author a
    WHERE a.content_tags IS NOT NULL
      AND TRIM(a.content_tags) <> ''
      AND a.content_tags LIKE %s
    ORDER BY
      CASE
        WHEN a.content_tags = %s THEN 0
        WHEN a.content_tags LIKE %s THEN 1
        ELSE 2
      END ASC,
      CHAR_LENGTH(a.content_tags) ASC,
      a.elderly_ratio DESC,
      a.elderly_tgi DESC
    LIMIT %s
    """
    like_contains = f"%{q}%"
    like_prefix = f"{q}%"
    with conn.cursor() as cur:
        cur.execute(sql, (like_contains, q, like_prefix, int(limit)))
        rows = cur.fetchall() or []
        return [dict(r) for r in rows]


@tool(description="从优质作者库中按搜索词匹配查找作者")
async def find_authors_from_db(query: str, limit: int = 3) -> ToolResult:
    """
    Args:
        query: 与 content_tags 做匹配的关键词（子串匹配；匹配度优先：全等、前缀、包含）
        limit: 返回作者数量上限（默认 3）
    """
    call_params = {"query": query, "limit": limit}
    conn = get_connection()
    try:
        rows = _query_authors(conn, query=query, limit=limit)
    finally:
        conn.close()

    authors: List[Dict[str, Any]] = []
    for r in rows:
        author_link = r.get("author_link") or ""
        authors.append(
            {
                "author_nickname": r.get("author_name") or "",
                "author_url": author_link,
                "author_sec_uid": _extract_sec_uid(author_link),
                "age_50_plus_ratio": r.get("elderly_ratio") or "",
                "age_50_plus_tgi": r.get("elderly_tgi") or "",
                "remark": r.get("remark") or "",
                "trace_id": r.get("trace_id") or "",
            }
        )

    lines = [f"按搜索词「{query}」在数据库中找到 {len(authors)} 个相关作者：", ""]
    for i, a in enumerate(authors, 1):
        lines.append(f"{i}. {a['author_nickname']}")
        if a["author_sec_uid"]:
            lines.append(f"   sec_uid: {a['author_sec_uid']}")
        if a["author_url"]:
            lines.append(f"   链接: {a['author_url']}")
        if a["age_50_plus_ratio"] != "" or a["age_50_plus_tgi"] != "":
            lines.append(f"   画像: 50+占比={a['age_50_plus_ratio']} | TGI={a['age_50_plus_tgi']}")
        if a["remark"]:
            lines.append(f"   备注: {a['remark']}")
        lines.append("")

    out = ToolResult(
        title="数据库作者检索",
        output="\n".join(lines).strip(),
        metadata={"authors": authors, "query": query, "limit": limit},
        long_term_memory=f"DB author search for '{query}', found {len(authors)} authors",
    )
    log_tool_call(_LOG_LABEL, call_params, json.dumps(out.metadata.get("authors", []), ensure_ascii=False))
    return out