howard
/
Agent


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
							"""
从数据库中按“搜索词 query”检索历史任务沉淀的优质作者（demand_find_author）。

用途：
- 先用该工具找到相关作者（sec_uid / 链接）
- 再调用 douyin_user_videos(account_id=sec_uid) 获取其作品做二次筛选
"""

from __future__ import annotations

import re
from typing import Any, Dict, List, Optional

from agent.tools import ToolResult, tool

from db import get_connection


_DOUYIN_USER_URL_RE = re.compile(r"^https?://www\.douyin\.com/user/(?P<sec_uid>[^/?#]+)")


def _extract_sec_uid(author_link: str) -> str:
    if not author_link:
        return ""
    m = _DOUYIN_USER_URL_RE.match(author_link.strip())
    return m.group("sec_uid") if m else ""


def _query_authors(conn, query: str, limit: int) -> List[Dict[str, Any]]:
    q = (query or "").strip()
    if not q:
        return []

    # demand_find_author 本身不存 query，需要通过 trace_id 关联 demand_find_content_result.query
    sql = """
    SELECT DISTINCT
      a.author_name,
      a.author_link,
      a.elderly_ratio,
      a.elderly_tgi,
      a.remark,
      a.trace_id
    FROM demand_find_author a
    INNER JOIN demand_find_content_result r
      ON r.trace_id = a.trace_id
    WHERE r.query LIKE %s
    ORDER BY a.elderly_ratio DESC, a.elderly_tgi DESC
    LIMIT %s
    """
    like = f"%{q}%"
    with conn.cursor() as cur:
        cur.execute(sql, (like, int(limit)))
        rows = cur.fetchall() or []
        return [dict(r) for r in rows]


@tool(description="从 demand_find_author 中按搜索词查找相关作者")
async def find_authors_from_db(query: str, limit: int = 20) -> ToolResult:
    """
    Args:
        query: 搜索词（与历史 demand_find_content_result.query 模糊匹配）
        limit: 返回作者数量上限
    """
    conn = get_connection()
    try:
        rows = _query_authors(conn, query=query, limit=limit)
    finally:
        conn.close()

    authors: List[Dict[str, Any]] = []
    for r in rows:
        author_link = r.get("author_link") or ""
        authors.append(
            {
                "author_nickname": r.get("author_name") or "",
                "author_url": author_link,
                "author_sec_uid": _extract_sec_uid(author_link),
                "age_50_plus_ratio": r.get("elderly_ratio") or "",
                "age_50_plus_tgi": r.get("elderly_tgi") or "",
                "remark": r.get("remark") or "",
                "trace_id": r.get("trace_id") or "",
            }
        )

    lines = [f"按搜索词「{query}」在数据库中找到 {len(authors)} 个相关作者：", ""]
    for i, a in enumerate(authors, 1):
        lines.append(f"{i}. {a['author_nickname']}")
        if a["author_sec_uid"]:
            lines.append(f"   sec_uid: {a['author_sec_uid']}")
        if a["author_url"]:
            lines.append(f"   链接: {a['author_url']}")
        if a["age_50_plus_ratio"] != "" or a["age_50_plus_tgi"] != "":
            lines.append(f"   画像: 50+占比={a['age_50_plus_ratio']} | TGI={a['age_50_plus_tgi']}")
        if a["remark"]:
            lines.append(f"   备注: {a['remark']}")
        lines.append("")

    return ToolResult(
        title="数据库作者检索",
        output="\n".join(lines).strip(),
        metadata={"authors": authors, "query": query, "limit": limit},
        long_term_memory=f"DB author search for '{query}', found {len(authors)} authors",
    )