| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104 |
- """
- 从数据库中按“搜索词 query”检索历史任务沉淀的优质作者(demand_find_author)。
- 用途:
- - 先用该工具找到相关作者(sec_uid / 链接)
- - 再调用 douyin_user_videos(account_id=sec_uid) 获取其作品做二次筛选
- """
- from __future__ import annotations
- import re
- from typing import Any, Dict, List, Optional
- from agent.tools import ToolResult, tool
- from db import get_connection
- _DOUYIN_USER_URL_RE = re.compile(r"^https?://www\.douyin\.com/user/(?P<sec_uid>[^/?#]+)")
- def _extract_sec_uid(author_link: str) -> str:
- if not author_link:
- return ""
- m = _DOUYIN_USER_URL_RE.match(author_link.strip())
- return m.group("sec_uid") if m else ""
- def _query_authors(conn, query: str, limit: int) -> List[Dict[str, Any]]:
- q = (query or "").strip()
- if not q:
- return []
- # demand_find_author 本身不存 query,需要通过 trace_id 关联 demand_find_content_result.query
- sql = """
- SELECT DISTINCT
- a.author_name,
- a.author_link,
- a.elderly_ratio,
- a.elderly_tgi,
- a.remark,
- a.trace_id
- FROM demand_find_author a
- INNER JOIN demand_find_content_result r
- ON r.trace_id = a.trace_id
- WHERE r.query LIKE %s
- ORDER BY a.elderly_ratio DESC, a.elderly_tgi DESC
- LIMIT %s
- """
- like = f"%{q}%"
- with conn.cursor() as cur:
- cur.execute(sql, (like, int(limit)))
- rows = cur.fetchall() or []
- return [dict(r) for r in rows]
- @tool(description="从 demand_find_author 中按搜索词查找相关作者")
- async def find_authors_from_db(query: str, limit: int = 20) -> ToolResult:
- """
- Args:
- query: 搜索词(与历史 demand_find_content_result.query 模糊匹配)
- limit: 返回作者数量上限
- """
- conn = get_connection()
- try:
- rows = _query_authors(conn, query=query, limit=limit)
- finally:
- conn.close()
- authors: List[Dict[str, Any]] = []
- for r in rows:
- author_link = r.get("author_link") or ""
- authors.append(
- {
- "author_nickname": r.get("author_name") or "",
- "author_url": author_link,
- "author_sec_uid": _extract_sec_uid(author_link),
- "age_50_plus_ratio": r.get("elderly_ratio") or "",
- "age_50_plus_tgi": r.get("elderly_tgi") or "",
- "remark": r.get("remark") or "",
- "trace_id": r.get("trace_id") or "",
- }
- )
- lines = [f"按搜索词「{query}」在数据库中找到 {len(authors)} 个相关作者:", ""]
- for i, a in enumerate(authors, 1):
- lines.append(f"{i}. {a['author_nickname']}")
- if a["author_sec_uid"]:
- lines.append(f" sec_uid: {a['author_sec_uid']}")
- if a["author_url"]:
- lines.append(f" 链接: {a['author_url']}")
- if a["age_50_plus_ratio"] != "" or a["age_50_plus_tgi"] != "":
- lines.append(f" 画像: 50+占比={a['age_50_plus_ratio']} | TGI={a['age_50_plus_tgi']}")
- if a["remark"]:
- lines.append(f" 备注: {a['remark']}")
- lines.append("")
- return ToolResult(
- title="数据库作者检索",
- output="\n".join(lines).strip(),
- metadata={"authors": authors, "query": query, "limit": limit},
- long_term_memory=f"DB author search for '{query}', found {len(authors)} authors",
- )
|