find_authors_from_db.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. """
  2. 从 demand_find_author 中,用调用方传入的 query 与 content_tags 做文字匹配,
  3. 按匹配度优先返回作者(sec_uid / 链接)。
  4. 用途:
  5. - 先用该工具找到相关作者
  6. - 再调用 douyin_user_videos(account_id=sec_uid) 获取其作品做二次筛选
  7. """
  8. from __future__ import annotations
  9. import json
  10. import re
  11. from typing import Any, Dict, List
  12. from agent.tools import ToolResult, tool
  13. from utils.tool_logging import format_tool_result_for_log, log_tool_call
  14. from db import get_connection
  15. _LOG_LABEL = "工具调用:find_authors_from_db -> 按 content_tags 匹配优质作者"
  16. _DOUYIN_USER_URL_RE = re.compile(r"^https?://www\.douyin\.com/user/(?P<sec_uid>[^/?#]+)")
  17. def _extract_sec_uid(author_link: str) -> str:
  18. if not author_link:
  19. return ""
  20. m = _DOUYIN_USER_URL_RE.match(author_link.strip())
  21. return m.group("sec_uid") if m else ""
  22. def _query_authors(conn, query: str, limit: int) -> List[Dict[str, Any]]:
  23. q = (query or "").strip()
  24. if not q:
  25. return []
  26. # 仅用 query 与 content_tags 文字匹配;匹配度:全等 > 前缀匹配 > 子串匹配,再按标签更短、画像字段排序
  27. # content_tags 必须出现在 SELECT 中:MySQL 下 DISTINCT + ORDER BY 引用列需一致(否则 3065)
  28. sql = """
  29. SELECT DISTINCT
  30. a.author_name,
  31. a.author_link,
  32. a.elderly_ratio,
  33. a.elderly_tgi,
  34. a.remark,
  35. a.trace_id,
  36. a.content_tags
  37. FROM demand_find_author a
  38. WHERE a.content_tags IS NOT NULL
  39. AND TRIM(a.content_tags) <> ''
  40. AND a.content_tags LIKE %s
  41. ORDER BY
  42. CASE
  43. WHEN a.content_tags = %s THEN 0
  44. WHEN a.content_tags LIKE %s THEN 1
  45. ELSE 2
  46. END ASC,
  47. CHAR_LENGTH(a.content_tags) ASC,
  48. a.elderly_ratio DESC,
  49. a.elderly_tgi DESC
  50. LIMIT %s
  51. """
  52. like_contains = f"%{q}%"
  53. like_prefix = f"{q}%"
  54. with conn.cursor() as cur:
  55. cur.execute(sql, (like_contains, q, like_prefix, int(limit)))
  56. rows = cur.fetchall() or []
  57. return [dict(r) for r in rows]
  58. @tool(description="从优质作者库中按搜索词匹配查找作者")
  59. async def find_authors_from_db(query: str, limit: int = 3) -> ToolResult:
  60. """
  61. Args:
  62. query: 与 content_tags 做匹配的关键词(子串匹配;匹配度优先:全等、前缀、包含)
  63. limit: 返回作者数量上限(默认 3)
  64. """
  65. call_params = {"query": query, "limit": limit}
  66. conn = get_connection()
  67. try:
  68. rows = _query_authors(conn, query=query, limit=limit)
  69. finally:
  70. conn.close()
  71. authors: List[Dict[str, Any]] = []
  72. for r in rows:
  73. author_link = r.get("author_link") or ""
  74. authors.append(
  75. {
  76. "author_nickname": r.get("author_name") or "",
  77. "author_url": author_link,
  78. "author_sec_uid": _extract_sec_uid(author_link),
  79. "age_50_plus_ratio": r.get("elderly_ratio") or "",
  80. "age_50_plus_tgi": r.get("elderly_tgi") or "",
  81. "remark": r.get("remark") or "",
  82. "trace_id": r.get("trace_id") or "",
  83. }
  84. )
  85. lines = [f"按搜索词「{query}」在数据库中找到 {len(authors)} 个相关作者:", ""]
  86. for i, a in enumerate(authors, 1):
  87. lines.append(f"{i}. {a['author_nickname']}")
  88. if a["author_sec_uid"]:
  89. lines.append(f" sec_uid: {a['author_sec_uid']}")
  90. if a["author_url"]:
  91. lines.append(f" 链接: {a['author_url']}")
  92. if a["age_50_plus_ratio"] != "" or a["age_50_plus_tgi"] != "":
  93. lines.append(f" 画像: 50+占比={a['age_50_plus_ratio']} | TGI={a['age_50_plus_tgi']}")
  94. if a["remark"]:
  95. lines.append(f" 备注: {a['remark']}")
  96. lines.append("")
  97. out = ToolResult(
  98. title="数据库作者检索",
  99. output="\n".join(lines).strip(),
  100. metadata={"authors": authors, "query": query, "limit": limit},
  101. long_term_memory=f"DB author search for '{query}', found {len(authors)} authors",
  102. )
  103. log_tool_call(_LOG_LABEL, call_params, json.dumps(out.metadata.get("authors", []), ensure_ascii=False))
  104. return out