run.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373
  1. """
  2. 内容寻找 Agent
  3. 使用示例:
  4. python run.py
  5. """
  6. import asyncio
  7. import logging
  8. import sys
  9. import os
  10. from pathlib import Path
  11. from agent.tools.builtin.knowledge import KnowledgeConfig
  12. sys.path.insert(0, str(Path(__file__).parent.parent.parent))
  13. from dotenv import load_dotenv
  14. load_dotenv()
  15. from agent import (
  16. AgentRunner,
  17. RunConfig,
  18. FileSystemTraceStore,
  19. Trace,
  20. Message,
  21. )
  22. from agent.llm import create_openrouter_llm_call
  23. from agent.llm.prompts import SimplePrompt
  24. # 导入工具(确保工具被注册)
  25. from tools import (
  26. douyin_search,
  27. douyin_user_videos,
  28. get_content_fans_portrait,
  29. get_account_fans_portrait,
  30. )
  31. # 配置日志
  32. log_dir = Path(__file__).parent / '.cache'
  33. log_dir.mkdir(exist_ok=True)
  34. logging.basicConfig(
  35. level=logging.INFO,
  36. format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
  37. handlers=[
  38. logging.FileHandler(log_dir / 'agent.log'),
  39. logging.StreamHandler()
  40. ]
  41. )
  42. logger = logging.getLogger(__name__)
  43. async def generate_fallback_output(store: FileSystemTraceStore, trace_id: str):
  44. """
  45. 当任务未正常输出时,从 trace 中提取数据并生成兜底输出
  46. """
  47. try:
  48. # 读取所有消息
  49. messages_dir = Path(store.base_path) / trace_id / "messages"
  50. if not messages_dir.exists():
  51. print("无法生成摘要:找不到消息目录")
  52. return
  53. # 提取搜索结果和画像数据
  54. search_results = []
  55. portrait_data = {}
  56. import json
  57. import re
  58. for msg_file in sorted(messages_dir.glob("*.json")):
  59. with open(msg_file, 'r', encoding='utf-8') as f:
  60. msg = json.load(f)
  61. # 提取搜索结果(从文本结果中解析)
  62. if msg.get("role") == "tool" and msg.get("content", {}).get("tool_name") == "douyin_search":
  63. result_text = msg.get("content", {}).get("result", "")
  64. # 解析每条搜索结果
  65. lines = result_text.split("\n")
  66. current_item = {}
  67. for line in lines:
  68. line = line.strip()
  69. if not line:
  70. if current_item.get("aweme_id"):
  71. if current_item["aweme_id"] not in [r["aweme_id"] for r in search_results]:
  72. search_results.append(current_item)
  73. current_item = {}
  74. continue
  75. # 解析标题行(以数字开头)
  76. if re.match(r'^\d+\.', line):
  77. current_item["desc"] = line.split(".", 1)[1].strip()[:100]
  78. # 解析 ID
  79. elif line.startswith("ID:"):
  80. current_item["aweme_id"] = line.split("ID:")[1].strip()
  81. # 解析作者
  82. elif line.startswith("作者:"):
  83. author_name = line.split("作者:")[1].strip()
  84. current_item["author"] = {"nickname": author_name}
  85. # 解析 sec_uid
  86. elif line.startswith("sec_uid:"):
  87. sec_uid = line.split("sec_uid:")[1].strip()
  88. if "author" not in current_item:
  89. current_item["author"] = {}
  90. current_item["author"]["sec_uid"] = sec_uid
  91. # 解析数据
  92. elif line.startswith("数据:"):
  93. stats_text = line.split("数据:")[1].strip()
  94. stats = {}
  95. # 解析点赞数
  96. if "点赞" in stats_text:
  97. digg_match = re.search(r'点赞\s+([\d,]+)', stats_text)
  98. if digg_match:
  99. stats["digg_count"] = int(digg_match.group(1).replace(",", ""))
  100. # 解析评论数
  101. if "评论" in stats_text:
  102. comment_match = re.search(r'评论\s+([\d,]+)', stats_text)
  103. if comment_match:
  104. stats["comment_count"] = int(comment_match.group(1).replace(",", ""))
  105. # 解析分享数
  106. if "分享" in stats_text:
  107. share_match = re.search(r'分享\s+([\d,]+)', stats_text)
  108. if share_match:
  109. stats["share_count"] = int(share_match.group(1).replace(",", ""))
  110. current_item["statistics"] = stats
  111. # 添加最后一条
  112. if current_item.get("aweme_id"):
  113. if current_item["aweme_id"] not in [r["aweme_id"] for r in search_results]:
  114. search_results.append(current_item)
  115. # 提取画像数据
  116. elif msg.get("role") == "tool":
  117. tool_name = msg.get("content", {}).get("tool_name", "")
  118. result_text = msg.get("content", {}).get("result", "")
  119. if tool_name in ["get_content_fans_portrait", "get_account_fans_portrait"]:
  120. # 解析画像数据
  121. content_id = None
  122. age_50_plus = None
  123. tgi = None
  124. # 从结果文本中提取 ID
  125. if "内容 " in result_text:
  126. parts = result_text.split("内容 ")[1].split(" ")[0]
  127. content_id = parts
  128. elif "账号 " in result_text:
  129. parts = result_text.split("账号 ")[1].split(" ")[0]
  130. content_id = parts
  131. # 提取50岁以上数据(格式:50-: 48.35% (偏好度: 210.05))
  132. if "【年龄】分布" in result_text:
  133. lines = result_text.split("\n")
  134. for line in lines:
  135. if "50-:" in line:
  136. # 解析: 50-: 48.35% (偏好度: 210.05)
  137. parts = line.split("50-:")[1].strip()
  138. if "%" in parts:
  139. age_50_plus = parts.split("%")[0].strip()
  140. if "偏好度:" in parts:
  141. tgi_part = parts.split("偏好度:")[1].strip()
  142. tgi = tgi_part.replace(")", "").strip()
  143. break
  144. if content_id and age_50_plus:
  145. portrait_data[content_id] = {
  146. "age_50_plus": age_50_plus,
  147. "tgi": tgi,
  148. "source": "内容点赞画像" if tool_name == "get_content_fans_portrait" else "账号粉丝画像"
  149. }
  150. # 生成输出
  151. print("\n" + "=" * 60)
  152. print("📊 任务执行摘要(兜底输出)")
  153. print("=" * 60)
  154. print(f"\n搜索情况:找到 {len(search_results)} 条候选内容")
  155. print(f"画像获取:获取了 {len(portrait_data)} 条画像数据")
  156. # 筛选有画像且符合要求的内容
  157. matched_results = []
  158. for result in search_results:
  159. aweme_id = result["aweme_id"]
  160. author_id = result["author"].get("sec_uid", "")
  161. # 查找画像数据(优先内容画像,其次账号画像)
  162. portrait = portrait_data.get(aweme_id) or portrait_data.get(author_id)
  163. if portrait and portrait.get("age_50_plus"):
  164. try:
  165. age_ratio = float(portrait["age_50_plus"])
  166. if age_ratio >= 20: # 50岁以上占比>=20%
  167. matched_results.append({
  168. **result,
  169. "portrait": portrait
  170. })
  171. except:
  172. pass
  173. # 按50岁以上占比排序
  174. matched_results.sort(key=lambda x: float(x["portrait"]["age_50_plus"]), reverse=True)
  175. # 输出推荐结果
  176. print(f"\n符合要求:{len(matched_results)} 条内容(50岁以上占比>=20%)")
  177. print("\n" + "=" * 60)
  178. print("🎯 推荐结果")
  179. print("=" * 60)
  180. for i, result in enumerate(matched_results[:10], 1):
  181. aweme_id = result["aweme_id"]
  182. desc = result["desc"]
  183. author = result["author"]
  184. stats = result["statistics"]
  185. portrait = result["portrait"]
  186. print(f"\n{i}. {desc}")
  187. print(f" 链接: https://www.douyin.com/video/{aweme_id}")
  188. print(f" 作者: {author.get('nickname', '未知')}")
  189. print(
  190. f" 热度: 👍 {stats.get('digg_count', 0):,} | 💬 {stats.get('comment_count', 0):,} | 🔄 {stats.get('share_count', 0):,}")
  191. print(f" 画像: 50岁以上 {portrait['age_50_plus']}% (tgi: {portrait['tgi']}) - {portrait['source']}")
  192. print("\n" + "=" * 60)
  193. print(f"✅ 已为您找到 {min(len(matched_results), 10)} 条推荐视频")
  194. print("=" * 60)
  195. except Exception as e:
  196. logger.error(f"生成兜底输出失败: {e}", exc_info=True)
  197. print(f"\n生成摘要失败: {e}")
  198. async def main():
  199. print("\n" + "=" * 60)
  200. print("内容寻找 Agent")
  201. print("=" * 60)
  202. print("开始执行...\n")
  203. # 加载 prompt
  204. prompt_path = Path(__file__).parent / "content_finder.prompt"
  205. prompt = SimplePrompt(prompt_path)
  206. # 构建消息
  207. messages = prompt.build_messages()
  208. # 初始化
  209. api_key = os.getenv("OPEN_ROUTER_API_KEY")
  210. if not api_key:
  211. raise ValueError("OPEN_ROUTER_API_KEY 未设置,请在 .env 文件中配置")
  212. model = os.getenv("MODEL", f"anthropic/claude-{prompt.config.get('model', 'sonnet-4.6')}")
  213. temperature = float(prompt.config.get("temperature", 0.3))
  214. max_iterations = int(os.getenv("MAX_ITERATIONS", "30"))
  215. trace_dir = os.getenv("TRACE_DIR", ".cache/traces")
  216. skills_dir = str(Path(__file__).parent / "skills")
  217. Path(trace_dir).mkdir(parents=True, exist_ok=True)
  218. store = FileSystemTraceStore(base_path=trace_dir)
  219. # 限制工具范围:只使用抖音相关的4个工具
  220. allowed_tools = [
  221. "douyin_search",
  222. "douyin_user_videos",
  223. "get_content_fans_portrait",
  224. "get_account_fans_portrait",
  225. ]
  226. runner = AgentRunner(
  227. llm_call=create_openrouter_llm_call(model=model),
  228. trace_store=store,
  229. skills_dir=skills_dir,
  230. )
  231. config = RunConfig(
  232. name="内容寻找",
  233. model=model,
  234. temperature=temperature,
  235. max_iterations=max_iterations,
  236. tools=allowed_tools, # 限制工具范围
  237. extra_llm_params={"max_tokens": 8192}, # 增加输出 token 限制,避免被截断
  238. knowledge=KnowledgeConfig(
  239. # 压缩时提取(消息量超阈值触发压缩时,用完整 history 反思)
  240. enable_extraction=True,
  241. reflect_prompt="", # 自定义反思 prompt;空则使用默认,见 agent/core/prompts/knowledge.py:REFLECT_PROMPT
  242. # agent运行完成后提取(不代表任务完成,agent 可能中途退出等待人工评估)
  243. enable_completion_extraction=True,
  244. completion_reflect_prompt="",
  245. # 自定义复盘 prompt;空则使用默认,见 agent/core/prompts/knowledge.py:COMPLETION_REFLECT_PROMPT
  246. # 知识注入(agent切换当前工作的goal时,自动注入相关知识)
  247. enable_injection=True,
  248. # 默认字段(保存/搜索时自动注入)
  249. owner="content_finder_agent", # 所有者(空则尝试从 git config user.email 获取,再空则用 agent:{agent_id})
  250. default_tags={"project": "content_finder"}, # 默认 tags(会与工具调用参数合并)
  251. default_scopes=["com.piaoquantv.supply"], # 默认 scopes
  252. default_search_types=["tool", "usecase", "definition"], # 默认搜索类型过滤
  253. default_search_owner="content_finder_agent" # 默认搜索 owner 过滤(空则不过滤)
  254. )
  255. )
  256. # 执行
  257. trace_id = None
  258. has_final_output = False
  259. try:
  260. async for item in runner.run(messages=messages, config=config):
  261. if isinstance(item, Trace):
  262. trace_id = item.trace_id
  263. if item.status == "completed":
  264. print(f"\n[完成] trace_id={item.trace_id}")
  265. # 检查是否有最终输出
  266. if not has_final_output:
  267. print("\n⚠️ 检测到任务未完整输出,正在生成摘要...")
  268. await generate_fallback_output(store, item.trace_id)
  269. elif item.status == "failed":
  270. print(f"\n[失败] {item.error_message}")
  271. elif isinstance(item, Message):
  272. if item.role == "assistant":
  273. content = item.content
  274. if isinstance(content, dict):
  275. text = content.get("text", "")
  276. tool_calls = content.get("tool_calls")
  277. # 输出文本内容
  278. if text:
  279. # 检测是否包含最终推荐结果
  280. if "推荐结果" in text or "推荐内容" in text or "🎯" in text:
  281. has_final_output = True
  282. # 如果文本很长(>500字符)且包含推荐结果标记,输出完整内容
  283. if len(text) > 500 and ("推荐结果" in text or "推荐内容" in text or "🎯" in text):
  284. print(f"\n{text}")
  285. # 如果有工具调用且文本较短,只输出摘要
  286. elif tool_calls and len(text) > 100:
  287. print(f"[思考] {text[:100]}...")
  288. # 其他情况输出完整文本
  289. else:
  290. print(f"\n{text}")
  291. # 输出工具调用信息
  292. if tool_calls:
  293. for tc in tool_calls:
  294. tool_name = tc.get("function", {}).get("name", "unknown")
  295. # 跳过 goal 工具的输出,减少噪音
  296. if tool_name != "goal":
  297. print(f"[工具] {tool_name}")
  298. elif isinstance(content, str) and content:
  299. print(f"\n{content}")
  300. elif item.role == "tool":
  301. content = item.content
  302. if isinstance(content, dict):
  303. tool_name = content.get("tool_name", "unknown")
  304. print(f"[结果] {tool_name} ✓")
  305. except KeyboardInterrupt:
  306. print("\n用户中断")
  307. except Exception as e:
  308. logger.error(f"执行失败: {e}", exc_info=True)
  309. print(f"\n执行失败: {e}")
  310. sys.exit(1)
  311. if __name__ == "__main__":
  312. asyncio.run(main())