Przeglądaj źródła

Merge branch 'feature_content_finder_agent_v1' of https://git.yishihui.com/howard/Agent into feature_content_finder_agent_v1

jihuaqiang 22 godzin temu
rodzic
commit
7cea57c375

+ 58 - 5
.env.example

@@ -1,10 +1,63 @@
-# Docker Compose 环境变量配置
-# 用于 content-finder 服务
+# ============================================
+# Agent 项目环境变量配置
+# ============================================
+# 用途:
+# 1. Docker 部署:docker-compose 读取此文件
+# 2. 本地开发:各子项目的 load_dotenv() 读取此文件
+# ============================================
 
+# --------------------------------------------
+# 全局配置
+# --------------------------------------------
+
+# KnowHub API 地址
+KNOWHUB_API=http://43.106.118.91:9999
+
+# Browser Use API Key(如果使用浏览器工具)
+BROWSER_USE_API_KEY=
+
+# --------------------------------------------
 # LLM 配置(必填)
+# --------------------------------------------
+
+# OpenRouter API Key(必填)
 OPEN_ROUTER_API_KEY=your-api-key-here
 
-# 模型配置(可选,有默认值)
-MODEL=anthropic/claude-sonnet-4.6
+# 阿里云 API(可选)
+ALI_API_KEY=
+ALI_BASE_URL=
+
+# --------------------------------------------
+# Content Finder 配置
+# --------------------------------------------
+
+# 模型配置
+MODEL=anthropic/claude-sonnet-4.5
 TEMPERATURE=0.3
-MAX_ITERATIONS=30
+MAX_ITERATIONS=200
+
+# 存储路径
+# Docker 部署时:使用容器内路径(/app/.trace, /app/.output)
+# 本地开发时:使用相对路径(.trace, .output 或 .cache/traces, .cache/output)
+TRACE_DIR=.trace
+OUTPUT_DIR=.output
+
+# Skills 配置
+SKILLS_DIR=./skills
+# 留空则加载所有 skills,指定则只加载指定的 skills
+ENABLED_SKILLS=
+
+# 服务端口
+PORT=8080
+
+# 并发控制
+MAX_CONCURRENT_TASKS=3
+
+# --------------------------------------------
+# 定时任务配置(可选)
+# --------------------------------------------
+
+# 外部 API 地址(用于获取 query)
+SCHEDULE_QUERY_API=http://your-api.com/content-finder/get-query
+# 外部 API 超时时间(秒)
+SCHEDULE_QUERY_API_TIMEOUT=10.0

+ 32 - 1
DEPLOY.md

@@ -9,6 +9,30 @@ cp .env.example .env
 vim .env  # 填写 OPEN_ROUTER_API_KEY
 ```
 
+`.env` 文件说明:
+
+```bash
+# ============================================
+# Agent 项目环境变量配置
+# ============================================
+
+# 全局配置
+KNOWHUB_API=http://43.106.118.91:9999
+BROWSER_USE_API_KEY=
+
+# LLM 配置(必填)
+OPEN_ROUTER_API_KEY=your-api-key-here  # 必须填写
+
+# Content Finder 配置
+MODEL=anthropic/claude-sonnet-4.6
+TEMPERATURE=0.3
+MAX_ITERATIONS=30
+MAX_CONCURRENT_TASKS=3
+
+# 定时任务(可选)
+SCHEDULE_QUERY_API=http://your-api.com/get-query
+```
+
 `.env` 文件内容:
 ```bash
 OPEN_ROUTER_API_KEY=your-api-key-here
@@ -166,7 +190,14 @@ cd Agent
 
 # 4. 配置环境变量
 cp .env.example .env
-vim .env
+vim .env  # 必须填写 OPEN_ROUTER_API_KEY
+
+# .env 文件内容示例:
+# OPEN_ROUTER_API_KEY=sk-or-v1-xxx  # 必填
+# MODEL=anthropic/claude-sonnet-4.6
+# TEMPERATURE=0.3
+# MAX_ITERATIONS=30
+# MAX_CONCURRENT_TASKS=3
 
 # 5. 启动服务
 docker-compose up -d

+ 4 - 4
Dockerfile.api-server

@@ -2,10 +2,10 @@ FROM registry.cn-hangzhou.aliyuncs.com/stuuudy/python:3.13-slim
 
 WORKDIR /app
 
-# 安装系统依赖
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    gcc \
-    curl \
+# 安装系统依赖(使用阿里云镜像源)
+RUN sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list.d/debian.sources \
+    && apt-get update \
+    && apt-get install -y --no-install-recommends curl \
     && rm -rf /var/lib/apt/lists/*
 
 # 复制依赖文件

+ 5 - 5
Dockerfile.content-finder

@@ -2,10 +2,10 @@ FROM registry.cn-hangzhou.aliyuncs.com/stuuudy/python:3.13-slim
 
 WORKDIR /app
 
-# 安装系统依赖
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    gcc \
-    curl \
+# 安装系统依赖(使用阿里云镜像源)
+RUN sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list.d/debian.sources \
+    && apt-get update \
+    && apt-get install -y --no-install-recommends curl \
     && rm -rf /var/lib/apt/lists/*
 
 # 复制依赖文件
@@ -25,4 +25,4 @@ WORKDIR /app/examples/content_finder
 EXPOSE 8080
 
 # 启动命令
-CMD ["python", "server.py"]
+CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8080"]

+ 3 - 3
Dockerfile.frontend

@@ -1,5 +1,5 @@
 # Stage 1: 构建前端
-FROM node:20-alpine AS builder
+FROM registry.cn-hangzhou.aliyuncs.com/stuuudy/node:20-alpine AS builder
 
 WORKDIR /app
 
@@ -13,10 +13,10 @@ RUN yarn install --frozen-lockfile
 COPY frontend/react-template/ .
 
 # 构建生产版本(跳过 TypeScript 类型检查)
-RUN vite build
+RUN ./node_modules/.bin/vite build
 
 # Stage 2: nginx 托管
-FROM nginx:alpine
+FROM registry.cn-hangzhou.aliyuncs.com/stuuudy/nginx:alpine
 
 # 安装 curl(用于健康检查)
 RUN apk add --no-cache curl

+ 11 - 6
docker-compose.yml

@@ -1,11 +1,11 @@
 services:
   # 内容寻找 Agent 服务
   content-finder:
-    image: registry.cn-hangzhou.aliyuncs.com/stuuudy/content-finder-agent:latest
+    image: registry.cn-hangzhou.aliyuncs.com/stuuudy/content-finder-agent:${VERSION:-latest}
     build:
       context: .
       dockerfile: Dockerfile.content-finder
-    container_name: agent-content-finder
+    container_name: content-finder-agent
     restart: unless-stopped
     ports:
       - "8080:8080"
@@ -19,8 +19,13 @@ services:
       - MAX_ITERATIONS=${MAX_ITERATIONS:-30}
       - TRACE_DIR=/app/.trace
       - OUTPUT_DIR=/app/.output
+      - SKILLS_DIR=./skills
+      - ENABLED_SKILLS=${ENABLED_SKILLS:-}
       - PORT=8080
       - MAX_CONCURRENT_TASKS=${MAX_CONCURRENT_TASKS:-3}
+      - SCHEDULE_QUERY_API=${SCHEDULE_QUERY_API:-}
+      - SCHEDULE_QUERY_API_TIMEOUT=${SCHEDULE_QUERY_API_TIMEOUT:-10.0}
+      - KNOWHUB_API=${KNOWHUB_API:-http://43.106.118.91:9999}
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
       interval: 30s
@@ -30,11 +35,11 @@ services:
 
   # 可视化 API 服务
   api-server:
-    image: registry.cn-hangzhou.aliyuncs.com/stuuudy/content-finder-agent-api-server:latest
+    image: registry.cn-hangzhou.aliyuncs.com/stuuudy/content-finder-agent-api-server:${VERSION:-latest}
     build:
       context: .
       dockerfile: Dockerfile.api-server
-    container_name: agent-api-server
+    container_name: content-finder-agent-api-server
     restart: unless-stopped
     ports:
       - "8000:8000"
@@ -49,11 +54,11 @@ services:
 
   # 前端服务
   frontend:
-    image: registry.cn-hangzhou.aliyuncs.com/stuuudy/content-finder-agent-frontend:latest
+    image: registry.cn-hangzhou.aliyuncs.com/stuuudy/content-finder-agent-frontend:${VERSION:-latest}
     build:
       context: .
       dockerfile: Dockerfile.frontend
-    container_name: agent-frontend
+    container_name: content-finder-agent-frontend
     restart: unless-stopped
     ports:
       - "3000:3000"

+ 0 - 4
examples/content_finder/server.py

@@ -133,14 +133,10 @@ async def scheduled_task():
             logger.warning("未配置 SCHEDULE_QUERY_API,跳过定时任务")
             return
 
-        api_key = os.getenv("SCHEDULE_QUERY_API_KEY", "")
         timeout = float(os.getenv("SCHEDULE_QUERY_API_TIMEOUT", "10.0"))
 
         async with httpx.AsyncClient() as client:
             headers = {}
-            if api_key:
-                headers["Authorization"] = f"Bearer {api_key}"
-
             logger.info(f"调用外部 API: {query_api}")
             response = await client.get(
                 query_api,

+ 117 - 0
examples/content_finder/tools/aigc_platform_api.py

@@ -0,0 +1,117 @@
+"""
+AIGC接口调用
+调用AIGC接口创建爬取计划,绑定生成计划
+"""
+import logging
+
+import requests
+
+from agent import ToolResult
+
+logger = logging.getLogger(__name__)
+
+AIGC_BASE_URL = "https://aigc-api.aiddit.com"
+CRAWLER_PLAN_CREATE_URL = f"{AIGC_BASE_URL}/aigc/crawler/plan/save"
+DEFAULT_TOKEN = "8bf14f27fc3a486788f3383452422d72"
+DEFAULT_TIMEOUT = 60.0
+
+
+async def create_crawler_plan_by_douyin_account_id(
+        account_id: str,
+        sort_type: str = "最新"
+) -> ToolResult:
+    """
+     根据抖音账号ID创建爬取计划
+     Args:
+         account_id: 抖音账号ID
+         sort_type: 搜索时的视频排序方式(最新/最热),默认最新
+
+     Returns:
+         ToolResult: 包含以下内容
+             - output: 创建的爬取计划ID
+    """
+
+    # 验证 account_id 格式
+    if not account_id or not isinstance(account_id, str):
+        logger.error("create_crawler_plan_by_douyin_account_id invalid account_id", extra={"account_id": account_id})
+        return ToolResult(
+            title="根据抖音账号ID创建爬取计划失败",
+            output="",
+            error="account_id 参数无效:必须是非空字符串",
+        )
+
+    if not account_id.startswith("MS4wLjABAAAA"):
+        logger.error("create_crawler_plan_by_douyin_account_id invalid sec_uid format", extra={"account_id": account_id})
+        return ToolResult(
+            title="根据抖音账号ID创建爬取计划失败",
+            output="",
+            error=f"account_id 格式错误:必须以 MS4wLjABAAAA 开头,当前值: {account_id[:min(20, len(account_id))]}...",
+        )
+
+    if len(account_id) < 70 or len(account_id) > 90:
+        logger.error("create_crawler_plan_by_douyin_account_id invalid account_id length", extra={"account_id": account_id, "length": len(account_id)})
+        return ToolResult(
+            title="根据抖音账号ID创建爬取计划失败",
+            output="",
+            error=f"account_id 长度异常:期望 70-90 字符,实际 {len(account_id)} 字符。这可能是编造或截断的数据。",
+        )
+
+    params = {
+        "baseInfo": {
+            "token": DEFAULT_TOKEN,
+            "userName": ""
+        },
+        "params": {
+            "accountFilters": [],
+            "channel": 2,
+            "contentFilters": [],
+            "contentModal": 4,
+            "crawlerComment": 0,
+            "crawlerMode": 4,
+            "filterAccountMatchMode": 2,
+            "filterContentMatchMode": 2,
+            "frequencyType": 1,
+            "inputModeValues": [
+                account_id
+            ],
+            "modelValueConfig": {
+                "sortType": sort_type
+            },
+            "name": f"【Agent自动创建】抖音账号ID爬取计划_{account_id[:min(30, len(account_id))]}",
+            "planType": 2,
+            "searchModeValues": [],
+            "selectModeValues": [],
+            "srtExtractFlag": 1,
+            "videoKeyFrameType": 1,
+            "voiceExtractFlag": 1
+        }
+    }
+    try:
+        response = requests.post(
+            CRAWLER_PLAN_CREATE_URL,
+            json=params,
+            headers={"Content-Type": "application/json"},
+            timeout=DEFAULT_TIMEOUT
+        )
+        response.raise_for_status()
+        response_json = response.json()
+        if response_json.get("code") != 0:
+            return ToolResult(
+                title="根据抖音账号ID创建爬取计划失败",
+                output=response_json.get("msg", "接口异常"),
+                error=f"create crawler plan interface error",
+            )
+
+        crawler_plan_id = response_json.get("data", {}).get("id", "")
+        return ToolResult(
+            title="根据抖音账号ID创建爬取计划",
+            output=crawler_plan_id,
+            long_term_memory="Create crawler plan by DouYin Account ID",
+        )
+    except Exception as e:
+        logger.error(e, extra={"account_id": account_id})
+        return ToolResult(
+            title="根据抖音账号ID创建爬取计划失败",
+            output="",
+            error=f"创建爬取计划错误:{str(e)}",
+        )

+ 14 - 1
requirements.txt

@@ -1,6 +1,8 @@
 # LLM request
 httpx[socks]>=0.28.0
 python-dotenv>=1.0.0
+openai>=1.0.0
+PyYAML>=6.0
 
 # Browser automation CLI
 # 推荐安装方式: uv add browser-use && uv sync
@@ -16,4 +18,15 @@ pydantic
 apscheduler>=3.10.0
 
 # 飞书
-lark-oapi==1.5.3
+lark-oapi==1.5.3
+
+# Database
+pymysql
+DBUtils>=3.0.0
+
+# HTTP clients
+requests>=2.31.0
+aiohttp>=3.9.0
+
+# Image processing
+Pillow>=10.0.0