jihuaqiang 1 week ago
parent
commit
977592a99f
8 changed files with 476 additions and 413 deletions
  1. 21 0
      .env
  2. 155 0
      README.md
  3. 204 309
      agent.py
  4. 0 102
      agent_process.py
  5. 58 0
      gemini.py
  6. 2 2
      indentify/indentify.py
  7. 4 0
      requirements.txt
  8. 32 0
      start_service.sh

+ 21 - 0
.env

@@ -0,0 +1,21 @@
+# 飞书基础配置,无需修改
+FEISHU_APP_ID=cli_a76c35b8fa7d500c
+FEISHU_APP_SECRET=xHpF7H9nBwXeCH2Z1YRDScWSXzyktq36
+FEISHU_FILE_TOKEN=VEBsbCfaWa3gF3slQILc6Rybnde
+
+# # 飞书表格配置
+# FEISHU_TABLE_ID=tblNdje7z6Cf3hax
+
+# 扣子
+COZE_API_KEY=pat_pClXS15hyuqohC9TK58vU7130Hp6QmmHlnyW2TjFpKVWKsW2B1VniFwdXkY3eRNB
+COZE_BOT_ID=7537570163895812146
+
+# Gemini
+GEMINI_API_KEY=AIzaSyAkt1l9Kw1CQgHFzTpla0vgt0OE53fr-BI
+
+# 代理
+DYNAMIC_HTTP_PROXY=http://t10952018781111:1ap37oc3@d844.kdltps.com:15818
+
+# GRPC
+CONTAINER_GRPC_HOST=192.168.203.112
+CONTAINER_GRPC_PORT=50051

+ 155 - 0
README.md

@@ -0,0 +1,155 @@
+# Knowledge Agent API
+
+基于 FastAPI 的智能内容识别和结构化处理服务。
+
+## 🚀 快速开始
+
+### 1. 安装依赖
+
+```bash
+pip install -r requirements.txt
+```
+
+### 2. 启动服务
+
+#### 方式一:使用启动脚本(推荐)
+```bash
+./start_service.sh
+```
+
+#### 方式二:直接运行
+```bash
+python3 agent.py
+```
+
+#### 方式三:使用 uvicorn
+```bash
+uvicorn agent:app --host 0.0.0.0 --port 8080 --reload
+```
+
+### 3. 访问服务
+
+- **服务地址**: http://localhost:8080
+- **API文档**: http://localhost:8080/docs
+- **健康检查**: http://localhost:8080/health
+
+## 📚 API 接口
+
+### POST /trigger
+同步触发内容处理
+
+**请求体**:
+```json
+{
+  "requestId": "your_request_id"
+}
+```
+
+**响应**:
+```json
+{
+  "requestId": "your_request_id",
+  "processed": 5,
+  "success": 4,
+  "details": [
+    {
+      "index": 1,
+      "dbInserted": true,
+      "identifyError": null,
+      "status": "success"
+    }
+  ]
+}
+```
+
+### POST /trigger/async
+异步触发内容处理(后台任务)
+
+**请求体**:
+```json
+{
+  "requestId": "your_request_id"
+}
+```
+
+**响应**:
+```json
+{
+  "requestId": "your_request_id",
+  "status": "processing",
+  "message": "任务已提交到后台处理"
+}
+```
+
+### GET /health
+健康检查接口
+
+**响应**:
+```json
+{
+  "status": "healthy",
+  "timestamp": 1703123456.789
+}
+```
+
+## 🔧 配置
+
+### 环境变量
+
+- `GEMINI_API_KEY`: Gemini API 密钥(必需)
+
+### 数据库配置
+
+在 `utils/mysql_db.py` 中配置数据库连接信息。
+
+## 📁 项目结构
+
+```
+knowledge-agent/
+├── agent.py                 # FastAPI 主服务文件
+├── agent_tools.py          # 核心工具类
+├── gemini.py               # Gemini API 处理器
+├── indentify/              # 内容识别模块
+│   ├── image_identifier.py # 图像识别
+│   ├── video_identifier.py # 视频识别
+│   └── indentify.py        # 识别主逻辑
+├── structure/              # 结构化处理模块
+│   └── structure_processor.py
+├── utils/                  # 工具模块
+│   ├── mysql_db.py        # 数据库操作
+│   ├── logging_config.py  # 日志配置
+│   └── file.py            # 文件操作
+├── prompt/                 # 提示词模板
+├── requirements.txt        # 依赖包列表
+└── start_service.sh       # 启动脚本
+```
+
+## 🎯 核心功能
+
+1. **内容识别**: 支持图像和视频的智能识别
+2. **结构化处理**: 使用 Gemini API 进行内容结构化
+3. **数据存储**: 自动存储处理结果到数据库
+4. **异步处理**: 支持后台异步任务处理
+5. **RESTful API**: 现代化的 HTTP API 接口
+
+## 🚨 注意事项
+
+1. 确保设置了正确的 `GEMINI_API_KEY` 环境变量
+2. 确保数据库连接配置正确
+3. 服务默认监听 8080 端口,可通过参数修改
+
+## 🔍 故障排除
+
+### 常见问题
+
+1. **导入错误**: 确保所有依赖已正确安装
+2. **数据库连接失败**: 检查数据库配置和网络连接
+3. **Gemini API 错误**: 验证 API 密钥是否正确
+
+### 日志查看
+
+服务运行时会输出详细日志,包括:
+- 请求处理状态
+- 错误信息
+- 处理进度
+- 性能指标

+ 204 - 309
agent.py

@@ -1,335 +1,230 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+使用 FastAPI 重构的 Agent 服务
+提供现代化的 HTTP API 接口
+"""
+
 import json
 import sys
 import os
-import argparse
-import signal
-from http.server import BaseHTTPRequestHandler, HTTPServer
-from urllib.parse import urlparse
-from typing import Any, Dict, List, Optional, Tuple
+import time
+from typing import Any, Dict, List, Optional
+from contextlib import asynccontextmanager
 
 # 保证可以导入本项目模块
 sys.path.append(os.path.dirname(os.path.abspath(__file__)))
 
+from fastapi import FastAPI, HTTPException, BackgroundTasks
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel, Field
+import uvicorn
+
 from utils.logging_config import get_logger
 from agent_tools import QueryDataTool, IdentifyTool, StructureTool
-from agent_process import start_daemon, stop_daemon, status_daemon
-
-# 可选引入 LangGraph(如未安装,将在运行时优雅回退到顺序执行)
-HAS_LANGGRAPH = False
-try:
-    from langgraph.graph import StateGraph, END
-    HAS_LANGGRAPH = True
-except Exception:
-    HAS_LANGGRAPH = False
 
+# 创建 logger
+logger = get_logger('AgentFastAPI')
 
-logger = get_logger('Agent')
+# 请求模型
+class TriggerRequest(BaseModel):
+    requestId: str = Field(..., description="请求ID")
 
-PID_FILE = os.path.join(os.path.dirname(__file__), 'agent_scheduler.pid')
+# 响应模型
+class TriggerResponse(BaseModel):
+    requestId: str
+    processed: int
+    success: int
+    details: List[Dict[str, Any]]
 
+# 全局变量
+identify_tool = None
 
-class ReactAgent:
-    def __init__(self) -> None:
-        self.identify_tool = IdentifyTool()
-
-    def handle_request(self, request_id: str) -> Dict[str, Any]:
-        items = QueryDataTool.fetch_crawl_data_list(request_id)
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """应用生命周期管理"""
+    # 启动时初始化
+    global identify_tool
+    identify_tool = IdentifyTool()
+    logger.info("Agent 服务启动完成")
+    
+    yield
+    
+    # 关闭时清理
+    logger.info("Agent 服务正在关闭")
+
+# 创建 FastAPI 应用
+app = FastAPI(
+    title="Knowledge Agent API",
+    description="智能内容识别和结构化处理服务",
+    version="1.0.0",
+    lifespan=lifespan
+)
+
+@app.get("/")
+async def root():
+    """根路径,返回服务信息"""
+    return {
+        "service": "Knowledge Agent API",
+        "version": "1.0.0",
+        "status": "running",
+        "endpoints": {
+            "trigger": "/trigger",
+            "health": "/health",
+            "docs": "/docs"
+        }
+    }
+
+@app.get("/health")
+async def health_check():
+    """健康检查接口"""
+    return {"status": "healthy", "timestamp": time.time()}
+
+@app.post("/trigger", response_model=TriggerResponse)
+async def trigger_processing(request: TriggerRequest, background_tasks: BackgroundTasks):
+    """
+    触发内容处理
+    
+    - **requestId**: 请求ID,用于标识处理任务
+    """
+    try:
+        logger.info(f"收到触发请求: requestId={request.requestId}")
+        
+        # 获取待处理数据
+        items = QueryDataTool.fetch_crawl_data_list(request.requestId)
         if not items:
-            return {"requestId": request_id, "processed": 0, "success": 0, "details": []}
-
+            return TriggerResponse(
+                requestId=request.requestId,
+                processed=0,
+                success=0,
+                details=[]
+            )
+
+        # 处理数据
         success_count = 0
         details: List[Dict[str, Any]] = []
+        
         for idx, item in enumerate(items, start=1):
-            crawl_data = item.get('crawl_data') or {}
-
-            # Step 1: 识别
-            identify_result = self.identify_tool.run(crawl_data if isinstance(crawl_data, dict) else {})
-
-            # Step 2: 结构化并入库
-            affected = StructureTool.store_parsing_result(request_id, item.get('raw') or {}, identify_result)
-            ok = affected is not None and affected > 0
-            if ok:
-                success_count += 1
-
-            details.append({
-                "index": idx,
-                "dbInserted": ok,
-                "identifyError": identify_result.get('error'),
-            })
-
+            try:
+                crawl_data = item.get('crawl_data') or {}
+                
+                # Step 1: 识别
+                identify_result = identify_tool.run(
+                    crawl_data if isinstance(crawl_data, dict) else {}
+                )
+                
+                # Step 2: 结构化并入库
+                affected = StructureTool.store_parsing_result(
+                    request.requestId, 
+                    item.get('raw') or {}, 
+                    identify_result
+                )
+                
+                ok = affected is not None and affected > 0
+                if ok:
+                    success_count += 1
+                
+                details.append({
+                    "index": idx,
+                    "dbInserted": ok,
+                    "identifyError": identify_result.get('error'),
+                    "status": "success" if ok else "failed"
+                })
+                
+            except Exception as e:
+                logger.error(f"处理第 {idx} 项时出错: {e}")
+                details.append({
+                    "index": idx,
+                    "dbInserted": False,
+                    "identifyError": str(e),
+                    "status": "error"
+                })
+
+        result = TriggerResponse(
+            requestId=request.requestId,
+            processed=len(items),
+            success=success_count,
+            details=details
+        )
+        
+        logger.info(f"处理完成: requestId={request.requestId}, processed={len(items)}, success={success_count}")
+        return result
+        
+    except Exception as e:
+        logger.error(f"处理请求失败: {e}")
+        raise HTTPException(status_code=500, detail=f"处理失败: {str(e)}")
+
+@app.post("/trigger/async")
+async def trigger_processing_async(request: TriggerRequest, background_tasks: BackgroundTasks):
+    """
+    异步触发内容处理(后台任务)
+    
+    - **requestId**: 请求ID,用于标识处理任务
+    """
+    try:
+        logger.info(f"收到异步触发请求: requestId={request.requestId}")
+        
+        # 添加后台任务
+        background_tasks.add_task(process_request_background, request.requestId)
+        
         return {
-            "requestId": request_id,
-            "processed": len(items),
-            "success": success_count,
-            "details": details,
+            "requestId": request.requestId,
+            "status": "processing",
+            "message": "任务已提交到后台处理"
         }
+        
+    except Exception as e:
+        logger.error(f"提交异步任务失败: {e}")
+        raise HTTPException(status_code=500, detail=f"提交任务失败: {str(e)}")
 
-
-AGENT = ReactAgent()
-
-
-# =========================
-# LangGraph 风格实现(可选)
-# =========================
-def build_langgraph_app():
-    if not HAS_LANGGRAPH:
-        return None
-
-    # 状态:以 dict 形式承载
-    # 输入: {"request_id": str}
-    # 输出附加: items, details, processed, success
-
-    def node_fetch(state: Dict[str, Any]) -> Dict[str, Any]:
-        request_id = str(state.get("request_id", ""))
+async def process_request_background(request_id: str):
+    """后台处理请求"""
+    try:
+        logger.info(f"开始后台处理: requestId={request_id}")
+        
+        # 获取待处理数据
         items = QueryDataTool.fetch_crawl_data_list(request_id)
-        return {
-            **state,
-            "items": items,
-            "details": [],
-            "processed": 0,
-            "success": 0,
-        }
-
-    identify_tool = IdentifyTool()
+        if not items:
+            logger.info(f"后台处理完成: requestId={request_id}, 无数据需要处理")
+            return
 
-    def node_process(state: Dict[str, Any]) -> Dict[str, Any]:
-        request_id = str(state.get("request_id", ""))
-        items: List[Dict[str, Any]] = state.get("items", []) or []
-        details: List[Dict[str, Any]] = []
+        # 处理数据
         success_count = 0
-
         for idx, item in enumerate(items, start=1):
-            crawl_data = item.get('crawl_data') or {}
-            identify_result = identify_tool.run(crawl_data if isinstance(crawl_data, dict) else {})
-            affected = StructureTool.store_parsing_result(request_id, item.get('raw') or {}, identify_result)
-            ok = affected is not None and affected > 0
-            if ok:
-                success_count += 1
-            details.append({
-                "index": idx,
-                "dbInserted": ok,
-                "identifyError": identify_result.get('error'),
-            })
-
-        return {
-            **state,
-            "details": details,
-            "processed": len(items),
-            "success": success_count,
-        }
-
-    graph = StateGraph(dict)
-    graph.add_node("fetch", node_fetch)
-    graph.add_node("process", node_process)
-
-    graph.set_entry_point("fetch")
-    graph.add_edge("fetch", "process")
-    graph.add_edge("process", END)
-
-    return graph.compile()
-
-
-APP = build_langgraph_app()
-
-
-class AgentHttpHandler(BaseHTTPRequestHandler):
-    def _set_headers(self, status_code: int = 200):
-        self.send_response(status_code)
-        self.send_header('Content-Type', 'application/json; charset=utf-8')
-        self.end_headers()
-
-    def do_POST(self):
-        parsed = urlparse(self.path)
-        if parsed.path != '/trigger':
-            self._set_headers(404)
-            self.wfile.write(json.dumps({"error": "not found"}).encode('utf-8'))
-            return
-
-        length = int(self.headers.get('Content-Length', '0') or '0')
-        body = self.rfile.read(length) if length > 0 else b''
-        try:
-            payload = json.loads(body.decode('utf-8')) if body else {}
-        except Exception:
-            self._set_headers(400)
-            self.wfile.write(json.dumps({"error": "invalid json"}).encode('utf-8'))
-            return
-
-        request_id = (payload or {}).get('requestId')
-        if not request_id:
-            self._set_headers(400)
-            self.wfile.write(json.dumps({"error": "requestId is required"}).encode('utf-8'))
-            return
-
-        try:
-            logger.info(f"收到触发请求: requestId={request_id}")
-            if APP is not None:
-                result = APP.invoke({"request_id": str(request_id)})
-                # 标准化返回
-                result = {
-                    "requestId": str(request_id),
-                    "processed": result.get("processed", 0),
-                    "success": result.get("success", 0),
-                    "details": result.get("details", []),
-                }
-            else:
-                # 回退到顺序执行
-                result = AGENT.handle_request(str(request_id))
-            self._set_headers(200)
-            self.wfile.write(json.dumps(result, ensure_ascii=False).encode('utf-8'))
-        except Exception as e:
-            logger.error(f"处理失败: {e}")
-            self._set_headers(500)
-            self.wfile.write(json.dumps({"error": str(e)}).encode('utf-8'))
-
-    def log_message(self, format: str, *args) -> None:
-        # 重定向默认日志到我们统一的 logger
-        logger.info("HTTP " + (format % args))
-
-
-def run(host: str = '0.0.0.0', port: int = 8080):
-    server_address = (host, port)
-    httpd = HTTPServer(server_address, AgentHttpHandler)
-
-    def _graceful_shutdown(signum, frame):
-        try:
-            logger.info(f"收到信号 {signum},正在停止HTTP服务...")
-            # shutdown 会在其他线程优雅停止; 这里我们直接关闭,避免阻塞
-            httpd.shutdown()
-        except Exception:
-            pass
-    for sig in (signal.SIGINT, signal.SIGTERM):
-        signal.signal(sig, _graceful_shutdown)
-
-    logger.info(f"Agent HTTP 服务已启动: http://{host}:{port}/trigger")
-    try:
-        httpd.serve_forever()
-    finally:
-        try:
-            httpd.server_close()
-        except Exception:
-            pass
-        logger.info("Agent HTTP 服务已停止")
-
-
-def _write_pid_file(pid: int) -> None:
-    with open(PID_FILE, 'w') as f:
-        f.write(str(pid))
-
-
-def _read_pid_file() -> Optional[int]:
-    if not os.path.exists(PID_FILE):
-        return None
-    try:
-        with open(PID_FILE, 'r') as f:
-            content = f.read().strip()
-            return int(content) if content else None
-    except Exception:
-        return None
-
-
-def _is_process_running(pid: int) -> bool:
-    try:
-        os.kill(pid, 0)
-        return True
-    except Exception:
-        return False
-
-
-def start_daemon(host: str, port: int) -> Dict[str, Any]:
-    old_pid = _read_pid_file()
-    if old_pid and _is_process_running(old_pid):
-        return {"status": "already_running", "pid": old_pid}
-
-    python_exec = sys.executable
-    script_path = os.path.abspath(__file__)
-    args = [python_exec, script_path, "--serve", "--host", host, "--port", str(port)]
-
-    with open(os.devnull, 'wb') as devnull:
-        proc = subprocess.Popen(
-            args,
-            stdout=devnull,
-            stderr=devnull,
-            stdin=devnull,
-            close_fds=True,
-            preexec_fn=os.setsid if hasattr(os, 'setsid') else None,
-        )
-
-    _write_pid_file(proc.pid)
-    # 简单等待,确认进程未立即退出
-    time.sleep(0.5)
-    running = _is_process_running(proc.pid)
-    return {"status": "started" if running else "failed", "pid": proc.pid}
-
-
-def stop_daemon(timeout: float = 5.0) -> Dict[str, Any]:
-    pid = _read_pid_file()
-    if not pid:
-        return {"status": "not_running"}
-
-    if not _is_process_running(pid):
-        try:
-            os.remove(PID_FILE)
-        except Exception:
-            pass
-        return {"status": "not_running"}
-
-    try:
-        os.kill(pid, signal.SIGTERM)
+            try:
+                crawl_data = item.get('crawl_data') or {}
+                
+                # Step 1: 识别
+                identify_result = identify_tool.run(
+                    crawl_data if isinstance(crawl_data, dict) else {}
+                )
+                
+                # Step 2: 结构化并入库
+                affected = StructureTool.store_parsing_result(
+                    request_id, 
+                    item.get('raw') or {}, 
+                    identify_result
+                )
+                
+                if affected is not None and affected > 0:
+                    success_count += 1
+                
+                logger.info(f"后台处理进度: {idx}/{len(items)} - {'成功' if affected else '失败'}")
+                
+            except Exception as e:
+                logger.error(f"后台处理第 {idx} 项时出错: {e}")
+
+        logger.info(f"后台处理完成: requestId={request_id}, processed={len(items)}, success={success_count}")
+        
     except Exception as e:
-        return {"status": "error", "error": str(e)}
-
-    start_time = time.time()
-    while time.time() - start_time < timeout:
-        if not _is_process_running(pid):
-            break
-        time.sleep(0.2)
-
-    if _is_process_running(pid):
-        try:
-            os.kill(pid, signal.SIGKILL)
-        except Exception as e:
-            return {"status": "error", "error": str(e)}
-
-    try:
-        os.remove(PID_FILE)
-    except Exception:
-        pass
-
-    return {"status": "stopped"}
-
-
-def status_daemon() -> Dict[str, Any]:
-    pid = _read_pid_file()
-    if pid and _is_process_running(pid):
-        return {"status": "running", "pid": pid}
-    return {"status": "not_running"}
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Agent 服务管理')
-    parser.add_argument('--serve', action='store_true', help='以前台模式启动HTTP服务')
-    parser.add_argument('--host', default='0.0.0.0', help='监听地址')
-    parser.add_argument('--port', type=int, default=8080, help='监听端口')
-    parser.add_argument('command', nargs='?', choices=['start', 'stop', 'status'], help='守护进程管理命令')
-    args = parser.parse_args()
-
-    if args.serve:
-        run(args.host, args.port)
-        sys.exit(0)
-
-    if args.command == 'start':
-        res = start_daemon(args.host, args.port)
-        print(json.dumps(res, ensure_ascii=False))
-        sys.exit(0 if res.get('status') == 'started' else 1)
-    elif args.command == 'stop':
-        res = stop_daemon()
-        print(json.dumps(res, ensure_ascii=False))
-        sys.exit(0 if res.get('status') in ('stopped', 'not_running') else 1)
-    elif args.command == 'status':
-        res = status_daemon()
-        print(json.dumps(res, ensure_ascii=False))
-        sys.exit(0 if res.get('status') in ('running', 'not_running') else 1)
-    else:
-        # 默认行为:以前台启动(兼容旧用法)
-        run(args.host, args.port)
-
+        logger.error(f"后台处理失败: requestId={request_id}, error={e}")
+
+if __name__ == "__main__":
+    # 启动服务
+    uvicorn.run(
+        "agent:app",
+        host="0.0.0.0",
+        port=8080,
+        reload=True,  # 开发模式,自动重载
+        log_level="info"
+    ) 

+ 0 - 102
agent_process.py

@@ -1,102 +0,0 @@
-import os
-import sys
-import json
-import time
-import signal
-import subprocess
-from typing import Any, Dict, Optional
-
-PID_FILE = os.path.join(os.path.dirname(__file__), 'agent_scheduler.pid')
-
-
-def _write_pid_file(pid: int) -> None:
-    with open(PID_FILE, 'w') as f:
-        f.write(str(pid))
-
-
-def _read_pid_file() -> Optional[int]:
-    if not os.path.exists(PID_FILE):
-        return None
-    try:
-        with open(PID_FILE, 'r') as f:
-            content = f.read().strip()
-            return int(content) if content else None
-    except Exception:
-        return None
-
-
-def _is_process_running(pid: int) -> bool:
-    try:
-        os.kill(pid, 0)
-        return True
-    except Exception:
-        return False
-
-
-def start_daemon(host: str, port: int) -> Dict[str, Any]:
-    old_pid = _read_pid_file()
-    if old_pid and _is_process_running(old_pid):
-        return {"status": "already_running", "pid": old_pid}
-
-    python_exec = sys.executable
-    script_path = os.path.join(os.path.dirname(__file__), 'agent.py')
-    args = [python_exec, script_path, "--serve", "--host", host, "--port", str(port)]
-
-    with open(os.devnull, 'wb') as devnull:
-        proc = subprocess.Popen(
-            args,
-            stdout=devnull,
-            stderr=devnull,
-            stdin=devnull,
-            close_fds=True,
-            preexec_fn=os.setsid if hasattr(os, 'setsid') else None,
-        )
-
-    _write_pid_file(proc.pid)
-    time.sleep(0.5)
-    running = _is_process_running(proc.pid)
-    return {"status": "started" if running else "failed", "pid": proc.pid}
-
-
-def stop_daemon(timeout: float = 5.0) -> Dict[str, Any]:
-    pid = _read_pid_file()
-    if not pid:
-        return {"status": "not_running"}
-
-    if not _is_process_running(pid):
-        try:
-            os.remove(PID_FILE)
-        except Exception:
-            pass
-        return {"status": "not_running"}
-
-    try:
-        os.kill(pid, signal.SIGTERM)
-    except Exception as e:
-        return {"status": "error", "error": str(e)}
-
-    start_time = time.time()
-    while time.time() - start_time < timeout:
-        if not _is_process_running(pid):
-            break
-        time.sleep(0.2)
-
-    if _is_process_running(pid):
-        try:
-            os.kill(pid, signal.SIGKILL)
-        except Exception as e:
-            return {"status": "error", "error": str(e)}
-
-    try:
-        os.remove(PID_FILE)
-    except Exception:
-        pass
-
-    return {"status": "stopped"}
-
-
-def status_daemon() -> Dict[str, Any]:
-    pid = _read_pid_file()
-    if pid and _is_process_running(pid):
-        return {"status": "running", "pid": pid}
-    return {"status": "not_running"}

+ 58 - 0
gemini.py

@@ -0,0 +1,58 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+简单的 Gemini 处理器
+用于满足导入需求,实际功能可以根据需要扩展
+"""
+
+import os
+import json
+from typing import Any, Dict, Optional
+from dotenv import load_dotenv
+import google.generativeai as genai
+
+
+class GeminiProcessor:
+    """Gemini API 处理器"""
+    
+    def __init__(self):
+        # 加载环境变量
+        load_dotenv()
+        
+        # 获取API密钥
+        self.api_key = os.getenv('GEMINI_API_KEY')
+        if not self.api_key:
+            raise ValueError("未找到GEMINI_API_KEY环境变量")
+        
+        # 配置Gemini
+        genai.configure(api_key=self.api_key)
+        self.model = genai.GenerativeModel('gemini-2.5-flash')
+    
+    def process(self, content: Any, system_prompt: str) -> Dict[str, Any]:
+        """
+        处理内容结构化
+        
+        Args:
+            content: 要处理的内容
+            system_prompt: 系统提示词
+            
+        Returns:
+            处理结果
+        """
+        try:
+            # 构建完整的提示词
+            full_prompt = f"{system_prompt}\n\n内容:{json.dumps(content, ensure_ascii=False)}"
+            
+            # 调用 Gemini API
+            response = self.model.generate_content(full_prompt)
+            
+            # 尝试解析 JSON 响应
+            try:
+                result = json.loads(response.text)
+                return result
+            except json.JSONDecodeError:
+                # 如果不是 JSON 格式,返回原始文本
+                return {"result": response.text, "raw_response": response.text}
+                
+        except Exception as e:
+            return {"error": str(e), "content": content} 

+ 2 - 2
indentify/indentify.py

@@ -22,8 +22,8 @@ from datetime import datetime
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 from utils.mysql_db import MysqlHelper
-from content_indentify.image_identifier import ImageIdentifier
-from content_indentify.video_identifier import VideoIdentifier
+from indentify.image_identifier import ImageIdentifier
+from indentify.video_identifier import VideoIdentifier
 from utils.logging_config import get_logger
 
 

+ 4 - 0
requirements.txt

@@ -5,3 +5,7 @@ loguru==0.7.3
 pymysql==1.0.2
 Pillow==10.4.0
 requests==2.32.4
+
+# FastAPI 相关依赖
+fastapi>=0.116.0
+uvicorn[standard]>=0.35.0

+ 32 - 0
start_service.sh

@@ -0,0 +1,32 @@
+#!/bin/bash
+
+# FastAPI Agent 服务启动脚本
+
+echo "🚀 启动 FastAPI Agent 服务..."
+
+# 检查Python环境
+if ! command -v python3 &> /dev/null; then
+    echo "❌ 错误: 未找到 python3 命令"
+    exit 1
+fi
+
+# 检查依赖
+echo "📦 检查依赖..."
+python3 -c "import fastapi, uvicorn" 2>/dev/null
+if [ $? -ne 0 ]; then
+    echo "❌ 错误: 缺少必要的依赖包"
+    echo "请运行: pip install -r requirements.txt"
+    exit 1
+fi
+
+# 启动服务
+echo "🌟 启动服务..."
+echo "📍 服务地址: http://localhost:8080"
+echo "📚 API文档: http://localhost:8080/docs"
+echo "🔍 健康检查: http://localhost:8080/health"
+echo ""
+echo "按 Ctrl+C 停止服务"
+echo ""
+
+# 启动服务
+python3 agent.py