|
2 weeks ago | |
---|---|---|
.. | ||
README.md | 2 weeks ago |
AI Architecture 是一个基于 LangChain 的通用 AI 服务框架,提供了模块化的架构设计,支持多种 LLM 提供商、向量数据库和文档处理功能。
src/config/
)src/utils/
)src/core/
)src/services/
)支持多种 LLM 提供商:
from src.core.llm_manager import llm_manager
# 创建 OpenAI LLM
llm = llm_manager.create_llm("openai", model="gpt-4")
# 创建 Anthropic LLM
llm = llm_manager.create_llm("anthropic", model="claude-3-sonnet-20240229")
# 测试连接
if llm_manager.test_connection("openai"):
print("连接成功")
支持 ChromaDB(FAISS 因编译问题暂时禁用):
from src.core.vector_store import vector_store_manager
# 创建 ChromaDB 存储
chroma_store = vector_store_manager.create_chroma_store("my_collection")
# FAISS 存储暂时禁用(需要 SWIG 编译器)
# faiss_store = vector_store_manager.create_faiss_store("my_index")
# 添加文档
vector_store_manager.add_documents(documents, "chroma", "my_collection")
# 相似性搜索
results = vector_store_manager.similarity_search("查询文本", k=4)
支持多种文档格式:
from src.core.document_processor import document_processor
# 支持的格式
formats = document_processor.supported_extensions
# {'.pdf': PyPDFLoader, '.docx': Docx2txtLoader, '.txt': TextLoader, ...}
# 处理单个文档
documents = document_processor.process_document_pipeline("document.pdf")
# 处理目录
documents = document_processor.load_directory("./docs", recursive=True)
# 分割文档
split_docs = document_processor.split_documents(documents, chunk_size=1000)
完整的会话管理:
from src.services.chat_service import chat_service
# 创建会话
session = chat_service.create_session("user_123", "你是一个有用的助手")
# 发送消息
response = chat_service.send_message("user_123", "你好")
# 获取会话历史
messages = session.get_messages()
# 获取会话摘要
summary = chat_service.get_conversation_summary("user_123")
基于向量数据库的问答系统:
from src.services.qa_service import qa_service
# 添加文档到知识库
qa_service.add_documents_for_qa(documents, collection_name="knowledge_base")
# 提问
answer = qa_service.ask_question("什么是人工智能?", collection_name="knowledge_base")
# 批量问答
questions = ["问题1", "问题2", "问题3"]
results = qa_service.batch_qa(questions, collection_name="knowledge_base")
文档管理和搜索:
from src.services.document_service import document_service
# 处理并存储文档
result = document_service.process_and_store_document(
"document.pdf",
collection_name="my_docs"
)
# 搜索文档
results = document_service.search_documents("关键词", k=5)
# 获取集合信息
stats = document_service.get_collection_info("my_docs")
复制 env.example
为 .env
并配置:
# OpenAI 配置
OPENAI_API_KEY=your_openai_api_key
OPENAI_API_BASE=https://api.openai.com/v1
# Anthropic 配置
ANTHROPIC_API_KEY=your_anthropic_api_key
# 向量数据库配置
CHROMA_DB_PATH=./data/chroma_db
FAISS_INDEX_PATH=./data/faiss_index
# 日志配置
LOG_LEVEL=INFO
LOG_FILE=./logs/ai_arch.log
# 应用配置
APP_NAME=AI_Architecture
DEBUG=false
# 缓存配置
CACHE_ENABLED=true
CACHE_TTL=3600
# LLM 参数
MAX_TOKENS=4000
TEMPERATURE=0.7
from src.services.chat_service import chat_service
from src.services.qa_service import qa_service
# 创建聊天会话
session = chat_service.create_session("user_123")
response = chat_service.send_message("user_123", "你好")
# 创建问答系统
qa_service.add_documents_for_qa(documents)
answer = qa_service.ask_question("问题")
from src.core.llm_manager import llm_manager
from src.core.vector_store import vector_store_manager
# 多提供商支持
llm_openai = llm_manager.create_llm("openai", model="gpt-4")
llm_anthropic = llm_manager.create_llm("anthropic", model="claude-3-sonnet")
# 向量数据库操作
vector_store = vector_store_manager.get_or_create_store("chroma", "my_collection")
vector_store.add_documents(documents)
results = vector_store.similarity_search("查询")
from src.core.llm_manager import BaseLLMProvider
class CustomProvider(BaseLLMProvider):
def create_llm(self, **kwargs):
# 实现创建 LLM 的逻辑
pass
def get_available_models(self):
# 返回可用模型列表
return ["model1", "model2"]
from src.core.vector_store import VectorStoreManager
# 在 VectorStoreManager 中添加新的存储类型
def create_custom_store(self, name: str):
# 实现自定义存储逻辑
pass
from src.core.document_processor import DocumentProcessor
# 在 DocumentProcessor 中添加新的加载器
self.supported_extensions['.custom'] = CustomLoader
try:
response = chat_service.send_message(session_id, message)
except ValueError as e:
logger.error(f"参数错误: {e}")
except Exception as e:
logger.error(f"未知错误: {e}")
from src.utils.cache import cache_manager
# 使用缓存装饰器
@cache_manager.cache(ttl=3600)
def expensive_operation():
# 耗时操作
pass
# 手动缓存管理
cache_manager.set("key", value, ttl=1800)
cached_value = cache_manager.get("key")
from loguru import logger
logger.info("操作开始")
logger.debug("调试信息")
logger.warning("警告信息")
logger.error("错误信息")
from src.config.settings import settings
# 使用配置
if settings.debug:
logger.setLevel("DEBUG")
if settings.cache_enabled:
# 启用缓存
pass
# 批量处理文档
documents = document_processor.load_directory("./docs")
vector_store_manager.add_documents(documents)
# 批量问答
questions = ["问题1", "问题2", "问题3"]
results = qa_service.batch_qa(questions)
# 缓存 LLM 实例
llm = llm_manager.get_or_create_llm("openai", model="gpt-4")
# 缓存向量存储
vector_store = vector_store_manager.get_or_create_store("chroma", "collection")
import asyncio
async def async_qa(questions):
tasks = [qa_service.ask_question(q) for q in questions]
results = await asyncio.gather(*tasks)
return results
API 密钥错误
向量数据库连接失败
文档处理失败
内存不足
# 启用调试模式
settings.debug = True
logger.setLevel("DEBUG")
# 测试连接
llm_manager.test_connection("openai")
MIT License