Appearance
LLM 配置
AnythingLLM 支持多种大语言模型(LLM)提供商,包括 OpenAI、Azure OpenAI、Anthropic Claude、本地模型等。本指南详细介绍如何配置和优化各种 LLM 服务。
支持的 LLM 提供商
OpenAI
- GPT-4 系列: gpt-4, gpt-4-turbo, gpt-4-vision-preview
- GPT-3.5 系列: gpt-3.5-turbo, gpt-3.5-turbo-16k
- 代码模型: code-davinci-002, code-cushman-001
Azure OpenAI
- 支持所有 OpenAI 模型的 Azure 托管版本
- 企业级安全和合规性
- 专用实例和自定义部署
Anthropic Claude
- Claude 3 系列: claude-3-opus, claude-3-sonnet, claude-3-haiku
- Claude 2 系列: claude-2.1, claude-2.0
- 长上下文支持(最高 200K tokens)
本地模型
- Ollama: 支持 Llama 2, Mistral, CodeLlama 等
- LM Studio: 本地模型管理和推理
- 自定义端点: 兼容 OpenAI API 的任何服务
OpenAI 配置
基础配置
javascript
// 环境变量配置
OPENAI_API_KEY=sk-your-openai-api-key-here
OPENAI_MODEL=gpt-4
OPENAI_ORGANIZATION_ID=org-your-organization-id // 可选
// 高级配置
OPENAI_BASE_URL=https://api.openai.com/v1
OPENAI_TIMEOUT=60000
OPENAI_MAX_RETRIES=3
OPENAI_TEMPERATURE=0.7
OPENAI_MAX_TOKENS=2048
模型选择指南
javascript
// 不同用途的模型推荐
const modelRecommendations = {
// 通用对话和问答
general: {
model: "gpt-4",
temperature: 0.7,
maxTokens: 2048,
description: "平衡性能和成本的最佳选择"
},
// 代码生成和编程
coding: {
model: "gpt-4",
temperature: 0.2,
maxTokens: 4096,
description: "更准确的代码生成"
},
// 创意写作
creative: {
model: "gpt-4",
temperature: 0.9,
maxTokens: 4096,
description: "更有创意的输出"
},
// 分析和总结
analytical: {
model: "gpt-4",
temperature: 0.3,
maxTokens: 2048,
description: "更准确的分析结果"
},
// 成本优化
costEffective: {
model: "gpt-3.5-turbo",
temperature: 0.7,
maxTokens: 1024,
description: "成本效益最佳"
}
};
高级参数配置
javascript
// 完整的 OpenAI 配置示例
const openaiConfig = {
apiKey: process.env.OPENAI_API_KEY,
organization: process.env.OPENAI_ORGANIZATION_ID,
baseURL: process.env.OPENAI_BASE_URL || "https://api.openai.com/v1",
// 模型参数
model: "gpt-4",
temperature: 0.7, // 创造性 (0-2)
maxTokens: 2048, // 最大输出长度
topP: 1, // 核采样 (0-1)
frequencyPenalty: 0, // 频率惩罚 (-2 到 2)
presencePenalty: 0, // 存在惩罚 (-2 到 2)
// 请求配置
timeout: 60000, // 超时时间 (毫秒)
maxRetries: 3, // 最大重试次数
retryDelay: 1000, // 重试延迟 (毫秒)
// 流式响应
stream: true, // 启用流式响应
streamOptions: {
includeUsage: true // 包含使用统计
}
};
Azure OpenAI 配置
基础配置
javascript
// Azure OpenAI 环境变量
AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com
AZURE_OPENAI_API_KEY=your-azure-api-key
AZURE_OPENAI_API_VERSION=2023-12-01-preview
// 部署配置
AZURE_OPENAI_DEPLOYMENT_NAME=gpt-4
AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME=text-embedding-ada-002
多部署配置
javascript
// 支持多个 Azure 部署
const azureDeployments = {
// 主要聊天模型
chat: {
endpoint: "https://main-resource.openai.azure.com",
deploymentName: "gpt-4-deployment",
apiVersion: "2023-12-01-preview"
},
// 嵌入模型
embedding: {
endpoint: "https://embedding-resource.openai.azure.com",
deploymentName: "text-embedding-ada-002",
apiVersion: "2023-12-01-preview"
},
// 备用部署
fallback: {
endpoint: "https://backup-resource.openai.azure.com",
deploymentName: "gpt-35-turbo-deployment",
apiVersion: "2023-12-01-preview"
}
};
区域和可用性配置
javascript
// 多区域配置
const azureRegions = {
primary: {
region: "East US",
endpoint: "https://eastus-resource.openai.azure.com",
priority: 1
},
secondary: {
region: "West Europe",
endpoint: "https://westeurope-resource.openai.azure.com",
priority: 2
},
tertiary: {
region: "Japan East",
endpoint: "https://japaneast-resource.openai.azure.com",
priority: 3
}
};
Anthropic Claude 配置
基础配置
javascript
// Anthropic 环境变量
ANTHROPIC_API_KEY=sk-ant-your-anthropic-api-key
ANTHROPIC_MODEL=claude-3-opus-20240229
ANTHROPIC_BASE_URL=https://api.anthropic.com
ANTHROPIC_TIMEOUT=60000
Claude 模型特性
javascript
// Claude 模型对比
const claudeModels = {
"claude-3-opus-20240229": {
description: "最强大的模型,适合复杂任务",
contextLength: 200000,
strengths: ["推理", "数学", "编程", "创意写作"],
costTier: "高",
useCase: "复杂分析、高质量内容生成"
},
"claude-3-sonnet-20240229": {
description: "平衡性能和速度",
contextLength: 200000,
strengths: ["通用对话", "文档分析", "代码审查"],
costTier: "中",
useCase: "日常对话、文档处理"
},
"claude-3-haiku-20240307": {
description: "最快速的模型",
contextLength: 200000,
strengths: ["快速响应", "简单任务", "实时交互"],
costTier: "低",
useCase: "实时聊天、快速问答"
}
};
高级配置
javascript
// Claude 高级配置
const claudeConfig = {
apiKey: process.env.ANTHROPIC_API_KEY,
baseURL: process.env.ANTHROPIC_BASE_URL,
// 模型参数
model: "claude-3-opus-20240229",
maxTokens: 4096,
temperature: 0.7,
topP: 1,
topK: 40,
// 系统提示
systemPrompt: "You are a helpful AI assistant.",
// 安全设置
safetySettings: {
harmBlockThreshold: "BLOCK_MEDIUM_AND_ABOVE",
harmCategories: ["HARM_CATEGORY_HARASSMENT", "HARM_CATEGORY_HATE_SPEECH"]
},
// 请求配置
timeout: 60000,
maxRetries: 3,
retryDelay: 2000
};
本地模型配置
Ollama 配置
javascript
// Ollama 环境变量
OLLAMA_BASE_URL=http://localhost:11434
OLLAMA_MODEL=llama2
OLLAMA_TIMEOUT=120000
// 支持的模型
const ollamaModels = {
"llama2": {
size: "7B/13B/70B",
description: "Meta 的开源对话模型",
strengths: ["通用对话", "指令跟随"],
requirements: "8GB+ RAM (7B), 16GB+ RAM (13B)"
},
"mistral": {
size: "7B",
description: "高效的开源模型",
strengths: ["代码生成", "推理"],
requirements: "8GB+ RAM"
},
"codellama": {
size: "7B/13B/34B",
description: "专门的代码生成模型",
strengths: ["代码生成", "代码解释", "调试"],
requirements: "8GB+ RAM (7B), 16GB+ RAM (13B)"
},
"neural-chat": {
size: "7B",
description: "Intel 优化的聊天模型",
strengths: ["对话", "问答"],
requirements: "8GB+ RAM"
}
};
LM Studio 配置
javascript
// LM Studio 环境变量
LM_STUDIO_BASE_URL=http://localhost:1234/v1
LM_STUDIO_MODEL=local-model
LM_STUDIO_TIMEOUT=120000
// LM Studio 模型管理
const lmStudioConfig = {
baseURL: "http://localhost:1234/v1",
// 模型配置
models: {
"mistral-7b-instruct": {
path: "./models/mistral-7b-instruct-v0.1.Q4_K_M.gguf",
contextLength: 8192,
temperature: 0.7
},
"llama2-13b-chat": {
path: "./models/llama-2-13b-chat.Q4_K_M.gguf",
contextLength: 4096,
temperature: 0.8
}
},
// 硬件配置
hardware: {
gpuLayers: 35, // GPU 层数
threads: 8, // CPU 线程数
batchSize: 512, // 批处理大小
contextSize: 4096 // 上下文大小
}
};
自定义端点配置
javascript
// 自定义 LLM 端点
CUSTOM_LLM_ENDPOINT=http://your-llm-server:8000/v1
CUSTOM_LLM_API_KEY=your-custom-api-key
CUSTOM_LLM_MODEL=custom-model-name
// 自定义端点配置示例
const customLLMConfig = {
endpoint: "http://your-llm-server:8000/v1",
apiKey: "your-api-key",
// 模型映射
modelMapping: {
"gpt-4": "your-custom-gpt4-equivalent",
"gpt-3.5-turbo": "your-custom-gpt35-equivalent"
},
// 请求格式转换
requestTransform: (request) => {
// 转换请求格式以适配自定义 API
return {
model: request.model,
messages: request.messages,
temperature: request.temperature,
max_tokens: request.max_tokens
};
},
// 响应格式转换
responseTransform: (response) => {
// 转换响应格式以符合 OpenAI 格式
return {
choices: [{
message: {
content: response.text,
role: "assistant"
}
}],
usage: response.usage
};
}
};
模型切换和负载均衡
智能模型路由
javascript
// 基于任务类型的模型路由
const modelRouter = {
// 路由规则
routes: [
{
condition: (request) => request.task === "code",
model: "gpt-4",
temperature: 0.2
},
{
condition: (request) => request.task === "creative",
model: "claude-3-opus-20240229",
temperature: 0.9
},
{
condition: (request) => request.length > 10000,
model: "claude-3-sonnet-20240229", // 长文本处理
temperature: 0.7
},
{
condition: (request) => request.priority === "fast",
model: "gpt-3.5-turbo",
temperature: 0.7
}
],
// 默认模型
default: {
model: "gpt-4",
temperature: 0.7
}
};
负载均衡配置
javascript
// 多提供商负载均衡
const loadBalancer = {
providers: [
{
name: "openai",
weight: 50,
config: {
apiKey: process.env.OPENAI_API_KEY,
model: "gpt-4"
},
healthCheck: async () => {
// 健康检查逻辑
return true;
}
},
{
name: "azure",
weight: 30,
config: {
endpoint: process.env.AZURE_OPENAI_ENDPOINT,
apiKey: process.env.AZURE_OPENAI_API_KEY,
deployment: "gpt-4"
},
healthCheck: async () => {
return true;
}
},
{
name: "anthropic",
weight: 20,
config: {
apiKey: process.env.ANTHROPIC_API_KEY,
model: "claude-3-sonnet-20240229"
},
healthCheck: async () => {
return true;
}
}
],
// 故障转移策略
failover: {
enabled: true,
maxRetries: 3,
retryDelay: 1000,
circuitBreaker: {
failureThreshold: 5,
resetTimeout: 60000
}
}
};
性能优化
缓存策略
javascript
// LLM 响应缓存
const cacheConfig = {
// 缓存键生成
keyGenerator: (request) => {
const key = `${request.model}:${request.temperature}:${hashMessages(request.messages)}`;
return key;
},
// 缓存策略
strategies: {
// 精确匹配缓存
exact: {
enabled: true,
ttl: 3600, // 1小时
maxSize: 1000
},
// 语义相似性缓存
semantic: {
enabled: true,
threshold: 0.95, // 相似度阈值
ttl: 1800, // 30分钟
maxSize: 500
}
},
// 缓存预热
prewarming: {
enabled: true,
commonQueries: [
"What is AnythingLLM?",
"How to install AnythingLLM?",
"AnythingLLM features"
]
}
};
请求优化
javascript
// 请求批处理和优化
const requestOptimization = {
// 批处理配置
batching: {
enabled: true,
maxBatchSize: 10,
maxWaitTime: 100, // 毫秒
batchProcessor: async (requests) => {
// 批量处理逻辑
return await processBatch(requests);
}
},
// 请求去重
deduplication: {
enabled: true,
window: 5000, // 5秒窗口
keyExtractor: (request) => {
return `${request.model}:${JSON.stringify(request.messages)}`;
}
},
// 请求压缩
compression: {
enabled: true,
algorithm: "gzip",
threshold: 1024 // 1KB 以上才压缩
}
};
流式响应优化
javascript
// 流式响应配置
const streamingConfig = {
// 启用流式响应
enabled: true,
// 缓冲配置
buffer: {
size: 64, // 缓冲区大小 (字符)
flushInterval: 50 // 刷新间隔 (毫秒)
},
// 分块策略
chunking: {
strategy: "sentence", // word, sentence, paragraph
minChunkSize: 10,
maxChunkSize: 100
},
// 错误处理
errorHandling: {
retryOnError: true,
maxRetries: 3,
fallbackToNonStreaming: true
}
};
监控和分析
使用统计
javascript
// LLM 使用统计
const usageTracking = {
// 指标收集
metrics: {
requestCount: true,
responseTime: true,
tokenUsage: true,
errorRate: true,
costTracking: true
},
// 成本计算
costCalculation: {
providers: {
openai: {
"gpt-4": {
inputCost: 0.03, // 每 1K tokens
outputCost: 0.06
},
"gpt-3.5-turbo": {
inputCost: 0.001,
outputCost: 0.002
}
},
anthropic: {
"claude-3-opus-20240229": {
inputCost: 0.015,
outputCost: 0.075
}
}
}
},
// 报告生成
reporting: {
interval: "daily",
recipients: ["admin@yourdomain.com"],
includeGraphs: true,
format: "html"
}
};
性能监控
javascript
// 性能监控配置
const performanceMonitoring = {
// 响应时间监控
responseTime: {
thresholds: {
warning: 5000, // 5秒
critical: 10000 // 10秒
},
alerting: {
enabled: true,
channels: ["email", "slack"]
}
},
// 错误率监控
errorRate: {
thresholds: {
warning: 0.05, // 5%
critical: 0.10 // 10%
},
window: 300 // 5分钟窗口
},
// 可用性监控
availability: {
healthCheck: {
interval: 60, // 60秒
timeout: 10, // 10秒
endpoints: [
"https://api.openai.com/v1/models",
"https://api.anthropic.com/v1/messages"
]
}
}
};
安全配置
API 密钥管理
javascript
// API 密钥安全配置
const keyManagement = {
// 密钥轮换
rotation: {
enabled: true,
interval: 2592000, // 30天
warningPeriod: 604800, // 7天前警告
autoRotate: false // 手动轮换
},
// 密钥验证
validation: {
checkOnStartup: true,
periodicCheck: true,
checkInterval: 3600 // 1小时
},
// 访问控制
accessControl: {
ipWhitelist: ["192.168.1.0/24"],
rateLimit: {
requests: 1000,
window: 3600 // 每小时1000次请求
}
}
};
数据保护
javascript
// 数据保护配置
const dataProtection = {
// 请求日志
requestLogging: {
enabled: true,
excludeFields: ["apiKey", "authorization"],
retention: 30, // 30天
encryption: true
},
// 响应过滤
responseFiltering: {
enabled: true,
filters: [
"creditCard",
"ssn",
"email",
"phoneNumber"
]
},
// 内容审核
contentModeration: {
enabled: true,
providers: ["openai-moderation"],
blockThreshold: 0.8,
logViolations: true
}
};
故障排除
常见问题
javascript
// 故障排除指南
const troubleshooting = {
// API 连接问题
connectionIssues: {
symptoms: ["Connection timeout", "Network error"],
solutions: [
"检查网络连接",
"验证 API 端点",
"检查防火墙设置",
"增加超时时间"
]
},
// 认证问题
authenticationIssues: {
symptoms: ["401 Unauthorized", "Invalid API key"],
solutions: [
"验证 API 密钥格式",
"检查密钥权限",
"确认组织 ID",
"检查密钥是否过期"
]
},
// 速率限制
rateLimitIssues: {
symptoms: ["429 Too Many Requests"],
solutions: [
"实施请求限流",
"增加重试延迟",
"升级 API 计划",
"使用多个 API 密钥"
]
},
// 性能问题
performanceIssues: {
symptoms: ["Slow response", "High latency"],
solutions: [
"启用响应缓存",
"优化提示词",
"减少上下文长度",
"使用更快的模型"
]
}
};
诊断工具
javascript
// LLM 诊断工具
class LLMDiagnostics {
async runDiagnostics() {
const results = {
connectivity: await this.testConnectivity(),
authentication: await this.testAuthentication(),
performance: await this.testPerformance(),
models: await this.testModels()
};
return this.generateReport(results);
}
async testConnectivity() {
// 测试网络连接
const providers = ['openai', 'anthropic', 'azure'];
const results = {};
for (const provider of providers) {
try {
const response = await this.pingProvider(provider);
results[provider] = {
status: 'success',
latency: response.latency
};
} catch (error) {
results[provider] = {
status: 'failed',
error: error.message
};
}
}
return results;
}
async testAuthentication() {
// 测试 API 密钥
const tests = [];
if (process.env.OPENAI_API_KEY) {
tests.push(this.testOpenAIAuth());
}
if (process.env.ANTHROPIC_API_KEY) {
tests.push(this.testAnthropicAuth());
}
return Promise.all(tests);
}
async testPerformance() {
// 性能基准测试
const testPrompt = "Hello, how are you?";
const results = {};
const startTime = Date.now();
const response = await this.sendTestRequest(testPrompt);
const endTime = Date.now();
results.responseTime = endTime - startTime;
results.tokenCount = response.usage?.total_tokens || 0;
results.tokensPerSecond = results.tokenCount / (results.responseTime / 1000);
return results;
}
generateReport(results) {
return {
timestamp: new Date().toISOString(),
summary: this.generateSummary(results),
details: results,
recommendations: this.generateRecommendations(results)
};
}
}
正确配置 LLM 是 AnythingLLM 发挥最佳性能的关键。根据您的具体需求选择合适的模型和配置,并定期监控和优化性能。