import OpenAI from 'openai' import type { ChatCompletionCreateParamsNonStreaming } from 'openai/resources/chat/completions' import Anthropic from '@anthropic-ai/sdk' import { prisma } from './prisma' // Hardcoded Claude model list (Anthropic API doesn't expose a models.list endpoint for all users) export const ANTHROPIC_CLAUDE_MODELS = [ 'claude-opus-4-5-20250514', 'claude-sonnet-4-5-20250514', 'claude-haiku-3-5-20241022', 'claude-opus-4-20250514', 'claude-sonnet-4-20250514', ] as const /** * AI client type returned by getOpenAI(). * Both the OpenAI SDK and the Anthropic adapter satisfy this interface. * All AI services only use .chat.completions.create(), so this is safe. */ export type AIClient = OpenAI | AnthropicClientAdapter type AnthropicClientAdapter = { __isAnthropicAdapter: true chat: { completions: { create(params: ChatCompletionCreateParamsNonStreaming): Promise } } } // OpenAI client singleton with lazy initialization const globalForOpenAI = globalThis as unknown as { openai: AIClient | undefined openaiInitialized: boolean } // ─── Provider Detection ───────────────────────────────────────────────────── /** * Get the configured AI provider from SystemSettings. * Returns 'openai' (default), 'litellm' (ChatGPT subscription proxy), or 'anthropic' (Claude API). */ export async function getConfiguredProvider(): Promise<'openai' | 'litellm' | 'anthropic'> { try { const setting = await prisma.systemSettings.findUnique({ where: { key: 'ai_provider' }, }) const value = setting?.value || 'openai' if (value === 'litellm') return 'litellm' if (value === 'anthropic') return 'anthropic' return 'openai' } catch { return 'openai' } } /** * Check if a model ID indicates LiteLLM ChatGPT subscription routing. * Models like 'chatgpt/gpt-5.2' use the chatgpt/ prefix. * Used by buildCompletionParams (sync) to strip unsupported token limit fields. */ export function isLiteLLMChatGPTModel(model: string): boolean { return model.toLowerCase().startsWith('chatgpt/') } // ─── Model Type Detection ──────────────────────────────────────────────────── /** * Reasoning models that require different API parameters: * - Use max_completion_tokens instead of max_tokens * - Don't support response_format: json_object (must instruct JSON in prompt) * - Don't support temperature parameter * - Don't support system messages (use developer or user role instead) */ const REASONING_MODEL_PREFIXES = ['o1', 'o3', 'o4'] /** * Models that use max_completion_tokens instead of max_tokens. * This includes reasoning models AND newer GPT models (GPT-5+). */ const NEW_TOKEN_PARAM_PREFIXES = ['o1', 'o3', 'o4', 'gpt-5', 'gpt-6', 'gpt-7'] /** * Models that don't support custom temperature values. * These only accept the default temperature (1). */ const NO_TEMPERATURE_PREFIXES = ['o1', 'o3', 'o4', 'gpt-5', 'gpt-6', 'gpt-7'] /** * Check if a model is a reasoning model (o1, o3, o4 series) * These models have additional restrictions (no temperature, no json_object, etc.) */ export function isReasoningModel(model: string): boolean { const modelLower = model.toLowerCase() return REASONING_MODEL_PREFIXES.some(prefix => modelLower.startsWith(prefix) || modelLower.includes(`/${prefix}`) || modelLower.includes(`-${prefix}`) ) } /** * Check if a model requires max_completion_tokens instead of max_tokens. * This includes reasoning models AND newer GPT models (GPT-5+). */ export function usesNewTokenParam(model: string): boolean { const modelLower = model.toLowerCase() return NEW_TOKEN_PARAM_PREFIXES.some(prefix => modelLower.startsWith(prefix) || modelLower.includes(`/${prefix}`) || modelLower.includes(`-${prefix}`) ) } /** * Check if a model supports custom temperature values. * Newer models (o-series, GPT-5+) only accept default temperature (1). */ export function supportsTemperature(model: string): boolean { const modelLower = model.toLowerCase() return !NO_TEMPERATURE_PREFIXES.some(prefix => modelLower.startsWith(prefix) || modelLower.includes(`/${prefix}`) || modelLower.includes(`-${prefix}`) ) } /** * Check if a model requires higher token limits due to reasoning overhead. * GPT-5 nano especially needs more tokens as reasoning consumes output budget. */ export function needsHigherTokenLimit(model: string): boolean { const modelLower = model.toLowerCase() return modelLower.includes('nano') || modelLower.includes('gpt-5') } /** * Get minimum recommended max_tokens for a model. * Reasoning models need higher limits because internal reasoning consumes tokens. */ export function getMinTokenLimit(model: string, requestedLimit?: number): number | undefined { // For GPT-5 nano, reasoning uses significant token budget // If user requests < 8000, bump it up or remove limit if (needsHigherTokenLimit(model)) { const minLimit = 16000 // Ensure enough headroom for reasoning if (!requestedLimit) return undefined // No limit = model default return Math.max(requestedLimit, minLimit) } return requestedLimit } // ─── Chat Completion Parameter Builder ─────────────────────────────────────── type MessageRole = 'system' | 'user' | 'assistant' | 'developer' export interface ChatCompletionOptions { messages: Array<{ role: MessageRole; content: string }> maxTokens?: number temperature?: number jsonMode?: boolean } /** * Build chat completion parameters with correct settings for the model type. * Handles differences between standard models and reasoning models. */ export function buildCompletionParams( model: string, options: ChatCompletionOptions ): ChatCompletionCreateParamsNonStreaming { const isReasoning = isReasoningModel(model) // Convert messages for reasoning models (system -> developer) const messages = options.messages.map(msg => { if (isReasoning && msg.role === 'system') { return { role: 'developer' as const, content: msg.content } } return msg as { role: 'system' | 'user' | 'assistant' | 'developer'; content: string } }) // For reasoning models requesting JSON, append JSON instruction to last user message if (isReasoning && options.jsonMode) { // Find last user message index (polyfill for findLastIndex) let lastUserIdx = -1 for (let i = messages.length - 1; i >= 0; i--) { if (messages[i].role === 'user') { lastUserIdx = i break } } if (lastUserIdx !== -1) { messages[lastUserIdx] = { ...messages[lastUserIdx], content: messages[lastUserIdx].content + '\n\nIMPORTANT: Respond with valid JSON only, no other text.', } } } const params: ChatCompletionCreateParamsNonStreaming = { model, messages: messages as ChatCompletionCreateParamsNonStreaming['messages'], } // Token limit parameter differs between model types // Newer models (GPT-5+, o-series) use max_completion_tokens // Also ensure sufficient tokens for models with reasoning overhead (GPT-5 nano) const effectiveMaxTokens = getMinTokenLimit(model, options.maxTokens) if (effectiveMaxTokens) { if (usesNewTokenParam(model)) { params.max_completion_tokens = effectiveMaxTokens } else { params.max_tokens = effectiveMaxTokens } } // Newer models (o-series, GPT-5+) don't support custom temperature if (supportsTemperature(model) && options.temperature !== undefined) { params.temperature = options.temperature } // Reasoning models don't support response_format: json_object if (!isReasoning && options.jsonMode) { params.response_format = { type: 'json_object' } } // LiteLLM ChatGPT subscription models reject token limit fields if (isLiteLLMChatGPTModel(model)) { delete params.max_tokens delete params.max_completion_tokens } return params } /** * Get OpenAI API key from SystemSettings */ async function getOpenAIApiKey(): Promise { try { const setting = await prisma.systemSettings.findUnique({ where: { key: 'openai_api_key' }, }) return setting?.value || process.env.OPENAI_API_KEY || null } catch { // Fall back to env var if database isn't available return process.env.OPENAI_API_KEY || null } } /** * Get Anthropic API key from SystemSettings */ async function getAnthropicApiKey(): Promise { try { const setting = await prisma.systemSettings.findUnique({ where: { key: 'anthropic_api_key' }, }) return setting?.value || process.env.ANTHROPIC_API_KEY || null } catch { return process.env.ANTHROPIC_API_KEY || null } } /** * Get custom base URL for OpenAI-compatible providers. * Supports OpenRouter, Together AI, Groq, local models, etc. * Set via Settings → AI or OPENAI_BASE_URL env var. */ async function getBaseURL(): Promise { try { const setting = await prisma.systemSettings.findUnique({ where: { key: 'openai_base_url' }, }) return setting?.value || process.env.OPENAI_BASE_URL || undefined } catch { return process.env.OPENAI_BASE_URL || undefined } } /** * Create OpenAI client instance. * Supports custom baseURL for OpenAI-compatible providers * (OpenRouter, Groq, Together AI, local models, etc.) */ async function createOpenAIClient(): Promise { const apiKey = await getOpenAIApiKey() const provider = await getConfiguredProvider() // LiteLLM proxy may not require a real API key const effectiveApiKey = apiKey || (provider === 'litellm' ? 'sk-litellm' : null) if (!effectiveApiKey) { console.warn('OpenAI API key not configured') return null } const baseURL = await getBaseURL() if (baseURL) { console.log(`[OpenAI] Using custom base URL: ${baseURL} (provider: ${provider})`) } return new OpenAI({ apiKey: effectiveApiKey, ...(baseURL ? { baseURL } : {}), }) } /** * Check if a model is a Claude Opus model (supports extended thinking). */ function isClaudeOpusModel(model: string): boolean { return model.toLowerCase().includes('opus') } /** * Create an Anthropic adapter that wraps the Anthropic SDK behind the * same `.chat.completions.create()` surface as OpenAI. This allows all * AI service files to work with zero changes. */ async function createAnthropicAdapter(): Promise { const apiKey = await getAnthropicApiKey() if (!apiKey) { console.warn('Anthropic API key not configured') return null } const baseURL = await getBaseURL() const anthropic = new Anthropic({ apiKey, ...(baseURL ? { baseURL } : {}), }) if (baseURL) { console.log(`[Anthropic] Using custom base URL: ${baseURL}`) } return { __isAnthropicAdapter: true, chat: { completions: { async create(params: ChatCompletionCreateParamsNonStreaming): Promise { // Extract system messages → Anthropic's system parameter const systemMessages: string[] = [] const userAssistantMessages: Anthropic.MessageParam[] = [] for (const msg of params.messages) { const content = typeof msg.content === 'string' ? msg.content : '' if (msg.role === 'system' || msg.role === 'developer') { systemMessages.push(content) } else { userAssistantMessages.push({ role: msg.role === 'assistant' ? 'assistant' : 'user', content, }) } } // Ensure messages start with a user message (Anthropic requirement) if (userAssistantMessages.length === 0 || userAssistantMessages[0].role !== 'user') { userAssistantMessages.unshift({ role: 'user', content: 'Hello' }) } // Determine max_tokens (required by Anthropic, default 16384) const maxTokens = params.max_tokens ?? params.max_completion_tokens ?? 16384 // Build Anthropic request const anthropicParams: Anthropic.MessageCreateParamsNonStreaming = { model: params.model, max_tokens: maxTokens, messages: userAssistantMessages, ...(systemMessages.length > 0 ? { system: systemMessages.join('\n\n') } : {}), } // Add temperature if present (Anthropic supports 0-1) if (params.temperature !== undefined && params.temperature !== null) { anthropicParams.temperature = params.temperature } // Extended thinking for Opus models if (isClaudeOpusModel(params.model)) { anthropicParams.thinking = { type: 'enabled', budget_tokens: Math.min(8192, maxTokens - 1) } } // Call Anthropic API let response = await anthropic.messages.create(anthropicParams) // Extract text from response (skip thinking blocks) let responseText = response.content .filter((block): block is Anthropic.TextBlock => block.type === 'text') .map((block) => block.text) .join('') // JSON retry: if response_format was set but response isn't valid JSON const wantsJson = params.response_format && 'type' in params.response_format && params.response_format.type === 'json_object' if (wantsJson && responseText) { try { JSON.parse(responseText) } catch { // Retry once with explicit JSON instruction const retryMessages = [...userAssistantMessages] const lastIdx = retryMessages.length - 1 if (lastIdx >= 0 && retryMessages[lastIdx].role === 'user') { retryMessages[lastIdx] = { ...retryMessages[lastIdx], content: retryMessages[lastIdx].content + '\n\nIMPORTANT: You MUST respond with valid JSON only. No markdown, no extra text, just a JSON object or array.', } } const retryParams: Anthropic.MessageCreateParamsNonStreaming = { ...anthropicParams, messages: retryMessages, } response = await anthropic.messages.create(retryParams) responseText = response.content .filter((block): block is Anthropic.TextBlock => block.type === 'text') .map((block) => block.text) .join('') } } // Normalize response to OpenAI shape return { id: response.id, object: 'chat.completion' as const, created: Math.floor(Date.now() / 1000), model: response.model, choices: [ { index: 0, message: { role: 'assistant' as const, content: responseText || null, refusal: null, }, finish_reason: response.stop_reason === 'end_turn' || response.stop_reason === 'stop_sequence' ? 'stop' : response.stop_reason === 'max_tokens' ? 'length' : 'stop', logprobs: null, }, ], usage: { prompt_tokens: response.usage.input_tokens, completion_tokens: response.usage.output_tokens, total_tokens: response.usage.input_tokens + response.usage.output_tokens, prompt_tokens_details: undefined as any, completion_tokens_details: undefined as any, }, } }, }, }, } } /** * Get the AI client singleton. * Returns an OpenAI client or an Anthropic adapter (both expose .chat.completions.create()). * Returns null if the API key is not configured. */ export async function getOpenAI(): Promise { if (globalForOpenAI.openaiInitialized) { return globalForOpenAI.openai || null } const provider = await getConfiguredProvider() const client = provider === 'anthropic' ? await createAnthropicAdapter() : await createOpenAIClient() if (process.env.NODE_ENV !== 'production') { globalForOpenAI.openai = client || undefined globalForOpenAI.openaiInitialized = true } return client } /** * Reset the OpenAI client singleton (e.g., after settings change). * Next call to getOpenAI() will create a fresh client. */ export function resetOpenAIClient(): void { globalForOpenAI.openai = undefined globalForOpenAI.openaiInitialized = false } /** * Check if OpenAI is configured and available */ export async function isOpenAIConfigured(): Promise { const provider = await getConfiguredProvider() if (provider === 'litellm') { const baseURL = await getBaseURL() return !!baseURL } if (provider === 'anthropic') { const apiKey = await getAnthropicApiKey() return !!apiKey } const apiKey = await getOpenAIApiKey() return !!apiKey } /** * List available models from OpenAI API */ export async function listAvailableModels(): Promise<{ success: boolean models?: string[] error?: string manualEntry?: boolean }> { try { const provider = await getConfiguredProvider() // LiteLLM proxy for ChatGPT subscription doesn't support models.list() if (provider === 'litellm') { return { success: true, models: [], manualEntry: true, } } // Anthropic: return hardcoded Claude model list if (provider === 'anthropic') { const apiKey = await getAnthropicApiKey() if (!apiKey) { return { success: false, error: 'Anthropic API key not configured' } } return { success: true, models: [...ANTHROPIC_CLAUDE_MODELS], } } const client = await getOpenAI() if (!client) { return { success: false, error: 'OpenAI API key not configured', } } const response = await (client as OpenAI).models.list() const chatModels = response.data .filter((m) => m.id.includes('gpt') || m.id.includes('o1') || m.id.includes('o3') || m.id.includes('o4')) .map((m) => m.id) .sort() return { success: true, models: chatModels, } } catch (error) { return { success: false, error: error instanceof Error ? error.message : 'Unknown error', } } } /** * Validate that a specific model is available */ export async function validateModel(modelId: string): Promise<{ valid: boolean error?: string }> { try { const client = await getOpenAI() if (!client) { return { valid: false, error: 'AI API key not configured', } } const provider = await getConfiguredProvider() // For Anthropic, use minimal max_tokens const params = buildCompletionParams(modelId, { messages: [{ role: 'user', content: 'test' }], maxTokens: provider === 'anthropic' ? 16 : 1, }) await client.chat.completions.create(params) return { valid: true } } catch (error) { const message = error instanceof Error ? error.message : 'Unknown error' // Check for specific model errors if (message.includes('does not exist') || message.includes('model_not_found')) { return { valid: false, error: `Model "${modelId}" is not available with your API key`, } } return { valid: false, error: message, } } } /** * Test OpenAI connection with the configured model */ export async function testOpenAIConnection(): Promise<{ success: boolean error?: string model?: string modelTested?: string }> { try { const client = await getOpenAI() const provider = await getConfiguredProvider() if (!client) { const label = provider === 'anthropic' ? 'Anthropic' : 'OpenAI' return { success: false, error: `${label} API key not configured`, } } // Get the configured model const configuredModel = await getConfiguredModel() // Test with the configured model using correct parameters const params = buildCompletionParams(configuredModel, { messages: [{ role: 'user', content: 'Hello' }], maxTokens: provider === 'anthropic' ? 16 : 5, }) const response = await client.chat.completions.create(params) return { success: true, model: response.model, modelTested: configuredModel, } } catch (error) { const message = error instanceof Error ? error.message : 'Unknown error' const configuredModel = await getConfiguredModel() // Check for model-specific errors if (message.includes('does not exist') || message.includes('model_not_found') || message.includes('not_found_error')) { return { success: false, error: `Model "${configuredModel}" is not available. Check Settings → AI to select a valid model.`, modelTested: configuredModel, } } return { success: false, error: message, modelTested: configuredModel, } } } // Default models for different use cases export const AI_MODELS = { ASSIGNMENT: 'gpt-4o', // Best for complex reasoning QUICK: 'gpt-4o-mini', // Faster, cheaper for simple tasks } as const /** * Get the admin-configured AI model from SystemSettings. * Falls back to the provided default if not configured. */ export async function getConfiguredModel(fallback: string = AI_MODELS.ASSIGNMENT): Promise { try { const setting = await prisma.systemSettings.findUnique({ where: { key: 'ai_model' }, }) return setting?.value || process.env.OPENAI_MODEL || fallback } catch { return process.env.OPENAI_MODEL || fallback } }