Files
MOPC-Portal/src/lib/openai.ts
Matt f42b452899
All checks were successful
Build and Push Docker Image / build (push) Successful in 13m15s
Add Anthropic API integration, remove locale settings UI
Anthropic API:
- Add @anthropic-ai/sdk with adapter wrapping OpenAI-shaped interface
- Support Claude models (opus, sonnet, haiku) with extended thinking
- Auto-reset model on provider switch, JSON retry logic
- Add Claude model pricing to ai-usage tracker
- Update AI settings form with Anthropic provider option
- Add provider field to AIUsageLog for cross-provider cost tracking

Locale Settings Removal:
- Strip Localization tab from admin settings (mobile + desktop)
- Remove i18n settings from router and feature flags
- Remove LOCALIZATION from SettingCategory enum
- Keep franc document language detection intact

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-21 17:26:59 +01:00

686 lines
22 KiB
TypeScript

import OpenAI from 'openai'
import type { ChatCompletionCreateParamsNonStreaming } from 'openai/resources/chat/completions'
import Anthropic from '@anthropic-ai/sdk'
import { prisma } from './prisma'
// Hardcoded Claude model list (Anthropic API doesn't expose a models.list endpoint for all users)
export const ANTHROPIC_CLAUDE_MODELS = [
'claude-opus-4-5-20250514',
'claude-sonnet-4-5-20250514',
'claude-haiku-3-5-20241022',
'claude-opus-4-20250514',
'claude-sonnet-4-20250514',
] as const
/**
* AI client type returned by getOpenAI().
* Both the OpenAI SDK and the Anthropic adapter satisfy this interface.
* All AI services only use .chat.completions.create(), so this is safe.
*/
export type AIClient = OpenAI | AnthropicClientAdapter
type AnthropicClientAdapter = {
__isAnthropicAdapter: true
chat: {
completions: {
create(params: ChatCompletionCreateParamsNonStreaming): Promise<OpenAI.Chat.Completions.ChatCompletion>
}
}
}
// OpenAI client singleton with lazy initialization
const globalForOpenAI = globalThis as unknown as {
openai: AIClient | undefined
openaiInitialized: boolean
}
// ─── Provider Detection ─────────────────────────────────────────────────────
/**
* Get the configured AI provider from SystemSettings.
* Returns 'openai' (default), 'litellm' (ChatGPT subscription proxy), or 'anthropic' (Claude API).
*/
export async function getConfiguredProvider(): Promise<'openai' | 'litellm' | 'anthropic'> {
try {
const setting = await prisma.systemSettings.findUnique({
where: { key: 'ai_provider' },
})
const value = setting?.value || 'openai'
if (value === 'litellm') return 'litellm'
if (value === 'anthropic') return 'anthropic'
return 'openai'
} catch {
return 'openai'
}
}
/**
* Check if a model ID indicates LiteLLM ChatGPT subscription routing.
* Models like 'chatgpt/gpt-5.2' use the chatgpt/ prefix.
* Used by buildCompletionParams (sync) to strip unsupported token limit fields.
*/
export function isLiteLLMChatGPTModel(model: string): boolean {
return model.toLowerCase().startsWith('chatgpt/')
}
// ─── Model Type Detection ────────────────────────────────────────────────────
/**
* Reasoning models that require different API parameters:
* - Use max_completion_tokens instead of max_tokens
* - Don't support response_format: json_object (must instruct JSON in prompt)
* - Don't support temperature parameter
* - Don't support system messages (use developer or user role instead)
*/
const REASONING_MODEL_PREFIXES = ['o1', 'o3', 'o4']
/**
* Models that use max_completion_tokens instead of max_tokens.
* This includes reasoning models AND newer GPT models (GPT-5+).
*/
const NEW_TOKEN_PARAM_PREFIXES = ['o1', 'o3', 'o4', 'gpt-5', 'gpt-6', 'gpt-7']
/**
* Models that don't support custom temperature values.
* These only accept the default temperature (1).
*/
const NO_TEMPERATURE_PREFIXES = ['o1', 'o3', 'o4', 'gpt-5', 'gpt-6', 'gpt-7']
/**
* Check if a model is a reasoning model (o1, o3, o4 series)
* These models have additional restrictions (no temperature, no json_object, etc.)
*/
export function isReasoningModel(model: string): boolean {
const modelLower = model.toLowerCase()
return REASONING_MODEL_PREFIXES.some(prefix =>
modelLower.startsWith(prefix) ||
modelLower.includes(`/${prefix}`) ||
modelLower.includes(`-${prefix}`)
)
}
/**
* Check if a model requires max_completion_tokens instead of max_tokens.
* This includes reasoning models AND newer GPT models (GPT-5+).
*/
export function usesNewTokenParam(model: string): boolean {
const modelLower = model.toLowerCase()
return NEW_TOKEN_PARAM_PREFIXES.some(prefix =>
modelLower.startsWith(prefix) ||
modelLower.includes(`/${prefix}`) ||
modelLower.includes(`-${prefix}`)
)
}
/**
* Check if a model supports custom temperature values.
* Newer models (o-series, GPT-5+) only accept default temperature (1).
*/
export function supportsTemperature(model: string): boolean {
const modelLower = model.toLowerCase()
return !NO_TEMPERATURE_PREFIXES.some(prefix =>
modelLower.startsWith(prefix) ||
modelLower.includes(`/${prefix}`) ||
modelLower.includes(`-${prefix}`)
)
}
/**
* Check if a model requires higher token limits due to reasoning overhead.
* GPT-5 nano especially needs more tokens as reasoning consumes output budget.
*/
export function needsHigherTokenLimit(model: string): boolean {
const modelLower = model.toLowerCase()
return modelLower.includes('nano') || modelLower.includes('gpt-5')
}
/**
* Get minimum recommended max_tokens for a model.
* Reasoning models need higher limits because internal reasoning consumes tokens.
*/
export function getMinTokenLimit(model: string, requestedLimit?: number): number | undefined {
// For GPT-5 nano, reasoning uses significant token budget
// If user requests < 8000, bump it up or remove limit
if (needsHigherTokenLimit(model)) {
const minLimit = 16000 // Ensure enough headroom for reasoning
if (!requestedLimit) return undefined // No limit = model default
return Math.max(requestedLimit, minLimit)
}
return requestedLimit
}
// ─── Chat Completion Parameter Builder ───────────────────────────────────────
type MessageRole = 'system' | 'user' | 'assistant' | 'developer'
export interface ChatCompletionOptions {
messages: Array<{ role: MessageRole; content: string }>
maxTokens?: number
temperature?: number
jsonMode?: boolean
}
/**
* Build chat completion parameters with correct settings for the model type.
* Handles differences between standard models and reasoning models.
*/
export function buildCompletionParams(
model: string,
options: ChatCompletionOptions
): ChatCompletionCreateParamsNonStreaming {
const isReasoning = isReasoningModel(model)
// Convert messages for reasoning models (system -> developer)
const messages = options.messages.map(msg => {
if (isReasoning && msg.role === 'system') {
return { role: 'developer' as const, content: msg.content }
}
return msg as { role: 'system' | 'user' | 'assistant' | 'developer'; content: string }
})
// For reasoning models requesting JSON, append JSON instruction to last user message
if (isReasoning && options.jsonMode) {
// Find last user message index (polyfill for findLastIndex)
let lastUserIdx = -1
for (let i = messages.length - 1; i >= 0; i--) {
if (messages[i].role === 'user') {
lastUserIdx = i
break
}
}
if (lastUserIdx !== -1) {
messages[lastUserIdx] = {
...messages[lastUserIdx],
content: messages[lastUserIdx].content + '\n\nIMPORTANT: Respond with valid JSON only, no other text.',
}
}
}
const params: ChatCompletionCreateParamsNonStreaming = {
model,
messages: messages as ChatCompletionCreateParamsNonStreaming['messages'],
}
// Token limit parameter differs between model types
// Newer models (GPT-5+, o-series) use max_completion_tokens
// Also ensure sufficient tokens for models with reasoning overhead (GPT-5 nano)
const effectiveMaxTokens = getMinTokenLimit(model, options.maxTokens)
if (effectiveMaxTokens) {
if (usesNewTokenParam(model)) {
params.max_completion_tokens = effectiveMaxTokens
} else {
params.max_tokens = effectiveMaxTokens
}
}
// Newer models (o-series, GPT-5+) don't support custom temperature
if (supportsTemperature(model) && options.temperature !== undefined) {
params.temperature = options.temperature
}
// Reasoning models don't support response_format: json_object
if (!isReasoning && options.jsonMode) {
params.response_format = { type: 'json_object' }
}
// LiteLLM ChatGPT subscription models reject token limit fields
if (isLiteLLMChatGPTModel(model)) {
delete params.max_tokens
delete params.max_completion_tokens
}
return params
}
/**
* Get OpenAI API key from SystemSettings
*/
async function getOpenAIApiKey(): Promise<string | null> {
try {
const setting = await prisma.systemSettings.findUnique({
where: { key: 'openai_api_key' },
})
return setting?.value || process.env.OPENAI_API_KEY || null
} catch {
// Fall back to env var if database isn't available
return process.env.OPENAI_API_KEY || null
}
}
/**
* Get Anthropic API key from SystemSettings
*/
async function getAnthropicApiKey(): Promise<string | null> {
try {
const setting = await prisma.systemSettings.findUnique({
where: { key: 'anthropic_api_key' },
})
return setting?.value || process.env.ANTHROPIC_API_KEY || null
} catch {
return process.env.ANTHROPIC_API_KEY || null
}
}
/**
* Get custom base URL for OpenAI-compatible providers.
* Supports OpenRouter, Together AI, Groq, local models, etc.
* Set via Settings → AI or OPENAI_BASE_URL env var.
*/
async function getBaseURL(): Promise<string | undefined> {
try {
const setting = await prisma.systemSettings.findUnique({
where: { key: 'openai_base_url' },
})
return setting?.value || process.env.OPENAI_BASE_URL || undefined
} catch {
return process.env.OPENAI_BASE_URL || undefined
}
}
/**
* Create OpenAI client instance.
* Supports custom baseURL for OpenAI-compatible providers
* (OpenRouter, Groq, Together AI, local models, etc.)
*/
async function createOpenAIClient(): Promise<OpenAI | null> {
const apiKey = await getOpenAIApiKey()
const provider = await getConfiguredProvider()
// LiteLLM proxy may not require a real API key
const effectiveApiKey = apiKey || (provider === 'litellm' ? 'sk-litellm' : null)
if (!effectiveApiKey) {
console.warn('OpenAI API key not configured')
return null
}
const baseURL = await getBaseURL()
if (baseURL) {
console.log(`[OpenAI] Using custom base URL: ${baseURL} (provider: ${provider})`)
}
return new OpenAI({
apiKey: effectiveApiKey,
...(baseURL ? { baseURL } : {}),
})
}
/**
* Check if a model is a Claude Opus model (supports extended thinking).
*/
function isClaudeOpusModel(model: string): boolean {
return model.toLowerCase().includes('opus')
}
/**
* Create an Anthropic adapter that wraps the Anthropic SDK behind the
* same `.chat.completions.create()` surface as OpenAI. This allows all
* AI service files to work with zero changes.
*/
async function createAnthropicAdapter(): Promise<AnthropicClientAdapter | null> {
const apiKey = await getAnthropicApiKey()
if (!apiKey) {
console.warn('Anthropic API key not configured')
return null
}
const baseURL = await getBaseURL()
const anthropic = new Anthropic({
apiKey,
...(baseURL ? { baseURL } : {}),
})
if (baseURL) {
console.log(`[Anthropic] Using custom base URL: ${baseURL}`)
}
return {
__isAnthropicAdapter: true,
chat: {
completions: {
async create(params: ChatCompletionCreateParamsNonStreaming): Promise<OpenAI.Chat.Completions.ChatCompletion> {
// Extract system messages → Anthropic's system parameter
const systemMessages: string[] = []
const userAssistantMessages: Anthropic.MessageParam[] = []
for (const msg of params.messages) {
const content = typeof msg.content === 'string' ? msg.content : ''
if (msg.role === 'system' || msg.role === 'developer') {
systemMessages.push(content)
} else {
userAssistantMessages.push({
role: msg.role === 'assistant' ? 'assistant' : 'user',
content,
})
}
}
// Ensure messages start with a user message (Anthropic requirement)
if (userAssistantMessages.length === 0 || userAssistantMessages[0].role !== 'user') {
userAssistantMessages.unshift({ role: 'user', content: 'Hello' })
}
// Determine max_tokens (required by Anthropic, default 16384)
const maxTokens = params.max_tokens ?? params.max_completion_tokens ?? 16384
// Build Anthropic request
const anthropicParams: Anthropic.MessageCreateParamsNonStreaming = {
model: params.model,
max_tokens: maxTokens,
messages: userAssistantMessages,
...(systemMessages.length > 0 ? { system: systemMessages.join('\n\n') } : {}),
}
// Add temperature if present (Anthropic supports 0-1)
if (params.temperature !== undefined && params.temperature !== null) {
anthropicParams.temperature = params.temperature
}
// Extended thinking for Opus models
if (isClaudeOpusModel(params.model)) {
anthropicParams.thinking = { type: 'enabled', budget_tokens: Math.min(8192, maxTokens - 1) }
}
// Call Anthropic API
let response = await anthropic.messages.create(anthropicParams)
// Extract text from response (skip thinking blocks)
let responseText = response.content
.filter((block): block is Anthropic.TextBlock => block.type === 'text')
.map((block) => block.text)
.join('')
// JSON retry: if response_format was set but response isn't valid JSON
const wantsJson = params.response_format && 'type' in params.response_format && params.response_format.type === 'json_object'
if (wantsJson && responseText) {
try {
JSON.parse(responseText)
} catch {
// Retry once with explicit JSON instruction
const retryMessages = [...userAssistantMessages]
const lastIdx = retryMessages.length - 1
if (lastIdx >= 0 && retryMessages[lastIdx].role === 'user') {
retryMessages[lastIdx] = {
...retryMessages[lastIdx],
content: retryMessages[lastIdx].content + '\n\nIMPORTANT: You MUST respond with valid JSON only. No markdown, no extra text, just a JSON object or array.',
}
}
const retryParams: Anthropic.MessageCreateParamsNonStreaming = {
...anthropicParams,
messages: retryMessages,
}
response = await anthropic.messages.create(retryParams)
responseText = response.content
.filter((block): block is Anthropic.TextBlock => block.type === 'text')
.map((block) => block.text)
.join('')
}
}
// Normalize response to OpenAI shape
return {
id: response.id,
object: 'chat.completion' as const,
created: Math.floor(Date.now() / 1000),
model: response.model,
choices: [
{
index: 0,
message: {
role: 'assistant' as const,
content: responseText || null,
refusal: null,
},
finish_reason: response.stop_reason === 'end_turn' || response.stop_reason === 'stop_sequence' ? 'stop' : response.stop_reason === 'max_tokens' ? 'length' : 'stop',
logprobs: null,
},
],
usage: {
prompt_tokens: response.usage.input_tokens,
completion_tokens: response.usage.output_tokens,
total_tokens: response.usage.input_tokens + response.usage.output_tokens,
prompt_tokens_details: undefined as any,
completion_tokens_details: undefined as any,
},
}
},
},
},
}
}
/**
* Get the AI client singleton.
* Returns an OpenAI client or an Anthropic adapter (both expose .chat.completions.create()).
* Returns null if the API key is not configured.
*/
export async function getOpenAI(): Promise<AIClient | null> {
if (globalForOpenAI.openaiInitialized) {
return globalForOpenAI.openai || null
}
const provider = await getConfiguredProvider()
const client = provider === 'anthropic'
? await createAnthropicAdapter()
: await createOpenAIClient()
if (process.env.NODE_ENV !== 'production') {
globalForOpenAI.openai = client || undefined
globalForOpenAI.openaiInitialized = true
}
return client
}
/**
* Reset the OpenAI client singleton (e.g., after settings change).
* Next call to getOpenAI() will create a fresh client.
*/
export function resetOpenAIClient(): void {
globalForOpenAI.openai = undefined
globalForOpenAI.openaiInitialized = false
}
/**
* Check if OpenAI is configured and available
*/
export async function isOpenAIConfigured(): Promise<boolean> {
const provider = await getConfiguredProvider()
if (provider === 'litellm') {
const baseURL = await getBaseURL()
return !!baseURL
}
if (provider === 'anthropic') {
const apiKey = await getAnthropicApiKey()
return !!apiKey
}
const apiKey = await getOpenAIApiKey()
return !!apiKey
}
/**
* List available models from OpenAI API
*/
export async function listAvailableModels(): Promise<{
success: boolean
models?: string[]
error?: string
manualEntry?: boolean
}> {
try {
const provider = await getConfiguredProvider()
// LiteLLM proxy for ChatGPT subscription doesn't support models.list()
if (provider === 'litellm') {
return {
success: true,
models: [],
manualEntry: true,
}
}
// Anthropic: return hardcoded Claude model list
if (provider === 'anthropic') {
const apiKey = await getAnthropicApiKey()
if (!apiKey) {
return { success: false, error: 'Anthropic API key not configured' }
}
return {
success: true,
models: [...ANTHROPIC_CLAUDE_MODELS],
}
}
const client = await getOpenAI()
if (!client) {
return {
success: false,
error: 'OpenAI API key not configured',
}
}
const response = await (client as OpenAI).models.list()
const chatModels = response.data
.filter((m) => m.id.includes('gpt') || m.id.includes('o1') || m.id.includes('o3') || m.id.includes('o4'))
.map((m) => m.id)
.sort()
return {
success: true,
models: chatModels,
}
} catch (error) {
return {
success: false,
error: error instanceof Error ? error.message : 'Unknown error',
}
}
}
/**
* Validate that a specific model is available
*/
export async function validateModel(modelId: string): Promise<{
valid: boolean
error?: string
}> {
try {
const client = await getOpenAI()
if (!client) {
return {
valid: false,
error: 'AI API key not configured',
}
}
const provider = await getConfiguredProvider()
// For Anthropic, use minimal max_tokens
const params = buildCompletionParams(modelId, {
messages: [{ role: 'user', content: 'test' }],
maxTokens: provider === 'anthropic' ? 16 : 1,
})
await client.chat.completions.create(params)
return { valid: true }
} catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error'
// Check for specific model errors
if (message.includes('does not exist') || message.includes('model_not_found')) {
return {
valid: false,
error: `Model "${modelId}" is not available with your API key`,
}
}
return {
valid: false,
error: message,
}
}
}
/**
* Test OpenAI connection with the configured model
*/
export async function testOpenAIConnection(): Promise<{
success: boolean
error?: string
model?: string
modelTested?: string
}> {
try {
const client = await getOpenAI()
const provider = await getConfiguredProvider()
if (!client) {
const label = provider === 'anthropic' ? 'Anthropic' : 'OpenAI'
return {
success: false,
error: `${label} API key not configured`,
}
}
// Get the configured model
const configuredModel = await getConfiguredModel()
// Test with the configured model using correct parameters
const params = buildCompletionParams(configuredModel, {
messages: [{ role: 'user', content: 'Hello' }],
maxTokens: provider === 'anthropic' ? 16 : 5,
})
const response = await client.chat.completions.create(params)
return {
success: true,
model: response.model,
modelTested: configuredModel,
}
} catch (error) {
const message = error instanceof Error ? error.message : 'Unknown error'
const configuredModel = await getConfiguredModel()
// Check for model-specific errors
if (message.includes('does not exist') || message.includes('model_not_found') || message.includes('not_found_error')) {
return {
success: false,
error: `Model "${configuredModel}" is not available. Check Settings → AI to select a valid model.`,
modelTested: configuredModel,
}
}
return {
success: false,
error: message,
modelTested: configuredModel,
}
}
}
// Default models for different use cases
export const AI_MODELS = {
ASSIGNMENT: 'gpt-4o', // Best for complex reasoning
QUICK: 'gpt-4o-mini', // Faster, cheaper for simple tasks
} as const
/**
* Get the admin-configured AI model from SystemSettings.
* Falls back to the provided default if not configured.
*/
export async function getConfiguredModel(fallback: string = AI_MODELS.ASSIGNMENT): Promise<string> {
try {
const setting = await prisma.systemSettings.findUnique({
where: { key: 'ai_model' },
})
return setting?.value || process.env.OPENAI_MODEL || fallback
} catch {
return process.env.OPENAI_MODEL || fallback
}
}