Reduce AI costs: switch tagging to gpt-4o-mini, add custom base URL support
Some checks failed
Build and Push Docker Image / build (push) Has been cancelled

- Change AI tagging to use AI_MODELS.QUICK (gpt-4o-mini) instead of gpt-4o for
  10-15x cost reduction on classification tasks
- Add openai_base_url system setting for OpenAI-compatible providers
  (OpenRouter, Groq, Together AI, local models)
- Reset OpenAI client singleton when API key, base URL, or model changes
- Add base URL field to AI settings form with provider examples

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Matt
2026-02-16 15:34:59 +01:00
parent f12c29103c
commit 014bb15890
5 changed files with 74 additions and 4 deletions

View File

@@ -201,6 +201,12 @@ export const settingsRouter = router({
clearStorageProviderCache()
}
// Reset OpenAI client if API key or base URL changed
if (input.settings.some((s) => s.key === 'openai_api_key' || s.key === 'openai_base_url' || s.key === 'ai_model')) {
const { resetOpenAIClient } = await import('@/lib/openai')
resetOpenAIClient()
}
// Audit log
await logAudit({
prisma: ctx.prisma,

View File

@@ -16,7 +16,7 @@
*/
import { prisma } from '@/lib/prisma'
import { getOpenAI, getConfiguredModel, buildCompletionParams } from '@/lib/openai'
import { getOpenAI, getConfiguredModel, buildCompletionParams, AI_MODELS } from '@/lib/openai'
import { logAIUsage, extractTokenUsage } from '@/server/utils/ai-usage'
import { classifyAIError, createParseError, logAIError } from './ai-errors'
import {
@@ -178,7 +178,8 @@ async function getAISuggestions(
return { suggestions: [], tokensUsed: 0 }
}
const model = await getConfiguredModel()
// Use QUICK model — tag classification is simple, doesn't need expensive reasoning
const model = await getConfiguredModel(AI_MODELS.QUICK)
// Build compact tag list for prompt
const tagList = availableTags.map((t) => ({
@@ -294,7 +295,8 @@ async function getAISuggestionsBatch(
return { suggestionsMap: new Map(), tokensUsed: 0 }
}
const model = await getConfiguredModel()
// Use QUICK model — tag classification is simple, doesn't need expensive reasoning
const model = await getConfiguredModel(AI_MODELS.QUICK)
const suggestionsMap = new Map<string, TagSuggestion[]>()
// Build compact tag list (sent once for entire batch)