Reduce AI costs: switch tagging to gpt-4o-mini, add custom base URL support

- Change AI tagging to use AI_MODELS.QUICK (gpt-4o-mini) instead of gpt-4o for 10-15x cost reduction on classification tasks - Add openai_base_url system setting for OpenAI-compatible providers (OpenRouter, Groq, Together AI, local models) - Reset OpenAI client singleton when API key, base URL, or model changes - Add base URL field to AI settings form with provider examples Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 15:34:59 +01:00
parent f12c29103c
commit 014bb15890
5 changed files with 74 additions and 4 deletions
--- a/src/server/routers/settings.ts
+++ b/src/server/routers/settings.ts
@@ -201,6 +201,12 @@ export const settingsRouter = router({
        clearStorageProviderCache()
      }

+      // Reset OpenAI client if API key or base URL changed
+      if (input.settings.some((s) => s.key === 'openai_api_key' || s.key === 'openai_base_url' || s.key === 'ai_model')) {
+        const { resetOpenAIClient } = await import('@/lib/openai')
+        resetOpenAIClient()
+      }
+
      // Audit log
      await logAudit({
        prisma: ctx.prisma,
--- a/src/server/services/ai-tagging.ts
+++ b/src/server/services/ai-tagging.ts
@@ -16,7 +16,7 @@
 */

 import { prisma } from '@/lib/prisma'
-import { getOpenAI, getConfiguredModel, buildCompletionParams } from '@/lib/openai'
+import { getOpenAI, getConfiguredModel, buildCompletionParams, AI_MODELS } from '@/lib/openai'
 import { logAIUsage, extractTokenUsage } from '@/server/utils/ai-usage'
 import { classifyAIError, createParseError, logAIError } from './ai-errors'
 import {
@@ -178,7 +178,8 @@ async function getAISuggestions(
    return { suggestions: [], tokensUsed: 0 }
  }

-  const model = await getConfiguredModel()
+  // Use QUICK model — tag classification is simple, doesn't need expensive reasoning
+  const model = await getConfiguredModel(AI_MODELS.QUICK)

  // Build compact tag list for prompt
  const tagList = availableTags.map((t) => ({
@@ -294,7 +295,8 @@ async function getAISuggestionsBatch(
    return { suggestionsMap: new Map(), tokensUsed: 0 }
  }

-  const model = await getConfiguredModel()
+  // Use QUICK model — tag classification is simple, doesn't need expensive reasoning
+  const model = await getConfiguredModel(AI_MODELS.QUICK)
  const suggestionsMap = new Map<string, TagSuggestion[]>()

  // Build compact tag list (sent once for entire batch)