Compare commits

...

4 Commits

Author SHA1 Message Date
Matt
014bb15890 Reduce AI costs: switch tagging to gpt-4o-mini, add custom base URL support
Some checks failed
Build and Push Docker Image / build (push) Has been cancelled
- Change AI tagging to use AI_MODELS.QUICK (gpt-4o-mini) instead of gpt-4o for
  10-15x cost reduction on classification tasks
- Add openai_base_url system setting for OpenAI-compatible providers
  (OpenRouter, Groq, Together AI, local models)
- Reset OpenAI client singleton when API key, base URL, or model changes
- Add base URL field to AI settings form with provider examples

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 15:34:59 +01:00
Matt
f12c29103c Fix project detail crash: replace dynamic hooks with single query
The project detail page called useQuery inside .map() to fetch file
requirements per round, violating React's rules of hooks. When
competitionRounds changed from [] to [round1, round2], the hook count
changed, causing React to crash with "Cannot read properties of
undefined (reading 'length')".

Fix: Add listRequirementsByRounds endpoint that accepts multiple
roundIds in one query, replacing the dynamic hook pattern with a
single stable useQuery call.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 15:30:44 +01:00
Matt
65a22e6f19 Optimize all AI functions for efficiency and speed
- AI Tagging: batch 10 projects per API call with 3 concurrent batches (~10x faster)
  - New `tagProjectsBatch()` with `getAISuggestionsBatch()` for multi-project prompts
  - Single DB query for all projects, single anonymization pass
  - Compact JSON in prompts (no pretty-print) saves tokens
- AI Shortlist: run STARTUP and BUSINESS_CONCEPT categories in parallel (2x faster)
- AI Filtering: increase default parallel batches from 1 to 3 (3x faster)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 14:02:38 +01:00
Matt
989db4dc14 Allow AI tagging dialog to close during processing, show background progress
- Remove blocking guard on dialog close when tagging is in progress
- Change Cancel button to "Run in Background" during processing
- Add amber border + spinner + progress % on AI Tags button when job runs in background
- Job already runs server-side and sends in-app notification on completion

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 13:58:03 +01:00
11 changed files with 488 additions and 64 deletions

View File

@@ -105,14 +105,13 @@ function ProjectDetailContent({ projectId }: { projectId: string }) {
// Extract all rounds from the competition // Extract all rounds from the competition
const competitionRounds = competition?.rounds || [] const competitionRounds = competition?.rounds || []
// Fetch requirements for each round // Fetch requirements for all rounds in a single query (avoids dynamic hook violation)
const requirementQueries = competitionRounds.map((round: { id: string; name: string }) => const roundIds = competitionRounds.map((r: { id: string }) => r.id)
trpc.file.listRequirements.useQuery({ roundId: round.id }) const { data: allRequirements = [] } = trpc.file.listRequirementsByRounds.useQuery(
{ roundIds },
{ enabled: roundIds.length > 0 }
) )
// Combine requirements from all rounds
const allRequirements = requirementQueries.flatMap((q: { data?: unknown[] }) => q.data || [])
const utils = trpc.useUtils() const utils = trpc.useUtils()
if (isLoading) { if (isLoading) {
@@ -592,7 +591,7 @@ function ProjectDetailContent({ projectId }: { projectId: string }) {
</p> </p>
)} )}
<div className="flex items-center gap-2 text-xs text-muted-foreground mt-0.5"> <div className="flex items-center gap-2 text-xs text-muted-foreground mt-0.5">
{req.acceptedMimeTypes.length > 0 && ( {req.acceptedMimeTypes?.length > 0 && (
<span> <span>
{req.acceptedMimeTypes.map((mime: string) => { {req.acceptedMimeTypes.map((mime: string) => {
if (mime === 'application/pdf') return 'PDF' if (mime === 'application/pdf') return 'PDF'

View File

@@ -366,8 +366,9 @@ export default function ProjectsPage() {
} }
const handleCloseTaggingDialog = () => { const handleCloseTaggingDialog = () => {
if (!taggingInProgress) {
setAiTagDialogOpen(false) setAiTagDialogOpen(false)
// Only reset job state if not in progress (preserve polling for background jobs)
if (!taggingInProgress) {
setActiveTaggingJobId(null) setActiveTaggingJobId(null)
setSelectedRoundForTagging('') setSelectedRoundForTagging('')
setSelectedProgramForTagging('') setSelectedProgramForTagging('')
@@ -618,9 +619,22 @@ export default function ProjectsPage() {
</p> </p>
</div> </div>
<div className="flex flex-wrap gap-2"> <div className="flex flex-wrap gap-2">
<Button variant="outline" onClick={() => setAiTagDialogOpen(true)}> <Button
variant="outline"
onClick={() => setAiTagDialogOpen(true)}
className={taggingInProgress ? 'border-amber-400 bg-amber-50 dark:bg-amber-950/20' : ''}
>
{taggingInProgress ? (
<Loader2 className="mr-2 h-4 w-4 animate-spin text-amber-600" />
) : (
<Bot className="mr-2 h-4 w-4" /> <Bot className="mr-2 h-4 w-4" />
)}
AI Tags AI Tags
{taggingInProgress && (
<span className="ml-1.5 text-[10px] text-amber-600 font-medium">
{taggingProgressPercent}%
</span>
)}
</Button> </Button>
<Button variant="outline" asChild> <Button variant="outline" asChild>
<Link href="/admin/projects/pool"> <Link href="/admin/projects/pool">
@@ -1833,9 +1847,8 @@ export default function ProjectsPage() {
<Button <Button
variant="outline" variant="outline"
onClick={handleCloseTaggingDialog} onClick={handleCloseTaggingDialog}
disabled={taggingInProgress}
> >
Cancel {taggingInProgress ? 'Run in Background' : 'Cancel'}
</Button> </Button>
<Button <Button
onClick={handleStartTagging} onClick={handleStartTagging}

View File

@@ -36,6 +36,7 @@ const formSchema = z.object({
ai_model: z.string(), ai_model: z.string(),
ai_send_descriptions: z.boolean(), ai_send_descriptions: z.boolean(),
openai_api_key: z.string().optional(), openai_api_key: z.string().optional(),
openai_base_url: z.string().optional(),
}) })
type FormValues = z.infer<typeof formSchema> type FormValues = z.infer<typeof formSchema>
@@ -47,6 +48,7 @@ interface AISettingsFormProps {
ai_model?: string ai_model?: string
ai_send_descriptions?: string ai_send_descriptions?: string
openai_api_key?: string openai_api_key?: string
openai_base_url?: string
} }
} }
@@ -61,6 +63,7 @@ export function AISettingsForm({ settings }: AISettingsFormProps) {
ai_model: settings.ai_model || 'gpt-4o', ai_model: settings.ai_model || 'gpt-4o',
ai_send_descriptions: settings.ai_send_descriptions === 'true', ai_send_descriptions: settings.ai_send_descriptions === 'true',
openai_api_key: '', openai_api_key: '',
openai_base_url: settings.openai_base_url || '',
}, },
}) })
@@ -113,6 +116,9 @@ export function AISettingsForm({ settings }: AISettingsFormProps) {
settingsToUpdate.push({ key: 'openai_api_key', value: data.openai_api_key }) settingsToUpdate.push({ key: 'openai_api_key', value: data.openai_api_key })
} }
// Save base URL (empty string clears it)
settingsToUpdate.push({ key: 'openai_base_url', value: data.openai_base_url?.trim() || '' })
updateSettings.mutate({ settings: settingsToUpdate }) updateSettings.mutate({ settings: settingsToUpdate })
} }
@@ -208,6 +214,27 @@ export function AISettingsForm({ settings }: AISettingsFormProps) {
)} )}
/> />
<FormField
control={form.control}
name="openai_base_url"
render={({ field }) => (
<FormItem>
<FormLabel>API Base URL (Optional)</FormLabel>
<FormControl>
<Input
placeholder="https://api.openai.com/v1"
{...field}
/>
</FormControl>
<FormDescription>
Custom base URL for OpenAI-compatible providers. Leave blank for OpenAI.
Use <code className="text-xs bg-muted px-1 rounded">https://openrouter.ai/api/v1</code> for OpenRouter (access Claude, Gemini, Llama, etc.)
</FormDescription>
<FormMessage />
</FormItem>
)}
/>
<FormField <FormField
control={form.control} control={form.control}
name="ai_model" name="ai_model"

View File

@@ -84,6 +84,7 @@ export function SettingsContent({ initialSettings, isSuperAdmin = true }: Settin
'ai_model', 'ai_model',
'ai_send_descriptions', 'ai_send_descriptions',
'openai_api_key', 'openai_api_key',
'openai_base_url',
]) ])
const brandingSettings = getSettingsByKeys([ const brandingSettings = getSettingsByKeys([

View File

@@ -187,7 +187,25 @@ async function getOpenAIApiKey(): Promise<string | null> {
} }
/** /**
* Create OpenAI client instance * Get custom base URL for OpenAI-compatible providers.
* Supports OpenRouter, Together AI, Groq, local models, etc.
* Set via Settings → AI or OPENAI_BASE_URL env var.
*/
async function getBaseURL(): Promise<string | undefined> {
try {
const setting = await prisma.systemSettings.findUnique({
where: { key: 'openai_base_url' },
})
return setting?.value || process.env.OPENAI_BASE_URL || undefined
} catch {
return process.env.OPENAI_BASE_URL || undefined
}
}
/**
* Create OpenAI client instance.
* Supports custom baseURL for OpenAI-compatible providers
* (OpenRouter, Groq, Together AI, local models, etc.)
*/ */
async function createOpenAIClient(): Promise<OpenAI | null> { async function createOpenAIClient(): Promise<OpenAI | null> {
const apiKey = await getOpenAIApiKey() const apiKey = await getOpenAIApiKey()
@@ -197,8 +215,15 @@ async function createOpenAIClient(): Promise<OpenAI | null> {
return null return null
} }
const baseURL = await getBaseURL()
if (baseURL) {
console.log(`[OpenAI] Using custom base URL: ${baseURL}`)
}
return new OpenAI({ return new OpenAI({
apiKey, apiKey,
...(baseURL ? { baseURL } : {}),
}) })
} }
@@ -221,6 +246,15 @@ export async function getOpenAI(): Promise<OpenAI | null> {
return client return client
} }
/**
* Reset the OpenAI client singleton (e.g., after settings change).
* Next call to getOpenAI() will create a fresh client.
*/
export function resetOpenAIClient(): void {
globalForOpenAI.openai = undefined
globalForOpenAI.openaiInitialized = false
}
/** /**
* Check if OpenAI is configured and available * Check if OpenAI is configured and available
*/ */

View File

@@ -818,6 +818,20 @@ export const fileRouter = router({
}) })
}), }),
/**
* List file requirements for multiple rounds in a single query.
* Avoids dynamic hook violations when fetching requirements per-round.
*/
listRequirementsByRounds: protectedProcedure
.input(z.object({ roundIds: z.array(z.string()).max(50) }))
.query(async ({ ctx, input }) => {
if (input.roundIds.length === 0) return []
return ctx.prisma.fileRequirement.findMany({
where: { roundId: { in: input.roundIds } },
orderBy: { sortOrder: 'asc' },
})
}),
/** /**
* Create a file requirement for a stage (admin only) * Create a file requirement for a stage (admin only)
*/ */

View File

@@ -201,6 +201,12 @@ export const settingsRouter = router({
clearStorageProviderCache() clearStorageProviderCache()
} }
// Reset OpenAI client if API key or base URL changed
if (input.settings.some((s) => s.key === 'openai_api_key' || s.key === 'openai_base_url' || s.key === 'ai_model')) {
const { resetOpenAIClient } = await import('@/lib/openai')
resetOpenAIClient()
}
// Audit log // Audit log
await logAudit({ await logAudit({
prisma: ctx.prisma, prisma: ctx.prisma,

View File

@@ -5,6 +5,7 @@ import { prisma } from '@/lib/prisma'
import { logAudit } from '../utils/audit' import { logAudit } from '../utils/audit'
import { import {
tagProject, tagProject,
tagProjectsBatch,
getTagSuggestions, getTagSuggestions,
addProjectTag, addProjectTag,
removeProjectTag, removeProjectTag,
@@ -17,7 +18,7 @@ import {
NotificationTypes, NotificationTypes,
} from '../services/in-app-notification' } from '../services/in-app-notification'
// Background job runner for tagging // Background job runner for tagging — uses batched API calls for efficiency
async function runTaggingJob(jobId: string, userId: string) { async function runTaggingJob(jobId: string, userId: string) {
const job = await prisma.taggingJob.findUnique({ const job = await prisma.taggingJob.findUnique({
where: { id: jobId }, where: { id: jobId },
@@ -28,7 +29,7 @@ async function runTaggingJob(jobId: string, userId: string) {
return return
} }
console.log(`[AI Tagging Job] Starting job ${jobId}...`) console.log(`[AI Tagging Job] Starting job ${jobId} (batched mode)...`)
// Mark as running // Mark as running
await prisma.taggingJob.update({ await prisma.taggingJob.update({
@@ -56,7 +57,7 @@ async function runTaggingJob(jobId: string, userId: string) {
const allProjects = await prisma.project.findMany({ const allProjects = await prisma.project.findMany({
where: whereClause, where: whereClause,
select: { id: true, title: true, tags: true }, select: { id: true, title: true, tags: true, projectTags: { select: { tagId: true } } },
}) })
const untaggedProjects = allProjects.filter(p => p.tags.length === 0) const untaggedProjects = allProjects.filter(p => p.tags.length === 0)
@@ -83,48 +84,33 @@ async function runTaggingJob(jobId: string, userId: string) {
return return
} }
let taggedCount = 0
let failedCount = 0
const errors: string[] = []
const startTime = Date.now() const startTime = Date.now()
for (let i = 0; i < untaggedProjects.length; i++) { // Use batched tagging — processes 10 projects per API call, 3 concurrent calls
const project = untaggedProjects[i] const { results, totalTokens } = await tagProjectsBatch(
console.log(`[AI Tagging Job] Processing ${i + 1}/${untaggedProjects.length}: "${project.title.substring(0, 40)}..."`) untaggedProjects,
userId,
try { async (processed, total) => {
const result = await tagProject(project.id, userId) // Update job progress on each batch completion
taggedCount++ const taggedSoFar = results?.length ?? processed
console.log(`[AI Tagging Job] ✓ Tagged with ${result.applied.length} tags`)
} catch (error) {
failedCount++
const errorMsg = error instanceof Error ? error.message : 'Unknown error'
errors.push(`${project.title}: ${errorMsg}`)
console.error(`[AI Tagging Job] ✗ Failed: ${errorMsg}`)
}
// Update progress
await prisma.taggingJob.update({ await prisma.taggingJob.update({
where: { id: jobId }, where: { id: jobId },
data: { data: {
processedCount: i + 1, processedCount: processed,
taggedCount, taggedCount: taggedSoFar,
failedCount,
errorsJson: errors.length > 0 ? errors.slice(0, 20) : undefined, // Keep last 20 errors
}, },
}) })
// Log progress every 10 projects
if ((i + 1) % 10 === 0) {
const elapsed = ((Date.now() - startTime) / 1000).toFixed(0) const elapsed = ((Date.now() - startTime) / 1000).toFixed(0)
const avgTime = (Date.now() - startTime) / (i + 1) / 1000 console.log(`[AI Tagging Job] Progress: ${processed}/${total} (${elapsed}s elapsed)`)
const remaining = avgTime * (untaggedProjects.length - i - 1)
console.log(`[AI Tagging Job] Progress: ${i + 1}/${untaggedProjects.length} (${elapsed}s elapsed, ~${remaining.toFixed(0)}s remaining)`)
}
} }
)
const taggedCount = results.filter(r => r.applied.length > 0).length
const failedCount = untaggedProjects.length - results.length
const totalTime = ((Date.now() - startTime) / 1000).toFixed(1) const totalTime = ((Date.now() - startTime) / 1000).toFixed(1)
console.log(`[AI Tagging Job] Complete: ${taggedCount} tagged, ${failedCount} failed in ${totalTime}s`) console.log(`[AI Tagging Job] Complete: ${taggedCount} tagged, ${failedCount} failed in ${totalTime}s (${totalTokens} tokens)`)
// Mark as completed // Mark as completed
await prisma.taggingJob.update({ await prisma.taggingJob.update({
@@ -132,7 +118,9 @@ async function runTaggingJob(jobId: string, userId: string) {
data: { data: {
status: 'COMPLETED', status: 'COMPLETED',
completedAt: new Date(), completedAt: new Date(),
errorsJson: errors.length > 0 ? errors : undefined, processedCount: results.length,
taggedCount,
failedCount,
}, },
}) })
@@ -144,7 +132,7 @@ async function runTaggingJob(jobId: string, userId: string) {
linkUrl: '/admin/projects', linkUrl: '/admin/projects',
linkLabel: 'View Projects', linkLabel: 'View Projects',
priority: 'normal', priority: 'normal',
metadata: { jobId, taggedCount, failedCount, skippedCount }, metadata: { jobId, taggedCount, failedCount, skippedCount, totalTokens },
}) })
} catch (error) { } catch (error) {

View File

@@ -142,7 +142,7 @@ interface FilteringRuleInput {
const DEFAULT_BATCH_SIZE = 20 const DEFAULT_BATCH_SIZE = 20
const MAX_BATCH_SIZE = 50 const MAX_BATCH_SIZE = 50
const MIN_BATCH_SIZE = 1 const MIN_BATCH_SIZE = 1
const DEFAULT_PARALLEL_BATCHES = 1 const DEFAULT_PARALLEL_BATCHES = 3
const MAX_PARALLEL_BATCHES = 10 const MAX_PARALLEL_BATCHES = 10
// Structured system prompt for AI screening // Structured system prompt for AI screening

View File

@@ -344,8 +344,8 @@ export async function generateShortlist(
let totalTokens = 0 let totalTokens = 0
const allErrors: string[] = [] const allErrors: string[] = []
// Run each category independently // Run categories in parallel for efficiency
for (const cat of categories) { const categoryPromises = categories.map(async (cat) => {
const catTopN = cat === 'STARTUP' const catTopN = cat === 'STARTUP'
? (startupTopN ?? topN) ? (startupTopN ?? topN)
: (conceptTopN ?? topN) : (conceptTopN ?? topN)
@@ -357,6 +357,12 @@ export async function generateShortlist(
prisma, prisma,
) )
return { cat, result }
})
const categoryResults = await Promise.all(categoryPromises)
for (const { cat, result } of categoryResults) {
if (cat === 'STARTUP') { if (cat === 'STARTUP') {
allRecommendations.STARTUP = result.recommendations allRecommendations.STARTUP = result.recommendations
} else { } else {

View File

@@ -5,7 +5,7 @@
* *
* Features: * Features:
* - Single project tagging (on-submit or manual) * - Single project tagging (on-submit or manual)
* - Batch tagging for rounds * - Batch tagging with concurrent processing (10 projects/batch, 3 concurrent)
* - Confidence scores for each tag * - Confidence scores for each tag
* - Additive only - never removes existing tags * - Additive only - never removes existing tags
* *
@@ -16,7 +16,7 @@
*/ */
import { prisma } from '@/lib/prisma' import { prisma } from '@/lib/prisma'
import { getOpenAI, getConfiguredModel, buildCompletionParams } from '@/lib/openai' import { getOpenAI, getConfiguredModel, buildCompletionParams, AI_MODELS } from '@/lib/openai'
import { logAIUsage, extractTokenUsage } from '@/server/utils/ai-usage' import { logAIUsage, extractTokenUsage } from '@/server/utils/ai-usage'
import { classifyAIError, createParseError, logAIError } from './ai-errors' import { classifyAIError, createParseError, logAIError } from './ai-errors'
import { import {
@@ -53,8 +53,10 @@ interface AvailableTag {
const CONFIDENCE_THRESHOLD = 0.5 const CONFIDENCE_THRESHOLD = 0.5
const DEFAULT_MAX_TAGS = 5 const DEFAULT_MAX_TAGS = 5
const BATCH_SIZE = 10 // Projects per API call
const BATCH_CONCURRENCY = 3 // Concurrent API calls
// System prompt optimized for tag suggestion // System prompt optimized for single-project tag suggestion
const TAG_SUGGESTION_SYSTEM_PROMPT = `You are an expert at categorizing ocean conservation and sustainability projects. const TAG_SUGGESTION_SYSTEM_PROMPT = `You are an expert at categorizing ocean conservation and sustainability projects.
Analyze the project and suggest the most relevant expertise tags from the provided list. Analyze the project and suggest the most relevant expertise tags from the provided list.
@@ -78,6 +80,36 @@ Rules:
- Maximum 7 suggestions per project - Maximum 7 suggestions per project
- Be conservative - only suggest tags that truly apply` - Be conservative - only suggest tags that truly apply`
// System prompt optimized for batch tagging (multiple projects in one call)
const BATCH_TAG_SYSTEM_PROMPT = `You are an expert at categorizing ocean conservation and sustainability projects.
Analyze EACH project and suggest the most relevant expertise tags from the provided list.
Consider each project's focus areas, technology, methodology, and domain.
Return JSON with this format:
{
"projects": [
{
"project_id": "PROJECT_001",
"suggestions": [
{
"tag_name": "exact tag name from list",
"confidence": 0.0-1.0,
"reasoning": "brief explanation"
}
]
}
]
}
Rules:
- Only suggest tags from the provided list (exact names)
- Order by relevance (most relevant first)
- Confidence should reflect how well the tag matches
- Maximum 7 suggestions per project
- Be conservative - only suggest tags that truly apply
- Return results for ALL projects provided`
// ─── Helper Functions ──────────────────────────────────────────────────────── // ─── Helper Functions ────────────────────────────────────────────────────────
/** /**
@@ -132,7 +164,8 @@ export async function getAvailableTags(): Promise<AvailableTag[]> {
// ─── AI Tagging Core ───────────────────────────────────────────────────────── // ─── AI Tagging Core ─────────────────────────────────────────────────────────
/** /**
* Call OpenAI to get tag suggestions for a project * Call OpenAI to get tag suggestions for a single project
* Used for on-demand single-project tagging
*/ */
async function getAISuggestions( async function getAISuggestions(
anonymizedProject: AnonymizedProjectForAI, anonymizedProject: AnonymizedProjectForAI,
@@ -145,9 +178,10 @@ async function getAISuggestions(
return { suggestions: [], tokensUsed: 0 } return { suggestions: [], tokensUsed: 0 }
} }
const model = await getConfiguredModel() // Use QUICK model — tag classification is simple, doesn't need expensive reasoning
const model = await getConfiguredModel(AI_MODELS.QUICK)
// Build tag list for prompt // Build compact tag list for prompt
const tagList = availableTags.map((t) => ({ const tagList = availableTags.map((t) => ({
name: t.name, name: t.name,
category: t.category, category: t.category,
@@ -155,10 +189,10 @@ async function getAISuggestions(
})) }))
const userPrompt = `PROJECT: const userPrompt = `PROJECT:
${JSON.stringify(anonymizedProject, null, 2)} ${JSON.stringify(anonymizedProject)}
AVAILABLE TAGS: AVAILABLE TAGS:
${JSON.stringify(tagList, null, 2)} ${JSON.stringify(tagList)}
Suggest relevant tags for this project.` Suggest relevant tags for this project.`
@@ -246,6 +280,161 @@ Suggest relevant tags for this project.`
} }
} }
/**
* Call OpenAI to get tag suggestions for a batch of projects in one API call.
* Returns a map of project_id -> TagSuggestion[].
*/
async function getAISuggestionsBatch(
anonymizedProjects: AnonymizedProjectForAI[],
availableTags: AvailableTag[],
userId?: string
): Promise<{ suggestionsMap: Map<string, TagSuggestion[]>; tokensUsed: number }> {
const openai = await getOpenAI()
if (!openai) {
console.warn('[AI Tagging] OpenAI not configured')
return { suggestionsMap: new Map(), tokensUsed: 0 }
}
// Use QUICK model — tag classification is simple, doesn't need expensive reasoning
const model = await getConfiguredModel(AI_MODELS.QUICK)
const suggestionsMap = new Map<string, TagSuggestion[]>()
// Build compact tag list (sent once for entire batch)
const tagList = availableTags.map((t) => ({
name: t.name,
category: t.category,
description: t.description,
}))
const userPrompt = `PROJECTS (${anonymizedProjects.length}):
${JSON.stringify(anonymizedProjects)}
AVAILABLE TAGS:
${JSON.stringify(tagList)}
Suggest relevant tags for each project.`
const MAX_PARSE_RETRIES = 2
let parseAttempts = 0
try {
const params = buildCompletionParams(model, {
messages: [
{ role: 'system', content: BATCH_TAG_SYSTEM_PROMPT },
{ role: 'user', content: userPrompt },
],
jsonMode: true,
temperature: 0.1,
maxTokens: Math.min(4000, anonymizedProjects.length * 500),
})
let response = await openai.chat.completions.create(params)
let usage = extractTokenUsage(response)
let totalTokens = usage.totalTokens
// Parse with retry logic
let parsed: {
projects: Array<{
project_id: string
suggestions: Array<{
tag_name: string
confidence: number
reasoning: string
}>
}>
}
while (true) {
try {
const content = response.choices[0]?.message?.content
if (!content) throw new Error('Empty response from AI')
const raw = JSON.parse(content)
parsed = raw.projects ? raw : { projects: Array.isArray(raw) ? raw : [] }
break
} catch (parseError) {
if (parseError instanceof SyntaxError && parseAttempts < MAX_PARSE_RETRIES) {
parseAttempts++
console.warn(`[AI Tagging Batch] JSON parse failed, retrying (${parseAttempts}/${MAX_PARSE_RETRIES})`)
const retryParams = buildCompletionParams(model, {
messages: [
{ role: 'system', content: BATCH_TAG_SYSTEM_PROMPT },
{ role: 'user', content: userPrompt + '\n\nIMPORTANT: Please ensure valid JSON output.' },
],
jsonMode: true,
temperature: 0.1,
maxTokens: Math.min(4000, anonymizedProjects.length * 500),
})
response = await openai.chat.completions.create(retryParams)
const retryUsage = extractTokenUsage(response)
totalTokens += retryUsage.totalTokens
continue
}
throw parseError
}
}
// Log usage for the entire batch
await logAIUsage({
userId,
action: 'PROJECT_TAGGING',
entityType: 'Project',
model,
promptTokens: usage.promptTokens,
completionTokens: usage.completionTokens,
totalTokens,
batchSize: anonymizedProjects.length,
itemsProcessed: parsed.projects?.length || 0,
status: 'SUCCESS',
})
// Map results back to TagSuggestion format
for (const projectResult of parsed.projects || []) {
const suggestions: TagSuggestion[] = []
for (const s of projectResult.suggestions || []) {
const tag = availableTags.find(
(t) => t.name.toLowerCase() === s.tag_name.toLowerCase()
)
if (tag) {
suggestions.push({
tagId: tag.id,
tagName: tag.name,
confidence: Math.max(0, Math.min(1, s.confidence)),
reasoning: s.reasoning || '',
})
}
}
suggestionsMap.set(projectResult.project_id, suggestions)
}
return { suggestionsMap, tokensUsed: totalTokens }
} catch (error) {
if (error instanceof SyntaxError) {
const parseError = createParseError(error.message)
logAIError('Tagging', 'getAISuggestionsBatch', parseError)
}
const classified = classifyAIError(error)
logAIError('Tagging', 'getAISuggestionsBatch', classified)
await logAIUsage({
userId,
action: 'PROJECT_TAGGING',
entityType: 'Project',
model: 'unknown',
promptTokens: 0,
completionTokens: 0,
totalTokens: 0,
batchSize: anonymizedProjects.length,
itemsProcessed: 0,
status: 'ERROR',
errorMessage: error instanceof Error ? error.message : 'Unknown error',
})
throw error
}
}
// ─── Public API ────────────────────────────────────────────────────────────── // ─── Public API ──────────────────────────────────────────────────────────────
/** /**
@@ -355,6 +544,153 @@ export async function tagProject(
} }
} }
/**
* Tag a batch of projects using batched API calls with concurrency.
* Much more efficient than tagging one-by-one for bulk operations.
*
* @param projects Array of { id, projectTags } to tag
* @param userId The user initiating the tagging
* @param onProgress Callback for progress updates
* @returns Array of TaggingResult
*/
export async function tagProjectsBatch(
projects: Array<{
id: string
title: string
projectTags: Array<{ tagId: string }>
}>,
userId: string,
onProgress?: (processed: number, total: number) => Promise<void>
): Promise<{ results: TaggingResult[]; totalTokens: number }> {
const settings = await getTaggingSettings()
if (!settings.enabled) {
return { results: [], totalTokens: 0 }
}
const availableTags = await getAvailableTags()
if (availableTags.length === 0) {
return { results: [], totalTokens: 0 }
}
// Fetch full project data for all projects at once (single DB query)
const fullProjects = await prisma.project.findMany({
where: { id: { in: projects.map((p) => p.id) } },
include: {
projectTags: true,
files: { select: { fileType: true } },
_count: { select: { teamMembers: true, files: true } },
},
})
const projectMap = new Map(fullProjects.map((p) => [p.id, p]))
// Anonymize all projects at once
const projectsWithRelations = fullProjects.map(toProjectWithRelations)
const { anonymized, mappings } = anonymizeProjectsForAI(projectsWithRelations, 'FILTERING')
if (!validateAnonymizedProjects(anonymized)) {
throw new Error('GDPR compliance check failed: PII detected in anonymized data')
}
// Build mapping from anonymous ID to real project
const anonToRealMap = new Map<string, string>()
for (const mapping of mappings) {
anonToRealMap.set(mapping.anonymousId, mapping.realId)
}
// Split into batches
const batches: AnonymizedProjectForAI[][] = []
for (let i = 0; i < anonymized.length; i += BATCH_SIZE) {
batches.push(anonymized.slice(i, i + BATCH_SIZE))
}
const allResults: TaggingResult[] = []
let totalTokens = 0
let processedCount = 0
// Process batches with concurrency
for (let i = 0; i < batches.length; i += BATCH_CONCURRENCY) {
const concurrentBatches = batches.slice(i, i + BATCH_CONCURRENCY)
const batchPromises = concurrentBatches.map(async (batch) => {
try {
const { suggestionsMap, tokensUsed } = await getAISuggestionsBatch(
batch,
availableTags,
userId
)
return { suggestionsMap, tokensUsed, error: null }
} catch (error) {
console.error('[AI Tagging Batch] Batch failed:', error)
return { suggestionsMap: new Map<string, TagSuggestion[]>(), tokensUsed: 0, error }
}
})
const batchResults = await Promise.all(batchPromises)
// Process results from all concurrent batches
for (const { suggestionsMap, tokensUsed } of batchResults) {
totalTokens += tokensUsed
for (const [anonId, suggestions] of suggestionsMap) {
const realId = anonToRealMap.get(anonId)
if (!realId) continue
const project = projectMap.get(realId)
if (!project) continue
// Filter by confidence
const validSuggestions = suggestions.filter(
(s) => s.confidence >= settings.confidenceThreshold
)
// Get existing tags
const existingTagIds = new Set(project.projectTags.map((pt) => pt.tagId))
const currentTagCount = project.projectTags.length
const remainingSlots = Math.max(0, settings.maxTags - currentTagCount)
const newSuggestions = validSuggestions
.filter((s) => !existingTagIds.has(s.tagId))
.slice(0, remainingSlots)
// Apply tags
const applied: TagSuggestion[] = []
for (const suggestion of newSuggestions) {
try {
await prisma.projectTag.create({
data: {
projectId: realId,
tagId: suggestion.tagId,
confidence: suggestion.confidence,
source: 'AI',
},
})
applied.push(suggestion)
} catch {
// Skip duplicates
}
}
allResults.push({
projectId: realId,
suggestions,
applied,
tokensUsed: 0, // Token tracking is per-batch, not per-project
})
processedCount++
}
}
// Report progress after each concurrent chunk
if (onProgress) {
await onProgress(processedCount, projects.length)
}
}
return { results: allResults, totalTokens }
}
/** /**
* Get tag suggestions for a project without applying them * Get tag suggestions for a project without applying them
* Useful for preview/review before applying * Useful for preview/review before applying