Optimize AI system with batching, token tracking, and GDPR compliance

- Add AIUsageLog model for persistent token/cost tracking - Implement batched processing for all AI services: - Assignment: 15 projects/batch - Filtering: 20 projects/batch - Award eligibility: 20 projects/batch - Mentor matching: 15 projects/batch - Create unified error classification (ai-errors.ts) - Enhance anonymization with comprehensive project data - Add AI usage dashboard to Settings page - Add usage stats endpoints to settings router - Create AI system documentation (5 files) - Create GDPR compliance documentation (2 files) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-03 11:58:12 +01:00
parent a72e815d3a
commit 928b1c65dc
19 changed files with 4103 additions and 601 deletions
--- a/src/server/services/ai-assignment.ts
+++ b/src/server/services/ai-assignment.ts
@@ -3,17 +3,41 @@
 *
 * Uses GPT to analyze juror expertise and project requirements
 * to generate optimal assignment suggestions.
+ *
+ * Optimization:
+ * - Batched processing (15 projects per batch)
+ * - Description truncation (300 chars)
+ * - Token tracking and cost logging
+ *
+ * GDPR Compliance:
+ * - All data anonymized before AI processing
+ * - IDs replaced with sequential identifiers
+ * - No personal information sent to OpenAI
 */

-import { getOpenAI, getConfiguredModel } from '@/lib/openai'
+import { getOpenAI, getConfiguredModel, buildCompletionParams } from '@/lib/openai'
+import { logAIUsage, extractTokenUsage } from '@/server/utils/ai-usage'
+import { classifyAIError, createParseError, logAIError } from './ai-errors'
 import {
  anonymizeForAI,
  deanonymizeResults,
  validateAnonymization,
+  DESCRIPTION_LIMITS,
+  truncateAndSanitize,
  type AnonymizationResult,
 } from './anonymization'

-// Types for AI assignment
+// ─── Constants ───────────────────────────────────────────────────────────────
+
+const ASSIGNMENT_BATCH_SIZE = 15
+
+// Optimized system prompt
+const ASSIGNMENT_SYSTEM_PROMPT = `Match jurors to projects by expertise. Return JSON assignments.
+Each: {juror_id, project_id, confidence_score: 0-1, expertise_match_score: 0-1, reasoning: str (1-2 sentences)}
+Distribute workload fairly. Avoid assigning jurors at capacity.`
+
+// ─── Types ───────────────────────────────────────────────────────────────────
+
 export interface AIAssignmentSuggestion {
  jurorId: string
  projectId: string
@@ -61,153 +85,144 @@ interface AssignmentConstraints {
  }>
 }

-/**
- * System prompt for AI assignment
- */
-const ASSIGNMENT_SYSTEM_PROMPT = `You are an expert at matching jury members to projects based on expertise alignment.
-
-Your task is to suggest optimal juror-project assignments that:
-1. Match juror expertise tags with project tags and content
-2. Distribute workload fairly among jurors
-3. Ensure each project gets the required number of reviews
-4. Avoid assigning jurors who are already at their limit
-
-For each suggestion, provide:
- A confidence score (0-1) based on how well the juror's expertise matches the project
- An expertise match score (0-1) based purely on tag/content alignment
- A brief reasoning explaining why this is a good match
-
-Return your response as a JSON array of assignments.`
+// ─── AI Processing ───────────────────────────────────────────────────────────

 /**
- * Generate AI-powered assignment suggestions
+ * Process a batch of projects for assignment suggestions
 */
-export async function generateAIAssignments(
-  jurors: JurorForAssignment[],
-  projects: ProjectForAssignment[],
-  constraints: AssignmentConstraints
-): Promise<AIAssignmentResult> {
-  // Anonymize data before sending to AI
-  const anonymizedData = anonymizeForAI(jurors, projects)
+async function processAssignmentBatch(
+  openai: NonNullable<Awaited<ReturnType<typeof getOpenAI>>>,
+  model: string,
+  anonymizedData: AnonymizationResult,
+  batchProjects: typeof anonymizedData.projects,
+  batchMappings: typeof anonymizedData.projectMappings,
+  constraints: AssignmentConstraints,
+  userId?: string,
+  entityId?: string
+): Promise<{
+  suggestions: AIAssignmentSuggestion[]
+  tokensUsed: number
+}> {
+  const suggestions: AIAssignmentSuggestion[] = []
+  let tokensUsed = 0

-  // Validate anonymization
-  if (!validateAnonymization(anonymizedData)) {
-    console.error('Anonymization validation failed, falling back to algorithm')
-    return generateFallbackAssignments(jurors, projects, constraints)
-  }
+  // Build prompt with batch-specific data
+  const userPrompt = buildBatchPrompt(
+    anonymizedData.jurors,
+    batchProjects,
+    constraints,
+    anonymizedData.jurorMappings,
+    batchMappings
+  )

  try {
-    const openai = await getOpenAI()
+    const params = buildCompletionParams(model, {
+      messages: [
+        { role: 'system', content: ASSIGNMENT_SYSTEM_PROMPT },
+        { role: 'user', content: userPrompt },
+      ],
+      jsonMode: true,
+      temperature: 0.3,
+      maxTokens: 4000,
+    })

-    if (!openai) {
-      console.log('OpenAI not configured, using fallback algorithm')
-      return generateFallbackAssignments(jurors, projects, constraints)
+    const response = await openai.chat.completions.create(params)
+    const usage = extractTokenUsage(response)
+    tokensUsed = usage.totalTokens
+
+    // Log batch usage
+    await logAIUsage({
+      userId,
+      action: 'ASSIGNMENT',
+      entityType: 'Round',
+      entityId,
+      model,
+      promptTokens: usage.promptTokens,
+      completionTokens: usage.completionTokens,
+      totalTokens: usage.totalTokens,
+      batchSize: batchProjects.length,
+      itemsProcessed: batchProjects.length,
+      status: 'SUCCESS',
+    })
+
+    const content = response.choices[0]?.message?.content
+    if (!content) {
+      throw new Error('No response from AI')
    }

-    const suggestions = await callAIForAssignments(
-      openai,
-      anonymizedData,
-      constraints
-    )
+    const parsed = JSON.parse(content) as {
+      assignments: Array<{
+        juror_id: string
+        project_id: string
+        confidence_score: number
+        expertise_match_score: number
+        reasoning: string
+      }>
+    }

-    // De-anonymize results
-    const deanonymizedSuggestions = deanonymizeResults(
-      suggestions.map((s) => ({
-        ...s,
-        jurorId: s.jurorId,
-        projectId: s.projectId,
+    // De-anonymize and add to suggestions
+    const deanonymized = deanonymizeResults(
+      (parsed.assignments || []).map((a) => ({
+        jurorId: a.juror_id,
+        projectId: a.project_id,
+        confidenceScore: Math.min(1, Math.max(0, a.confidence_score)),
+        expertiseMatchScore: Math.min(1, Math.max(0, a.expertise_match_score)),
+        reasoning: a.reasoning,
      })),
      anonymizedData.jurorMappings,
-      anonymizedData.projectMappings
-    ).map((s) => ({
-      jurorId: s.realJurorId,
-      projectId: s.realProjectId,
-      confidenceScore: s.confidenceScore,
-      reasoning: s.reasoning,
-      expertiseMatchScore: s.expertiseMatchScore,
-    }))
+      batchMappings
+    )

-    return {
-      success: true,
-      suggestions: deanonymizedSuggestions,
-      fallbackUsed: false,
+    for (const item of deanonymized) {
+      suggestions.push({
+        jurorId: item.realJurorId,
+        projectId: item.realProjectId,
+        confidenceScore: item.confidenceScore,
+        reasoning: item.reasoning,
+        expertiseMatchScore: item.expertiseMatchScore,
+      })
    }
+
  } catch (error) {
-    console.error('AI assignment failed, using fallback:', error)
-    return generateFallbackAssignments(jurors, projects, constraints)
+    if (error instanceof SyntaxError) {
+      const parseError = createParseError(error.message)
+      logAIError('Assignment', 'batch processing', parseError)
+
+      await logAIUsage({
+        userId,
+        action: 'ASSIGNMENT',
+        entityType: 'Round',
+        entityId,
+        model,
+        promptTokens: 0,
+        completionTokens: 0,
+        totalTokens: tokensUsed,
+        batchSize: batchProjects.length,
+        itemsProcessed: 0,
+        status: 'ERROR',
+        errorMessage: parseError.message,
+      })
+    } else {
+      throw error
+    }
  }
+
+  return { suggestions, tokensUsed }
 }

 /**
- * Call OpenAI API for assignment suggestions
+ * Build prompt for a batch of projects
 */
-async function callAIForAssignments(
-  openai: Awaited<ReturnType<typeof getOpenAI>>,
-  anonymizedData: AnonymizationResult,
-  constraints: AssignmentConstraints
-): Promise<AIAssignmentSuggestion[]> {
-  if (!openai) {
-    throw new Error('OpenAI client not available')
-  }
-
-  // Build the user prompt
-  const userPrompt = buildAssignmentPrompt(anonymizedData, constraints)
-
-  const model = await getConfiguredModel()
-
-  const response = await openai.chat.completions.create({
-    model,
-    messages: [
-      { role: 'system', content: ASSIGNMENT_SYSTEM_PROMPT },
-      { role: 'user', content: userPrompt },
-    ],
-    response_format: { type: 'json_object' },
-    temperature: 0.3, // Lower temperature for more consistent results
-    max_tokens: 4000,
-  })
-
-  const content = response.choices[0]?.message?.content
-
-  if (!content) {
-    throw new Error('No response from AI')
-  }
-
-  // Parse the response
-  const parsed = JSON.parse(content) as {
-    assignments: Array<{
-      juror_id: string
-      project_id: string
-      confidence_score: number
-      expertise_match_score: number
-      reasoning: string
-    }>
-  }
-
-  return (parsed.assignments || []).map((a) => ({
-    jurorId: a.juror_id,
-    projectId: a.project_id,
-    confidenceScore: Math.min(1, Math.max(0, a.confidence_score)),
-    expertiseMatchScore: Math.min(1, Math.max(0, a.expertise_match_score)),
-    reasoning: a.reasoning,
-  }))
-}
-
-/**
- * Build the prompt for AI assignment
- */
-function buildAssignmentPrompt(
-  data: AnonymizationResult,
-  constraints: AssignmentConstraints
+function buildBatchPrompt(
+  jurors: AnonymizationResult['jurors'],
+  projects: AnonymizationResult['projects'],
+  constraints: AssignmentConstraints,
+  jurorMappings: AnonymizationResult['jurorMappings'],
+  projectMappings: AnonymizationResult['projectMappings']
 ): string {
-  const { jurors, projects } = data
-
  // Map existing assignments to anonymous IDs
-  const jurorIdMap = new Map(
-    data.jurorMappings.map((m) => [m.realId, m.anonymousId])
-  )
-  const projectIdMap = new Map(
-    data.projectMappings.map((m) => [m.realId, m.anonymousId])
-  )
+  const jurorIdMap = new Map(jurorMappings.map((m) => [m.realId, m.anonymousId]))
+  const projectIdMap = new Map(projectMappings.map((m) => [m.realId, m.anonymousId]))

  const anonymousExisting = constraints.existingAssignments
    .map((a) => ({
@@ -216,29 +231,110 @@ function buildAssignmentPrompt(
    }))
    .filter((a) => a.jurorId && a.projectId)

-  return `## Jurors Available
-${JSON.stringify(jurors, null, 2)}
-
-## Projects to Assign
-${JSON.stringify(projects, null, 2)}
-
-## Constraints
- Each project needs ${constraints.requiredReviewsPerProject} reviews
- Maximum assignments per juror: ${constraints.maxAssignmentsPerJuror || 'No limit'}
- Existing assignments to avoid duplicating:
-${JSON.stringify(anonymousExisting, null, 2)}
-
-## Instructions
-Generate optimal juror-project assignments. Return a JSON object with an "assignments" array where each assignment has:
- juror_id: The anonymous juror ID
- project_id: The anonymous project ID
- confidence_score: 0-1 confidence in this match
- expertise_match_score: 0-1 expertise alignment score
- reasoning: Brief explanation (1-2 sentences)
-
-Focus on matching expertise tags with project tags and descriptions. Distribute assignments fairly.`
+  return `JURORS: ${JSON.stringify(jurors)}
+PROJECTS: ${JSON.stringify(projects)}
+CONSTRAINTS: ${constraints.requiredReviewsPerProject} reviews/project, max ${constraints.maxAssignmentsPerJuror || 'unlimited'}/juror
+EXISTING: ${JSON.stringify(anonymousExisting)}
+Return JSON: {"assignments": [...]}`
 }

+/**
+ * Generate AI-powered assignment suggestions with batching
+ */
+export async function generateAIAssignments(
+  jurors: JurorForAssignment[],
+  projects: ProjectForAssignment[],
+  constraints: AssignmentConstraints,
+  userId?: string,
+  entityId?: string
+): Promise<AIAssignmentResult> {
+  // Truncate descriptions before anonymization
+  const truncatedProjects = projects.map((p) => ({
+    ...p,
+    description: truncateAndSanitize(p.description, DESCRIPTION_LIMITS.ASSIGNMENT),
+  }))
+
+  // Anonymize data before sending to AI
+  const anonymizedData = anonymizeForAI(jurors, truncatedProjects)
+
+  // Validate anonymization
+  if (!validateAnonymization(anonymizedData)) {
+    console.error('[AI Assignment] Anonymization validation failed, falling back to algorithm')
+    return generateFallbackAssignments(jurors, projects, constraints)
+  }
+
+  try {
+    const openai = await getOpenAI()
+
+    if (!openai) {
+      console.log('[AI Assignment] OpenAI not configured, using fallback algorithm')
+      return generateFallbackAssignments(jurors, projects, constraints)
+    }
+
+    const model = await getConfiguredModel()
+    console.log(`[AI Assignment] Using model: ${model} for ${projects.length} projects in batches of ${ASSIGNMENT_BATCH_SIZE}`)
+
+    const allSuggestions: AIAssignmentSuggestion[] = []
+    let totalTokens = 0
+
+    // Process projects in batches
+    for (let i = 0; i < anonymizedData.projects.length; i += ASSIGNMENT_BATCH_SIZE) {
+      const batchProjects = anonymizedData.projects.slice(i, i + ASSIGNMENT_BATCH_SIZE)
+      const batchMappings = anonymizedData.projectMappings.slice(i, i + ASSIGNMENT_BATCH_SIZE)
+
+      console.log(`[AI Assignment] Processing batch ${Math.floor(i / ASSIGNMENT_BATCH_SIZE) + 1}/${Math.ceil(anonymizedData.projects.length / ASSIGNMENT_BATCH_SIZE)}`)
+
+      const { suggestions, tokensUsed } = await processAssignmentBatch(
+        openai,
+        model,
+        anonymizedData,
+        batchProjects,
+        batchMappings,
+        constraints,
+        userId,
+        entityId
+      )
+
+      allSuggestions.push(...suggestions)
+      totalTokens += tokensUsed
+    }
+
+    console.log(`[AI Assignment] Completed. Total suggestions: ${allSuggestions.length}, Total tokens: ${totalTokens}`)
+
+    return {
+      success: true,
+      suggestions: allSuggestions,
+      tokensUsed: totalTokens,
+      fallbackUsed: false,
+    }
+
+  } catch (error) {
+    const classified = classifyAIError(error)
+    logAIError('Assignment', 'generateAIAssignments', classified)
+
+    // Log failed attempt
+    await logAIUsage({
+      userId,
+      action: 'ASSIGNMENT',
+      entityType: 'Round',
+      entityId,
+      model: 'unknown',
+      promptTokens: 0,
+      completionTokens: 0,
+      totalTokens: 0,
+      batchSize: projects.length,
+      itemsProcessed: 0,
+      status: 'ERROR',
+      errorMessage: classified.message,
+    })
+
+    console.error('[AI Assignment] AI assignment failed, using fallback:', classified.message)
+    return generateFallbackAssignments(jurors, projects, constraints)
+  }
+}
+
+// ─── Fallback Algorithm ──────────────────────────────────────────────────────
+
 /**
 * Fallback algorithm-based assignment when AI is unavailable
 */