Optimize AI system with batching, token tracking, and GDPR compliance

- Add AIUsageLog model for persistent token/cost tracking - Implement batched processing for all AI services: - Assignment: 15 projects/batch - Filtering: 20 projects/batch - Award eligibility: 20 projects/batch - Mentor matching: 15 projects/batch - Create unified error classification (ai-errors.ts) - Enhance anonymization with comprehensive project data - Add AI usage dashboard to Settings page - Add usage stats endpoints to settings router - Create AI system documentation (5 files) - Create GDPR compliance documentation (2 files) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-03 11:58:12 +01:00
parent a72e815d3a
commit 928b1c65dc
19 changed files with 4103 additions and 601 deletions
--- a/src/server/services/mentor-matching.ts
+++ b/src/server/services/mentor-matching.ts
@@ -1,5 +1,33 @@
+/**
+ * AI-Powered Mentor Matching Service
+ *
+ * Matches mentors to projects based on expertise alignment.
+ *
+ * Optimization:
+ * - Batched processing (15 projects per batch)
+ * - Token tracking and cost logging
+ * - Fallback to algorithmic matching
+ *
+ * GDPR Compliance:
+ * - All data anonymized before AI processing
+ * - No personal information sent to OpenAI
+ */
+
 import { PrismaClient, OceanIssue, CompetitionCategory } from '@prisma/client'
-import { getOpenAI, getConfiguredModel } from '@/lib/openai'
+import { getOpenAI, getConfiguredModel, buildCompletionParams } from '@/lib/openai'
+import { logAIUsage, extractTokenUsage } from '@/server/utils/ai-usage'
+import { classifyAIError, createParseError, logAIError } from './ai-errors'
+
+// ─── Constants ───────────────────────────────────────────────────────────────
+
+const MENTOR_BATCH_SIZE = 15
+
+// Optimized system prompt
+const MENTOR_MATCHING_SYSTEM_PROMPT = `Match mentors to projects by expertise. Return JSON.
+Format for each project: {"matches": [{project_id, mentor_matches: [{mentor_index, confidence_score: 0-1, expertise_match_score: 0-1, reasoning: str}]}]}
+Rank by suitability. Consider expertise alignment and availability.`
+
+// ─── Types ───────────────────────────────────────────────────────────────────

 interface ProjectInfo {
  id: string
@@ -26,17 +54,162 @@ interface MentorMatch {
  reasoning: string
 }

+// ─── Batched AI Matching ─────────────────────────────────────────────────────
+
 /**
- * Get AI-suggested mentor matches for a project
+ * Process a batch of projects for mentor matching
 */
-export async function getAIMentorSuggestions(
+async function processMatchingBatch(
+  openai: NonNullable<Awaited<ReturnType<typeof getOpenAI>>>,
+  model: string,
+  projects: ProjectInfo[],
+  mentors: MentorInfo[],
+  limit: number,
+  userId?: string
+): Promise<{
+  results: Map<string, MentorMatch[]>
+  tokensUsed: number
+}> {
+  const results = new Map<string, MentorMatch[]>()
+  let tokensUsed = 0
+
+  // Anonymize project data
+  const anonymizedProjects = projects.map((p, index) => ({
+    project_id: `P${index + 1}`,
+    real_id: p.id,
+    description: p.description?.slice(0, 350) || 'No description',
+    category: p.competitionCategory,
+    oceanIssue: p.oceanIssue,
+    tags: p.tags,
+  }))
+
+  // Anonymize mentor data
+  const anonymizedMentors = mentors.map((m, index) => ({
+    index,
+    expertise: m.expertiseTags,
+    availability: m.maxAssignments
+      ? `${m.currentAssignments}/${m.maxAssignments}`
+      : 'unlimited',
+  }))
+
+  const userPrompt = `PROJECTS:
+${anonymizedProjects.map(p => `${p.project_id}: Category=${p.category || 'N/A'}, Issue=${p.oceanIssue || 'N/A'}, Tags=[${p.tags.join(', ')}], Desc=${p.description.slice(0, 200)}`).join('\n')}
+
+MENTORS:
+${anonymizedMentors.map(m => `${m.index}: Expertise=[${m.expertise.join(', ')}], Availability=${m.availability}`).join('\n')}
+
+For each project, rank top ${limit} mentors.`
+
+  try {
+    const params = buildCompletionParams(model, {
+      messages: [
+        { role: 'system', content: MENTOR_MATCHING_SYSTEM_PROMPT },
+        { role: 'user', content: userPrompt },
+      ],
+      jsonMode: true,
+      temperature: 0.3,
+      maxTokens: 4000,
+    })
+
+    const response = await openai.chat.completions.create(params)
+    const usage = extractTokenUsage(response)
+    tokensUsed = usage.totalTokens
+
+    // Log usage
+    await logAIUsage({
+      userId,
+      action: 'MENTOR_MATCHING',
+      entityType: 'Project',
+      model,
+      promptTokens: usage.promptTokens,
+      completionTokens: usage.completionTokens,
+      totalTokens: usage.totalTokens,
+      batchSize: projects.length,
+      itemsProcessed: projects.length,
+      status: 'SUCCESS',
+    })
+
+    const content = response.choices[0]?.message?.content
+    if (!content) {
+      throw new Error('No response from AI')
+    }
+
+    const parsed = JSON.parse(content) as {
+      matches: Array<{
+        project_id: string
+        mentor_matches: Array<{
+          mentor_index: number
+          confidence_score: number
+          expertise_match_score: number
+          reasoning: string
+        }>
+      }>
+    }
+
+    // Map results back to real IDs
+    for (const projectMatch of parsed.matches || []) {
+      const project = anonymizedProjects.find(p => p.project_id === projectMatch.project_id)
+      if (!project) continue
+
+      const mentorMatches: MentorMatch[] = []
+      for (const match of projectMatch.mentor_matches || []) {
+        if (match.mentor_index >= 0 && match.mentor_index < mentors.length) {
+          mentorMatches.push({
+            mentorId: mentors[match.mentor_index].id,
+            confidenceScore: Math.min(1, Math.max(0, match.confidence_score)),
+            expertiseMatchScore: Math.min(1, Math.max(0, match.expertise_match_score)),
+            reasoning: match.reasoning,
+          })
+        }
+      }
+      results.set(project.real_id, mentorMatches)
+    }
+
+  } catch (error) {
+    if (error instanceof SyntaxError) {
+      const parseError = createParseError(error.message)
+      logAIError('MentorMatching', 'batch processing', parseError)
+
+      await logAIUsage({
+        userId,
+        action: 'MENTOR_MATCHING',
+        entityType: 'Project',
+        model,
+        promptTokens: 0,
+        completionTokens: 0,
+        totalTokens: tokensUsed,
+        batchSize: projects.length,
+        itemsProcessed: 0,
+        status: 'ERROR',
+        errorMessage: parseError.message,
+      })
+
+      // Return empty results for batch (will fall back to algorithm)
+      for (const project of projects) {
+        results.set(project.id, [])
+      }
+    } else {
+      throw error
+    }
+  }
+
+  return { results, tokensUsed }
+}
+
+/**
+ * Get AI-suggested mentor matches for multiple projects (batched)
+ */
+export async function getAIMentorSuggestionsBatch(
  prisma: PrismaClient,
-  projectId: string,
-  limit: number = 5
-): Promise<MentorMatch[]> {
-  // Get project details
-  const project = await prisma.project.findUniqueOrThrow({
-    where: { id: projectId },
+  projectIds: string[],
+  limit: number = 5,
+  userId?: string
+): Promise<Map<string, MentorMatch[]>> {
+  const allResults = new Map<string, MentorMatch[]>()
+
+  // Get projects
+  const projects = await prisma.project.findMany({
+    where: { id: { in: projectIds } },
    select: {
      id: true,
      title: true,
@@ -47,14 +220,16 @@ export async function getAIMentorSuggestions(
    },
  })

-  // Get available mentors (users with expertise tags)
-  // In a full implementation, you'd have a MENTOR role
-  // For now, we use users with expertiseTags and consider them potential mentors
+  if (projects.length === 0) {
+    return allResults
+  }
+
+  // Get available mentors
  const mentors = await prisma.user.findMany({
    where: {
      OR: [
        { expertiseTags: { isEmpty: false } },
-        { role: 'JURY_MEMBER' }, // Jury members can also be mentors
+        { role: 'JURY_MEMBER' },
      ],
      status: 'ACTIVE',
    },
@@ -86,118 +261,111 @@ export async function getAIMentorSuggestions(
    }))

  if (availableMentors.length === 0) {
-    return []
+    return allResults
  }

-  // Try AI matching if API key is configured
-  if (process.env.OPENAI_API_KEY) {
-    try {
-      return await getAIMatches(project, availableMentors, limit)
-    } catch (error) {
-      console.error('AI mentor matching failed, falling back to algorithm:', error)
+  // Try AI matching
+  try {
+    const openai = await getOpenAI()
+    if (!openai) {
+      console.log('[Mentor Matching] OpenAI not configured, using algorithm')
+      return getAlgorithmicMatchesBatch(projects, availableMentors, limit)
    }
-  }

-  // Fallback to algorithmic matching
-  return getAlgorithmicMatches(project, availableMentors, limit)
+    const model = await getConfiguredModel()
+    console.log(`[Mentor Matching] Using model: ${model} for ${projects.length} projects in batches of ${MENTOR_BATCH_SIZE}`)
+
+    let totalTokens = 0
+
+    // Process in batches
+    for (let i = 0; i < projects.length; i += MENTOR_BATCH_SIZE) {
+      const batchProjects = projects.slice(i, i + MENTOR_BATCH_SIZE)
+
+      console.log(`[Mentor Matching] Processing batch ${Math.floor(i / MENTOR_BATCH_SIZE) + 1}/${Math.ceil(projects.length / MENTOR_BATCH_SIZE)}`)
+
+      const { results, tokensUsed } = await processMatchingBatch(
+        openai,
+        model,
+        batchProjects,
+        availableMentors,
+        limit,
+        userId
+      )
+
+      totalTokens += tokensUsed
+
+      // Merge results
+      for (const [projectId, matches] of results) {
+        allResults.set(projectId, matches)
+      }
+    }
+
+    console.log(`[Mentor Matching] Completed. Total tokens: ${totalTokens}`)
+
+    // Fill in any missing projects with algorithmic fallback
+    for (const project of projects) {
+      if (!allResults.has(project.id) || allResults.get(project.id)?.length === 0) {
+        const fallbackMatches = getAlgorithmicMatches(project, availableMentors, limit)
+        allResults.set(project.id, fallbackMatches)
+      }
+    }
+
+    return allResults
+
+  } catch (error) {
+    const classified = classifyAIError(error)
+    logAIError('MentorMatching', 'getAIMentorSuggestionsBatch', classified)
+
+    // Log failed attempt
+    await logAIUsage({
+      userId,
+      action: 'MENTOR_MATCHING',
+      entityType: 'Project',
+      model: 'unknown',
+      promptTokens: 0,
+      completionTokens: 0,
+      totalTokens: 0,
+      batchSize: projects.length,
+      itemsProcessed: 0,
+      status: 'ERROR',
+      errorMessage: classified.message,
+    })
+
+    console.error('[Mentor Matching] AI failed, using algorithm:', classified.message)
+    return getAlgorithmicMatchesBatch(projects, availableMentors, limit)
+  }
 }

 /**
- * Use OpenAI to match mentors to projects
+ * Get AI-suggested mentor matches for a single project
 */
-async function getAIMatches(
-  project: ProjectInfo,
+export async function getAIMentorSuggestions(
+  prisma: PrismaClient,
+  projectId: string,
+  limit: number = 5,
+  userId?: string
+): Promise<MentorMatch[]> {
+  const results = await getAIMentorSuggestionsBatch(prisma, [projectId], limit, userId)
+  return results.get(projectId) || []
+}
+
+// ─── Algorithmic Fallback ────────────────────────────────────────────────────
+
+/**
+ * Algorithmic fallback for multiple projects
+ */
+function getAlgorithmicMatchesBatch(
+  projects: ProjectInfo[],
  mentors: MentorInfo[],
  limit: number
-): Promise<MentorMatch[]> {
-  // Anonymize data before sending to AI
-  const anonymizedProject = {
-    description: project.description?.slice(0, 500) || 'No description',
-    category: project.competitionCategory,
-    oceanIssue: project.oceanIssue,
-    tags: project.tags,
+): Map<string, MentorMatch[]> {
+  const results = new Map<string, MentorMatch[]>()
+
+  for (const project of projects) {
+    results.set(project.id, getAlgorithmicMatches(project, mentors, limit))
  }

-  const anonymizedMentors = mentors.map((m, index) => ({
-    index,
-    expertise: m.expertiseTags,
-    availability: m.maxAssignments
-      ? `${m.currentAssignments}/${m.maxAssignments}`
-      : 'unlimited',
-  }))
-
-  const prompt = `You are matching mentors to an ocean protection project.
-
-PROJECT:
- Category: ${anonymizedProject.category || 'Not specified'}
- Ocean Issue: ${anonymizedProject.oceanIssue || 'Not specified'}
- Tags: ${anonymizedProject.tags.join(', ') || 'None'}
- Description: ${anonymizedProject.description}
-
-AVAILABLE MENTORS:
-${anonymizedMentors.map((m) => `${m.index}: Expertise: [${m.expertise.join(', ')}], Availability: ${m.availability}`).join('\n')}
-
-Rank the top ${limit} mentors by suitability. For each, provide:
-1. Mentor index (0-based)
-2. Confidence score (0-1)
-3. Expertise match score (0-1)
-4. Brief reasoning (1-2 sentences)
-
-Respond in JSON format:
-{
-  "matches": [
-    {
-      "mentorIndex": 0,
-      "confidenceScore": 0.85,
-      "expertiseMatchScore": 0.9,
-      "reasoning": "Strong expertise alignment..."
-    }
-  ]
-}`
-
-  const openai = await getOpenAI()
-  if (!openai) {
-    throw new Error('OpenAI client not available')
-  }
-
-  const model = await getConfiguredModel()
-
-  const response = await openai.chat.completions.create({
-    model,
-    messages: [
-      {
-        role: 'system',
-        content: 'You are an expert at matching mentors to projects based on expertise alignment. Always respond with valid JSON.',
-      },
-      { role: 'user', content: prompt },
-    ],
-    response_format: { type: 'json_object' },
-    temperature: 0.3,
-    max_tokens: 1000,
-  })
-
-  const content = response.choices[0]?.message?.content
-  if (!content) {
-    throw new Error('No response from AI')
-  }
-
-  const parsed = JSON.parse(content) as {
-    matches: Array<{
-      mentorIndex: number
-      confidenceScore: number
-      expertiseMatchScore: number
-      reasoning: string
-    }>
-  }
-
-  return parsed.matches
-    .filter((m) => m.mentorIndex >= 0 && m.mentorIndex < mentors.length)
-    .map((m) => ({
-      mentorId: mentors[m.mentorIndex].id,
-      confidenceScore: m.confidenceScore,
-      expertiseMatchScore: m.expertiseMatchScore,
-      reasoning: m.reasoning,
-    }))
+  return results
 }

 /**
@@ -226,7 +394,6 @@ function getAlgorithmicMatches(
  })

  if (project.description) {
-    // Extract key words from description
    const words = project.description.toLowerCase().split(/\s+/)
    words.forEach((word) => {
      if (word.length > 4) projectKeywords.add(word.replace(/[^a-z]/g, ''))
@@ -267,7 +434,7 @@ function getAlgorithmicMatches(
      mentorId: mentor.id,
      confidenceScore: Math.round(confidenceScore * 100) / 100,
      expertiseMatchScore: Math.round(expertiseMatchScore * 100) / 100,
-      reasoning: `Matched ${matchCount} keyword(s) with mentor expertise. Availability: ${availabilityScore > 0.5 ? 'Good' : 'Limited'}.`,
+      reasoning: `Matched ${matchCount} keyword(s). Availability: ${availabilityScore > 0.5 ? 'Good' : 'Limited'}.`,
    }
  })