Optimize AI system with batching, token tracking, and GDPR compliance

- Add AIUsageLog model for persistent token/cost tracking - Implement batched processing for all AI services: - Assignment: 15 projects/batch - Filtering: 20 projects/batch - Award eligibility: 20 projects/batch - Mentor matching: 15 projects/batch - Create unified error classification (ai-errors.ts) - Enhance anonymization with comprehensive project data - Add AI usage dashboard to Settings page - Add usage stats endpoints to settings router - Create AI system documentation (5 files) - Create GDPR compliance documentation (2 files) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-03 11:58:12 +01:00
parent a72e815d3a
commit 928b1c65dc
19 changed files with 4103 additions and 601 deletions
--- a/src/server/services/ai-award-eligibility.ts
+++ b/src/server/services/ai-award-eligibility.ts
@@ -4,9 +4,33 @@
 * Determines project eligibility for special awards using:
 * - Deterministic field matching (tags, country, category)
 * - AI interpretation of plain-language criteria
+ *
+ * GDPR Compliance:
+ * - All project data is anonymized before AI processing
+ * - IDs replaced with sequential identifiers
+ * - No personal information sent to OpenAI
 */

-import { getOpenAI, getConfiguredModel } from '@/lib/openai'
+import { getOpenAI, getConfiguredModel, buildCompletionParams } from '@/lib/openai'
+import { logAIUsage, extractTokenUsage } from '@/server/utils/ai-usage'
+import { classifyAIError, createParseError, logAIError } from './ai-errors'
+import {
+  anonymizeProjectsForAI,
+  validateAnonymizedProjects,
+  type ProjectWithRelations,
+  type AnonymizedProjectForAI,
+  type ProjectAIMapping,
+} from './anonymization'
+import type { SubmissionSource } from '@prisma/client'
+
+// ─── Constants ───────────────────────────────────────────────────────────────
+
+const BATCH_SIZE = 20
+
+// Optimized system prompt
+const AI_ELIGIBILITY_SYSTEM_PROMPT = `Award eligibility evaluator. Evaluate projects against criteria, return JSON.
+Format: {"evaluations": [{project_id, eligible: bool, confidence: 0-1, reasoning: str}]}
+Be objective. Base evaluation only on provided data. No personal identifiers in reasoning.`

 // ─── Types ──────────────────────────────────────────────────────────────────

@@ -33,6 +57,16 @@ interface ProjectForEligibility {
  geographicZone?: string | null
  tags: string[]
  oceanIssue?: string | null
+  institution?: string | null
+  foundedAt?: Date | null
+  wantsMentorship?: boolean
+  submissionSource?: SubmissionSource
+  submittedAt?: Date | null
+  _count?: {
+    teamMembers?: number
+    files?: number
+  }
+  files?: Array<{ fileType: string | null }>
 }

 // ─── Auto Tag Rules ─────────────────────────────────────────────────────────
@@ -97,32 +131,162 @@ function getFieldValue(

 // ─── AI Criteria Interpretation ─────────────────────────────────────────────

-const AI_ELIGIBILITY_SYSTEM_PROMPT = `You are a special award eligibility evaluator. Given a list of projects and award criteria, determine which projects are eligible.
-
-Return a JSON object with this structure:
-{
-  "evaluations": [
-    {
-      "project_id": "string",
-      "eligible": boolean,
-      "confidence": number (0-1),
-      "reasoning": "string"
-    }
-  ]
+/**
+ * Convert project to enhanced format for anonymization
+ */
+function toProjectWithRelations(project: ProjectForEligibility): ProjectWithRelations {
+  return {
+    id: project.id,
+    title: project.title,
+    description: project.description,
+    competitionCategory: project.competitionCategory as any,
+    oceanIssue: project.oceanIssue as any,
+    country: project.country,
+    geographicZone: project.geographicZone,
+    institution: project.institution,
+    tags: project.tags,
+    foundedAt: project.foundedAt,
+    wantsMentorship: project.wantsMentorship ?? false,
+    submissionSource: project.submissionSource ?? 'MANUAL',
+    submittedAt: project.submittedAt,
+    _count: {
+      teamMembers: project._count?.teamMembers ?? 0,
+      files: project._count?.files ?? 0,
+    },
+    files: project.files?.map(f => ({ fileType: f.fileType as any })) ?? [],
+  }
 }

-Be fair, objective, and base your evaluation only on the provided information. Do not include personal identifiers in reasoning.`
+/**
+ * Process a batch for AI eligibility evaluation
+ */
+async function processEligibilityBatch(
+  openai: NonNullable<Awaited<ReturnType<typeof getOpenAI>>>,
+  model: string,
+  criteriaText: string,
+  anonymized: AnonymizedProjectForAI[],
+  mappings: ProjectAIMapping[],
+  userId?: string,
+  entityId?: string
+): Promise<{
+  results: EligibilityResult[]
+  tokensUsed: number
+}> {
+  const results: EligibilityResult[] = []
+  let tokensUsed = 0
+
+  const userPrompt = `CRITERIA: ${criteriaText}
+PROJECTS: ${JSON.stringify(anonymized)}
+Evaluate eligibility for each project.`
+
+  try {
+    const params = buildCompletionParams(model, {
+      messages: [
+        { role: 'system', content: AI_ELIGIBILITY_SYSTEM_PROMPT },
+        { role: 'user', content: userPrompt },
+      ],
+      jsonMode: true,
+      temperature: 0.3,
+      maxTokens: 4000,
+    })
+
+    const response = await openai.chat.completions.create(params)
+    const usage = extractTokenUsage(response)
+    tokensUsed = usage.totalTokens
+
+    // Log usage
+    await logAIUsage({
+      userId,
+      action: 'AWARD_ELIGIBILITY',
+      entityType: 'Award',
+      entityId,
+      model,
+      promptTokens: usage.promptTokens,
+      completionTokens: usage.completionTokens,
+      totalTokens: usage.totalTokens,
+      batchSize: anonymized.length,
+      itemsProcessed: anonymized.length,
+      status: 'SUCCESS',
+    })
+
+    const content = response.choices[0]?.message?.content
+    if (!content) {
+      throw new Error('Empty response from AI')
+    }
+
+    const parsed = JSON.parse(content) as {
+      evaluations: Array<{
+        project_id: string
+        eligible: boolean
+        confidence: number
+        reasoning: string
+      }>
+    }
+
+    // Map results back to real IDs
+    for (const eval_ of parsed.evaluations || []) {
+      const mapping = mappings.find((m) => m.anonymousId === eval_.project_id)
+      if (mapping) {
+        results.push({
+          projectId: mapping.realId,
+          eligible: eval_.eligible,
+          confidence: eval_.confidence,
+          reasoning: eval_.reasoning,
+          method: 'AI',
+        })
+      }
+    }
+
+  } catch (error) {
+    if (error instanceof SyntaxError) {
+      const parseError = createParseError(error.message)
+      logAIError('AwardEligibility', 'batch processing', parseError)
+
+      await logAIUsage({
+        userId,
+        action: 'AWARD_ELIGIBILITY',
+        entityType: 'Award',
+        entityId,
+        model,
+        promptTokens: 0,
+        completionTokens: 0,
+        totalTokens: tokensUsed,
+        batchSize: anonymized.length,
+        itemsProcessed: 0,
+        status: 'ERROR',
+        errorMessage: parseError.message,
+      })
+
+      // Flag all for manual review
+      for (const mapping of mappings) {
+        results.push({
+          projectId: mapping.realId,
+          eligible: false,
+          confidence: 0,
+          reasoning: 'AI response parse error — requires manual review',
+          method: 'AI',
+        })
+      }
+    } else {
+      throw error
+    }
+  }
+
+  return { results, tokensUsed }
+}

 export async function aiInterpretCriteria(
  criteriaText: string,
-  projects: ProjectForEligibility[]
+  projects: ProjectForEligibility[],
+  userId?: string,
+  awardId?: string
 ): Promise<EligibilityResult[]> {
  const results: EligibilityResult[] = []

  try {
    const openai = await getOpenAI()
    if (!openai) {
-      // No OpenAI — mark all as needing manual review
+      console.warn('[AI Eligibility] OpenAI not configured')
      return projects.map((p) => ({
        projectId: p.id,
        eligible: false,
@@ -133,91 +297,69 @@ export async function aiInterpretCriteria(
    }

    const model = await getConfiguredModel()
+    console.log(`[AI Eligibility] Using model: ${model} for ${projects.length} projects`)

-    // Anonymize and batch
-    const anonymized = projects.map((p, i) => ({
-      project_id: `P${i + 1}`,
-      real_id: p.id,
-      title: p.title,
-      description: p.description?.slice(0, 500) || '',
-      category: p.competitionCategory || 'Unknown',
-      ocean_issue: p.oceanIssue || 'Unknown',
-      country: p.country || 'Unknown',
-      region: p.geographicZone || 'Unknown',
-      tags: p.tags.join(', '),
-    }))
+    // Convert and anonymize projects
+    const projectsWithRelations = projects.map(toProjectWithRelations)
+    const { anonymized, mappings } = anonymizeProjectsForAI(projectsWithRelations, 'ELIGIBILITY')

-    const batchSize = 20
-    for (let i = 0; i < anonymized.length; i += batchSize) {
-      const batch = anonymized.slice(i, i + batchSize)
-
-      const userPrompt = `Award criteria: ${criteriaText}
-
-Projects to evaluate:
-${JSON.stringify(
-  batch.map(({ real_id, ...rest }) => rest),
-  null,
-  2
-)}
-
-Evaluate each project against the award criteria.`
-
-      const response = await openai.chat.completions.create({
-        model,
-        messages: [
-          { role: 'system', content: AI_ELIGIBILITY_SYSTEM_PROMPT },
-          { role: 'user', content: userPrompt },
-        ],
-        response_format: { type: 'json_object' },
-        temperature: 0.3,
-        max_tokens: 4000,
-      })
-
-      const content = response.choices[0]?.message?.content
-      if (content) {
-        try {
-          const parsed = JSON.parse(content) as {
-            evaluations: Array<{
-              project_id: string
-              eligible: boolean
-              confidence: number
-              reasoning: string
-            }>
-          }
-
-          for (const eval_ of parsed.evaluations) {
-            const anon = batch.find((b) => b.project_id === eval_.project_id)
-            if (anon) {
-              results.push({
-                projectId: anon.real_id,
-                eligible: eval_.eligible,
-                confidence: eval_.confidence,
-                reasoning: eval_.reasoning,
-                method: 'AI',
-              })
-            }
-          }
-        } catch {
-          // Parse error — mark batch for manual review
-          for (const item of batch) {
-            results.push({
-              projectId: item.real_id,
-              eligible: false,
-              confidence: 0,
-              reasoning: 'AI response parse error — requires manual review',
-              method: 'AI',
-            })
-          }
-        }
-      }
+    // Validate anonymization
+    if (!validateAnonymizedProjects(anonymized)) {
+      console.error('[AI Eligibility] Anonymization validation failed')
+      throw new Error('GDPR compliance check failed: PII detected in anonymized data')
    }
-  } catch {
-    // OpenAI error — mark all for manual review
+
+    let totalTokens = 0
+
+    // Process in batches
+    for (let i = 0; i < anonymized.length; i += BATCH_SIZE) {
+      const batchAnon = anonymized.slice(i, i + BATCH_SIZE)
+      const batchMappings = mappings.slice(i, i + BATCH_SIZE)
+
+      console.log(`[AI Eligibility] Processing batch ${Math.floor(i / BATCH_SIZE) + 1}/${Math.ceil(anonymized.length / BATCH_SIZE)}`)
+
+      const { results: batchResults, tokensUsed } = await processEligibilityBatch(
+        openai,
+        model,
+        criteriaText,
+        batchAnon,
+        batchMappings,
+        userId,
+        awardId
+      )
+
+      results.push(...batchResults)
+      totalTokens += tokensUsed
+    }
+
+    console.log(`[AI Eligibility] Completed. Total tokens: ${totalTokens}`)
+
+  } catch (error) {
+    const classified = classifyAIError(error)
+    logAIError('AwardEligibility', 'aiInterpretCriteria', classified)
+
+    // Log failed attempt
+    await logAIUsage({
+      userId,
+      action: 'AWARD_ELIGIBILITY',
+      entityType: 'Award',
+      entityId: awardId,
+      model: 'unknown',
+      promptTokens: 0,
+      completionTokens: 0,
+      totalTokens: 0,
+      batchSize: projects.length,
+      itemsProcessed: 0,
+      status: 'ERROR',
+      errorMessage: classified.message,
+    })
+
+    // Return all as needing manual review
    return projects.map((p) => ({
      projectId: p.id,
      eligible: false,
      confidence: 0,
-      reasoning: 'AI error — requires manual eligibility review',
+      reasoning: `AI error: ${classified.message}`,
      method: 'AI' as const,
    }))
  }