Optimize AI system with batching, token tracking, and GDPR compliance

- Add AIUsageLog model for persistent token/cost tracking
- Implement batched processing for all AI services:
  - Assignment: 15 projects/batch
  - Filtering: 20 projects/batch
  - Award eligibility: 20 projects/batch
  - Mentor matching: 15 projects/batch
- Create unified error classification (ai-errors.ts)
- Enhance anonymization with comprehensive project data
- Add AI usage dashboard to Settings page
- Add usage stats endpoints to settings router
- Create AI system documentation (5 files)
- Create GDPR compliance documentation (2 files)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-03 11:58:12 +01:00
parent a72e815d3a
commit 928b1c65dc
19 changed files with 4103 additions and 601 deletions

View File

@@ -4,9 +4,33 @@
* Determines project eligibility for special awards using:
* - Deterministic field matching (tags, country, category)
* - AI interpretation of plain-language criteria
*
* GDPR Compliance:
* - All project data is anonymized before AI processing
* - IDs replaced with sequential identifiers
* - No personal information sent to OpenAI
*/
import { getOpenAI, getConfiguredModel } from '@/lib/openai'
import { getOpenAI, getConfiguredModel, buildCompletionParams } from '@/lib/openai'
import { logAIUsage, extractTokenUsage } from '@/server/utils/ai-usage'
import { classifyAIError, createParseError, logAIError } from './ai-errors'
import {
anonymizeProjectsForAI,
validateAnonymizedProjects,
type ProjectWithRelations,
type AnonymizedProjectForAI,
type ProjectAIMapping,
} from './anonymization'
import type { SubmissionSource } from '@prisma/client'
// ─── Constants ───────────────────────────────────────────────────────────────
const BATCH_SIZE = 20
// Optimized system prompt
const AI_ELIGIBILITY_SYSTEM_PROMPT = `Award eligibility evaluator. Evaluate projects against criteria, return JSON.
Format: {"evaluations": [{project_id, eligible: bool, confidence: 0-1, reasoning: str}]}
Be objective. Base evaluation only on provided data. No personal identifiers in reasoning.`
// ─── Types ──────────────────────────────────────────────────────────────────
@@ -33,6 +57,16 @@ interface ProjectForEligibility {
geographicZone?: string | null
tags: string[]
oceanIssue?: string | null
institution?: string | null
foundedAt?: Date | null
wantsMentorship?: boolean
submissionSource?: SubmissionSource
submittedAt?: Date | null
_count?: {
teamMembers?: number
files?: number
}
files?: Array<{ fileType: string | null }>
}
// ─── Auto Tag Rules ─────────────────────────────────────────────────────────
@@ -97,32 +131,162 @@ function getFieldValue(
// ─── AI Criteria Interpretation ─────────────────────────────────────────────
const AI_ELIGIBILITY_SYSTEM_PROMPT = `You are a special award eligibility evaluator. Given a list of projects and award criteria, determine which projects are eligible.
Return a JSON object with this structure:
{
"evaluations": [
{
"project_id": "string",
"eligible": boolean,
"confidence": number (0-1),
"reasoning": "string"
}
]
/**
* Convert project to enhanced format for anonymization
*/
function toProjectWithRelations(project: ProjectForEligibility): ProjectWithRelations {
return {
id: project.id,
title: project.title,
description: project.description,
competitionCategory: project.competitionCategory as any,
oceanIssue: project.oceanIssue as any,
country: project.country,
geographicZone: project.geographicZone,
institution: project.institution,
tags: project.tags,
foundedAt: project.foundedAt,
wantsMentorship: project.wantsMentorship ?? false,
submissionSource: project.submissionSource ?? 'MANUAL',
submittedAt: project.submittedAt,
_count: {
teamMembers: project._count?.teamMembers ?? 0,
files: project._count?.files ?? 0,
},
files: project.files?.map(f => ({ fileType: f.fileType as any })) ?? [],
}
}
Be fair, objective, and base your evaluation only on the provided information. Do not include personal identifiers in reasoning.`
/**
* Process a batch for AI eligibility evaluation
*/
async function processEligibilityBatch(
openai: NonNullable<Awaited<ReturnType<typeof getOpenAI>>>,
model: string,
criteriaText: string,
anonymized: AnonymizedProjectForAI[],
mappings: ProjectAIMapping[],
userId?: string,
entityId?: string
): Promise<{
results: EligibilityResult[]
tokensUsed: number
}> {
const results: EligibilityResult[] = []
let tokensUsed = 0
const userPrompt = `CRITERIA: ${criteriaText}
PROJECTS: ${JSON.stringify(anonymized)}
Evaluate eligibility for each project.`
try {
const params = buildCompletionParams(model, {
messages: [
{ role: 'system', content: AI_ELIGIBILITY_SYSTEM_PROMPT },
{ role: 'user', content: userPrompt },
],
jsonMode: true,
temperature: 0.3,
maxTokens: 4000,
})
const response = await openai.chat.completions.create(params)
const usage = extractTokenUsage(response)
tokensUsed = usage.totalTokens
// Log usage
await logAIUsage({
userId,
action: 'AWARD_ELIGIBILITY',
entityType: 'Award',
entityId,
model,
promptTokens: usage.promptTokens,
completionTokens: usage.completionTokens,
totalTokens: usage.totalTokens,
batchSize: anonymized.length,
itemsProcessed: anonymized.length,
status: 'SUCCESS',
})
const content = response.choices[0]?.message?.content
if (!content) {
throw new Error('Empty response from AI')
}
const parsed = JSON.parse(content) as {
evaluations: Array<{
project_id: string
eligible: boolean
confidence: number
reasoning: string
}>
}
// Map results back to real IDs
for (const eval_ of parsed.evaluations || []) {
const mapping = mappings.find((m) => m.anonymousId === eval_.project_id)
if (mapping) {
results.push({
projectId: mapping.realId,
eligible: eval_.eligible,
confidence: eval_.confidence,
reasoning: eval_.reasoning,
method: 'AI',
})
}
}
} catch (error) {
if (error instanceof SyntaxError) {
const parseError = createParseError(error.message)
logAIError('AwardEligibility', 'batch processing', parseError)
await logAIUsage({
userId,
action: 'AWARD_ELIGIBILITY',
entityType: 'Award',
entityId,
model,
promptTokens: 0,
completionTokens: 0,
totalTokens: tokensUsed,
batchSize: anonymized.length,
itemsProcessed: 0,
status: 'ERROR',
errorMessage: parseError.message,
})
// Flag all for manual review
for (const mapping of mappings) {
results.push({
projectId: mapping.realId,
eligible: false,
confidence: 0,
reasoning: 'AI response parse error — requires manual review',
method: 'AI',
})
}
} else {
throw error
}
}
return { results, tokensUsed }
}
export async function aiInterpretCriteria(
criteriaText: string,
projects: ProjectForEligibility[]
projects: ProjectForEligibility[],
userId?: string,
awardId?: string
): Promise<EligibilityResult[]> {
const results: EligibilityResult[] = []
try {
const openai = await getOpenAI()
if (!openai) {
// No OpenAI — mark all as needing manual review
console.warn('[AI Eligibility] OpenAI not configured')
return projects.map((p) => ({
projectId: p.id,
eligible: false,
@@ -133,91 +297,69 @@ export async function aiInterpretCriteria(
}
const model = await getConfiguredModel()
console.log(`[AI Eligibility] Using model: ${model} for ${projects.length} projects`)
// Anonymize and batch
const anonymized = projects.map((p, i) => ({
project_id: `P${i + 1}`,
real_id: p.id,
title: p.title,
description: p.description?.slice(0, 500) || '',
category: p.competitionCategory || 'Unknown',
ocean_issue: p.oceanIssue || 'Unknown',
country: p.country || 'Unknown',
region: p.geographicZone || 'Unknown',
tags: p.tags.join(', '),
}))
// Convert and anonymize projects
const projectsWithRelations = projects.map(toProjectWithRelations)
const { anonymized, mappings } = anonymizeProjectsForAI(projectsWithRelations, 'ELIGIBILITY')
const batchSize = 20
for (let i = 0; i < anonymized.length; i += batchSize) {
const batch = anonymized.slice(i, i + batchSize)
const userPrompt = `Award criteria: ${criteriaText}
Projects to evaluate:
${JSON.stringify(
batch.map(({ real_id, ...rest }) => rest),
null,
2
)}
Evaluate each project against the award criteria.`
const response = await openai.chat.completions.create({
model,
messages: [
{ role: 'system', content: AI_ELIGIBILITY_SYSTEM_PROMPT },
{ role: 'user', content: userPrompt },
],
response_format: { type: 'json_object' },
temperature: 0.3,
max_tokens: 4000,
})
const content = response.choices[0]?.message?.content
if (content) {
try {
const parsed = JSON.parse(content) as {
evaluations: Array<{
project_id: string
eligible: boolean
confidence: number
reasoning: string
}>
}
for (const eval_ of parsed.evaluations) {
const anon = batch.find((b) => b.project_id === eval_.project_id)
if (anon) {
results.push({
projectId: anon.real_id,
eligible: eval_.eligible,
confidence: eval_.confidence,
reasoning: eval_.reasoning,
method: 'AI',
})
}
}
} catch {
// Parse error — mark batch for manual review
for (const item of batch) {
results.push({
projectId: item.real_id,
eligible: false,
confidence: 0,
reasoning: 'AI response parse error — requires manual review',
method: 'AI',
})
}
}
}
// Validate anonymization
if (!validateAnonymizedProjects(anonymized)) {
console.error('[AI Eligibility] Anonymization validation failed')
throw new Error('GDPR compliance check failed: PII detected in anonymized data')
}
} catch {
// OpenAI error — mark all for manual review
let totalTokens = 0
// Process in batches
for (let i = 0; i < anonymized.length; i += BATCH_SIZE) {
const batchAnon = anonymized.slice(i, i + BATCH_SIZE)
const batchMappings = mappings.slice(i, i + BATCH_SIZE)
console.log(`[AI Eligibility] Processing batch ${Math.floor(i / BATCH_SIZE) + 1}/${Math.ceil(anonymized.length / BATCH_SIZE)}`)
const { results: batchResults, tokensUsed } = await processEligibilityBatch(
openai,
model,
criteriaText,
batchAnon,
batchMappings,
userId,
awardId
)
results.push(...batchResults)
totalTokens += tokensUsed
}
console.log(`[AI Eligibility] Completed. Total tokens: ${totalTokens}`)
} catch (error) {
const classified = classifyAIError(error)
logAIError('AwardEligibility', 'aiInterpretCriteria', classified)
// Log failed attempt
await logAIUsage({
userId,
action: 'AWARD_ELIGIBILITY',
entityType: 'Award',
entityId: awardId,
model: 'unknown',
promptTokens: 0,
completionTokens: 0,
totalTokens: 0,
batchSize: projects.length,
itemsProcessed: 0,
status: 'ERROR',
errorMessage: classified.message,
})
// Return all as needing manual review
return projects.map((p) => ({
projectId: p.id,
eligible: false,
confidence: 0,
reasoning: 'AI error — requires manual eligibility review',
reasoning: `AI error: ${classified.message}`,
method: 'AI' as const,
}))
}