Optimize AI system with batching, token tracking, and GDPR compliance

- Add AIUsageLog model for persistent token/cost tracking
- Implement batched processing for all AI services:
  - Assignment: 15 projects/batch
  - Filtering: 20 projects/batch
  - Award eligibility: 20 projects/batch
  - Mentor matching: 15 projects/batch
- Create unified error classification (ai-errors.ts)
- Enhance anonymization with comprehensive project data
- Add AI usage dashboard to Settings page
- Add usage stats endpoints to settings router
- Create AI system documentation (5 files)
- Create GDPR compliance documentation (2 files)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-03 11:58:12 +01:00
parent a72e815d3a
commit 928b1c65dc
19 changed files with 4103 additions and 601 deletions

View File

@@ -3,17 +3,41 @@
*
* Uses GPT to analyze juror expertise and project requirements
* to generate optimal assignment suggestions.
*
* Optimization:
* - Batched processing (15 projects per batch)
* - Description truncation (300 chars)
* - Token tracking and cost logging
*
* GDPR Compliance:
* - All data anonymized before AI processing
* - IDs replaced with sequential identifiers
* - No personal information sent to OpenAI
*/
import { getOpenAI, getConfiguredModel } from '@/lib/openai'
import { getOpenAI, getConfiguredModel, buildCompletionParams } from '@/lib/openai'
import { logAIUsage, extractTokenUsage } from '@/server/utils/ai-usage'
import { classifyAIError, createParseError, logAIError } from './ai-errors'
import {
anonymizeForAI,
deanonymizeResults,
validateAnonymization,
DESCRIPTION_LIMITS,
truncateAndSanitize,
type AnonymizationResult,
} from './anonymization'
// Types for AI assignment
// ─── Constants ───────────────────────────────────────────────────────────────
const ASSIGNMENT_BATCH_SIZE = 15
// Optimized system prompt
const ASSIGNMENT_SYSTEM_PROMPT = `Match jurors to projects by expertise. Return JSON assignments.
Each: {juror_id, project_id, confidence_score: 0-1, expertise_match_score: 0-1, reasoning: str (1-2 sentences)}
Distribute workload fairly. Avoid assigning jurors at capacity.`
// ─── Types ───────────────────────────────────────────────────────────────────
export interface AIAssignmentSuggestion {
jurorId: string
projectId: string
@@ -61,153 +85,144 @@ interface AssignmentConstraints {
}>
}
/**
* System prompt for AI assignment
*/
const ASSIGNMENT_SYSTEM_PROMPT = `You are an expert at matching jury members to projects based on expertise alignment.
Your task is to suggest optimal juror-project assignments that:
1. Match juror expertise tags with project tags and content
2. Distribute workload fairly among jurors
3. Ensure each project gets the required number of reviews
4. Avoid assigning jurors who are already at their limit
For each suggestion, provide:
- A confidence score (0-1) based on how well the juror's expertise matches the project
- An expertise match score (0-1) based purely on tag/content alignment
- A brief reasoning explaining why this is a good match
Return your response as a JSON array of assignments.`
// ─── AI Processing ───────────────────────────────────────────────────────────
/**
* Generate AI-powered assignment suggestions
* Process a batch of projects for assignment suggestions
*/
export async function generateAIAssignments(
jurors: JurorForAssignment[],
projects: ProjectForAssignment[],
constraints: AssignmentConstraints
): Promise<AIAssignmentResult> {
// Anonymize data before sending to AI
const anonymizedData = anonymizeForAI(jurors, projects)
async function processAssignmentBatch(
openai: NonNullable<Awaited<ReturnType<typeof getOpenAI>>>,
model: string,
anonymizedData: AnonymizationResult,
batchProjects: typeof anonymizedData.projects,
batchMappings: typeof anonymizedData.projectMappings,
constraints: AssignmentConstraints,
userId?: string,
entityId?: string
): Promise<{
suggestions: AIAssignmentSuggestion[]
tokensUsed: number
}> {
const suggestions: AIAssignmentSuggestion[] = []
let tokensUsed = 0
// Validate anonymization
if (!validateAnonymization(anonymizedData)) {
console.error('Anonymization validation failed, falling back to algorithm')
return generateFallbackAssignments(jurors, projects, constraints)
}
// Build prompt with batch-specific data
const userPrompt = buildBatchPrompt(
anonymizedData.jurors,
batchProjects,
constraints,
anonymizedData.jurorMappings,
batchMappings
)
try {
const openai = await getOpenAI()
const params = buildCompletionParams(model, {
messages: [
{ role: 'system', content: ASSIGNMENT_SYSTEM_PROMPT },
{ role: 'user', content: userPrompt },
],
jsonMode: true,
temperature: 0.3,
maxTokens: 4000,
})
if (!openai) {
console.log('OpenAI not configured, using fallback algorithm')
return generateFallbackAssignments(jurors, projects, constraints)
const response = await openai.chat.completions.create(params)
const usage = extractTokenUsage(response)
tokensUsed = usage.totalTokens
// Log batch usage
await logAIUsage({
userId,
action: 'ASSIGNMENT',
entityType: 'Round',
entityId,
model,
promptTokens: usage.promptTokens,
completionTokens: usage.completionTokens,
totalTokens: usage.totalTokens,
batchSize: batchProjects.length,
itemsProcessed: batchProjects.length,
status: 'SUCCESS',
})
const content = response.choices[0]?.message?.content
if (!content) {
throw new Error('No response from AI')
}
const suggestions = await callAIForAssignments(
openai,
anonymizedData,
constraints
)
const parsed = JSON.parse(content) as {
assignments: Array<{
juror_id: string
project_id: string
confidence_score: number
expertise_match_score: number
reasoning: string
}>
}
// De-anonymize results
const deanonymizedSuggestions = deanonymizeResults(
suggestions.map((s) => ({
...s,
jurorId: s.jurorId,
projectId: s.projectId,
// De-anonymize and add to suggestions
const deanonymized = deanonymizeResults(
(parsed.assignments || []).map((a) => ({
jurorId: a.juror_id,
projectId: a.project_id,
confidenceScore: Math.min(1, Math.max(0, a.confidence_score)),
expertiseMatchScore: Math.min(1, Math.max(0, a.expertise_match_score)),
reasoning: a.reasoning,
})),
anonymizedData.jurorMappings,
anonymizedData.projectMappings
).map((s) => ({
jurorId: s.realJurorId,
projectId: s.realProjectId,
confidenceScore: s.confidenceScore,
reasoning: s.reasoning,
expertiseMatchScore: s.expertiseMatchScore,
}))
batchMappings
)
return {
success: true,
suggestions: deanonymizedSuggestions,
fallbackUsed: false,
for (const item of deanonymized) {
suggestions.push({
jurorId: item.realJurorId,
projectId: item.realProjectId,
confidenceScore: item.confidenceScore,
reasoning: item.reasoning,
expertiseMatchScore: item.expertiseMatchScore,
})
}
} catch (error) {
console.error('AI assignment failed, using fallback:', error)
return generateFallbackAssignments(jurors, projects, constraints)
if (error instanceof SyntaxError) {
const parseError = createParseError(error.message)
logAIError('Assignment', 'batch processing', parseError)
await logAIUsage({
userId,
action: 'ASSIGNMENT',
entityType: 'Round',
entityId,
model,
promptTokens: 0,
completionTokens: 0,
totalTokens: tokensUsed,
batchSize: batchProjects.length,
itemsProcessed: 0,
status: 'ERROR',
errorMessage: parseError.message,
})
} else {
throw error
}
}
return { suggestions, tokensUsed }
}
/**
* Call OpenAI API for assignment suggestions
* Build prompt for a batch of projects
*/
async function callAIForAssignments(
openai: Awaited<ReturnType<typeof getOpenAI>>,
anonymizedData: AnonymizationResult,
constraints: AssignmentConstraints
): Promise<AIAssignmentSuggestion[]> {
if (!openai) {
throw new Error('OpenAI client not available')
}
// Build the user prompt
const userPrompt = buildAssignmentPrompt(anonymizedData, constraints)
const model = await getConfiguredModel()
const response = await openai.chat.completions.create({
model,
messages: [
{ role: 'system', content: ASSIGNMENT_SYSTEM_PROMPT },
{ role: 'user', content: userPrompt },
],
response_format: { type: 'json_object' },
temperature: 0.3, // Lower temperature for more consistent results
max_tokens: 4000,
})
const content = response.choices[0]?.message?.content
if (!content) {
throw new Error('No response from AI')
}
// Parse the response
const parsed = JSON.parse(content) as {
assignments: Array<{
juror_id: string
project_id: string
confidence_score: number
expertise_match_score: number
reasoning: string
}>
}
return (parsed.assignments || []).map((a) => ({
jurorId: a.juror_id,
projectId: a.project_id,
confidenceScore: Math.min(1, Math.max(0, a.confidence_score)),
expertiseMatchScore: Math.min(1, Math.max(0, a.expertise_match_score)),
reasoning: a.reasoning,
}))
}
/**
* Build the prompt for AI assignment
*/
function buildAssignmentPrompt(
data: AnonymizationResult,
constraints: AssignmentConstraints
function buildBatchPrompt(
jurors: AnonymizationResult['jurors'],
projects: AnonymizationResult['projects'],
constraints: AssignmentConstraints,
jurorMappings: AnonymizationResult['jurorMappings'],
projectMappings: AnonymizationResult['projectMappings']
): string {
const { jurors, projects } = data
// Map existing assignments to anonymous IDs
const jurorIdMap = new Map(
data.jurorMappings.map((m) => [m.realId, m.anonymousId])
)
const projectIdMap = new Map(
data.projectMappings.map((m) => [m.realId, m.anonymousId])
)
const jurorIdMap = new Map(jurorMappings.map((m) => [m.realId, m.anonymousId]))
const projectIdMap = new Map(projectMappings.map((m) => [m.realId, m.anonymousId]))
const anonymousExisting = constraints.existingAssignments
.map((a) => ({
@@ -216,29 +231,110 @@ function buildAssignmentPrompt(
}))
.filter((a) => a.jurorId && a.projectId)
return `## Jurors Available
${JSON.stringify(jurors, null, 2)}
## Projects to Assign
${JSON.stringify(projects, null, 2)}
## Constraints
- Each project needs ${constraints.requiredReviewsPerProject} reviews
- Maximum assignments per juror: ${constraints.maxAssignmentsPerJuror || 'No limit'}
- Existing assignments to avoid duplicating:
${JSON.stringify(anonymousExisting, null, 2)}
## Instructions
Generate optimal juror-project assignments. Return a JSON object with an "assignments" array where each assignment has:
- juror_id: The anonymous juror ID
- project_id: The anonymous project ID
- confidence_score: 0-1 confidence in this match
- expertise_match_score: 0-1 expertise alignment score
- reasoning: Brief explanation (1-2 sentences)
Focus on matching expertise tags with project tags and descriptions. Distribute assignments fairly.`
return `JURORS: ${JSON.stringify(jurors)}
PROJECTS: ${JSON.stringify(projects)}
CONSTRAINTS: ${constraints.requiredReviewsPerProject} reviews/project, max ${constraints.maxAssignmentsPerJuror || 'unlimited'}/juror
EXISTING: ${JSON.stringify(anonymousExisting)}
Return JSON: {"assignments": [...]}`
}
/**
* Generate AI-powered assignment suggestions with batching
*/
export async function generateAIAssignments(
jurors: JurorForAssignment[],
projects: ProjectForAssignment[],
constraints: AssignmentConstraints,
userId?: string,
entityId?: string
): Promise<AIAssignmentResult> {
// Truncate descriptions before anonymization
const truncatedProjects = projects.map((p) => ({
...p,
description: truncateAndSanitize(p.description, DESCRIPTION_LIMITS.ASSIGNMENT),
}))
// Anonymize data before sending to AI
const anonymizedData = anonymizeForAI(jurors, truncatedProjects)
// Validate anonymization
if (!validateAnonymization(anonymizedData)) {
console.error('[AI Assignment] Anonymization validation failed, falling back to algorithm')
return generateFallbackAssignments(jurors, projects, constraints)
}
try {
const openai = await getOpenAI()
if (!openai) {
console.log('[AI Assignment] OpenAI not configured, using fallback algorithm')
return generateFallbackAssignments(jurors, projects, constraints)
}
const model = await getConfiguredModel()
console.log(`[AI Assignment] Using model: ${model} for ${projects.length} projects in batches of ${ASSIGNMENT_BATCH_SIZE}`)
const allSuggestions: AIAssignmentSuggestion[] = []
let totalTokens = 0
// Process projects in batches
for (let i = 0; i < anonymizedData.projects.length; i += ASSIGNMENT_BATCH_SIZE) {
const batchProjects = anonymizedData.projects.slice(i, i + ASSIGNMENT_BATCH_SIZE)
const batchMappings = anonymizedData.projectMappings.slice(i, i + ASSIGNMENT_BATCH_SIZE)
console.log(`[AI Assignment] Processing batch ${Math.floor(i / ASSIGNMENT_BATCH_SIZE) + 1}/${Math.ceil(anonymizedData.projects.length / ASSIGNMENT_BATCH_SIZE)}`)
const { suggestions, tokensUsed } = await processAssignmentBatch(
openai,
model,
anonymizedData,
batchProjects,
batchMappings,
constraints,
userId,
entityId
)
allSuggestions.push(...suggestions)
totalTokens += tokensUsed
}
console.log(`[AI Assignment] Completed. Total suggestions: ${allSuggestions.length}, Total tokens: ${totalTokens}`)
return {
success: true,
suggestions: allSuggestions,
tokensUsed: totalTokens,
fallbackUsed: false,
}
} catch (error) {
const classified = classifyAIError(error)
logAIError('Assignment', 'generateAIAssignments', classified)
// Log failed attempt
await logAIUsage({
userId,
action: 'ASSIGNMENT',
entityType: 'Round',
entityId,
model: 'unknown',
promptTokens: 0,
completionTokens: 0,
totalTokens: 0,
batchSize: projects.length,
itemsProcessed: 0,
status: 'ERROR',
errorMessage: classified.message,
})
console.error('[AI Assignment] AI assignment failed, using fallback:', classified.message)
return generateFallbackAssignments(jurors, projects, constraints)
}
}
// ─── Fallback Algorithm ──────────────────────────────────────────────────────
/**
* Fallback algorithm-based assignment when AI is unavailable
*/