Files
MOPC-Portal/src/server/services/ai-assignment.ts

675 lines
22 KiB
TypeScript
Raw Normal View History

/**
* AI-Powered Assignment Service
*
* Uses GPT to analyze juror expertise and project requirements
* to generate optimal assignment suggestions.
*
* Optimization:
* - Batched processing (15 projects per batch)
* - Description truncation (300 chars)
* - Token tracking and cost logging
*
* GDPR Compliance:
* - All data anonymized before AI processing
* - IDs replaced with sequential identifiers
* - No personal information sent to OpenAI
*/
import { getOpenAI, getConfiguredModel, buildCompletionParams } from '@/lib/openai'
import { logAIUsage, extractTokenUsage } from '@/server/utils/ai-usage'
import { classifyAIError, createParseError, logAIError } from './ai-errors'
import {
anonymizeForAI,
deanonymizeResults,
validateAnonymization,
DESCRIPTION_LIMITS,
truncateAndSanitize,
type AnonymizationResult,
} from './anonymization'
// ─── Constants ───────────────────────────────────────────────────────────────
const ASSIGNMENT_BATCH_SIZE = 15
// Structured system prompt for assignment
const ASSIGNMENT_SYSTEM_PROMPT = `You are an expert jury assignment optimizer for an ocean conservation competition.
## Your Role
Match jurors to projects based on expertise alignment, workload balance, and coverage requirements.
## Matching Criteria (Weighted)
- Expertise Match (50%): How well juror tags/expertise align with project topics. Project tags include a confidence score (0-1) weight higher-confidence tags more heavily as they are more reliably assigned. A tag with confidence 0.9 is a strong signal; one with 0.5 is uncertain.
- Workload Balance (30%): Distribute assignments evenly; prefer jurors below capacity
- Minimum Target (20%): Prioritize jurors who haven't reached their minimum assignment count
## Output Format
Return a JSON object:
{
"assignments": [
{
"juror_id": "JUROR_001",
"project_id": "PROJECT_001",
"confidence_score": 0.0-1.0,
"expertise_match_score": 0.0-1.0,
"reasoning": "1-2 sentence justification"
}
]
}
## Guidelines
- Each project should receive the required number of reviews
- Do not assign jurors who are at or above their capacity
- Favor geographic and disciplinary diversity in assignments
- confidence_score reflects overall assignment quality; expertise_match_score reflects tag overlap only
- A strong match: shared expertise tags + available capacity + under minimum target
- An acceptable match: related domain + available capacity
- A poor match: no expertise overlap, only assigned for coverage`
// ─── Types ───────────────────────────────────────────────────────────────────
export interface AIAssignmentSuggestion {
jurorId: string
projectId: string
confidenceScore: number // 0-1
reasoning: string
expertiseMatchScore: number // 0-1
}
export interface AIAssignmentResult {
success: boolean
suggestions: AIAssignmentSuggestion[]
error?: string
tokensUsed?: number
fallbackUsed?: boolean
}
interface JurorForAssignment {
id: string
name?: string | null
email: string
expertiseTags: string[]
maxAssignments?: number | null
_count?: {
assignments: number
}
}
interface ProjectForAssignment {
id: string
title: string
description?: string | null
tags: string[]
tagConfidences?: Array<{ name: string; confidence: number }>
teamName?: string | null
_count?: {
assignments: number
}
}
interface AssignmentConstraints {
requiredReviewsPerProject: number
minAssignmentsPerJuror?: number
maxAssignmentsPerJuror?: number
jurorLimits?: Record<string, number> // userId -> personal max assignments
existingAssignments: Array<{
jurorId: string
projectId: string
}>
}
export interface AssignmentProgressCallback {
(progress: {
currentBatch: number
totalBatches: number
processedCount: number
totalProjects: number
}): Promise<void>
}
// ─── AI Processing ───────────────────────────────────────────────────────────
/**
* Process a batch of projects for assignment suggestions
*/
async function processAssignmentBatch(
openai: NonNullable<Awaited<ReturnType<typeof getOpenAI>>>,
model: string,
anonymizedData: AnonymizationResult,
batchProjects: typeof anonymizedData.projects,
batchMappings: typeof anonymizedData.projectMappings,
constraints: AssignmentConstraints,
userId?: string,
entityId?: string
): Promise<{
suggestions: AIAssignmentSuggestion[]
tokensUsed: number
}> {
const suggestions: AIAssignmentSuggestion[] = []
let tokensUsed = 0
// Build prompt with batch-specific data
const userPrompt = buildBatchPrompt(
anonymizedData.jurors,
batchProjects,
constraints,
anonymizedData.jurorMappings,
batchMappings
)
const MAX_PARSE_RETRIES = 2
let parseAttempts = 0
let response: Awaited<ReturnType<typeof openai.chat.completions.create>>
try {
const params = buildCompletionParams(model, {
messages: [
{ role: 'system', content: ASSIGNMENT_SYSTEM_PROMPT },
{ role: 'user', content: userPrompt },
],
jsonMode: true,
temperature: 0.1,
maxTokens: 4000,
})
try {
response = await openai.chat.completions.create(params)
} catch (apiError) {
// Provide clearer error for model-related issues
const errorMsg = apiError instanceof Error ? apiError.message : String(apiError)
if (errorMsg.includes('model') || errorMsg.includes('does not exist')) {
throw new Error(`Invalid AI model "${model}". Please check the model name in Settings > AI Configuration.`)
}
throw apiError
}
const usage = extractTokenUsage(response)
tokensUsed = usage.totalTokens
// Log batch usage
await logAIUsage({
userId,
action: 'ASSIGNMENT',
entityType: 'Round',
entityId,
model,
promptTokens: usage.promptTokens,
completionTokens: usage.completionTokens,
totalTokens: usage.totalTokens,
batchSize: batchProjects.length,
itemsProcessed: batchProjects.length,
status: 'SUCCESS',
})
// Parse with retry logic
let parsed: {
assignments: Array<{
juror_id: string
project_id: string
confidence_score: number
expertise_match_score: number
reasoning: string
}>
}
while (true) {
try {
const content = response.choices[0]?.message?.content
if (!content) {
// Check if response indicates an issue
const finishReason = response.choices[0]?.finish_reason
if (finishReason === 'content_filter') {
throw new Error('AI response was filtered. Try a different model or simplify the project descriptions.')
}
if (!response.choices || response.choices.length === 0) {
throw new Error(`No response from model "${model}". This model may not exist or may not be available. Please verify the model name.`)
}
throw new Error(`Empty response from AI model "${model}". The model may not support this type of request.`)
}
parsed = JSON.parse(content)
break
} catch (parseError) {
if (parseError instanceof SyntaxError && parseAttempts < MAX_PARSE_RETRIES) {
parseAttempts++
console.warn(`[AI Assignment] JSON parse failed, retrying (${parseAttempts}/${MAX_PARSE_RETRIES})`)
// Retry the API call with hint
const retryParams = buildCompletionParams(model, {
messages: [
{ role: 'system', content: ASSIGNMENT_SYSTEM_PROMPT },
{ role: 'user', content: userPrompt + '\n\nIMPORTANT: Please ensure valid JSON output.' },
],
jsonMode: true,
temperature: 0.1,
maxTokens: 4000,
})
response = await openai.chat.completions.create(retryParams)
const retryUsage = extractTokenUsage(response)
tokensUsed += retryUsage.totalTokens
continue
}
throw parseError
}
}
// De-anonymize and add to suggestions
const deanonymized = deanonymizeResults(
(parsed.assignments || []).map((a) => ({
jurorId: a.juror_id,
projectId: a.project_id,
confidenceScore: Math.min(1, Math.max(0, a.confidence_score)),
expertiseMatchScore: Math.min(1, Math.max(0, a.expertise_match_score)),
reasoning: a.reasoning,
})),
anonymizedData.jurorMappings,
batchMappings
)
for (const item of deanonymized) {
suggestions.push({
jurorId: item.realJurorId,
projectId: item.realProjectId,
confidenceScore: item.confidenceScore,
reasoning: item.reasoning,
expertiseMatchScore: item.expertiseMatchScore,
})
}
} catch (error) {
if (error instanceof SyntaxError) {
const parseError = createParseError(error.message)
logAIError('Assignment', 'batch processing', parseError)
await logAIUsage({
userId,
action: 'ASSIGNMENT',
entityType: 'Round',
entityId,
model,
promptTokens: 0,
completionTokens: 0,
totalTokens: tokensUsed,
batchSize: batchProjects.length,
itemsProcessed: 0,
status: 'ERROR',
errorMessage: parseError.message,
})
} else {
throw error
}
}
return { suggestions, tokensUsed }
}
/**
* Build prompt for a batch of projects
*/
function buildBatchPrompt(
jurors: AnonymizationResult['jurors'],
projects: AnonymizationResult['projects'],
constraints: AssignmentConstraints,
jurorMappings: AnonymizationResult['jurorMappings'],
projectMappings: AnonymizationResult['projectMappings']
): string {
// Map existing assignments to anonymous IDs
const jurorIdMap = new Map(jurorMappings.map((m) => [m.realId, m.anonymousId]))
const projectIdMap = new Map(projectMappings.map((m) => [m.realId, m.anonymousId]))
const anonymousExisting = constraints.existingAssignments
.map((a) => ({
jurorId: jurorIdMap.get(a.jurorId),
projectId: projectIdMap.get(a.projectId),
}))
.filter((a) => a.jurorId && a.projectId)
// Build per-juror limits mapped to anonymous IDs
let jurorLimitsStr = ''
if (constraints.jurorLimits && Object.keys(constraints.jurorLimits).length > 0) {
const anonymousLimits: Record<string, number> = {}
for (const [realId, limit] of Object.entries(constraints.jurorLimits)) {
const anonId = jurorIdMap.get(realId)
if (anonId) {
anonymousLimits[anonId] = limit
}
}
if (Object.keys(anonymousLimits).length > 0) {
jurorLimitsStr = `\nJUROR_LIMITS: ${JSON.stringify(anonymousLimits)} (per-juror max assignments, override global max)`
}
}
return `JURORS: ${JSON.stringify(jurors)}
PROJECTS: ${JSON.stringify(projects)}
CONSTRAINTS: ${constraints.requiredReviewsPerProject} reviews/project, max ${constraints.maxAssignmentsPerJuror || 'unlimited'}/juror${jurorLimitsStr}
EXISTING: ${JSON.stringify(anonymousExisting)}
Return JSON: {"assignments": [...]}`
}
/**
* Generate AI-powered assignment suggestions with batching
*/
export async function generateAIAssignments(
jurors: JurorForAssignment[],
projects: ProjectForAssignment[],
constraints: AssignmentConstraints,
userId?: string,
entityId?: string,
onProgress?: AssignmentProgressCallback
): Promise<AIAssignmentResult> {
// Truncate descriptions before anonymization
const truncatedProjects = projects.map((p) => ({
...p,
description: truncateAndSanitize(p.description, DESCRIPTION_LIMITS.ASSIGNMENT),
}))
// Anonymize data before sending to AI
const anonymizedData = anonymizeForAI(jurors, truncatedProjects)
// Validate anonymization
if (!validateAnonymization(anonymizedData)) {
console.error('[AI Assignment] Anonymization validation failed, falling back to algorithm')
return generateFallbackAssignments(jurors, projects, constraints)
}
try {
const openai = await getOpenAI()
if (!openai) {
console.log('[AI Assignment] OpenAI not configured, using fallback algorithm')
return generateFallbackAssignments(jurors, projects, constraints)
}
const model = await getConfiguredModel()
console.log(`[AI Assignment] Using model: ${model} for ${projects.length} projects in batches of ${ASSIGNMENT_BATCH_SIZE}`)
const allSuggestions: AIAssignmentSuggestion[] = []
let totalTokens = 0
// Process projects in batches
const totalBatches = Math.ceil(anonymizedData.projects.length / ASSIGNMENT_BATCH_SIZE)
for (let i = 0; i < anonymizedData.projects.length; i += ASSIGNMENT_BATCH_SIZE) {
const batchProjects = anonymizedData.projects.slice(i, i + ASSIGNMENT_BATCH_SIZE)
const batchMappings = anonymizedData.projectMappings.slice(i, i + ASSIGNMENT_BATCH_SIZE)
const currentBatch = Math.floor(i / ASSIGNMENT_BATCH_SIZE) + 1
console.log(`[AI Assignment] Processing batch ${currentBatch}/${totalBatches}`)
const { suggestions, tokensUsed } = await processAssignmentBatch(
openai,
model,
anonymizedData,
batchProjects,
batchMappings,
constraints,
userId,
entityId
)
allSuggestions.push(...suggestions)
totalTokens += tokensUsed
// Report progress after each batch
if (onProgress) {
const processedCount = Math.min((currentBatch) * ASSIGNMENT_BATCH_SIZE, projects.length)
await onProgress({
currentBatch,
totalBatches,
processedCount,
totalProjects: projects.length,
})
}
}
console.log(`[AI Assignment] Completed. Total suggestions: ${allSuggestions.length}, Total tokens: ${totalTokens}`)
return {
success: true,
suggestions: allSuggestions,
tokensUsed: totalTokens,
fallbackUsed: false,
}
} catch (error) {
const classified = classifyAIError(error)
logAIError('Assignment', 'generateAIAssignments', classified)
// Log failed attempt
await logAIUsage({
userId,
action: 'ASSIGNMENT',
entityType: 'Round',
entityId,
model: 'unknown',
promptTokens: 0,
completionTokens: 0,
totalTokens: 0,
batchSize: projects.length,
itemsProcessed: 0,
status: 'ERROR',
errorMessage: classified.message,
})
console.error('[AI Assignment] AI assignment failed, using fallback:', classified.message)
return generateFallbackAssignments(jurors, projects, constraints)
}
}
// ─── Fallback Algorithm ──────────────────────────────────────────────────────
/**
* Fallback algorithm-based assignment when AI is unavailable
*/
export function generateFallbackAssignments(
jurors: JurorForAssignment[],
projects: ProjectForAssignment[],
constraints: AssignmentConstraints
): AIAssignmentResult {
const suggestions: AIAssignmentSuggestion[] = []
const existingSet = new Set(
constraints.existingAssignments.map((a) => `${a.jurorId}:${a.projectId}`)
)
// Track assignments per juror and project
const jurorAssignments = new Map<string, number>()
const projectAssignments = new Map<string, number>()
// Initialize counts from existing assignments
for (const assignment of constraints.existingAssignments) {
jurorAssignments.set(
assignment.jurorId,
(jurorAssignments.get(assignment.jurorId) || 0) + 1
)
projectAssignments.set(
assignment.projectId,
(projectAssignments.get(assignment.projectId) || 0) + 1
)
}
// Also include current assignment counts
for (const juror of jurors) {
const current = juror._count?.assignments || 0
jurorAssignments.set(
juror.id,
Math.max(jurorAssignments.get(juror.id) || 0, current)
)
}
for (const project of projects) {
const current = project._count?.assignments || 0
projectAssignments.set(
project.id,
Math.max(projectAssignments.get(project.id) || 0, current)
)
}
// Sort projects by need (fewest assignments first)
const sortedProjects = [...projects].sort((a, b) => {
const aCount = projectAssignments.get(a.id) || 0
const bCount = projectAssignments.get(b.id) || 0
return aCount - bCount
})
// For each project, find best matching jurors
for (const project of sortedProjects) {
const currentProjectAssignments = projectAssignments.get(project.id) || 0
const neededReviews = Math.max(
0,
constraints.requiredReviewsPerProject - currentProjectAssignments
)
if (neededReviews === 0) continue
// Score all available jurors
const scoredJurors = jurors
.filter((juror) => {
// Check not already assigned
if (existingSet.has(`${juror.id}:${project.id}`)) return false
// Check not at limit
const currentAssignments = jurorAssignments.get(juror.id) || 0
const maxAssignments =
juror.maxAssignments ?? constraints.maxAssignmentsPerJuror ?? Infinity
if (currentAssignments >= maxAssignments) return false
return true
})
.map((juror) => {
const currentLoad = jurorAssignments.get(juror.id) || 0
const maxLoad = juror.maxAssignments ?? constraints.maxAssignmentsPerJuror ?? 20
const minTarget = constraints.minAssignmentsPerJuror ?? 5
return {
juror,
score: calculateExpertiseScore(juror.expertiseTags, project.tags, project.tagConfidences),
loadScore: calculateLoadScore(currentLoad, maxLoad),
underMinBonus: calculateUnderMinBonus(currentLoad, minTarget),
}
})
.sort((a, b) => {
// Combined score: 50% expertise, 30% load balancing, 20% under-min bonus
const aTotal = a.score * 0.5 + a.loadScore * 0.3 + a.underMinBonus * 0.2
const bTotal = b.score * 0.5 + b.loadScore * 0.3 + b.underMinBonus * 0.2
return bTotal - aTotal
})
// Assign top jurors
for (let i = 0; i < Math.min(neededReviews, scoredJurors.length); i++) {
const { juror, score } = scoredJurors[i]
suggestions.push({
jurorId: juror.id,
projectId: project.id,
confidenceScore: score,
expertiseMatchScore: score,
reasoning: generateFallbackReasoning(
juror.expertiseTags,
project.tags,
score
),
})
// Update tracking
existingSet.add(`${juror.id}:${project.id}`)
jurorAssignments.set(juror.id, (jurorAssignments.get(juror.id) || 0) + 1)
projectAssignments.set(
project.id,
(projectAssignments.get(project.id) || 0) + 1
)
}
}
return {
success: true,
suggestions,
fallbackUsed: true,
}
}
/**
* Calculate expertise match score based on tag overlap
* When tagConfidences are available, weights matches by confidence
*/
function calculateExpertiseScore(
jurorTags: string[],
projectTags: string[],
tagConfidences?: Array<{ name: string; confidence: number }>
): number {
if (jurorTags.length === 0 || projectTags.length === 0) {
return 0.5 // Neutral score if no tags
}
const jurorTagsLower = new Set(jurorTags.map((t) => t.toLowerCase()))
// If we have confidence data, use weighted scoring
if (tagConfidences && tagConfidences.length > 0) {
let weightedMatches = 0
let totalWeight = 0
for (const tc of tagConfidences) {
totalWeight += tc.confidence
if (jurorTagsLower.has(tc.name.toLowerCase())) {
weightedMatches += tc.confidence
}
}
if (totalWeight === 0) return 0.5
const weightedRatio = weightedMatches / totalWeight
const hasExpertise = weightedMatches > 0 ? 0.2 : 0
return Math.min(1, weightedRatio * 0.8 + hasExpertise)
}
// Fallback: unweighted matching using flat tags
const matchingTags = projectTags.filter((t) =>
jurorTagsLower.has(t.toLowerCase())
)
const matchRatio = matchingTags.length / projectTags.length
const hasExpertise = matchingTags.length > 0 ? 0.2 : 0
return Math.min(1, matchRatio * 0.8 + hasExpertise)
}
/**
* Calculate load balancing score (higher score = less loaded)
*/
function calculateLoadScore(currentLoad: number, maxLoad: number): number {
if (maxLoad === 0) return 0
const utilization = currentLoad / maxLoad
return Math.max(0, 1 - utilization)
}
/**
* Calculate bonus for jurors under their minimum target
* Returns 1.0 if under min, scaled down as approaching min
*/
function calculateUnderMinBonus(currentLoad: number, minTarget: number): number {
if (currentLoad >= minTarget) return 0
// Scale bonus based on how far under min (1.0 at 0 load, decreasing as approaching min)
return (minTarget - currentLoad) / minTarget
}
/**
* Generate reasoning for fallback assignments
*/
function generateFallbackReasoning(
jurorTags: string[],
projectTags: string[],
score: number
): string {
const jurorTagsLower = new Set(jurorTags.map((t) => t.toLowerCase()))
const matchingTags = projectTags.filter((t) =>
jurorTagsLower.has(t.toLowerCase())
)
if (matchingTags.length > 0) {
return `Expertise match: ${matchingTags.join(', ')}. Match score: ${(score * 100).toFixed(0)}%.`
}
if (score >= 0.5) {
return `Assigned for workload balance. No direct expertise match but available capacity.`
}
return `Assigned to ensure project coverage.`
}