Files
MOPC-Portal/src/server/services/ai-assignment.ts

689 lines
23 KiB
TypeScript
Raw Normal View History

/**
* AI-Powered Assignment Service
*
* Uses GPT to analyze juror expertise and project requirements
* to generate optimal assignment suggestions.
*
* Optimization:
* - Batched processing (15 projects per batch)
* - Description truncation (300 chars)
* - Token tracking and cost logging
*
* GDPR Compliance:
* - All data anonymized before AI processing
* - IDs replaced with sequential identifiers
* - No personal information sent to OpenAI
*/
import { getOpenAI, getConfiguredModel, buildCompletionParams } from '@/lib/openai'
import { logAIUsage, extractTokenUsage } from '@/server/utils/ai-usage'
import { classifyAIError, createParseError, logAIError } from './ai-errors'
import {
anonymizeForAI,
deanonymizeResults,
validateAnonymization,
DESCRIPTION_LIMITS,
truncateAndSanitize,
type AnonymizationResult,
} from './anonymization'
// ─── Constants ───────────────────────────────────────────────────────────────
const ASSIGNMENT_BATCH_SIZE = 15
// Structured system prompt for assignment
const ASSIGNMENT_SYSTEM_PROMPT = `You are an expert jury assignment optimizer for an ocean conservation competition.
## Your Role
Match jurors to projects based on expertise alignment, workload balance, geographic diversity, and coverage requirements. You have access to rich data about both jurors and projects use ALL available information to make optimal assignments.
## Available Data
- **Jurors**: expertiseTags (areas of expertise), bio (background description with deeper domain knowledge), country, currentAssignmentCount, maxAssignments
- **Projects**: title, description (detailed project overview), tags (with confidence 0-1), category (e.g. STARTUP, BUSINESS_CONCEPT), oceanIssue (focus area like CORAL_REEFS, POLLUTION), country, institution, teamSize, fileTypes (submitted document types)
## Matching Criteria (Weighted)
- Expertise & Domain Match (50%): How well juror tags, bio, and background align with project topics, category, ocean issue, and description. Use bio text to identify deeper domain expertise beyond explicit tags e.g., a bio mentioning "20 years of coral research" matches coral-related projects even without explicit tags. Weight higher-confidence tags more heavily.
- Workload Balance (30%): Distribute assignments as evenly as possible; strongly prefer jurors below capacity. Never let one juror get significantly more assignments than another.
- Minimum Target (20%): Prioritize jurors who haven't reached their minimum assignment count
## Output Format
Return a JSON object:
{
"assignments": [
{
"juror_id": "JUROR_001",
"project_id": "PROJECT_001",
"confidence_score": 0.0-1.0,
"expertise_match_score": 0.0-1.0,
"reasoning": "1-2 sentence justification referencing specific expertise matches"
}
]
}
## Guidelines
- Each project MUST receive the required number of reviews ensure full coverage
- Distribute assignments as evenly as possible across all jurors
- Do not assign jurors who are at or above their capacity
- Favor geographic diversity: avoid assigning jurors from the same country as the project when possible
- Consider disciplinary diversity: mix different expertise backgrounds per project
- confidence_score reflects overall assignment quality; expertise_match_score reflects tag/expertise overlap
- A strong match: shared expertise tags + relevant bio background + available capacity
- An acceptable match: related domain/ocean issue + available capacity
- A poor match: no expertise overlap, only assigned for coverage`
// ─── Types ───────────────────────────────────────────────────────────────────
export interface AIAssignmentSuggestion {
jurorId: string
projectId: string
confidenceScore: number // 0-1
reasoning: string
expertiseMatchScore: number // 0-1
}
export interface AIAssignmentResult {
success: boolean
suggestions: AIAssignmentSuggestion[]
error?: string
tokensUsed?: number
fallbackUsed?: boolean
}
interface JurorForAssignment {
id: string
name?: string | null
email: string
expertiseTags: string[]
bio?: string | null
country?: string | null
maxAssignments?: number | null
_count?: {
assignments: number
}
}
interface ProjectForAssignment {
id: string
title: string
description?: string | null
tags: string[]
tagConfidences?: Array<{ name: string; confidence: number }>
teamName?: string | null
competitionCategory?: string | null
oceanIssue?: string | null
country?: string | null
institution?: string | null
teamSize?: number
fileTypes?: string[]
_count?: {
assignments: number
}
}
interface AssignmentConstraints {
requiredReviewsPerProject: number
minAssignmentsPerJuror?: number
maxAssignmentsPerJuror?: number
jurorLimits?: Record<string, number> // userId -> personal max assignments
existingAssignments: Array<{
jurorId: string
projectId: string
}>
}
export interface AssignmentProgressCallback {
(progress: {
currentBatch: number
totalBatches: number
processedCount: number
totalProjects: number
}): Promise<void>
}
// ─── AI Processing ───────────────────────────────────────────────────────────
/**
* Process a batch of projects for assignment suggestions
*/
async function processAssignmentBatch(
openai: NonNullable<Awaited<ReturnType<typeof getOpenAI>>>,
model: string,
anonymizedData: AnonymizationResult,
batchProjects: typeof anonymizedData.projects,
batchMappings: typeof anonymizedData.projectMappings,
constraints: AssignmentConstraints,
userId?: string,
entityId?: string
): Promise<{
suggestions: AIAssignmentSuggestion[]
tokensUsed: number
}> {
const suggestions: AIAssignmentSuggestion[] = []
let tokensUsed = 0
// Build prompt with batch-specific data
const userPrompt = buildBatchPrompt(
anonymizedData.jurors,
batchProjects,
constraints,
anonymizedData.jurorMappings,
batchMappings
)
const MAX_PARSE_RETRIES = 2
let parseAttempts = 0
let response: Awaited<ReturnType<typeof openai.chat.completions.create>>
try {
const params = buildCompletionParams(model, {
messages: [
{ role: 'system', content: ASSIGNMENT_SYSTEM_PROMPT },
{ role: 'user', content: userPrompt },
],
jsonMode: true,
temperature: 0.1,
maxTokens: 4000,
})
try {
response = await openai.chat.completions.create(params)
} catch (apiError) {
// Provide clearer error for model-related issues
const errorMsg = apiError instanceof Error ? apiError.message : String(apiError)
if (errorMsg.includes('model') || errorMsg.includes('does not exist')) {
throw new Error(`Invalid AI model "${model}". Please check the model name in Settings > AI Configuration.`)
}
throw apiError
}
const usage = extractTokenUsage(response)
tokensUsed = usage.totalTokens
// Log batch usage
await logAIUsage({
userId,
action: 'ASSIGNMENT',
entityType: 'Round',
entityId,
model,
promptTokens: usage.promptTokens,
completionTokens: usage.completionTokens,
totalTokens: usage.totalTokens,
batchSize: batchProjects.length,
itemsProcessed: batchProjects.length,
status: 'SUCCESS',
})
// Parse with retry logic
let parsed: {
assignments: Array<{
juror_id: string
project_id: string
confidence_score: number
expertise_match_score: number
reasoning: string
}>
}
while (true) {
try {
const content = response.choices[0]?.message?.content
if (!content) {
// Check if response indicates an issue
const finishReason = response.choices[0]?.finish_reason
if (finishReason === 'content_filter') {
throw new Error('AI response was filtered. Try a different model or simplify the project descriptions.')
}
if (!response.choices || response.choices.length === 0) {
throw new Error(`No response from model "${model}". This model may not exist or may not be available. Please verify the model name.`)
}
throw new Error(`Empty response from AI model "${model}". The model may not support this type of request.`)
}
parsed = JSON.parse(content)
break
} catch (parseError) {
if (parseError instanceof SyntaxError && parseAttempts < MAX_PARSE_RETRIES) {
parseAttempts++
console.warn(`[AI Assignment] JSON parse failed, retrying (${parseAttempts}/${MAX_PARSE_RETRIES})`)
// Retry the API call with hint
const retryParams = buildCompletionParams(model, {
messages: [
{ role: 'system', content: ASSIGNMENT_SYSTEM_PROMPT },
{ role: 'user', content: userPrompt + '\n\nIMPORTANT: Please ensure valid JSON output.' },
],
jsonMode: true,
temperature: 0.1,
maxTokens: 4000,
})
response = await openai.chat.completions.create(retryParams)
const retryUsage = extractTokenUsage(response)
tokensUsed += retryUsage.totalTokens
continue
}
throw parseError
}
}
// De-anonymize and add to suggestions
const deanonymized = deanonymizeResults(
(parsed.assignments || []).map((a) => ({
jurorId: a.juror_id,
projectId: a.project_id,
confidenceScore: Math.min(1, Math.max(0, a.confidence_score)),
expertiseMatchScore: Math.min(1, Math.max(0, a.expertise_match_score)),
reasoning: a.reasoning,
})),
anonymizedData.jurorMappings,
batchMappings
)
for (const item of deanonymized) {
suggestions.push({
jurorId: item.realJurorId,
projectId: item.realProjectId,
confidenceScore: item.confidenceScore,
reasoning: item.reasoning,
expertiseMatchScore: item.expertiseMatchScore,
})
}
} catch (error) {
if (error instanceof SyntaxError) {
const parseError = createParseError(error.message)
logAIError('Assignment', 'batch processing', parseError)
await logAIUsage({
userId,
action: 'ASSIGNMENT',
entityType: 'Round',
entityId,
model,
promptTokens: 0,
completionTokens: 0,
totalTokens: tokensUsed,
batchSize: batchProjects.length,
itemsProcessed: 0,
status: 'ERROR',
errorMessage: parseError.message,
})
} else {
throw error
}
}
return { suggestions, tokensUsed }
}
/**
* Build prompt for a batch of projects
*/
function buildBatchPrompt(
jurors: AnonymizationResult['jurors'],
projects: AnonymizationResult['projects'],
constraints: AssignmentConstraints,
jurorMappings: AnonymizationResult['jurorMappings'],
projectMappings: AnonymizationResult['projectMappings']
): string {
// Map existing assignments to anonymous IDs
const jurorIdMap = new Map(jurorMappings.map((m) => [m.realId, m.anonymousId]))
const projectIdMap = new Map(projectMappings.map((m) => [m.realId, m.anonymousId]))
const anonymousExisting = constraints.existingAssignments
.map((a) => ({
jurorId: jurorIdMap.get(a.jurorId),
projectId: projectIdMap.get(a.projectId),
}))
.filter((a) => a.jurorId && a.projectId)
// Build per-juror limits mapped to anonymous IDs
let jurorLimitsStr = ''
if (constraints.jurorLimits && Object.keys(constraints.jurorLimits).length > 0) {
const anonymousLimits: Record<string, number> = {}
for (const [realId, limit] of Object.entries(constraints.jurorLimits)) {
const anonId = jurorIdMap.get(realId)
if (anonId) {
anonymousLimits[anonId] = limit
}
}
if (Object.keys(anonymousLimits).length > 0) {
jurorLimitsStr = `\nJUROR_LIMITS: ${JSON.stringify(anonymousLimits)} (per-juror max assignments, override global max)`
}
}
return `JURORS: ${JSON.stringify(jurors)}
PROJECTS: ${JSON.stringify(projects)}
CONSTRAINTS: ${constraints.requiredReviewsPerProject} reviews/project, max ${constraints.maxAssignmentsPerJuror || 'unlimited'}/juror${jurorLimitsStr}
EXISTING: ${JSON.stringify(anonymousExisting)}
Return JSON: {"assignments": [...]}`
}
/**
* Generate AI-powered assignment suggestions with batching
*/
export async function generateAIAssignments(
jurors: JurorForAssignment[],
projects: ProjectForAssignment[],
constraints: AssignmentConstraints,
userId?: string,
entityId?: string,
onProgress?: AssignmentProgressCallback
): Promise<AIAssignmentResult> {
// Truncate descriptions before anonymization
const truncatedProjects = projects.map((p) => ({
...p,
description: truncateAndSanitize(p.description, DESCRIPTION_LIMITS.ASSIGNMENT),
}))
// Anonymize data before sending to AI
const anonymizedData = anonymizeForAI(jurors, truncatedProjects)
// Validate anonymization
if (!validateAnonymization(anonymizedData)) {
console.error('[AI Assignment] Anonymization validation failed, falling back to algorithm')
return generateFallbackAssignments(jurors, projects, constraints)
}
try {
const openai = await getOpenAI()
if (!openai) {
console.log('[AI Assignment] OpenAI not configured, using fallback algorithm')
return generateFallbackAssignments(jurors, projects, constraints)
}
const model = await getConfiguredModel()
console.log(`[AI Assignment] Using model: ${model} for ${projects.length} projects in batches of ${ASSIGNMENT_BATCH_SIZE}`)
const allSuggestions: AIAssignmentSuggestion[] = []
let totalTokens = 0
// Process projects in batches
const totalBatches = Math.ceil(anonymizedData.projects.length / ASSIGNMENT_BATCH_SIZE)
for (let i = 0; i < anonymizedData.projects.length; i += ASSIGNMENT_BATCH_SIZE) {
const batchProjects = anonymizedData.projects.slice(i, i + ASSIGNMENT_BATCH_SIZE)
const batchMappings = anonymizedData.projectMappings.slice(i, i + ASSIGNMENT_BATCH_SIZE)
const currentBatch = Math.floor(i / ASSIGNMENT_BATCH_SIZE) + 1
console.log(`[AI Assignment] Processing batch ${currentBatch}/${totalBatches}`)
const { suggestions, tokensUsed } = await processAssignmentBatch(
openai,
model,
anonymizedData,
batchProjects,
batchMappings,
constraints,
userId,
entityId
)
allSuggestions.push(...suggestions)
totalTokens += tokensUsed
// Report progress after each batch
if (onProgress) {
const processedCount = Math.min((currentBatch) * ASSIGNMENT_BATCH_SIZE, projects.length)
await onProgress({
currentBatch,
totalBatches,
processedCount,
totalProjects: projects.length,
})
}
}
console.log(`[AI Assignment] Completed. Total suggestions: ${allSuggestions.length}, Total tokens: ${totalTokens}`)
return {
success: true,
suggestions: allSuggestions,
tokensUsed: totalTokens,
fallbackUsed: false,
}
} catch (error) {
const classified = classifyAIError(error)
logAIError('Assignment', 'generateAIAssignments', classified)
// Log failed attempt
await logAIUsage({
userId,
action: 'ASSIGNMENT',
entityType: 'Round',
entityId,
model: 'unknown',
promptTokens: 0,
completionTokens: 0,
totalTokens: 0,
batchSize: projects.length,
itemsProcessed: 0,
status: 'ERROR',
errorMessage: classified.message,
})
console.error('[AI Assignment] AI assignment failed, using fallback:', classified.message)
return generateFallbackAssignments(jurors, projects, constraints)
}
}
// ─── Fallback Algorithm ──────────────────────────────────────────────────────
/**
* Fallback algorithm-based assignment when AI is unavailable
*/
export function generateFallbackAssignments(
jurors: JurorForAssignment[],
projects: ProjectForAssignment[],
constraints: AssignmentConstraints
): AIAssignmentResult {
const suggestions: AIAssignmentSuggestion[] = []
const existingSet = new Set(
constraints.existingAssignments.map((a) => `${a.jurorId}:${a.projectId}`)
)
// Track assignments per juror and project
const jurorAssignments = new Map<string, number>()
const projectAssignments = new Map<string, number>()
// Initialize counts from existing assignments
for (const assignment of constraints.existingAssignments) {
jurorAssignments.set(
assignment.jurorId,
(jurorAssignments.get(assignment.jurorId) || 0) + 1
)
projectAssignments.set(
assignment.projectId,
(projectAssignments.get(assignment.projectId) || 0) + 1
)
}
// Also include current assignment counts
for (const juror of jurors) {
const current = juror._count?.assignments || 0
jurorAssignments.set(
juror.id,
Math.max(jurorAssignments.get(juror.id) || 0, current)
)
}
for (const project of projects) {
const current = project._count?.assignments || 0
projectAssignments.set(
project.id,
Math.max(projectAssignments.get(project.id) || 0, current)
)
}
// Sort projects by need (fewest assignments first)
const sortedProjects = [...projects].sort((a, b) => {
const aCount = projectAssignments.get(a.id) || 0
const bCount = projectAssignments.get(b.id) || 0
return aCount - bCount
})
// For each project, find best matching jurors
for (const project of sortedProjects) {
const currentProjectAssignments = projectAssignments.get(project.id) || 0
const neededReviews = Math.max(
0,
constraints.requiredReviewsPerProject - currentProjectAssignments
)
if (neededReviews === 0) continue
// Score all available jurors
const scoredJurors = jurors
.filter((juror) => {
// Check not already assigned
if (existingSet.has(`${juror.id}:${project.id}`)) return false
// Check not at limit
const currentAssignments = jurorAssignments.get(juror.id) || 0
const maxAssignments =
juror.maxAssignments ?? constraints.maxAssignmentsPerJuror ?? Infinity
if (currentAssignments >= maxAssignments) return false
return true
})
.map((juror) => {
const currentLoad = jurorAssignments.get(juror.id) || 0
const maxLoad = juror.maxAssignments ?? constraints.maxAssignmentsPerJuror ?? 20
const minTarget = constraints.minAssignmentsPerJuror ?? 5
return {
juror,
score: calculateExpertiseScore(juror.expertiseTags, project.tags, project.tagConfidences),
loadScore: calculateLoadScore(currentLoad, maxLoad),
underMinBonus: calculateUnderMinBonus(currentLoad, minTarget),
}
})
.sort((a, b) => {
// Combined score: 50% expertise, 30% load balancing, 20% under-min bonus
const aTotal = a.score * 0.5 + a.loadScore * 0.3 + a.underMinBonus * 0.2
const bTotal = b.score * 0.5 + b.loadScore * 0.3 + b.underMinBonus * 0.2
return bTotal - aTotal
})
// Assign top jurors
for (let i = 0; i < Math.min(neededReviews, scoredJurors.length); i++) {
const { juror, score } = scoredJurors[i]
suggestions.push({
jurorId: juror.id,
projectId: project.id,
confidenceScore: score,
expertiseMatchScore: score,
reasoning: generateFallbackReasoning(
juror.expertiseTags,
project.tags,
score
),
})
// Update tracking
existingSet.add(`${juror.id}:${project.id}`)
jurorAssignments.set(juror.id, (jurorAssignments.get(juror.id) || 0) + 1)
projectAssignments.set(
project.id,
(projectAssignments.get(project.id) || 0) + 1
)
}
}
return {
success: true,
suggestions,
fallbackUsed: true,
}
}
/**
* Calculate expertise match score based on tag overlap
* When tagConfidences are available, weights matches by confidence
*/
function calculateExpertiseScore(
jurorTags: string[],
projectTags: string[],
tagConfidences?: Array<{ name: string; confidence: number }>
): number {
if (jurorTags.length === 0 || projectTags.length === 0) {
return 0.5 // Neutral score if no tags
}
const jurorTagsLower = new Set(jurorTags.map((t) => t.toLowerCase()))
// If we have confidence data, use weighted scoring
if (tagConfidences && tagConfidences.length > 0) {
let weightedMatches = 0
let totalWeight = 0
for (const tc of tagConfidences) {
totalWeight += tc.confidence
if (jurorTagsLower.has(tc.name.toLowerCase())) {
weightedMatches += tc.confidence
}
}
if (totalWeight === 0) return 0.5
const weightedRatio = weightedMatches / totalWeight
const hasExpertise = weightedMatches > 0 ? 0.2 : 0
return Math.min(1, weightedRatio * 0.8 + hasExpertise)
}
// Fallback: unweighted matching using flat tags
const matchingTags = projectTags.filter((t) =>
jurorTagsLower.has(t.toLowerCase())
)
const matchRatio = matchingTags.length / projectTags.length
const hasExpertise = matchingTags.length > 0 ? 0.2 : 0
return Math.min(1, matchRatio * 0.8 + hasExpertise)
}
/**
* Calculate load balancing score (higher score = less loaded)
*/
function calculateLoadScore(currentLoad: number, maxLoad: number): number {
if (maxLoad === 0) return 0
const utilization = currentLoad / maxLoad
return Math.max(0, 1 - utilization)
}
/**
* Calculate bonus for jurors under their minimum target
* Returns 1.0 if under min, scaled down as approaching min
*/
function calculateUnderMinBonus(currentLoad: number, minTarget: number): number {
if (currentLoad >= minTarget) return 0
// Scale bonus based on how far under min (1.0 at 0 load, decreasing as approaching min)
return (minTarget - currentLoad) / minTarget
}
/**
* Generate reasoning for fallback assignments
*/
function generateFallbackReasoning(
jurorTags: string[],
projectTags: string[],
score: number
): string {
const jurorTagsLower = new Set(jurorTags.map((t) => t.toLowerCase()))
const matchingTags = projectTags.filter((t) =>
jurorTagsLower.has(t.toLowerCase())
)
if (matchingTags.length > 0) {
return `Expertise match: ${matchingTags.join(', ')}. Match score: ${(score * 100).toFixed(0)}%.`
}
if (score >= 0.5) {
return `Assigned for workload balance. No direct expertise match but available capacity.`
}
return `Assigned to ensure project coverage.`
}