diff --git a/src/server/services/ai-assignment.ts b/src/server/services/ai-assignment.ts index b49cca5..5c6b4b3 100644 --- a/src/server/services/ai-assignment.ts +++ b/src/server/services/ai-assignment.ts @@ -1,13 +1,10 @@ /** - * AI-Powered Assignment Service + * AI-Powered Assignment Service (Hybrid Approach) * - * Uses GPT to analyze juror expertise and project requirements - * to generate optimal assignment suggestions. - * - * Optimization: - * - Batched processing (15 projects per batch) - * - Description truncation (300 chars) - * - Token tracking and cost logging + * Phase 1 — AI Scoring: ONE API call asks GPT to score each juror's affinity + * for each project (expertise match, reasoning). Returns a preference matrix. + * Phase 2 — Algorithm: Uses the AI scores to assign N reviewers per project + * with even workload distribution, respecting caps and COI constraints. * * GDPR Compliance: * - All data anonymized before AI processing @@ -27,45 +24,6 @@ import { type AnonymizationResult, } from './anonymization' -// ─── Constants ─────────────────────────────────────────────────────────────── - -const ASSIGNMENT_BATCH_SIZE = 10 - -// Structured system prompt for assignment -const ASSIGNMENT_SYSTEM_PROMPT = `You are an expert jury assignment optimizer for an ocean conservation competition. - -## Your Role -Match jurors to projects based on BALANCED workload distribution and expertise alignment. Even distribution is the TOP PRIORITY. - -## Available Data -- **Jurors**: expertiseTags, bio, country, currentAssignmentCount, maxAssignments -- **Projects**: title, description, tags (with confidence 0-1), category, oceanIssue, country, institution, teamSize, fileTypes - -## CRITICAL RULES (Must Follow) -1. **MULTIPLE REVIEWERS PER PROJECT**: Each project MUST be assigned to EXACTLY the number of DIFFERENT jurors specified in REVIEWS_PER_PROJECT. For example, if REVIEWS_PER_PROJECT is 3, every project needs 3 separate assignment objects with 3 different juror_ids. This is the most important rule. -2. **HARD CAP**: NEVER assign a juror more than their maxAssignments. Check currentAssignmentCount + new assignments in this batch. If a juror is at capacity, skip them. -3. **EVEN DISTRIBUTION**: Spread assignments evenly. Always prefer the juror with the FEWEST total assignments (currentAssignmentCount + assignments in this batch). Only deviate if the least-loaded juror has zero relevance and another with 1-2 more has strong expertise. - -## Matching Criteria (Weighted) -- Workload Balance (50%): Prefer least-loaded jurors. -- Expertise Match (35%): Tag overlap, bio background, ocean issue alignment. -- Diversity (15%): Avoid same-country; mix expertise per project. - -## Output Format -Return a JSON object. For N reviews/project, each project appears N times with DIFFERENT juror_ids: -{ - "assignments": [ - { "juror_id": "JUROR_001", "project_id": "PROJECT_001", "confidence_score": 0.85, "expertise_match_score": 0.7, "reasoning": "justification" }, - { "juror_id": "JUROR_003", "project_id": "PROJECT_001", "confidence_score": 0.72, "expertise_match_score": 0.5, "reasoning": "justification" }, - { "juror_id": "JUROR_005", "project_id": "PROJECT_001", "confidence_score": 0.65, "expertise_match_score": 0.3, "reasoning": "justification" } - ] -} - -## Guidelines -- Total assignments in output should be approximately: number_of_projects × REVIEWS_PER_PROJECT -- NEVER exceed a juror's maxAssignments cap -- Spread evenly — max-loaded minus min-loaded juror should differ by at most 2` - // ─── Types ─────────────────────────────────────────────────────────────────── export interface AIAssignmentSuggestion { @@ -137,260 +95,448 @@ export interface AssignmentProgressCallback { }): Promise } -// ─── AI Processing ─────────────────────────────────────────────────────────── +/** Per-juror ranking from AI: which projects they should review */ +interface JurorAffinityRow { + jurorId: string // anonymous ID + rankings: Array<{ + projectId: string // anonymous ID + score: number // 0-100 + reasoning: string + }> +} + +// ─── System Prompt ────────────────────────────────────────────────────────── + +const AFFINITY_SYSTEM_PROMPT = `You are an expert jury assignment optimizer for an ocean conservation competition. + +## Your Task +Score how well each juror matches each project. Return a compact affinity matrix. + +## Scoring Criteria (100-point scale) +- **Expertise Match (60 pts)**: Tag overlap, bio background relevance, ocean issue alignment +- **Diversity Benefit (25 pts)**: Different country from project, different expertise angle from other jurors +- **Category Fit (15 pts)**: Experience with startup vs concept evaluation, institutional familiarity + +## Output Format +Return JSON with this exact structure: +{ + "affinities": [ + { + "juror_id": "JUROR_001", + "rankings": [ + {"project_id": "PROJECT_001", "score": 85, "reason": "Strong coral reef expertise matches project focus"}, + {"project_id": "PROJECT_005", "score": 72, "reason": "Marine biology background relevant to biodiversity project"} + ] + } + ] +} + +## Rules +- For each juror, list their TOP project matches (at least the top 50% of projects, more is better) +- Scores must be integers 0-100 +- Keep "reason" to one short sentence (under 20 words) +- A juror with no matching expertise should still get scores (based on general competence), just lower ones (30-50 range) +- Do NOT include projects that a juror has zero relevance for (score would be under 20) +- Return VALID JSON only` + +// ─── AI Scoring Phase ─────────────────────────────────────────────────────── /** - * Process a batch of projects for assignment suggestions + * Build the user prompt for the single AI affinity call */ -async function processAssignmentBatch( +function buildAffinityPrompt( + anonymizedData: AnonymizationResult, + existingPairs: Set, +): string { + // Compact juror representation + const jurorLines = anonymizedData.jurors.map((j) => { + const parts = [j.anonymousId] + if (j.expertiseTags.length > 0) parts.push(`tags:[${j.expertiseTags.join(',')}]`) + if (j.bio) parts.push(`bio:"${j.bio.slice(0, 150)}"`) + if (j.country) parts.push(`country:${j.country}`) + return parts.join(' | ') + }) + + // Compact project representation + const projectLines = anonymizedData.projects.map((p) => { + const parts = [p.anonymousId, `"${p.title}"`] + if (p.tags.length > 0) parts.push(`tags:[${p.tags.map((t) => t.name).join(',')}]`) + if (p.category) parts.push(`cat:${p.category}`) + if (p.oceanIssue) parts.push(`issue:${p.oceanIssue}`) + if (p.country) parts.push(`country:${p.country}`) + if (p.description) parts.push(`desc:"${p.description.slice(0, 100)}"`) + return parts.join(' | ') + }) + + // Note existing assignments to avoid + let existingNote = '' + if (existingPairs.size > 0) { + existingNote = `\nALREADY_ASSIGNED (do NOT score these pairs): ${[...existingPairs].join(', ')}` + } + + return `## JURORS (${jurorLines.length}) +${jurorLines.join('\n')} + +## PROJECTS (${projectLines.length}) +${projectLines.join('\n')} +${existingNote} + +Score each juror's affinity for the projects. For each juror, return their top project matches with scores (0-100) and a short reason.` +} + +/** + * Call AI once to get the full affinity matrix + */ +async function getAIAffinityMatrix( openai: NonNullable>>, model: string, anonymizedData: AnonymizationResult, - batchProjects: typeof anonymizedData.projects, - batchMappings: typeof anonymizedData.projectMappings, - constraints: AssignmentConstraints, + existingPairs: Set, userId?: string, - entityId?: string + entityId?: string, ): Promise<{ - suggestions: AIAssignmentSuggestion[] + affinities: JurorAffinityRow[] tokensUsed: number }> { - const suggestions: AIAssignmentSuggestion[] = [] - let tokensUsed = 0 + const userPrompt = buildAffinityPrompt(anonymizedData, existingPairs) - // Build prompt with batch-specific data - const userPrompt = buildBatchPrompt( - anonymizedData.jurors, - batchProjects, - constraints, - anonymizedData.jurorMappings, - batchMappings - ) + // Estimate tokens: ~50 tokens per juror-project score entry + // For 15 jurors × 99 projects top 60% = ~890 entries × 50 = ~44500 output tokens + // Cap at a reasonable limit + const estimatedEntries = anonymizedData.jurors.length * Math.ceil(anonymizedData.projects.length * 0.6) + const estimatedTokens = Math.min(64000, Math.max(8000, estimatedEntries * 50 + 500)) + + console.log(`[AI Assignment] Affinity call: ${anonymizedData.jurors.length} jurors × ${anonymizedData.projects.length} projects, est. ${estimatedEntries} entries, maxTokens=${estimatedTokens}`) + + const params = buildCompletionParams(model, { + messages: [ + { role: 'system', content: AFFINITY_SYSTEM_PROMPT }, + { role: 'user', content: userPrompt }, + ], + jsonMode: true, + temperature: 0.1, + maxTokens: estimatedTokens, + }) - const MAX_PARSE_RETRIES = 2 - let parseAttempts = 0 let response: Awaited> try { - // Calculate maxTokens based on expected assignments - // ~150 tokens per assignment JSON object, capped at 12000 - const expectedAssignments = batchProjects.length * constraints.requiredReviewsPerProject - const estimatedTokens = Math.min(12000, Math.max(4000, expectedAssignments * 200 + 500)) - - const params = buildCompletionParams(model, { - messages: [ - { role: 'system', content: ASSIGNMENT_SYSTEM_PROMPT }, - { role: 'user', content: userPrompt }, - ], - jsonMode: true, - temperature: 0.1, - maxTokens: estimatedTokens, - }) - - try { - response = await openai.chat.completions.create(params) - } catch (apiError) { - // Provide clearer error for model-related issues - const errorMsg = apiError instanceof Error ? apiError.message : String(apiError) - if (errorMsg.includes('model') || errorMsg.includes('does not exist')) { - throw new Error(`Invalid AI model "${model}". Please check the model name in Settings > AI Configuration.`) - } - throw apiError + response = await openai.chat.completions.create(params) + } catch (apiError) { + const errorMsg = apiError instanceof Error ? apiError.message : String(apiError) + if (errorMsg.includes('model') || errorMsg.includes('does not exist')) { + throw new Error(`Invalid AI model "${model}". Please check the model name in Settings > AI Configuration.`) } + throw apiError + } - const usage = extractTokenUsage(response) - tokensUsed = usage.totalTokens + const usage = extractTokenUsage(response) - // Log batch usage - await logAIUsage({ - userId, - action: 'ASSIGNMENT', - entityType: 'Round', - entityId, - model, - promptTokens: usage.promptTokens, - completionTokens: usage.completionTokens, - totalTokens: usage.totalTokens, - batchSize: batchProjects.length, - itemsProcessed: batchProjects.length, - status: 'SUCCESS', - }) + await logAIUsage({ + userId, + action: 'ASSIGNMENT', + entityType: 'Round', + entityId, + model, + promptTokens: usage.promptTokens, + completionTokens: usage.completionTokens, + totalTokens: usage.totalTokens, + batchSize: anonymizedData.projects.length, + itemsProcessed: anonymizedData.projects.length, + status: 'SUCCESS', + }) - // Parse with retry logic - let parsed: { - assignments: Array<{ - juror_id: string - project_id: string - confidence_score: number - expertise_match_score: number - reasoning: string - }> + // Parse response + const content = response.choices[0]?.message?.content + if (!content) { + const finishReason = response.choices[0]?.finish_reason + if (finishReason === 'content_filter') { + throw new Error('AI response was filtered. Try a different model or simplify the project descriptions.') } - - while (true) { - try { - const content = response.choices[0]?.message?.content - if (!content) { - // Check if response indicates an issue - const finishReason = response.choices[0]?.finish_reason - if (finishReason === 'content_filter') { - throw new Error('AI response was filtered. Try a different model or simplify the project descriptions.') - } - if (!response.choices || response.choices.length === 0) { - throw new Error(`No response from model "${model}". This model may not exist or may not be available. Please verify the model name.`) - } - throw new Error(`Empty response from AI model "${model}". The model may not support this type of request.`) - } - parsed = JSON.parse(content) - break - } catch (parseError) { - if (parseError instanceof SyntaxError && parseAttempts < MAX_PARSE_RETRIES) { - parseAttempts++ - console.warn(`[AI Assignment] JSON parse failed, retrying (${parseAttempts}/${MAX_PARSE_RETRIES})`) - - // Retry the API call with hint - const retryParams = buildCompletionParams(model, { - messages: [ - { role: 'system', content: ASSIGNMENT_SYSTEM_PROMPT }, - { role: 'user', content: userPrompt + '\n\nIMPORTANT: Please ensure valid JSON output.' }, - ], - jsonMode: true, - temperature: 0.1, - maxTokens: 4000, - }) - response = await openai.chat.completions.create(retryParams) - const retryUsage = extractTokenUsage(response) - tokensUsed += retryUsage.totalTokens - continue - } - throw parseError - } - } - - // De-anonymize and add to suggestions - const deanonymized = deanonymizeResults( - (parsed.assignments || []).map((a) => ({ - jurorId: a.juror_id, - projectId: a.project_id, - confidenceScore: Math.min(1, Math.max(0, a.confidence_score)), - expertiseMatchScore: Math.min(1, Math.max(0, a.expertise_match_score)), - reasoning: a.reasoning, - })), - anonymizedData.jurorMappings, - batchMappings - ) - - for (const item of deanonymized) { - suggestions.push({ - jurorId: item.realJurorId, - projectId: item.realProjectId, - confidenceScore: item.confidenceScore, - reasoning: item.reasoning, - expertiseMatchScore: item.expertiseMatchScore, - }) - } - - } catch (error) { - if (error instanceof SyntaxError) { - const parseError = createParseError(error.message) - logAIError('Assignment', 'batch processing', parseError) - - await logAIUsage({ - userId, - action: 'ASSIGNMENT', - entityType: 'Round', - entityId, - model, - promptTokens: 0, - completionTokens: 0, - totalTokens: tokensUsed, - batchSize: batchProjects.length, - itemsProcessed: 0, - status: 'ERROR', - errorMessage: parseError.message, - }) + if (finishReason === 'length') { + console.warn('[AI Assignment] Response truncated (hit token limit). Will proceed with partial data + algorithm gap-fill.') } else { - throw error + throw new Error(`Empty response from AI model "${model}".`) } } - return { suggestions, tokensUsed } + let parsed: { + affinities: Array<{ + juror_id: string + rankings: Array<{ + project_id: string + score: number + reason: string + }> + }> + } + + try { + // Handle potentially truncated JSON by attempting repair + let jsonStr = content || '{}' + + // If truncated, try to close the JSON structure + if (!jsonStr.trim().endsWith('}')) { + console.warn('[AI Assignment] Response appears truncated, attempting JSON repair') + jsonStr = repairTruncatedJSON(jsonStr) + } + + parsed = JSON.parse(jsonStr) + } catch (parseError) { + // Try extracting JSON from markdown code blocks + const jsonMatch = (content || '').match(/```(?:json)?\s*([\s\S]*?)```/) + if (jsonMatch) { + try { + parsed = JSON.parse(jsonMatch[1]) + } catch { + throw createParseError(`Failed to parse AI affinity response: ${(parseError as Error).message}`) + } + } else { + throw createParseError(`Failed to parse AI affinity response: ${(parseError as Error).message}`) + } + } + + // Normalize to our internal format + const affinities: JurorAffinityRow[] = (parsed.affinities || []).map((a) => ({ + jurorId: a.juror_id, + rankings: (a.rankings || []).map((r) => ({ + projectId: r.project_id, + score: Math.min(100, Math.max(0, r.score)), + reasoning: r.reason || '', + })), + })) + + console.log(`[AI Assignment] Got affinities for ${affinities.length} jurors, total entries: ${affinities.reduce((sum, a) => sum + a.rankings.length, 0)}`) + + return { affinities, tokensUsed: usage.totalTokens } } /** - * Build prompt for a batch of projects + * Attempt to repair truncated JSON by closing open structures */ -function buildBatchPrompt( - jurors: AnonymizationResult['jurors'], - projects: AnonymizationResult['projects'], - constraints: AssignmentConstraints, - jurorMappings: AnonymizationResult['jurorMappings'], - projectMappings: AnonymizationResult['projectMappings'] -): string { - // Map existing assignments to anonymous IDs - const jurorIdMap = new Map(jurorMappings.map((m) => [m.realId, m.anonymousId])) - const projectIdMap = new Map(projectMappings.map((m) => [m.realId, m.anonymousId])) +function repairTruncatedJSON(json: string): string { + let s = json.trim() - const anonymousExisting = constraints.existingAssignments - .map((a) => ({ - jurorId: jurorIdMap.get(a.jurorId), - projectId: projectIdMap.get(a.projectId), - })) - .filter((a) => a.jurorId && a.projectId) + // Remove any trailing incomplete entry (cut mid-object) + const lastCompleteEntry = s.lastIndexOf('}') + if (lastCompleteEntry > 0) { + s = s.slice(0, lastCompleteEntry + 1) + } - // Build per-juror limits mapped to anonymous IDs - let jurorLimitsStr = '' - if (constraints.jurorLimits && Object.keys(constraints.jurorLimits).length > 0) { - const anonymousLimits: Record = {} - for (const [realId, limit] of Object.entries(constraints.jurorLimits)) { - const anonId = jurorIdMap.get(realId) - if (anonId) { - anonymousLimits[anonId] = limit + // Count open/close brackets + let openBrackets = 0 + let openBraces = 0 + for (const ch of s) { + if (ch === '[') openBrackets++ + else if (ch === ']') openBrackets-- + else if (ch === '{') openBraces++ + else if (ch === '}') openBraces-- + } + + // Close everything + while (openBrackets > 0) { s += ']'; openBrackets-- } + while (openBraces > 0) { s += '}'; openBraces-- } + + return s +} + +// ─── Algorithm Phase ──────────────────────────────────────────────────────── + +/** + * Build a full score matrix from AI affinities, filling gaps with fallback scores + */ +function buildScoreMatrix( + affinities: JurorAffinityRow[], + jurors: JurorForAssignment[], + projects: ProjectForAssignment[], + anonymizedData: AnonymizationResult, +): Map> { + // Create reverse mapping: anonymous ID → real ID + const jurorAnonToReal = new Map(anonymizedData.jurorMappings.map((m) => [m.anonymousId, m.realId])) + const projectAnonToReal = new Map(anonymizedData.projectMappings.map((m) => [m.anonymousId, m.realId])) + + // Matrix: realJurorId → realProjectId → { score, reasoning } + const matrix = new Map>() + + // Initialize with AI scores + for (const row of affinities) { + const realJurorId = jurorAnonToReal.get(row.jurorId) + if (!realJurorId) continue + + const jurorScores = new Map() + for (const r of row.rankings) { + const realProjectId = projectAnonToReal.get(r.projectId) + if (!realProjectId) continue + jurorScores.set(realProjectId, { + score: r.score / 100, // normalize to 0-1 + reasoning: r.reasoning, + }) + } + matrix.set(realJurorId, jurorScores) + } + + // Fill gaps: for juror-project pairs not scored by AI, use tag-based fallback + for (const juror of jurors) { + if (!matrix.has(juror.id)) { + matrix.set(juror.id, new Map()) + } + const jurorScores = matrix.get(juror.id)! + + for (const project of projects) { + if (!jurorScores.has(project.id)) { + const tagScore = calculateExpertiseScore(juror.expertiseTags, project.tags, project.tagConfidences) + jurorScores.set(project.id, { + score: tagScore * 0.5, // Scale down fallback scores + reasoning: generateFallbackReasoning(juror.expertiseTags, project.tags, tagScore), + }) } } - if (Object.keys(anonymousLimits).length > 0) { - jurorLimitsStr = `\nJUROR_LIMITS: ${JSON.stringify(anonymousLimits)} (per-juror max assignments, override global max)` - } } - const targetStr = constraints._targetPerJuror - ? `\nTARGET_PER_JUROR: ${constraints._targetPerJuror} (aim for this many total assignments per juror, ±1)` - : '' - - const expectedTotal = projects.length * constraints.requiredReviewsPerProject - - // Instead of full existing assignment list, send per-juror current load counts - // This keeps the prompt shorter as batches accumulate - const jurorCurrentLoad: Record = {} - for (const a of constraints.existingAssignments) { - const anonId = jurorIdMap.get(a.jurorId) - if (anonId) jurorCurrentLoad[anonId] = (jurorCurrentLoad[anonId] || 0) + 1 - } - - // Also track which projects in this batch already have assignments - const projectExistingReviewers: Record = {} - for (const a of constraints.existingAssignments) { - const anonProjectId = projectIdMap.get(a.projectId) - const anonJurorId = jurorIdMap.get(a.jurorId) - if (anonProjectId && anonJurorId) { - if (!projectExistingReviewers[anonProjectId]) projectExistingReviewers[anonProjectId] = [] - projectExistingReviewers[anonProjectId].push(anonJurorId) - } - } - - return `JURORS: ${JSON.stringify(jurors)} -PROJECTS: ${JSON.stringify(projects)} -REVIEWS_PER_PROJECT: ${constraints.requiredReviewsPerProject} (each project MUST get exactly ${constraints.requiredReviewsPerProject} different jurors) -MAX_PER_JUROR: ${constraints.maxAssignmentsPerJuror || 'unlimited'} (HARD LIMIT — never exceed)${jurorLimitsStr}${targetStr} -CURRENT_JUROR_LOAD: ${JSON.stringify(jurorCurrentLoad)} (add these to currentAssignmentCount to get true total) -ALREADY_ASSIGNED: ${JSON.stringify(projectExistingReviewers)} (do NOT assign these juror-project pairs again) -EXPECTED_OUTPUT: ${expectedTotal} assignment objects (${projects.length} projects × ${constraints.requiredReviewsPerProject} reviewers) -IMPORTANT: Every project must appear ${constraints.requiredReviewsPerProject} times with ${constraints.requiredReviewsPerProject} DIFFERENT juror_ids. Pick the least-loaded jurors first. -Return JSON: {"assignments": [...]}` + return matrix } /** - * Generate AI-powered assignment suggestions with batching + * Balanced assignment algorithm using AI affinity scores. + * + * Strategy: iteratively assign the best available juror to each under-covered + * project, always preferring the least-loaded juror among those with decent scores. + */ +function assignFromScores( + scoreMatrix: Map>, + jurors: JurorForAssignment[], + projects: ProjectForAssignment[], + constraints: AssignmentConstraints, + maxCap: number, +): AIAssignmentSuggestion[] { + const result: AIAssignmentSuggestion[] = [] + + // Track state + const assignedPairs = new Set() + const jurorLoad = new Map() // total load (existing + new) + const projectCoverage = new Map() // how many reviewers assigned + + // Initialize from existing assignments + for (const ea of constraints.existingAssignments) { + assignedPairs.add(`${ea.jurorId}:${ea.projectId}`) + jurorLoad.set(ea.jurorId, (jurorLoad.get(ea.jurorId) || 0) + 1) + projectCoverage.set(ea.projectId, (projectCoverage.get(ea.projectId) || 0) + 1) + } + + // Also count existing DB assignments from _count + for (const j of jurors) { + const dbCount = j._count?.assignments || 0 + jurorLoad.set(j.id, Math.max(jurorLoad.get(j.id) || 0, dbCount)) + } + for (const p of projects) { + const dbCount = p._count?.assignments || 0 + projectCoverage.set(p.id, Math.max(projectCoverage.get(p.id) || 0, dbCount)) + } + + const getEffectiveCap = (jurorId: string) => { + if (constraints.jurorLimits?.[jurorId]) return constraints.jurorLimits[jurorId] + const juror = jurors.find((j) => j.id === jurorId) + return juror?.maxAssignments ?? maxCap + } + + // Ideal target: distribute evenly + const totalNeeded = projects.reduce((sum, p) => { + const current = projectCoverage.get(p.id) || 0 + return sum + Math.max(0, constraints.requiredReviewsPerProject - current) + }, 0) + const idealPerJuror = Math.ceil(totalNeeded / jurors.length) + + console.log(`[AI Assignment] Algorithm: ${totalNeeded} slots to fill, ideal ${idealPerJuror}/juror, cap ${maxCap}/juror`) + + // Iterative assignment: repeat until all projects are covered or no more capacity + for (let pass = 0; pass < constraints.requiredReviewsPerProject; pass++) { + // Sort projects by coverage gap (most under-covered first) + const projectsByNeed = [...projects] + .map((p) => ({ + project: p, + current: projectCoverage.get(p.id) || 0, + needed: constraints.requiredReviewsPerProject, + })) + .filter((pp) => pp.current < pp.needed) + .sort((a, b) => (a.current - a.needed) - (b.current - b.needed)) + + if (projectsByNeed.length === 0) break + + for (const { project } of projectsByNeed) { + const currentCoverage = projectCoverage.get(project.id) || 0 + if (currentCoverage >= constraints.requiredReviewsPerProject) continue + + // Find best available juror: weighted by AI score AND workload balance + const candidates = jurors + .filter((j) => { + const pairKey = `${j.id}:${project.id}` + if (assignedPairs.has(pairKey)) return false + const load = jurorLoad.get(j.id) || 0 + return load < getEffectiveCap(j.id) + }) + .map((j) => { + const load = jurorLoad.get(j.id) || 0 + const aiData = scoreMatrix.get(j.id)?.get(project.id) + const aiScore = aiData?.score ?? 0.3 + const reasoning = aiData?.reasoning ?? 'Assigned for coverage' + + // Workload penalty: heavily penalize jurors above ideal target + // This ensures even distribution + const loadRatio = load / Math.max(1, idealPerJuror) + const loadPenalty = loadRatio > 1 + ? 0.3 * Math.pow(0.5, loadRatio - 1) // Steep drop-off above ideal + : 1 - (loadRatio * 0.4) // Gentle linear decrease up to ideal + + // Combined score: 55% AI score, 45% workload balance + const combinedScore = aiScore * 0.55 + loadPenalty * 0.45 + + return { juror: j, aiScore, combinedScore, reasoning, load } + }) + .sort((a, b) => b.combinedScore - a.combinedScore) + + if (candidates.length === 0) continue + + const best = candidates[0] + + result.push({ + jurorId: best.juror.id, + projectId: project.id, + confidenceScore: best.aiScore, + expertiseMatchScore: best.aiScore, + reasoning: best.reasoning, + }) + + assignedPairs.add(`${best.juror.id}:${project.id}`) + jurorLoad.set(best.juror.id, (best.load) + 1) + projectCoverage.set(project.id, (currentCoverage) + 1) + } + } + + // Log final distribution + const newAssignmentsPerJuror = new Map() + for (const s of result) { + newAssignmentsPerJuror.set(s.jurorId, (newAssignmentsPerJuror.get(s.jurorId) || 0) + 1) + } + const loads = [...newAssignmentsPerJuror.values()] + if (loads.length > 0) { + console.log(`[AI Assignment] Distribution: min=${Math.min(...loads)}, max=${Math.max(...loads)}, avg=${(loads.reduce((a, b) => a + b, 0) / loads.length).toFixed(1)}`) + } + + const uncovered = projects.filter((p) => (projectCoverage.get(p.id) || 0) < constraints.requiredReviewsPerProject) + if (uncovered.length > 0) { + console.warn(`[AI Assignment] ${uncovered.length} projects still under-covered after assignment`) + } + + return result +} + +// ─── Main Entry Point ─────────────────────────────────────────────────────── + +/** + * Generate AI-powered assignment suggestions (hybrid approach). + * + * 1. ONE AI call: get affinity scores for all juror-project pairs + * 2. Algorithm: assign N reviewers per project using AI scores + workload balancing */ export async function generateAIAssignments( jurors: JurorForAssignment[], @@ -398,7 +544,7 @@ export async function generateAIAssignments( constraints: AssignmentConstraints, userId?: string, entityId?: string, - onProgress?: AssignmentProgressCallback + _onProgress?: AssignmentProgressCallback ): Promise { // Truncate descriptions before anonymization const truncatedProjects = projects.map((p) => ({ @@ -415,6 +561,20 @@ export async function generateAIAssignments( return generateFallbackAssignments(jurors, projects, constraints) } + // Build existing pair set for AI (anonymous IDs) + const jurorRealToAnon = new Map(anonymizedData.jurorMappings.map((m) => [m.realId, m.anonymousId])) + const projectRealToAnon = new Map(anonymizedData.projectMappings.map((m) => [m.realId, m.anonymousId])) + const existingAnonPairs = new Set() + for (const ea of constraints.existingAssignments) { + const aJ = jurorRealToAnon.get(ea.jurorId) + const aP = projectRealToAnon.get(ea.projectId) + if (aJ && aP) existingAnonPairs.add(`${aJ}:${aP}`) + } + + // Calculate caps + const totalNeeded = projects.length * constraints.requiredReviewsPerProject + const maxCap = constraints.maxAssignmentsPerJuror ?? Math.ceil(totalNeeded / jurors.length) + 2 + try { const openai = await getOpenAI() @@ -424,84 +584,30 @@ export async function generateAIAssignments( } const model = await getConfiguredModel() - console.log(`[AI Assignment] Using model: ${model} for ${projects.length} projects in batches of ${ASSIGNMENT_BATCH_SIZE}`) + console.log(`[AI Assignment] Hybrid approach: ${projects.length} projects, ${jurors.length} jurors, ${constraints.requiredReviewsPerProject} reviews/project, model: ${model}`) - const allSuggestions: AIAssignmentSuggestion[] = [] - let totalTokens = 0 + // ── Phase 1: AI Scoring (single call) ── + console.log('[AI Assignment] Phase 1: Getting AI affinity scores...') + const { affinities, tokensUsed } = await getAIAffinityMatrix( + openai, + model, + anonymizedData, + existingAnonPairs, + userId, + entityId, + ) - // Calculate ideal distribution for the prompt - const totalNeededAssignments = projects.length * constraints.requiredReviewsPerProject - const maxCap = constraints.maxAssignmentsPerJuror ?? Math.ceil(totalNeededAssignments / jurors.length) + 2 - const idealPerJuror = Math.ceil(totalNeededAssignments / jurors.length) + // ── Phase 2: Build score matrix and run algorithm ── + console.log('[AI Assignment] Phase 2: Running balanced assignment algorithm...') + const scoreMatrix = buildScoreMatrix(affinities, jurors, projects, anonymizedData) + const suggestions = assignFromScores(scoreMatrix, jurors, projects, constraints, maxCap) - // Track cumulative assignments across batches (real IDs) - const cumulativeAssignments: Array<{ jurorId: string; projectId: string }> = [ - ...constraints.existingAssignments, - ] - - // Process projects in batches - const totalBatches = Math.ceil(anonymizedData.projects.length / ASSIGNMENT_BATCH_SIZE) - - for (let i = 0; i < anonymizedData.projects.length; i += ASSIGNMENT_BATCH_SIZE) { - const batchProjects = anonymizedData.projects.slice(i, i + ASSIGNMENT_BATCH_SIZE) - const batchMappings = anonymizedData.projectMappings.slice(i, i + ASSIGNMENT_BATCH_SIZE) - const currentBatch = Math.floor(i / ASSIGNMENT_BATCH_SIZE) + 1 - - console.log(`[AI Assignment] Processing batch ${currentBatch}/${totalBatches}`) - - // Pass cumulative assignments so GPT knows about previous batch results - const batchConstraints: AssignmentConstraints = { - ...constraints, - maxAssignmentsPerJuror: maxCap, - existingAssignments: cumulativeAssignments, - _targetPerJuror: idealPerJuror, - } - - const { suggestions, tokensUsed } = await processAssignmentBatch( - openai, - model, - anonymizedData, - batchProjects, - batchMappings, - batchConstraints, - userId, - entityId - ) - - // Add this batch's results to cumulative tracking - for (const s of suggestions) { - cumulativeAssignments.push({ jurorId: s.jurorId, projectId: s.projectId }) - } - - allSuggestions.push(...suggestions) - totalTokens += tokensUsed - - // Report progress after each batch - if (onProgress) { - const processedCount = Math.min((currentBatch) * ASSIGNMENT_BATCH_SIZE, projects.length) - await onProgress({ - currentBatch, - totalBatches, - processedCount, - totalProjects: projects.length, - }) - } - } - - console.log(`[AI Assignment] Completed. Total suggestions: ${allSuggestions.length}, Total tokens: ${totalTokens}`) - - // Post-process: enforce hard cap and rebalance - const balanced = rebalanceAssignments(allSuggestions, jurors, constraints, maxCap) - - // Fill coverage gaps: if any project has fewer than requiredReviewsPerProject, use fallback - const gapFilled = fillCoverageGaps(balanced, jurors, projects, constraints, maxCap) - - console.log(`[AI Assignment] After gap-fill: ${gapFilled.length} total (${gapFilled.length - balanced.length} added for coverage)`) + console.log(`[AI Assignment] Complete: ${suggestions.length} assignments, ${tokensUsed} tokens used`) return { success: true, - suggestions: gapFilled, - tokensUsed: totalTokens, + suggestions, + tokensUsed, fallbackUsed: false, } @@ -509,7 +615,6 @@ export async function generateAIAssignments( const classified = classifyAIError(error) logAIError('Assignment', 'generateAIAssignments', classified) - // Log failed attempt await logAIUsage({ userId, action: 'ASSIGNMENT', @@ -525,220 +630,13 @@ export async function generateAIAssignments( errorMessage: classified.message, }) - console.error('[AI Assignment] AI assignment failed, using fallback:', classified.message) - return generateFallbackAssignments(jurors, projects, constraints) - } -} - -// ─── Post-Processing Rebalancer ───────────────────────────────────────────── - -/** - * Enforce hard caps and rebalance assignments from overloaded jurors. - * Moves excess assignments from over-cap jurors to under-loaded jurors - * that haven't been assigned that project yet. - */ -function rebalanceAssignments( - suggestions: AIAssignmentSuggestion[], - jurors: JurorForAssignment[], - constraints: AssignmentConstraints, - maxCap: number, -): AIAssignmentSuggestion[] { - // Build juror load tracking (existing DB assignments + new AI suggestions) - const jurorLoad = new Map() - const jurorSet = new Set(jurors.map((j) => j.id)) - - // Count existing assignments from DB - for (const ea of constraints.existingAssignments) { - if (jurorSet.has(ea.jurorId)) { - jurorLoad.set(ea.jurorId, (jurorLoad.get(ea.jurorId) || 0) + 1) + console.error('[AI Assignment] AI failed, falling back to algorithm:', classified.message) + const fallback = generateFallbackAssignments(jurors, projects, constraints) + return { + ...fallback, + error: `AI scoring failed (${classified.message}). Used algorithmic fallback.`, } } - - // Count new suggestions per juror - const newLoadPerJuror = new Map() - for (const s of suggestions) { - newLoadPerJuror.set(s.jurorId, (newLoadPerJuror.get(s.jurorId) || 0) + 1) - } - - // Effective cap: per-juror personal cap or global cap - const getEffectiveCap = (jurorId: string) => { - if (constraints.jurorLimits?.[jurorId]) return constraints.jurorLimits[jurorId] - const juror = jurors.find((j) => j.id === jurorId) - return juror?.maxAssignments ?? maxCap - } - - // Calculate max new assignments allowed per juror - const maxNewForJuror = (jurorId: string) => { - const existing = jurorLoad.get(jurorId) || 0 - const cap = getEffectiveCap(jurorId) - return Math.max(0, cap - existing) - } - - // Sort suggestions by confidence (keep best matches when trimming) - const sorted = [...suggestions].sort((a, b) => b.confidenceScore - a.confidenceScore) - - // Phase 1: Accept assignments up to each juror's cap - const accepted: AIAssignmentSuggestion[] = [] - const rejected: AIAssignmentSuggestion[] = [] - const acceptedPerJuror = new Map() - const acceptedPairs = new Set() - const projectCoverage = new Map() // projectId → accepted reviewers - - for (const s of sorted) { - const currentNew = acceptedPerJuror.get(s.jurorId) || 0 - const allowed = maxNewForJuror(s.jurorId) - const pairKey = `${s.jurorId}:${s.projectId}` - - if (currentNew < allowed && !acceptedPairs.has(pairKey)) { - accepted.push(s) - acceptedPerJuror.set(s.jurorId, currentNew + 1) - acceptedPairs.add(pairKey) - projectCoverage.set(s.projectId, (projectCoverage.get(s.projectId) || 0) + 1) - } else { - rejected.push(s) - } - } - - // Phase 2: Reassign rejected items to least-loaded jurors that aren't at cap - for (const r of rejected) { - const currentCoverage = projectCoverage.get(r.projectId) || 0 - if (currentCoverage >= constraints.requiredReviewsPerProject) continue // project is covered - - // Find the least-loaded juror who can take this project - const candidates = jurors - .filter((j) => { - const pairKey = `${j.id}:${r.projectId}` - if (acceptedPairs.has(pairKey)) return false // already assigned - // Check existing DB pairs too - if (constraints.existingAssignments.some( - (ea) => ea.jurorId === j.id && ea.projectId === r.projectId - )) return false - const currentNew = acceptedPerJuror.get(j.id) || 0 - return currentNew < maxNewForJuror(j.id) - }) - .sort((a, b) => { - const aTotal = (jurorLoad.get(a.id) || 0) + (acceptedPerJuror.get(a.id) || 0) - const bTotal = (jurorLoad.get(b.id) || 0) + (acceptedPerJuror.get(b.id) || 0) - return aTotal - bTotal // least loaded first - }) - - if (candidates.length > 0) { - const picked = candidates[0] - accepted.push({ - jurorId: picked.id, - projectId: r.projectId, - confidenceScore: r.confidenceScore * 0.8, // slightly lower confidence for reassigned - expertiseMatchScore: r.expertiseMatchScore * 0.5, - reasoning: `Reassigned for workload balance (originally suggested for another juror at capacity).`, - }) - acceptedPerJuror.set(picked.id, (acceptedPerJuror.get(picked.id) || 0) + 1) - acceptedPairs.add(`${picked.id}:${r.projectId}`) - projectCoverage.set(r.projectId, (projectCoverage.get(r.projectId) || 0) + 1) - } - } - - // Log rebalancing stats - const rebalanced = accepted.length - (suggestions.length - rejected.length) - if (rejected.length > 0) { - console.log( - `[AI Assignment] Rebalanced: ${rejected.length} over-cap assignments redistributed, ` + - `${rebalanced} successfully reassigned` - ) - } - - return accepted -} - -/** - * Fill coverage gaps — ensure every project has requiredReviewsPerProject assignments. - * Uses a simple least-loaded-juror algorithm to fill missing slots. - */ -function fillCoverageGaps( - suggestions: AIAssignmentSuggestion[], - jurors: JurorForAssignment[], - projects: ProjectForAssignment[], - constraints: AssignmentConstraints, - maxCap: number, -): AIAssignmentSuggestion[] { - const result = [...suggestions] - - // Track current state - const assignedPairs = new Set() - const jurorLoad = new Map() - const projectCoverage = new Map() - - // Count existing DB assignments - for (const ea of constraints.existingAssignments) { - assignedPairs.add(`${ea.jurorId}:${ea.projectId}`) - jurorLoad.set(ea.jurorId, (jurorLoad.get(ea.jurorId) || 0) + 1) - projectCoverage.set(ea.projectId, (projectCoverage.get(ea.projectId) || 0) + 1) - } - - // Count new AI suggestions - for (const s of suggestions) { - assignedPairs.add(`${s.jurorId}:${s.projectId}`) - jurorLoad.set(s.jurorId, (jurorLoad.get(s.jurorId) || 0) + 1) - projectCoverage.set(s.projectId, (projectCoverage.get(s.projectId) || 0) + 1) - } - - const getEffectiveCap = (jurorId: string) => { - if (constraints.jurorLimits?.[jurorId]) return constraints.jurorLimits[jurorId] - const juror = jurors.find((j) => j.id === jurorId) - return juror?.maxAssignments ?? maxCap - } - - let gapsFilled = 0 - - // For each project, check if it needs more reviewers - for (const project of projects) { - const current = projectCoverage.get(project.id) || 0 - const needed = constraints.requiredReviewsPerProject - current - - if (needed <= 0) continue - - // Find available jurors sorted by load (least loaded first) - const candidates = jurors - .filter((j) => { - const pairKey = `${j.id}:${project.id}` - if (assignedPairs.has(pairKey)) return false - const load = jurorLoad.get(j.id) || 0 - return load < getEffectiveCap(j.id) - }) - .sort((a, b) => { - const aLoad = jurorLoad.get(a.id) || 0 - const bLoad = jurorLoad.get(b.id) || 0 - return aLoad - bLoad - }) - - for (let i = 0; i < Math.min(needed, candidates.length); i++) { - const juror = candidates[i] - const expertiseScore = calculateExpertiseScore( - juror.expertiseTags, - project.tags, - project.tagConfidences, - ) - - result.push({ - jurorId: juror.id, - projectId: project.id, - confidenceScore: expertiseScore * 0.7, // slightly lower confidence for gap-fill - expertiseMatchScore: expertiseScore, - reasoning: generateFallbackReasoning(juror.expertiseTags, project.tags, expertiseScore) - + ' (Added to meet coverage requirement)', - }) - - assignedPairs.add(`${juror.id}:${project.id}`) - jurorLoad.set(juror.id, (jurorLoad.get(juror.id) || 0) + 1) - projectCoverage.set(project.id, (projectCoverage.get(project.id) || 0) + 1) - gapsFilled++ - } - } - - if (gapsFilled > 0) { - console.log(`[AI Assignment] Gap-filled ${gapsFilled} assignment(s) to meet coverage requirements`) - } - - return result } // ─── Fallback Algorithm ────────────────────────────────────────────────────── @@ -789,79 +687,68 @@ export function generateFallbackAssignments( ) } - // Sort projects by need (fewest assignments first) - const sortedProjects = [...projects].sort((a, b) => { - const aCount = projectAssignments.get(a.id) || 0 - const bCount = projectAssignments.get(b.id) || 0 - return aCount - bCount - }) + const totalNeeded = projects.length * constraints.requiredReviewsPerProject + const maxCap = constraints.maxAssignmentsPerJuror ?? Math.ceil(totalNeeded / jurors.length) + 2 + const idealPerJuror = Math.ceil(totalNeeded / jurors.length) - // For each project, find best matching jurors - for (const project of sortedProjects) { - const currentProjectAssignments = projectAssignments.get(project.id) || 0 - const neededReviews = Math.max( - 0, - constraints.requiredReviewsPerProject - currentProjectAssignments - ) + // Iterative: for each pass, assign one more reviewer per under-covered project + for (let pass = 0; pass < constraints.requiredReviewsPerProject; pass++) { + // Sort projects by need (fewest assignments first) + const sortedProjects = [...projects].sort((a, b) => { + const aCount = projectAssignments.get(a.id) || 0 + const bCount = projectAssignments.get(b.id) || 0 + return aCount - bCount + }) - if (neededReviews === 0) continue + for (const project of sortedProjects) { + const currentProjectAssignments = projectAssignments.get(project.id) || 0 + if (currentProjectAssignments >= constraints.requiredReviewsPerProject) continue - // Score all available jurors - const scoredJurors = jurors - .filter((juror) => { - // Check not already assigned - if (existingSet.has(`${juror.id}:${project.id}`)) return false + // Score jurors with heavy workload emphasis + const scoredJurors = jurors + .filter((juror) => { + if (existingSet.has(`${juror.id}:${project.id}`)) return false + const currentLoad = jurorAssignments.get(juror.id) || 0 + const cap = juror.maxAssignments ?? maxCap + return currentLoad < cap + }) + .map((juror) => { + const currentLoad = jurorAssignments.get(juror.id) || 0 + const expertiseScore = calculateExpertiseScore( + juror.expertiseTags, + project.tags, + project.tagConfidences, + ) - // Check not at limit - const currentAssignments = jurorAssignments.get(juror.id) || 0 - const maxAssignments = - juror.maxAssignments ?? constraints.maxAssignmentsPerJuror ?? Infinity - if (currentAssignments >= maxAssignments) return false + // Heavy workload balance weight + const loadRatio = currentLoad / Math.max(1, idealPerJuror) + const loadPenalty = loadRatio > 1 + ? 0.3 * Math.pow(0.5, loadRatio - 1) + : 1 - (loadRatio * 0.4) - return true - }) - .map((juror) => { - const currentLoad = jurorAssignments.get(juror.id) || 0 - const maxLoad = juror.maxAssignments ?? constraints.maxAssignmentsPerJuror ?? 20 - const minTarget = constraints.minAssignmentsPerJuror ?? 5 + return { + juror, + expertiseScore, + combinedScore: expertiseScore * 0.45 + loadPenalty * 0.55, + } + }) + .sort((a, b) => b.combinedScore - a.combinedScore) - return { - juror, - score: calculateExpertiseScore(juror.expertiseTags, project.tags, project.tagConfidences), - loadScore: calculateLoadScore(currentLoad, maxLoad), - underMinBonus: calculateUnderMinBonus(currentLoad, minTarget), - } - }) - .sort((a, b) => { - // Combined score: 50% expertise, 30% load balancing, 20% under-min bonus - const aTotal = a.score * 0.5 + a.loadScore * 0.3 + a.underMinBonus * 0.2 - const bTotal = b.score * 0.5 + b.loadScore * 0.3 + b.underMinBonus * 0.2 - return bTotal - aTotal - }) + if (scoredJurors.length === 0) continue - // Assign top jurors - for (let i = 0; i < Math.min(neededReviews, scoredJurors.length); i++) { - const { juror, score } = scoredJurors[i] + const { juror, expertiseScore } = scoredJurors[0] suggestions.push({ jurorId: juror.id, projectId: project.id, - confidenceScore: score, - expertiseMatchScore: score, - reasoning: generateFallbackReasoning( - juror.expertiseTags, - project.tags, - score - ), + confidenceScore: expertiseScore, + expertiseMatchScore: expertiseScore, + reasoning: generateFallbackReasoning(juror.expertiseTags, project.tags, expertiseScore), }) - // Update tracking existingSet.add(`${juror.id}:${project.id}`) jurorAssignments.set(juror.id, (jurorAssignments.get(juror.id) || 0) + 1) - projectAssignments.set( - project.id, - (projectAssignments.get(project.id) || 0) + 1 - ) + projectAssignments.set(project.id, currentProjectAssignments + 1) } } @@ -872,6 +759,8 @@ export function generateFallbackAssignments( } } +// ─── Scoring Helpers ──────────────────────────────────────────────────────── + /** * Calculate expertise match score based on tag overlap * When tagConfidences are available, weights matches by confidence @@ -917,25 +806,6 @@ function calculateExpertiseScore( return Math.min(1, matchRatio * 0.8 + hasExpertise) } -/** - * Calculate load balancing score (higher score = less loaded) - */ -function calculateLoadScore(currentLoad: number, maxLoad: number): number { - if (maxLoad === 0) return 0 - const utilization = currentLoad / maxLoad - return Math.max(0, 1 - utilization) -} - -/** - * Calculate bonus for jurors under their minimum target - * Returns 1.0 if under min, scaled down as approaching min - */ -function calculateUnderMinBonus(currentLoad: number, minTarget: number): number { - if (currentLoad >= minTarget) return 0 - // Scale bonus based on how far under min (1.0 at 0 load, decreasing as approaching min) - return (minTarget - currentLoad) / minTarget -} - /** * Generate reasoning for fallback assignments */