/** * AI Shortlist Service * * Generates ranked recommendations at end of evaluation rounds. * Runs SEPARATELY for each category (STARTUP / BUSINESS_CONCEPT) * to produce independent rankings per the competition's advancement rules. * * GDPR Compliance: * - All project data is anonymized before AI processing * - No personal identifiers in prompts or responses */ import { getOpenAI, getConfiguredModel, buildCompletionParams } from '@/lib/openai' import { logAIUsage, extractTokenUsage } from '@/server/utils/ai-usage' import { classifyAIError, logAIError } from './ai-errors' import { extractMultipleFileContents } from './file-content-extractor' import { toProjectWithRelations, anonymizeProjectsForAI, validateAnonymizedProjects, sanitizeText, } from './anonymization' import type { PrismaClient, CompetitionCategory } from '@prisma/client' // ─── Types ────────────────────────────────────────────────────────────────── export type ShortlistResult = { success: boolean recommendations: CategoryRecommendations errors?: string[] tokensUsed?: number } export type CategoryRecommendations = { STARTUP: ShortlistRecommendation[] BUSINESS_CONCEPT: ShortlistRecommendation[] } export type ShortlistRecommendation = { projectId: string rank: number score: number category: string strengths: string[] concerns: string[] recommendation: string } // ─── Prompt Building ──────────────────────────────────────────────────────── function buildShortlistPrompt(category: string, topN: number, rubric?: string): string { const categoryLabel = category === 'STARTUP' ? 'Startup' : 'Business Concept' return `You are a senior jury advisor for the Monaco Ocean Protection Challenge. ## Your Role Analyze aggregated evaluation data to produce a ranked shortlist of the top ${topN} ${categoryLabel} projects. You are evaluating ONLY ${categoryLabel} projects in this batch — rank them against each other within this category. ## Ranking Criteria (Weighted) - Evaluation Scores (40%): Average scores across all jury evaluations - Innovation & Impact (25%): Novelty of approach and potential environmental impact - Feasibility (20%): Likelihood of successful implementation - Alignment (15%): Fit with ocean protection mission and competition goals ## Document Analysis If document content is provided (text_content field in files), use it for deeper qualitative analysis. Pay SPECIAL ATTENTION to files marked with is_current_round=true — these are the most recent submissions. Older documents provide context, but recent ones should carry more weight in your assessment. ${rubric ? `## Custom Evaluation Rubric\n${rubric}\n` : ''} ## Output Format Return a JSON array: [ { "anonymousId": "PROJECT_001", "rank": 1, "score": 0-100, "strengths": ["strength 1", "strength 2"], "concerns": ["concern 1"], "recommendation": "1-2 sentence recommendation" } ] ## Guidelines - Only include the top ${topN} projects in your ranking - Score should reflect weighted combination of all criteria - Be specific in strengths and concerns — avoid generic statements - Consider feedback themes and evaluator consensus - Higher evaluator consensus should boost confidence in ranking - Do not include any personal identifiers` } // ─── Single Category Processing ───────────────────────────────────────────── async function generateCategoryShortlist( params: { roundId: string category: string topN: number rubric?: string aiParseFiles: boolean }, prisma: PrismaClient, ): Promise<{ recommendations: ShortlistRecommendation[]; tokensUsed: number; errors: string[] }> { const { roundId, category, topN, rubric, aiParseFiles } = params // Load projects with evaluations for this category const projects = await prisma.project.findMany({ where: { competitionCategory: category as CompetitionCategory, assignments: { some: { roundId } }, }, include: { assignments: { where: { roundId }, include: { evaluation: true }, }, projectTags: { include: { tag: true } }, files: { select: { id: true, fileName: true, fileType: true, mimeType: true, size: true, pageCount: true, objectKey: true, roundId: true, createdAt: true, }, orderBy: { createdAt: 'desc' as const }, }, teamMembers: { select: { user: { select: { name: true } } } }, }, }) if (projects.length === 0) { return { recommendations: [], tokensUsed: 0, errors: [`No ${category} projects found for this round`], } } // Get round names for file tagging const roundIds = new Set() for (const p of projects) { for (const f of (p as any).files || []) { if (f.roundId) roundIds.add(f.roundId) } } const roundNames = new Map() if (roundIds.size > 0) { const rounds = await prisma.round.findMany({ where: { id: { in: [...roundIds] } }, select: { id: true, name: true }, }) for (const r of rounds) roundNames.set(r.id, r.name) } // Optionally extract file contents let fileContents: Map | undefined if (aiParseFiles) { const allFiles = projects.flatMap((p: any) => ((p.files || []) as Array<{ id: string; fileName: string; mimeType: string; objectKey: string }>) ) const extractions = await extractMultipleFileContents(allFiles) fileContents = new Map() for (const e of extractions) { if (e.content) fileContents.set(e.fileId, e.content) } } // Aggregate per-project stats and free-text feedback. Sanitize feedback // before it enters the prompt — sanitizeText strips email/phone/url/ssn // patterns embedded in juror free-text. Without this, juror feedback like // "Contact applicant Jane at jane@example.com" leaks PII to OpenAI. const aggregatesByProjectId = new Map< string, { avgScore: number; evaluationCount: number; feedbackSamples: string[] } >() for (const project of projects as any[]) { const evaluations = project.assignments .map((a: any) => a.evaluation) .filter(Boolean) .filter((e: any) => e.status === 'SUBMITTED') const scores = evaluations.map((e: any) => e.globalScore ?? 0) const avgScore = scores.length > 0 ? scores.reduce((sum: number, s: number) => sum + s, 0) / scores.length : 0 const feedbackSamples = evaluations .map((e: any) => e.feedbackGeneral || e.feedbackText) .filter((t: unknown): t is string => typeof t === 'string' && t.length > 0) .slice(0, 3) .map((t: string) => sanitizeText(t).slice(0, 1000)) aggregatesByProjectId.set(project.id, { avgScore, evaluationCount: evaluations.length, feedbackSamples, }) } // Route every project through the canonical anonymization pipeline so // description/title/institution are PII-stripped, free-text is truncated, // and file text_content is sanitized (handled in anonymizeProjectForAI). const projectsWithRelations = (projects as any[]).map((p) => toProjectWithRelations({ id: p.id, title: p.title, description: p.description, competitionCategory: p.competitionCategory, oceanIssue: p.oceanIssue ?? null, country: p.country ?? null, geographicZone: p.geographicZone ?? null, institution: p.institution ?? null, tags: (p.projectTags ?? []).map((pt: any) => pt.tag.name), foundedAt: p.foundedAt ?? null, wantsMentorship: p.wantsMentorship ?? false, submissionSource: p.submissionSource ?? 'MANUAL', submittedAt: p.submittedAt ?? null, _count: { teamMembers: p.teamMembers?.length ?? 0, files: p.files?.length ?? 0 }, files: (p.files ?? []).map((f: any) => ({ fileType: f.fileType ?? null, size: f.size, pageCount: f.pageCount, roundName: f.roundId ? roundNames.get(f.roundId) : undefined, isCurrentRound: f.roundId === roundId, textContent: fileContents?.get(f.id), })), }), ) const { anonymized: anonymizedBase, mappings } = anonymizeProjectsForAI( projectsWithRelations, 'FILTERING', ) if (!validateAnonymizedProjects(anonymizedBase)) { console.error('[AI Shortlist] Anonymization validation failed') return { recommendations: [], tokensUsed: 0, errors: ['GDPR compliance check failed: PII detected in anonymized data'], } } // Merge anonymized base with per-project aggregates, keyed by mapping order. // Use the same anonymousId scheme the AI prompt expects. const anonymized = anonymizedBase.map((p, index) => { const realId = mappings[index].realId const agg = aggregatesByProjectId.get(realId) ?? { avgScore: 0, evaluationCount: 0, feedbackSamples: [], } return { anonymousId: `PROJECT_${String(index + 1).padStart(3, '0')}`, ...p, project_id: undefined, avgScore: agg.avgScore, evaluationCount: agg.evaluationCount, feedbackSamples: agg.feedbackSamples, } }) // Build idMap for de-anonymization const idMap = new Map() mappings.forEach((m, index) => { idMap.set(`PROJECT_${String(index + 1).padStart(3, '0')}`, m.realId) }) // Call AI const openai = await getOpenAI() const model = await getConfiguredModel() if (!openai) { return { recommendations: [], tokensUsed: 0, errors: ['OpenAI client not configured'] } } const systemPrompt = buildShortlistPrompt(category, topN, rubric) const userPrompt = `Analyze these anonymized ${category} project evaluations and produce a ranked shortlist of the top ${topN}. Projects (${anonymized.length} total): ${JSON.stringify(anonymized, null, 2)} Return a JSON array following the format specified. Only include the top ${topN} projects. Rank by overall quality within this category.` const MAX_PARSE_RETRIES = 2 let parseAttempts = 0 let response = await openai.chat.completions.create( buildCompletionParams(model, { messages: [ { role: 'system', content: systemPrompt }, { role: 'user', content: userPrompt }, ], temperature: 0.1, jsonMode: true, }), ) let tokenUsage = extractTokenUsage(response) await logAIUsage({ action: 'SHORTLIST', model, promptTokens: tokenUsage.promptTokens, completionTokens: tokenUsage.completionTokens, totalTokens: tokenUsage.totalTokens, status: 'SUCCESS', }) // Parse response let parsed: any[] while (true) { try { const content = response.choices[0]?.message?.content if (!content) { return { recommendations: [], tokensUsed: tokenUsage.totalTokens, errors: ['Empty AI response'] } } const json = JSON.parse(content) parsed = Array.isArray(json) ? json : json.rankings ?? json.projects ?? json.shortlist ?? [] break } catch (parseError) { if (parseError instanceof SyntaxError && parseAttempts < MAX_PARSE_RETRIES) { parseAttempts++ response = await openai.chat.completions.create( buildCompletionParams(model, { messages: [ { role: 'system', content: systemPrompt }, { role: 'user', content: userPrompt + '\n\nIMPORTANT: Please ensure valid JSON output.' }, ], temperature: 0.1, jsonMode: true, }), ) const retryUsage = extractTokenUsage(response) tokenUsage.totalTokens += retryUsage.totalTokens continue } return { recommendations: [], tokensUsed: tokenUsage.totalTokens, errors: ['Failed to parse AI response'] } } } // De-anonymize const recommendations: ShortlistRecommendation[] = parsed .filter((item: any) => item.anonymousId && idMap.has(item.anonymousId)) .map((item: any) => ({ projectId: idMap.get(item.anonymousId)!, rank: item.rank ?? 0, score: item.score ?? 0, category, strengths: item.strengths ?? [], concerns: item.concerns ?? [], recommendation: item.recommendation ?? '', })) .sort((a: ShortlistRecommendation, b: ShortlistRecommendation) => a.rank - b.rank) return { recommendations, tokensUsed: tokenUsage.totalTokens, errors: [] } } // ─── Main Function ────────────────────────────────────────────────────────── /** * Generate an AI shortlist for projects in a round, split by category. * Runs independently for STARTUP and BUSINESS_CONCEPT. */ export async function generateShortlist( params: { roundId: string competitionId: string category?: string // If provided, only run for this category topN?: number // Global fallback startupTopN?: number // Per-category override conceptTopN?: number // Per-category override rubric?: string aiParseFiles?: boolean }, prisma: PrismaClient, ): Promise { const { roundId, category, topN = 10, startupTopN, conceptTopN, rubric, aiParseFiles = false, } = params try { const categories = category ? [category] : ['STARTUP', 'BUSINESS_CONCEPT'] const allRecommendations: CategoryRecommendations = { STARTUP: [], BUSINESS_CONCEPT: [], } let totalTokens = 0 const allErrors: string[] = [] // Run categories in parallel for efficiency const categoryPromises = categories.map(async (cat) => { const catTopN = cat === 'STARTUP' ? (startupTopN ?? topN) : (conceptTopN ?? topN) console.log(`[AI Shortlist] Generating top-${catTopN} for ${cat}`) const result = await generateCategoryShortlist( { roundId, category: cat, topN: catTopN, rubric, aiParseFiles }, prisma, ) return { cat, result } }) const categoryResults = await Promise.all(categoryPromises) for (const { cat, result } of categoryResults) { if (cat === 'STARTUP') { allRecommendations.STARTUP = result.recommendations } else { allRecommendations.BUSINESS_CONCEPT = result.recommendations } totalTokens += result.tokensUsed allErrors.push(...result.errors) } return { success: true, recommendations: allRecommendations, tokensUsed: totalTokens, errors: allErrors.length > 0 ? allErrors : undefined, } } catch (error) { const classification = classifyAIError(error) logAIError('ai-shortlist', 'generateShortlist', classification) console.error('[AIShortlist] generateShortlist failed:', error) return { success: false, recommendations: { STARTUP: [], BUSINESS_CONCEPT: [] }, errors: [error instanceof Error ? error.message : 'AI shortlist generation failed'], } } }