/** * AI-Powered Evaluation Summary Service * * Generates AI summaries of jury evaluations for a project in a given round. * Combines OpenAI analysis with server-side scoring pattern calculations. * * GDPR Compliance: * - All evaluation data is anonymized before AI processing * - No juror names, emails, or identifiers are sent to OpenAI * - Only scores, feedback text, and binary decisions are included */ import { TRPCError } from '@trpc/server' import { getOpenAI, getConfiguredModel, buildCompletionParams, AI_MODELS } from '@/lib/openai' import { logAIUsage, extractTokenUsage } from '@/server/utils/ai-usage' import { classifyAIError, createParseError, logAIError } from './ai-errors' import { sanitizeText } from './anonymization' import type { PrismaClient, Prisma } from '@prisma/client' // ─── Types ────────────────────────────────────────────────────────────────── interface EvaluationForSummary { id: string criterionScoresJson: Record | null globalScore: number | null binaryDecision: boolean | null feedbackText: string | null assignment: { user: { id: string name: string | null email: string } } } interface AnonymizedEvaluation { criterionScores: Record | null globalScore: number | null binaryDecision: boolean | null feedbackText: string | null } interface CriterionDef { id: string label: string type?: 'numeric' | 'text' | 'boolean' | 'section_header' trueLabel?: string falseLabel?: string } interface AIResponsePayload { overallAssessment: string strengths: string[] weaknesses: string[] themes: Array<{ theme: string sentiment: 'positive' | 'negative' | 'mixed' frequency: number }> recommendation: string } interface BooleanStats { yesCount: number noCount: number total: number yesPercent: number trueLabel: string falseLabel: string } interface ScoringPatterns { averageGlobalScore: number | null consensus: number criterionAverages: Record booleanCriteria: Record textResponses: Record evaluatorCount: number } export interface EvaluationSummaryResult { id: string projectId: string roundId: string summaryJson: AIResponsePayload & { scoringPatterns: ScoringPatterns } generatedAt: Date model: string tokensUsed: number } // ─── Anonymization ────────────────────────────────────────────────────────── /** * Strip juror names/emails from evaluations, keeping only scores and feedback. */ export function anonymizeEvaluations( evaluations: EvaluationForSummary[] ): AnonymizedEvaluation[] { return evaluations.map((ev) => ({ criterionScores: ev.criterionScoresJson as Record | null, globalScore: ev.globalScore, binaryDecision: ev.binaryDecision, feedbackText: ev.feedbackText ? sanitizeText(ev.feedbackText) : null, })) } // ─── Prompt Building ──────────────────────────────────────────────────────── /** * Build the OpenAI prompt for evaluation summary generation. */ export function buildSummaryPrompt( anonymizedEvaluations: AnonymizedEvaluation[], projectTitle: string, criteriaDefinitions: CriterionDef[] ): string { const sanitizedTitle = sanitizeText(projectTitle) // Build a descriptive criteria section that explains each criterion type const criteriaDescription = criteriaDefinitions .filter((c) => c.type !== 'section_header') .map((c) => { const type = c.type || 'numeric' if (type === 'boolean') { return `- "${c.label}" (Yes/No decision: ${c.trueLabel || 'Yes'} / ${c.falseLabel || 'No'})` } if (type === 'text') { return `- "${c.label}" (Free-text response)` } return `- "${c.label}" (Numeric score)` }) .join('\n') return `You are analyzing jury evaluations for a project competition. PROJECT: "${sanitizedTitle}" EVALUATION CRITERIA: ${criteriaDescription} Note: criterionScores values may be numbers (numeric scores), booleans (true/false for yes/no criteria), or strings (text responses). EVALUATIONS (${anonymizedEvaluations.length} total): ${JSON.stringify(anonymizedEvaluations, null, 2)} Analyze these evaluations and return a JSON object with this exact structure: { "overallAssessment": "A 2-3 sentence summary of how the project was evaluated overall", "strengths": ["strength 1", "strength 2", ...], "weaknesses": ["weakness 1", "weakness 2", ...], "themes": [ { "theme": "theme name", "sentiment": "positive" | "negative" | "mixed", "frequency": } ], "recommendation": "A brief recommendation based on the evaluation consensus" } Guidelines: - Base your analysis only on the provided evaluation data - For numeric criteria, consider score averages and distribution - For yes/no criteria, consider the proportion of yes vs no answers - For text criteria, synthesize common themes from the responses - Identify common themes across evaluator feedback - Note areas of agreement and disagreement - Keep the assessment objective and balanced - Do not include any personal identifiers` } // ─── Scoring Patterns (Server-Side) ───────────────────────────────────────── /** * Compute scoring patterns from evaluations without AI. */ export function computeScoringPatterns( evaluations: EvaluationForSummary[], criteriaLabels: CriterionDef[] ): ScoringPatterns { const globalScores = evaluations .map((e) => e.globalScore) .filter((s): s is number => s !== null) // Average global score const averageGlobalScore = globalScores.length > 0 ? globalScores.reduce((a, b) => a + b, 0) / globalScores.length : null // Consensus: 1 - normalized standard deviation (1.0 = full consensus) let consensus = 1 if (globalScores.length > 1 && averageGlobalScore !== null) { const variance = globalScores.reduce( (sum, score) => sum + Math.pow(score - averageGlobalScore, 2), 0 ) / globalScores.length const stdDev = Math.sqrt(variance) // Normalize by the scoring scale (1-10, so max possible std dev is ~4.5) consensus = Math.max(0, 1 - stdDev / 4.5) } // Criterion averages (numeric only) const criterionAverages: Record = {} // Boolean criteria stats const booleanCriteria: Record = {} // Text responses const textResponses: Record = {} for (const criterion of criteriaLabels) { const type = criterion.type || 'numeric' if (type === 'numeric') { const scores: number[] = [] for (const ev of evaluations) { const criterionScores = ev.criterionScoresJson as Record | null const val = criterionScores?.[criterion.id] if (typeof val === 'number') { scores.push(val) } } if (scores.length > 0) { criterionAverages[criterion.label] = scores.reduce((a, b) => a + b, 0) / scores.length } } else if (type === 'boolean') { let yesCount = 0 let noCount = 0 for (const ev of evaluations) { const criterionScores = ev.criterionScoresJson as Record | null const val = criterionScores?.[criterion.id] if (typeof val === 'boolean') { if (val) yesCount++ else noCount++ } } const total = yesCount + noCount if (total > 0) { booleanCriteria[criterion.label] = { yesCount, noCount, total, yesPercent: Math.round((yesCount / total) * 100), trueLabel: criterion.trueLabel || 'Yes', falseLabel: criterion.falseLabel || 'No', } } } else if (type === 'text') { const responses: string[] = [] for (const ev of evaluations) { const criterionScores = ev.criterionScoresJson as Record | null const val = criterionScores?.[criterion.id] if (typeof val === 'string' && val.trim()) { responses.push(sanitizeText(val)) } } if (responses.length > 0) { textResponses[criterion.label] = responses } } } return { averageGlobalScore, consensus: Math.round(consensus * 100) / 100, criterionAverages, booleanCriteria, textResponses, evaluatorCount: evaluations.length, } } // ─── Main Orchestrator ────────────────────────────────────────────────────── /** * Generate an AI-powered evaluation summary for a project in a round. */ export async function generateSummary({ projectId, roundId, userId, prisma, }: { projectId: string roundId: string userId: string prisma: PrismaClient }): Promise { // 1. Fetch project with evaluations and form criteria const project = await prisma.project.findUnique({ where: { id: projectId }, select: { id: true, title: true, }, }) if (!project) { throw new TRPCError({ code: 'NOT_FOUND', message: 'Project not found' }) } // Fetch submitted evaluations for this project in this round const evaluations = await prisma.evaluation.findMany({ where: { status: 'SUBMITTED', assignment: { projectId, roundId, }, }, select: { id: true, criterionScoresJson: true, globalScore: true, binaryDecision: true, feedbackText: true, assignment: { select: { user: { select: { id: true, name: true, email: true }, }, }, }, }, }) if (evaluations.length === 0) { throw new TRPCError({ code: 'BAD_REQUEST', message: 'No submitted evaluations found for this project in this round', }) } // Get evaluation form criteria for this round const form = await prisma.evaluationForm.findFirst({ where: { roundId, isActive: true }, select: { criteriaJson: true }, }) const criteria: CriterionDef[] = form?.criteriaJson ? (form.criteriaJson as unknown as CriterionDef[]) : [] // 2. Anonymize evaluations const typedEvaluations = evaluations as unknown as EvaluationForSummary[] const anonymized = anonymizeEvaluations(typedEvaluations) // 3. Build prompt and call OpenAI const openai = await getOpenAI() if (!openai) { throw new TRPCError({ code: 'PRECONDITION_FAILED', message: 'OpenAI is not configured. Please set up your API key in Settings.', }) } const model = await getConfiguredModel(AI_MODELS.QUICK) const prompt = buildSummaryPrompt(anonymized, project.title, criteria) let aiResponse: AIResponsePayload let tokensUsed = 0 const MAX_PARSE_RETRIES = 2 let parseAttempts = 0 let response: Awaited> try { const params = buildCompletionParams(model, { messages: [ { role: 'user', content: prompt }, ], jsonMode: true, temperature: 0.1, maxTokens: 2000, }) response = await openai.chat.completions.create(params) let usage = extractTokenUsage(response) tokensUsed = usage.totalTokens // Parse with retry logic while (true) { try { const content = response.choices[0]?.message?.content if (!content) { throw new Error('Empty response from AI') } aiResponse = JSON.parse(content) as AIResponsePayload break } catch (parseError) { if (parseError instanceof SyntaxError && parseAttempts < MAX_PARSE_RETRIES) { parseAttempts++ console.warn(`[AI Evaluation Summary] JSON parse failed, retrying (${parseAttempts}/${MAX_PARSE_RETRIES})`) // Retry the API call with hint const retryParams = buildCompletionParams(model, { messages: [ { role: 'user', content: prompt + '\n\nIMPORTANT: Please ensure valid JSON output.' }, ], jsonMode: true, temperature: 0.1, maxTokens: 2000, }) response = await openai.chat.completions.create(retryParams) const retryUsage = extractTokenUsage(response) tokensUsed += retryUsage.totalTokens continue } // If retry limit reached or non-syntax error if (parseError instanceof SyntaxError) { const parseErrorObj = createParseError((parseError as Error).message) logAIError('EvaluationSummary', 'generateSummary', parseErrorObj) await logAIUsage({ userId, action: 'EVALUATION_SUMMARY', entityType: 'Project', entityId: projectId, model, promptTokens: 0, completionTokens: 0, totalTokens: tokensUsed, itemsProcessed: 0, status: 'ERROR', errorMessage: parseErrorObj.message, }) throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: 'Failed to parse AI response. Please try again.', }) } throw parseError } } } catch (error) { if (error instanceof TRPCError) { throw error } const classified = classifyAIError(error) logAIError('EvaluationSummary', 'generateSummary', classified) await logAIUsage({ userId, action: 'EVALUATION_SUMMARY', entityType: 'Project', entityId: projectId, model, promptTokens: 0, completionTokens: 0, totalTokens: 0, itemsProcessed: 0, status: 'ERROR', errorMessage: classified.message, }) throw new TRPCError({ code: 'INTERNAL_SERVER_ERROR', message: classified.message, }) } // 4. Compute scoring patterns (server-side, no AI) const scoringPatterns = computeScoringPatterns(typedEvaluations, criteria) // 5. Merge and upsert const summaryJson = { ...aiResponse, scoringPatterns, } const summaryJsonValue = summaryJson as unknown as Prisma.InputJsonValue const summary = await prisma.evaluationSummary.upsert({ where: { projectId_roundId: { projectId, roundId }, }, create: { projectId, roundId, summaryJson: summaryJsonValue, generatedById: userId, model, tokensUsed, }, update: { summaryJson: summaryJsonValue, generatedAt: new Date(), generatedById: userId, model, tokensUsed, }, }) // 6. Log AI usage await logAIUsage({ userId, action: 'EVALUATION_SUMMARY', entityType: 'Project', entityId: projectId, model, promptTokens: 0, // Detailed breakdown not always available completionTokens: 0, totalTokens: tokensUsed, itemsProcessed: evaluations.length, status: 'SUCCESS', }) return { id: summary.id, projectId: summary.projectId, roundId: summary.roundId, summaryJson: summaryJson as AIResponsePayload & { scoringPatterns: ScoringPatterns }, generatedAt: summary.generatedAt, model: summary.model, tokensUsed: summary.tokensUsed, } }