Files
MOPC-Portal/src/server/services/ai-evaluation-summary.ts
Matt 3ccf9b0542 feat: per-category evaluation criteria (startup vs business concept)
Add ability to define completely different evaluation criteria for each
competition category. Admins toggle "Separate Criteria per Category" in
round config, then configure criteria independently via tabbed editor.

- Schema: add nullable `category` to EvaluationForm with updated constraints
- Config: add `perCategoryCriteria` boolean to EvaluationConfigSchema
- Helper: new `findActiveForm()` with category-aware resolution + fallback
- Backend: getForm, upsertForm, getStageForm, startStage all category-aware
- AI services: use project category for form lookup in summaries + ranking
- Export/ranking: merge criteria from all active forms for cross-category reports
- Admin UI: toggle switch + tabbed criteria editor with per-category builders
- Jury UI: auto-selects correct form based on project category (invisible to juror)
- Fully backwards compatible: toggle defaults OFF, existing forms unchanged

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-02 13:03:22 -04:00

518 lines
16 KiB
TypeScript

/**
* AI-Powered Evaluation Summary Service
*
* Generates AI summaries of jury evaluations for a project in a given round.
* Combines OpenAI analysis with server-side scoring pattern calculations.
*
* GDPR Compliance:
* - All evaluation data is anonymized before AI processing
* - No juror names, emails, or identifiers are sent to OpenAI
* - Only scores, feedback text, and binary decisions are included
*/
import { TRPCError } from '@trpc/server'
import { getOpenAI, getConfiguredModel, buildCompletionParams, AI_MODELS } from '@/lib/openai'
import { logAIUsage, extractTokenUsage } from '@/server/utils/ai-usage'
import { classifyAIError, createParseError, logAIError } from './ai-errors'
import { sanitizeText } from './anonymization'
import type { PrismaClient, Prisma } from '@prisma/client'
import { findActiveForm } from '@/server/utils/evaluation-form-lookup'
// ─── Types ──────────────────────────────────────────────────────────────────
interface EvaluationForSummary {
id: string
criterionScoresJson: Record<string, number | boolean | string> | null
globalScore: number | null
binaryDecision: boolean | null
feedbackText: string | null
assignment: {
user: {
id: string
name: string | null
email: string
}
}
}
interface AnonymizedEvaluation {
criterionScores: Record<string, number | boolean | string> | null
globalScore: number | null
binaryDecision: boolean | null
feedbackText: string | null
}
interface CriterionDef {
id: string
label: string
type?: 'numeric' | 'text' | 'boolean' | 'section_header'
trueLabel?: string
falseLabel?: string
}
interface AIResponsePayload {
overallAssessment: string
strengths: string[]
weaknesses: string[]
themes: Array<{
theme: string
sentiment: 'positive' | 'negative' | 'mixed'
frequency: number
}>
recommendation: string
}
interface BooleanStats {
yesCount: number
noCount: number
total: number
yesPercent: number
trueLabel: string
falseLabel: string
}
interface ScoringPatterns {
averageGlobalScore: number | null
consensus: number
criterionAverages: Record<string, number>
booleanCriteria: Record<string, BooleanStats>
textResponses: Record<string, string[]>
evaluatorCount: number
}
export interface EvaluationSummaryResult {
id: string
projectId: string
roundId: string
summaryJson: AIResponsePayload & { scoringPatterns: ScoringPatterns }
generatedAt: Date
model: string
tokensUsed: number
}
// ─── Anonymization ──────────────────────────────────────────────────────────
/**
* Strip juror names/emails from evaluations, keeping only scores and feedback.
*/
export function anonymizeEvaluations(
evaluations: EvaluationForSummary[]
): AnonymizedEvaluation[] {
return evaluations.map((ev) => ({
criterionScores: ev.criterionScoresJson as Record<string, number | boolean | string> | null,
globalScore: ev.globalScore,
binaryDecision: ev.binaryDecision,
feedbackText: ev.feedbackText ? sanitizeText(ev.feedbackText) : null,
}))
}
// ─── Prompt Building ────────────────────────────────────────────────────────
/**
* Build the OpenAI prompt for evaluation summary generation.
*/
export function buildSummaryPrompt(
anonymizedEvaluations: AnonymizedEvaluation[],
projectTitle: string,
criteriaDefinitions: CriterionDef[]
): string {
const sanitizedTitle = sanitizeText(projectTitle)
// Build a descriptive criteria section that explains each criterion type
const criteriaDescription = criteriaDefinitions
.filter((c) => c.type !== 'section_header')
.map((c) => {
const type = c.type || 'numeric'
if (type === 'boolean') {
return `- "${c.label}" (Yes/No decision: ${c.trueLabel || 'Yes'} / ${c.falseLabel || 'No'})`
}
if (type === 'text') {
return `- "${c.label}" (Free-text response)`
}
return `- "${c.label}" (Numeric score)`
})
.join('\n')
return `You are analyzing jury evaluations for a project competition.
PROJECT: "${sanitizedTitle}"
EVALUATION CRITERIA:
${criteriaDescription}
Note: criterionScores values may be numbers (numeric scores), booleans (true/false for yes/no criteria), or strings (text responses).
EVALUATIONS (${anonymizedEvaluations.length} total):
${JSON.stringify(anonymizedEvaluations, null, 2)}
Analyze these evaluations and return a JSON object with this exact structure:
{
"overallAssessment": "A 2-3 sentence summary of how the project was evaluated overall",
"strengths": ["strength 1", "strength 2", ...],
"weaknesses": ["weakness 1", "weakness 2", ...],
"themes": [
{ "theme": "theme name", "sentiment": "positive" | "negative" | "mixed", "frequency": <number of evaluators mentioning this> }
],
"recommendation": "A brief recommendation based on the evaluation consensus"
}
Guidelines:
- Base your analysis only on the provided evaluation data
- For numeric criteria, consider score averages and distribution
- For yes/no criteria, consider the proportion of yes vs no answers
- For text criteria, synthesize common themes from the responses
- Identify common themes across evaluator feedback
- Note areas of agreement and disagreement
- Keep the assessment objective and balanced
- Do not include any personal identifiers`
}
// ─── Scoring Patterns (Server-Side) ─────────────────────────────────────────
/**
* Compute scoring patterns from evaluations without AI.
*/
export function computeScoringPatterns(
evaluations: EvaluationForSummary[],
criteriaLabels: CriterionDef[]
): ScoringPatterns {
const globalScores = evaluations
.map((e) => e.globalScore)
.filter((s): s is number => s !== null)
// Average global score
const averageGlobalScore =
globalScores.length > 0
? globalScores.reduce((a, b) => a + b, 0) / globalScores.length
: null
// Consensus: 1 - normalized standard deviation (1.0 = full consensus)
let consensus = 1
if (globalScores.length > 1 && averageGlobalScore !== null) {
const variance =
globalScores.reduce(
(sum, score) => sum + Math.pow(score - averageGlobalScore, 2),
0
) / globalScores.length
const stdDev = Math.sqrt(variance)
// Normalize by the scoring scale (1-10, so max possible std dev is ~4.5)
consensus = Math.max(0, 1 - stdDev / 4.5)
}
// Criterion averages (numeric only)
const criterionAverages: Record<string, number> = {}
// Boolean criteria stats
const booleanCriteria: Record<string, BooleanStats> = {}
// Text responses
const textResponses: Record<string, string[]> = {}
for (const criterion of criteriaLabels) {
const type = criterion.type || 'numeric'
if (type === 'numeric') {
const scores: number[] = []
for (const ev of evaluations) {
const criterionScores = ev.criterionScoresJson as Record<string, number | boolean | string> | null
const val = criterionScores?.[criterion.id]
if (typeof val === 'number') {
scores.push(val)
}
}
if (scores.length > 0) {
criterionAverages[criterion.label] =
scores.reduce((a, b) => a + b, 0) / scores.length
}
} else if (type === 'boolean') {
let yesCount = 0
let noCount = 0
for (const ev of evaluations) {
const criterionScores = ev.criterionScoresJson as Record<string, number | boolean | string> | null
const val = criterionScores?.[criterion.id]
if (typeof val === 'boolean') {
if (val) yesCount++
else noCount++
}
}
const total = yesCount + noCount
if (total > 0) {
booleanCriteria[criterion.label] = {
yesCount,
noCount,
total,
yesPercent: Math.round((yesCount / total) * 100),
trueLabel: criterion.trueLabel || 'Yes',
falseLabel: criterion.falseLabel || 'No',
}
}
} else if (type === 'text') {
const responses: string[] = []
for (const ev of evaluations) {
const criterionScores = ev.criterionScoresJson as Record<string, number | boolean | string> | null
const val = criterionScores?.[criterion.id]
if (typeof val === 'string' && val.trim()) {
responses.push(sanitizeText(val))
}
}
if (responses.length > 0) {
textResponses[criterion.label] = responses
}
}
}
return {
averageGlobalScore,
consensus: Math.round(consensus * 100) / 100,
criterionAverages,
booleanCriteria,
textResponses,
evaluatorCount: evaluations.length,
}
}
// ─── Main Orchestrator ──────────────────────────────────────────────────────
/**
* Generate an AI-powered evaluation summary for a project in a round.
*/
export async function generateSummary({
projectId,
roundId,
userId,
prisma,
}: {
projectId: string
roundId: string
userId: string
prisma: PrismaClient
}): Promise<EvaluationSummaryResult> {
// 1. Fetch project with evaluations and form criteria
const project = await prisma.project.findUnique({
where: { id: projectId },
select: {
id: true,
title: true,
competitionCategory: true,
},
})
if (!project) {
throw new TRPCError({ code: 'NOT_FOUND', message: 'Project not found' })
}
// Fetch submitted evaluations for this project in this round
const evaluations = await prisma.evaluation.findMany({
where: {
status: 'SUBMITTED',
assignment: {
projectId,
roundId,
},
},
select: {
id: true,
criterionScoresJson: true,
globalScore: true,
binaryDecision: true,
feedbackText: true,
assignment: {
select: {
user: {
select: { id: true, name: true, email: true },
},
},
},
},
})
if (evaluations.length === 0) {
throw new TRPCError({
code: 'BAD_REQUEST',
message: 'No submitted evaluations found for this project in this round',
})
}
// Get evaluation form criteria for this round (category-aware)
const form = await findActiveForm(prisma, roundId, project.competitionCategory)
const criteria: CriterionDef[] = form?.criteriaJson
? (form.criteriaJson as unknown as CriterionDef[])
: []
// 2. Anonymize evaluations
const typedEvaluations = evaluations as unknown as EvaluationForSummary[]
const anonymized = anonymizeEvaluations(typedEvaluations)
// 3. Build prompt and call OpenAI
const openai = await getOpenAI()
if (!openai) {
throw new TRPCError({
code: 'PRECONDITION_FAILED',
message: 'OpenAI is not configured. Please set up your API key in Settings.',
})
}
const model = await getConfiguredModel(AI_MODELS.QUICK)
const prompt = buildSummaryPrompt(anonymized, project.title, criteria)
let aiResponse: AIResponsePayload
let tokensUsed = 0
const MAX_PARSE_RETRIES = 2
let parseAttempts = 0
let response: Awaited<ReturnType<typeof openai.chat.completions.create>>
try {
const params = buildCompletionParams(model, {
messages: [
{ role: 'user', content: prompt },
],
jsonMode: true,
temperature: 0.1,
maxTokens: 2000,
})
response = await openai.chat.completions.create(params)
let usage = extractTokenUsage(response)
tokensUsed = usage.totalTokens
// Parse with retry logic
while (true) {
try {
const content = response.choices[0]?.message?.content
if (!content) {
throw new Error('Empty response from AI')
}
aiResponse = JSON.parse(content) as AIResponsePayload
break
} catch (parseError) {
if (parseError instanceof SyntaxError && parseAttempts < MAX_PARSE_RETRIES) {
parseAttempts++
console.warn(`[AI Evaluation Summary] JSON parse failed, retrying (${parseAttempts}/${MAX_PARSE_RETRIES})`)
// Retry the API call with hint
const retryParams = buildCompletionParams(model, {
messages: [
{ role: 'user', content: prompt + '\n\nIMPORTANT: Please ensure valid JSON output.' },
],
jsonMode: true,
temperature: 0.1,
maxTokens: 2000,
})
response = await openai.chat.completions.create(retryParams)
const retryUsage = extractTokenUsage(response)
tokensUsed += retryUsage.totalTokens
continue
}
// If retry limit reached or non-syntax error
if (parseError instanceof SyntaxError) {
const parseErrorObj = createParseError((parseError as Error).message)
logAIError('EvaluationSummary', 'generateSummary', parseErrorObj)
await logAIUsage({
userId,
action: 'EVALUATION_SUMMARY',
entityType: 'Project',
entityId: projectId,
model,
promptTokens: 0,
completionTokens: 0,
totalTokens: tokensUsed,
itemsProcessed: 0,
status: 'ERROR',
errorMessage: parseErrorObj.message,
})
throw new TRPCError({
code: 'INTERNAL_SERVER_ERROR',
message: 'Failed to parse AI response. Please try again.',
})
}
throw parseError
}
}
} catch (error) {
if (error instanceof TRPCError) {
throw error
}
const classified = classifyAIError(error)
logAIError('EvaluationSummary', 'generateSummary', classified)
await logAIUsage({
userId,
action: 'EVALUATION_SUMMARY',
entityType: 'Project',
entityId: projectId,
model,
promptTokens: 0,
completionTokens: 0,
totalTokens: 0,
itemsProcessed: 0,
status: 'ERROR',
errorMessage: classified.message,
})
throw new TRPCError({
code: 'INTERNAL_SERVER_ERROR',
message: classified.message,
})
}
// 4. Compute scoring patterns (server-side, no AI)
const scoringPatterns = computeScoringPatterns(typedEvaluations, criteria)
// 5. Merge and upsert
const summaryJson = {
...aiResponse,
scoringPatterns,
}
const summaryJsonValue = summaryJson as unknown as Prisma.InputJsonValue
const summary = await prisma.evaluationSummary.upsert({
where: {
projectId_roundId: { projectId, roundId },
},
create: {
projectId,
roundId,
summaryJson: summaryJsonValue,
generatedById: userId,
model,
tokensUsed,
},
update: {
summaryJson: summaryJsonValue,
generatedAt: new Date(),
generatedById: userId,
model,
tokensUsed,
},
})
// 6. Log AI usage
await logAIUsage({
userId,
action: 'EVALUATION_SUMMARY',
entityType: 'Project',
entityId: projectId,
model,
promptTokens: 0, // Detailed breakdown not always available
completionTokens: 0,
totalTokens: tokensUsed,
itemsProcessed: evaluations.length,
status: 'SUCCESS',
})
return {
id: summary.id,
projectId: summary.projectId,
roundId: summary.roundId,
summaryJson: summaryJson as AIResponsePayload & { scoringPatterns: ScoringPatterns },
generatedAt: summary.generatedAt,
model: summary.model,
tokensUsed: summary.tokensUsed,
}
}