Apply full refactor updates plus pipeline/email UX confirmations

2026-02-14 15:26:42 +01:00
parent e56e143a40
commit b5425e705e
374 changed files with 116737 additions and 111969 deletions
--- a/src/server/services/ai-evaluation-summary.ts
+++ b/src/server/services/ai-evaluation-summary.ts
@@ -1,404 +1,404 @@
-/**
- * AI-Powered Evaluation Summary Service
- *
- * Generates AI summaries of jury evaluations for a project in a given round.
- * Combines OpenAI analysis with server-side scoring pattern calculations.
- *
- * GDPR Compliance:
- * - All evaluation data is anonymized before AI processing
- * - No juror names, emails, or identifiers are sent to OpenAI
- * - Only scores, feedback text, and binary decisions are included
- */
-
-import { TRPCError } from '@trpc/server'
-import { getOpenAI, getConfiguredModel, buildCompletionParams, AI_MODELS } from '@/lib/openai'
-import { logAIUsage, extractTokenUsage } from '@/server/utils/ai-usage'
-import { classifyAIError, createParseError, logAIError } from './ai-errors'
-import { sanitizeText } from './anonymization'
-import type { PrismaClient, Prisma } from '@prisma/client'
-
-// ─── Types ──────────────────────────────────────────────────────────────────
-
-interface EvaluationForSummary {
-  id: string
-  criterionScoresJson: Record<string, number> | null
-  globalScore: number | null
-  binaryDecision: boolean | null
-  feedbackText: string | null
-  assignment: {
-    user: {
-      id: string
-      name: string | null
-      email: string
-    }
-  }
-}
-
-interface AnonymizedEvaluation {
-  criterionScores: Record<string, number> | null
-  globalScore: number | null
-  binaryDecision: boolean | null
-  feedbackText: string | null
-}
-
-interface CriterionDef {
-  id: string
-  label: string
-}
-
-interface AIResponsePayload {
-  overallAssessment: string
-  strengths: string[]
-  weaknesses: string[]
-  themes: Array<{
-    theme: string
-    sentiment: 'positive' | 'negative' | 'mixed'
-    frequency: number
-  }>
-  recommendation: string
-}
-
-interface ScoringPatterns {
-  averageGlobalScore: number | null
-  consensus: number
-  criterionAverages: Record<string, number>
-  evaluatorCount: number
-}
-
-export interface EvaluationSummaryResult {
-  id: string
-  projectId: string
-  stageId: string
-  summaryJson: AIResponsePayload & { scoringPatterns: ScoringPatterns }
-  generatedAt: Date
-  model: string
-  tokensUsed: number
-}
-
-// ─── Anonymization ──────────────────────────────────────────────────────────
-
-/**
- * Strip juror names/emails from evaluations, keeping only scores and feedback.
- */
-export function anonymizeEvaluations(
-  evaluations: EvaluationForSummary[]
-): AnonymizedEvaluation[] {
-  return evaluations.map((ev) => ({
-    criterionScores: ev.criterionScoresJson as Record<string, number> | null,
-    globalScore: ev.globalScore,
-    binaryDecision: ev.binaryDecision,
-    feedbackText: ev.feedbackText ? sanitizeText(ev.feedbackText) : null,
-  }))
-}
-
-// ─── Prompt Building ────────────────────────────────────────────────────────
-
-/**
- * Build the OpenAI prompt for evaluation summary generation.
- */
-export function buildSummaryPrompt(
-  anonymizedEvaluations: AnonymizedEvaluation[],
-  projectTitle: string,
-  criteriaLabels: string[]
-): string {
-  const sanitizedTitle = sanitizeText(projectTitle)
-
-  return `You are analyzing jury evaluations for a project competition.
-
-PROJECT: "${sanitizedTitle}"
-
-EVALUATION CRITERIA: ${criteriaLabels.join(', ')}
-
-EVALUATIONS (${anonymizedEvaluations.length} total):
-${JSON.stringify(anonymizedEvaluations, null, 2)}
-
-Analyze these evaluations and return a JSON object with this exact structure:
-{
-  "overallAssessment": "A 2-3 sentence summary of how the project was evaluated overall",
-  "strengths": ["strength 1", "strength 2", ...],
-  "weaknesses": ["weakness 1", "weakness 2", ...],
-  "themes": [
-    { "theme": "theme name", "sentiment": "positive" | "negative" | "mixed", "frequency": <number of evaluators mentioning this> }
-  ],
-  "recommendation": "A brief recommendation based on the evaluation consensus"
-}
-
-Guidelines:
- Base your analysis only on the provided evaluation data
- Identify common themes across evaluator feedback
- Note areas of agreement and disagreement
- Keep the assessment objective and balanced
- Do not include any personal identifiers`
-}
-
-// ─── Scoring Patterns (Server-Side) ─────────────────────────────────────────
-
-/**
- * Compute scoring patterns from evaluations without AI.
- */
-export function computeScoringPatterns(
-  evaluations: EvaluationForSummary[],
-  criteriaLabels: CriterionDef[]
-): ScoringPatterns {
-  const globalScores = evaluations
-    .map((e) => e.globalScore)
-    .filter((s): s is number => s !== null)
-
-  // Average global score
-  const averageGlobalScore =
-    globalScores.length > 0
-      ? globalScores.reduce((a, b) => a + b, 0) / globalScores.length
-      : null
-
-  // Consensus: 1 - normalized standard deviation (1.0 = full consensus)
-  let consensus = 1
-  if (globalScores.length > 1 && averageGlobalScore !== null) {
-    const variance =
-      globalScores.reduce(
-        (sum, score) => sum + Math.pow(score - averageGlobalScore, 2),
-        0
-      ) / globalScores.length
-    const stdDev = Math.sqrt(variance)
-    // Normalize by the scoring scale (1-10, so max possible std dev is ~4.5)
-    consensus = Math.max(0, 1 - stdDev / 4.5)
-  }
-
-  // Criterion averages
-  const criterionAverages: Record<string, number> = {}
-  for (const criterion of criteriaLabels) {
-    const scores: number[] = []
-    for (const ev of evaluations) {
-      const criterionScores = ev.criterionScoresJson as Record<string, number> | null
-      if (criterionScores && criterionScores[criterion.id] !== undefined) {
-        scores.push(criterionScores[criterion.id])
-      }
-    }
-    if (scores.length > 0) {
-      criterionAverages[criterion.label] =
-        scores.reduce((a, b) => a + b, 0) / scores.length
-    }
-  }
-
-  return {
-    averageGlobalScore,
-    consensus: Math.round(consensus * 100) / 100,
-    criterionAverages,
-    evaluatorCount: evaluations.length,
-  }
-}
-
-// ─── Main Orchestrator ──────────────────────────────────────────────────────
-
-/**
- * Generate an AI-powered evaluation summary for a project in a round.
- */
-export async function generateSummary({
-  projectId,
-  stageId,
-  userId,
-  prisma,
-}: {
-  projectId: string
-  stageId: string
-  userId: string
-  prisma: PrismaClient
-}): Promise<EvaluationSummaryResult> {
-  // 1. Fetch project with evaluations and form criteria
-  const project = await prisma.project.findUnique({
-    where: { id: projectId },
-    select: {
-      id: true,
-      title: true,
-    },
-  })
-
-  if (!project) {
-    throw new TRPCError({ code: 'NOT_FOUND', message: 'Project not found' })
-  }
-
-  // Fetch submitted evaluations for this project in this stage
-  const evaluations = await prisma.evaluation.findMany({
-    where: {
-      status: 'SUBMITTED',
-      assignment: {
-        projectId,
-        stageId,
-      },
-    },
-    select: {
-      id: true,
-      criterionScoresJson: true,
-      globalScore: true,
-      binaryDecision: true,
-      feedbackText: true,
-      assignment: {
-        select: {
-          user: {
-            select: { id: true, name: true, email: true },
-          },
-        },
-      },
-    },
-  })
-
-  if (evaluations.length === 0) {
-    throw new TRPCError({
-      code: 'BAD_REQUEST',
-      message: 'No submitted evaluations found for this project in this stage',
-    })
-  }
-
-  // Get evaluation form criteria for this stage
-  const form = await prisma.evaluationForm.findFirst({
-    where: { stageId, isActive: true },
-    select: { criteriaJson: true },
-  })
-
-  const criteria: CriterionDef[] = form?.criteriaJson
-    ? (form.criteriaJson as unknown as CriterionDef[])
-    : []
-  const criteriaLabels = criteria.map((c) => c.label)
-
-  // 2. Anonymize evaluations
-  const typedEvaluations = evaluations as unknown as EvaluationForSummary[]
-  const anonymized = anonymizeEvaluations(typedEvaluations)
-
-  // 3. Build prompt and call OpenAI
-  const openai = await getOpenAI()
-  if (!openai) {
-    throw new TRPCError({
-      code: 'PRECONDITION_FAILED',
-      message: 'OpenAI is not configured. Please set up your API key in Settings.',
-    })
-  }
-
-  const model = await getConfiguredModel(AI_MODELS.QUICK)
-  const prompt = buildSummaryPrompt(anonymized, project.title, criteriaLabels)
-
-  let aiResponse: AIResponsePayload
-  let tokensUsed = 0
-
-  try {
-    const params = buildCompletionParams(model, {
-      messages: [
-        { role: 'user', content: prompt },
-      ],
-      jsonMode: true,
-      temperature: 0.3,
-      maxTokens: 2000,
-    })
-
-    const response = await openai.chat.completions.create(params)
-    const usage = extractTokenUsage(response)
-    tokensUsed = usage.totalTokens
-
-    const content = response.choices[0]?.message?.content
-    if (!content) {
-      throw new Error('Empty response from AI')
-    }
-
-    aiResponse = JSON.parse(content) as AIResponsePayload
-  } catch (error) {
-    if (error instanceof SyntaxError) {
-      const parseError = createParseError(error.message)
-      logAIError('EvaluationSummary', 'generateSummary', parseError)
-
-      await logAIUsage({
-        userId,
-        action: 'EVALUATION_SUMMARY',
-        entityType: 'Project',
-        entityId: projectId,
-        model,
-        promptTokens: 0,
-        completionTokens: 0,
-        totalTokens: tokensUsed,
-        itemsProcessed: 0,
-        status: 'ERROR',
-        errorMessage: parseError.message,
-      })
-
-      throw new TRPCError({
-        code: 'INTERNAL_SERVER_ERROR',
-        message: 'Failed to parse AI response. Please try again.',
-      })
-    }
-
-    const classified = classifyAIError(error)
-    logAIError('EvaluationSummary', 'generateSummary', classified)
-
-    await logAIUsage({
-      userId,
-      action: 'EVALUATION_SUMMARY',
-      entityType: 'Project',
-      entityId: projectId,
-      model,
-      promptTokens: 0,
-      completionTokens: 0,
-      totalTokens: 0,
-      itemsProcessed: 0,
-      status: 'ERROR',
-      errorMessage: classified.message,
-    })
-
-    throw new TRPCError({
-      code: 'INTERNAL_SERVER_ERROR',
-      message: classified.message,
-    })
-  }
-
-  // 4. Compute scoring patterns (server-side, no AI)
-  const scoringPatterns = computeScoringPatterns(typedEvaluations, criteria)
-
-  // 5. Merge and upsert
-  const summaryJson = {
-    ...aiResponse,
-    scoringPatterns,
-  }
-
-  const summaryJsonValue = summaryJson as unknown as Prisma.InputJsonValue
-
-  const summary = await prisma.evaluationSummary.upsert({
-    where: {
-      projectId_stageId: { projectId, stageId },
-    },
-    create: {
-      projectId,
-      stageId,
-      summaryJson: summaryJsonValue,
-      generatedById: userId,
-      model,
-      tokensUsed,
-    },
-    update: {
-      summaryJson: summaryJsonValue,
-      generatedAt: new Date(),
-      generatedById: userId,
-      model,
-      tokensUsed,
-    },
-  })
-
-  // 6. Log AI usage
-  await logAIUsage({
-    userId,
-    action: 'EVALUATION_SUMMARY',
-    entityType: 'Project',
-    entityId: projectId,
-    model,
-    promptTokens: 0, // Detailed breakdown not always available
-    completionTokens: 0,
-    totalTokens: tokensUsed,
-    itemsProcessed: evaluations.length,
-    status: 'SUCCESS',
-  })
-
-  return {
-    id: summary.id,
-    projectId: summary.projectId,
-    stageId: summary.stageId,
-    summaryJson: summaryJson as AIResponsePayload & { scoringPatterns: ScoringPatterns },
-    generatedAt: summary.generatedAt,
-    model: summary.model,
-    tokensUsed: summary.tokensUsed,
-  }
-}
+/**
+ * AI-Powered Evaluation Summary Service
+ *
+ * Generates AI summaries of jury evaluations for a project in a given round.
+ * Combines OpenAI analysis with server-side scoring pattern calculations.
+ *
+ * GDPR Compliance:
+ * - All evaluation data is anonymized before AI processing
+ * - No juror names, emails, or identifiers are sent to OpenAI
+ * - Only scores, feedback text, and binary decisions are included
+ */
+
+import { TRPCError } from '@trpc/server'
+import { getOpenAI, getConfiguredModel, buildCompletionParams, AI_MODELS } from '@/lib/openai'
+import { logAIUsage, extractTokenUsage } from '@/server/utils/ai-usage'
+import { classifyAIError, createParseError, logAIError } from './ai-errors'
+import { sanitizeText } from './anonymization'
+import type { PrismaClient, Prisma } from '@prisma/client'
+
+// ─── Types ──────────────────────────────────────────────────────────────────
+
+interface EvaluationForSummary {
+  id: string
+  criterionScoresJson: Record<string, number> | null
+  globalScore: number | null
+  binaryDecision: boolean | null
+  feedbackText: string | null
+  assignment: {
+    user: {
+      id: string
+      name: string | null
+      email: string
+    }
+  }
+}
+
+interface AnonymizedEvaluation {
+  criterionScores: Record<string, number> | null
+  globalScore: number | null
+  binaryDecision: boolean | null
+  feedbackText: string | null
+}
+
+interface CriterionDef {
+  id: string
+  label: string
+}
+
+interface AIResponsePayload {
+  overallAssessment: string
+  strengths: string[]
+  weaknesses: string[]
+  themes: Array<{
+    theme: string
+    sentiment: 'positive' | 'negative' | 'mixed'
+    frequency: number
+  }>
+  recommendation: string
+}
+
+interface ScoringPatterns {
+  averageGlobalScore: number | null
+  consensus: number
+  criterionAverages: Record<string, number>
+  evaluatorCount: number
+}
+
+export interface EvaluationSummaryResult {
+  id: string
+  projectId: string
+  stageId: string
+  summaryJson: AIResponsePayload & { scoringPatterns: ScoringPatterns }
+  generatedAt: Date
+  model: string
+  tokensUsed: number
+}
+
+// ─── Anonymization ──────────────────────────────────────────────────────────
+
+/**
+ * Strip juror names/emails from evaluations, keeping only scores and feedback.
+ */
+export function anonymizeEvaluations(
+  evaluations: EvaluationForSummary[]
+): AnonymizedEvaluation[] {
+  return evaluations.map((ev) => ({
+    criterionScores: ev.criterionScoresJson as Record<string, number> | null,
+    globalScore: ev.globalScore,
+    binaryDecision: ev.binaryDecision,
+    feedbackText: ev.feedbackText ? sanitizeText(ev.feedbackText) : null,
+  }))
+}
+
+// ─── Prompt Building ────────────────────────────────────────────────────────
+
+/**
+ * Build the OpenAI prompt for evaluation summary generation.
+ */
+export function buildSummaryPrompt(
+  anonymizedEvaluations: AnonymizedEvaluation[],
+  projectTitle: string,
+  criteriaLabels: string[]
+): string {
+  const sanitizedTitle = sanitizeText(projectTitle)
+
+  return `You are analyzing jury evaluations for a project competition.
+
+PROJECT: "${sanitizedTitle}"
+
+EVALUATION CRITERIA: ${criteriaLabels.join(', ')}
+
+EVALUATIONS (${anonymizedEvaluations.length} total):
+${JSON.stringify(anonymizedEvaluations, null, 2)}
+
+Analyze these evaluations and return a JSON object with this exact structure:
+{
+  "overallAssessment": "A 2-3 sentence summary of how the project was evaluated overall",
+  "strengths": ["strength 1", "strength 2", ...],
+  "weaknesses": ["weakness 1", "weakness 2", ...],
+  "themes": [
+    { "theme": "theme name", "sentiment": "positive" | "negative" | "mixed", "frequency": <number of evaluators mentioning this> }
+  ],
+  "recommendation": "A brief recommendation based on the evaluation consensus"
+}
+
+Guidelines:
+- Base your analysis only on the provided evaluation data
+- Identify common themes across evaluator feedback
+- Note areas of agreement and disagreement
+- Keep the assessment objective and balanced
+- Do not include any personal identifiers`
+}
+
+// ─── Scoring Patterns (Server-Side) ─────────────────────────────────────────
+
+/**
+ * Compute scoring patterns from evaluations without AI.
+ */
+export function computeScoringPatterns(
+  evaluations: EvaluationForSummary[],
+  criteriaLabels: CriterionDef[]
+): ScoringPatterns {
+  const globalScores = evaluations
+    .map((e) => e.globalScore)
+    .filter((s): s is number => s !== null)
+
+  // Average global score
+  const averageGlobalScore =
+    globalScores.length > 0
+      ? globalScores.reduce((a, b) => a + b, 0) / globalScores.length
+      : null
+
+  // Consensus: 1 - normalized standard deviation (1.0 = full consensus)
+  let consensus = 1
+  if (globalScores.length > 1 && averageGlobalScore !== null) {
+    const variance =
+      globalScores.reduce(
+        (sum, score) => sum + Math.pow(score - averageGlobalScore, 2),
+        0
+      ) / globalScores.length
+    const stdDev = Math.sqrt(variance)
+    // Normalize by the scoring scale (1-10, so max possible std dev is ~4.5)
+    consensus = Math.max(0, 1 - stdDev / 4.5)
+  }
+
+  // Criterion averages
+  const criterionAverages: Record<string, number> = {}
+  for (const criterion of criteriaLabels) {
+    const scores: number[] = []
+    for (const ev of evaluations) {
+      const criterionScores = ev.criterionScoresJson as Record<string, number> | null
+      if (criterionScores && criterionScores[criterion.id] !== undefined) {
+        scores.push(criterionScores[criterion.id])
+      }
+    }
+    if (scores.length > 0) {
+      criterionAverages[criterion.label] =
+        scores.reduce((a, b) => a + b, 0) / scores.length
+    }
+  }
+
+  return {
+    averageGlobalScore,
+    consensus: Math.round(consensus * 100) / 100,
+    criterionAverages,
+    evaluatorCount: evaluations.length,
+  }
+}
+
+// ─── Main Orchestrator ──────────────────────────────────────────────────────
+
+/**
+ * Generate an AI-powered evaluation summary for a project in a round.
+ */
+export async function generateSummary({
+  projectId,
+  stageId,
+  userId,
+  prisma,
+}: {
+  projectId: string
+  stageId: string
+  userId: string
+  prisma: PrismaClient
+}): Promise<EvaluationSummaryResult> {
+  // 1. Fetch project with evaluations and form criteria
+  const project = await prisma.project.findUnique({
+    where: { id: projectId },
+    select: {
+      id: true,
+      title: true,
+    },
+  })
+
+  if (!project) {
+    throw new TRPCError({ code: 'NOT_FOUND', message: 'Project not found' })
+  }
+
+  // Fetch submitted evaluations for this project in this stage
+  const evaluations = await prisma.evaluation.findMany({
+    where: {
+      status: 'SUBMITTED',
+      assignment: {
+        projectId,
+        stageId,
+      },
+    },
+    select: {
+      id: true,
+      criterionScoresJson: true,
+      globalScore: true,
+      binaryDecision: true,
+      feedbackText: true,
+      assignment: {
+        select: {
+          user: {
+            select: { id: true, name: true, email: true },
+          },
+        },
+      },
+    },
+  })
+
+  if (evaluations.length === 0) {
+    throw new TRPCError({
+      code: 'BAD_REQUEST',
+      message: 'No submitted evaluations found for this project in this stage',
+    })
+  }
+
+  // Get evaluation form criteria for this stage
+  const form = await prisma.evaluationForm.findFirst({
+    where: { stageId, isActive: true },
+    select: { criteriaJson: true },
+  })
+
+  const criteria: CriterionDef[] = form?.criteriaJson
+    ? (form.criteriaJson as unknown as CriterionDef[])
+    : []
+  const criteriaLabels = criteria.map((c) => c.label)
+
+  // 2. Anonymize evaluations
+  const typedEvaluations = evaluations as unknown as EvaluationForSummary[]
+  const anonymized = anonymizeEvaluations(typedEvaluations)
+
+  // 3. Build prompt and call OpenAI
+  const openai = await getOpenAI()
+  if (!openai) {
+    throw new TRPCError({
+      code: 'PRECONDITION_FAILED',
+      message: 'OpenAI is not configured. Please set up your API key in Settings.',
+    })
+  }
+
+  const model = await getConfiguredModel(AI_MODELS.QUICK)
+  const prompt = buildSummaryPrompt(anonymized, project.title, criteriaLabels)
+
+  let aiResponse: AIResponsePayload
+  let tokensUsed = 0
+
+  try {
+    const params = buildCompletionParams(model, {
+      messages: [
+        { role: 'user', content: prompt },
+      ],
+      jsonMode: true,
+      temperature: 0.3,
+      maxTokens: 2000,
+    })
+
+    const response = await openai.chat.completions.create(params)
+    const usage = extractTokenUsage(response)
+    tokensUsed = usage.totalTokens
+
+    const content = response.choices[0]?.message?.content
+    if (!content) {
+      throw new Error('Empty response from AI')
+    }
+
+    aiResponse = JSON.parse(content) as AIResponsePayload
+  } catch (error) {
+    if (error instanceof SyntaxError) {
+      const parseError = createParseError(error.message)
+      logAIError('EvaluationSummary', 'generateSummary', parseError)
+
+      await logAIUsage({
+        userId,
+        action: 'EVALUATION_SUMMARY',
+        entityType: 'Project',
+        entityId: projectId,
+        model,
+        promptTokens: 0,
+        completionTokens: 0,
+        totalTokens: tokensUsed,
+        itemsProcessed: 0,
+        status: 'ERROR',
+        errorMessage: parseError.message,
+      })
+
+      throw new TRPCError({
+        code: 'INTERNAL_SERVER_ERROR',
+        message: 'Failed to parse AI response. Please try again.',
+      })
+    }
+
+    const classified = classifyAIError(error)
+    logAIError('EvaluationSummary', 'generateSummary', classified)
+
+    await logAIUsage({
+      userId,
+      action: 'EVALUATION_SUMMARY',
+      entityType: 'Project',
+      entityId: projectId,
+      model,
+      promptTokens: 0,
+      completionTokens: 0,
+      totalTokens: 0,
+      itemsProcessed: 0,
+      status: 'ERROR',
+      errorMessage: classified.message,
+    })
+
+    throw new TRPCError({
+      code: 'INTERNAL_SERVER_ERROR',
+      message: classified.message,
+    })
+  }
+
+  // 4. Compute scoring patterns (server-side, no AI)
+  const scoringPatterns = computeScoringPatterns(typedEvaluations, criteria)
+
+  // 5. Merge and upsert
+  const summaryJson = {
+    ...aiResponse,
+    scoringPatterns,
+  }
+
+  const summaryJsonValue = summaryJson as unknown as Prisma.InputJsonValue
+
+  const summary = await prisma.evaluationSummary.upsert({
+    where: {
+      projectId_stageId: { projectId, stageId },
+    },
+    create: {
+      projectId,
+      stageId,
+      summaryJson: summaryJsonValue,
+      generatedById: userId,
+      model,
+      tokensUsed,
+    },
+    update: {
+      summaryJson: summaryJsonValue,
+      generatedAt: new Date(),
+      generatedById: userId,
+      model,
+      tokensUsed,
+    },
+  })
+
+  // 6. Log AI usage
+  await logAIUsage({
+    userId,
+    action: 'EVALUATION_SUMMARY',
+    entityType: 'Project',
+    entityId: projectId,
+    model,
+    promptTokens: 0, // Detailed breakdown not always available
+    completionTokens: 0,
+    totalTokens: tokensUsed,
+    itemsProcessed: evaluations.length,
+    status: 'SUCCESS',
+  })
+
+  return {
+    id: summary.id,
+    projectId: summary.projectId,
+    stageId: summary.stageId,
+    summaryJson: summaryJson as AIResponsePayload & { scoringPatterns: ScoringPatterns },
+    generatedAt: summary.generatedAt,
+    model: summary.model,
+    tokensUsed: summary.tokensUsed,
+  }
+}