MOPC-Portal/src/server/services/ai-evaluation-summary.ts

/**
 * AI-Powered Evaluation Summary Service
 *
 * Generates AI summaries of jury evaluations for a project in a given round.
 * Combines OpenAI analysis with server-side scoring pattern calculations.
 *
 * GDPR Compliance:
 * - All evaluation data is anonymized before AI processing
 * - No juror names, emails, or identifiers are sent to OpenAI
 * - Only scores, feedback text, and binary decisions are included
 */

import { TRPCError } from '@trpc/server'
import { getOpenAI, getConfiguredModel, buildCompletionParams, AI_MODELS } from '@/lib/openai'
import { logAIUsage, extractTokenUsage } from '@/server/utils/ai-usage'
import { classifyAIError, createParseError, logAIError } from './ai-errors'
import { sanitizeText } from './anonymization'
import type { PrismaClient, Prisma } from '@prisma/client'
import { findActiveForm } from '@/server/utils/evaluation-form-lookup'

// ─── Types ──────────────────────────────────────────────────────────────────

interface EvaluationForSummary {
  id: string
  criterionScoresJson: Record<string, number | boolean | string> | null
  globalScore: number | null
  binaryDecision: boolean | null
  feedbackText: string | null
  assignment: {
    user: {
      id: string
      name: string | null
      email: string
    }
  }
}

interface AnonymizedEvaluation {
  criterionScores: Record<string, number | boolean | string> | null
  globalScore: number | null
  binaryDecision: boolean | null
  feedbackText: string | null
}

interface CriterionDef {
  id: string
  label: string
  type?: 'numeric' | 'text' | 'boolean' | 'section_header'
  trueLabel?: string
  falseLabel?: string
}

interface AIResponsePayload {
  overallAssessment: string
  strengths: string[]
  weaknesses: string[]
  themes: Array<{
    theme: string
    sentiment: 'positive' | 'negative' | 'mixed'
    frequency: number
  }>
  recommendation: string
}

interface BooleanStats {
  yesCount: number
  noCount: number
  total: number
  yesPercent: number
  trueLabel: string
  falseLabel: string
}

interface ScoringPatterns {
  averageGlobalScore: number | null
  consensus: number
  criterionAverages: Record<string, number>
  booleanCriteria: Record<string, BooleanStats>
  textResponses: Record<string, string[]>
  evaluatorCount: number
}

export interface EvaluationSummaryResult {
  id: string
  projectId: string
  roundId: string
  summaryJson: AIResponsePayload & { scoringPatterns: ScoringPatterns }
  generatedAt: Date
  model: string
  tokensUsed: number
}

// ─── Anonymization ──────────────────────────────────────────────────────────

/**
 * Strip juror names/emails from evaluations, keeping only scores and feedback.
 */
export function anonymizeEvaluations(
  evaluations: EvaluationForSummary[]
): AnonymizedEvaluation[] {
  return evaluations.map((ev) => ({
    criterionScores: ev.criterionScoresJson as Record<string, number | boolean | string> | null,
    globalScore: ev.globalScore,
    binaryDecision: ev.binaryDecision,
    feedbackText: ev.feedbackText ? sanitizeText(ev.feedbackText) : null,
  }))
}

// ─── Prompt Building ────────────────────────────────────────────────────────

/**
 * Build the OpenAI prompt for evaluation summary generation.
 */
export function buildSummaryPrompt(
  anonymizedEvaluations: AnonymizedEvaluation[],
  projectTitle: string,
  criteriaDefinitions: CriterionDef[]
): string {
  const sanitizedTitle = sanitizeText(projectTitle)

  // Build a descriptive criteria section that explains each criterion type
  const criteriaDescription = criteriaDefinitions
    .filter((c) => c.type !== 'section_header')
    .map((c) => {
      const type = c.type || 'numeric'
      if (type === 'boolean') {
        return `- "${c.label}" (Yes/No decision: ${c.trueLabel || 'Yes'} / ${c.falseLabel || 'No'})`
      }
      if (type === 'text') {
        return `- "${c.label}" (Free-text response)`
      }
      return `- "${c.label}" (Numeric score)`
    })
    .join('\n')

  return `You are analyzing jury evaluations for a project competition.

PROJECT: "${sanitizedTitle}"

EVALUATION CRITERIA:
${criteriaDescription}

Note: criterionScores values may be numbers (numeric scores), booleans (true/false for yes/no criteria), or strings (text responses).

EVALUATIONS (${anonymizedEvaluations.length} total):
${JSON.stringify(anonymizedEvaluations, null, 2)}

Analyze these evaluations and return a JSON object with this exact structure:
{
  "overallAssessment": "A 2-3 sentence summary of how the project was evaluated overall",
  "strengths": ["strength 1", "strength 2", ...],
  "weaknesses": ["weakness 1", "weakness 2", ...],
  "themes": [
    { "theme": "theme name", "sentiment": "positive" | "negative" | "mixed", "frequency": <number of evaluators mentioning this> }
  ],
  "recommendation": "A brief recommendation based on the evaluation consensus"
}

Guidelines:
- Base your analysis only on the provided evaluation data
- For numeric criteria, consider score averages and distribution
- For yes/no criteria, consider the proportion of yes vs no answers
- For text criteria, synthesize common themes from the responses
- Identify common themes across evaluator feedback
- Note areas of agreement and disagreement
- Keep the assessment objective and balanced
- Do not include any personal identifiers`
}

// ─── Scoring Patterns (Server-Side) ─────────────────────────────────────────

/**
 * Compute scoring patterns from evaluations without AI.
 */
export function computeScoringPatterns(
  evaluations: EvaluationForSummary[],
  criteriaLabels: CriterionDef[]
): ScoringPatterns {
  const globalScores = evaluations
    .map((e) => e.globalScore)
    .filter((s): s is number => s !== null)

  // Average global score
  const averageGlobalScore =
    globalScores.length > 0
      ? globalScores.reduce((a, b) => a + b, 0) / globalScores.length
      : null

  // Consensus: 1 - normalized standard deviation (1.0 = full consensus)
  let consensus = 1
  if (globalScores.length > 1 && averageGlobalScore !== null) {
    const variance =
      globalScores.reduce(
        (sum, score) => sum + Math.pow(score - averageGlobalScore, 2),
        0
      ) / globalScores.length
    const stdDev = Math.sqrt(variance)
    // Normalize by the scoring scale (1-10, so max possible std dev is ~4.5)
    consensus = Math.max(0, 1 - stdDev / 4.5)
  }

  // Criterion averages (numeric only)
  const criterionAverages: Record<string, number> = {}
  // Boolean criteria stats
  const booleanCriteria: Record<string, BooleanStats> = {}
  // Text responses
  const textResponses: Record<string, string[]> = {}

  for (const criterion of criteriaLabels) {
    const type = criterion.type || 'numeric'

    if (type === 'numeric') {
      const scores: number[] = []
      for (const ev of evaluations) {
        const criterionScores = ev.criterionScoresJson as Record<string, number | boolean | string> | null
        const val = criterionScores?.[criterion.id]
        if (typeof val === 'number') {
          scores.push(val)
        }
      }
      if (scores.length > 0) {
        criterionAverages[criterion.label] =
          scores.reduce((a, b) => a + b, 0) / scores.length
      }
    } else if (type === 'boolean') {
      let yesCount = 0
      let noCount = 0
      for (const ev of evaluations) {
        const criterionScores = ev.criterionScoresJson as Record<string, number | boolean | string> | null
        const val = criterionScores?.[criterion.id]
        if (typeof val === 'boolean') {
          if (val) yesCount++
          else noCount++
        }
      }
      const total = yesCount + noCount
      if (total > 0) {
        booleanCriteria[criterion.label] = {
          yesCount,
          noCount,
          total,
          yesPercent: Math.round((yesCount / total) * 100),
          trueLabel: criterion.trueLabel || 'Yes',
          falseLabel: criterion.falseLabel || 'No',
        }
      }
    } else if (type === 'text') {
      const responses: string[] = []
      for (const ev of evaluations) {
        const criterionScores = ev.criterionScoresJson as Record<string, number | boolean | string> | null
        const val = criterionScores?.[criterion.id]
        if (typeof val === 'string' && val.trim()) {
          responses.push(sanitizeText(val))
        }
      }
      if (responses.length > 0) {
        textResponses[criterion.label] = responses
      }
    }
  }

  return {
    averageGlobalScore,
    consensus: Math.round(consensus * 100) / 100,
    criterionAverages,
    booleanCriteria,
    textResponses,
    evaluatorCount: evaluations.length,
  }
}

// ─── Main Orchestrator ──────────────────────────────────────────────────────

/**
 * Generate an AI-powered evaluation summary for a project in a round.
 */
export async function generateSummary({
  projectId,
  roundId,
  userId,
  prisma,
}: {
  projectId: string
  roundId: string
  userId: string
  prisma: PrismaClient
}): Promise<EvaluationSummaryResult> {
  // 1. Fetch project with evaluations and form criteria
  const project = await prisma.project.findUnique({
    where: { id: projectId },
    select: {
      id: true,
      title: true,
      competitionCategory: true,
    },
  })

  if (!project) {
    throw new TRPCError({ code: 'NOT_FOUND', message: 'Project not found' })
  }

  // Fetch submitted evaluations for this project in this round
  const evaluations = await prisma.evaluation.findMany({
    where: {
      status: 'SUBMITTED',
      assignment: {
        projectId,
        roundId,
      },
    },
    select: {
      id: true,
      criterionScoresJson: true,
      globalScore: true,
      binaryDecision: true,
      feedbackText: true,
      assignment: {
        select: {
          user: {
            select: { id: true, name: true, email: true },
          },
        },
      },
    },
  })

  if (evaluations.length === 0) {
    throw new TRPCError({
      code: 'BAD_REQUEST',
      message: 'No submitted evaluations found for this project in this round',
    })
  }

  // Get evaluation form criteria for this round (category-aware)
  const form = await findActiveForm(prisma, roundId, project.competitionCategory)

  const criteria: CriterionDef[] = form?.criteriaJson
    ? (form.criteriaJson as unknown as CriterionDef[])
    : []

  // 2. Anonymize evaluations
  const typedEvaluations = evaluations as unknown as EvaluationForSummary[]
  const anonymized = anonymizeEvaluations(typedEvaluations)

  // 3. Build prompt and call OpenAI
  const openai = await getOpenAI()
  if (!openai) {
    throw new TRPCError({
      code: 'PRECONDITION_FAILED',
      message: 'OpenAI is not configured. Please set up your API key in Settings.',
    })
  }

  const model = await getConfiguredModel(AI_MODELS.QUICK)
  const prompt = buildSummaryPrompt(anonymized, project.title, criteria)

  let aiResponse: AIResponsePayload
  let tokensUsed = 0

  const MAX_PARSE_RETRIES = 2
  let parseAttempts = 0
  let response: Awaited<ReturnType<typeof openai.chat.completions.create>>

  try {
    const params = buildCompletionParams(model, {
      messages: [
        { role: 'user', content: prompt },
      ],
      jsonMode: true,
      temperature: 0.1,
      maxTokens: 2000,
    })

    response = await openai.chat.completions.create(params)
    let usage = extractTokenUsage(response)
    tokensUsed = usage.totalTokens

    // Parse with retry logic
    while (true) {
      try {
        const content = response.choices[0]?.message?.content
        if (!content) {
          throw new Error('Empty response from AI')
        }
        aiResponse = JSON.parse(content) as AIResponsePayload
        break
      } catch (parseError) {
        if (parseError instanceof SyntaxError && parseAttempts < MAX_PARSE_RETRIES) {
          parseAttempts++
          console.warn(`[AI Evaluation Summary] JSON parse failed, retrying (${parseAttempts}/${MAX_PARSE_RETRIES})`)

          // Retry the API call with hint
          const retryParams = buildCompletionParams(model, {
            messages: [
              { role: 'user', content: prompt + '\n\nIMPORTANT: Please ensure valid JSON output.' },
            ],
            jsonMode: true,
            temperature: 0.1,
            maxTokens: 2000,
          })
          response = await openai.chat.completions.create(retryParams)
          const retryUsage = extractTokenUsage(response)
          tokensUsed += retryUsage.totalTokens
          continue
        }

        // If retry limit reached or non-syntax error
        if (parseError instanceof SyntaxError) {
          const parseErrorObj = createParseError((parseError as Error).message)
          logAIError('EvaluationSummary', 'generateSummary', parseErrorObj)

          await logAIUsage({
            userId,
            action: 'EVALUATION_SUMMARY',
            entityType: 'Project',
            entityId: projectId,
            model,
            promptTokens: 0,
            completionTokens: 0,
            totalTokens: tokensUsed,
            itemsProcessed: 0,
            status: 'ERROR',
            errorMessage: parseErrorObj.message,
          })

          throw new TRPCError({
            code: 'INTERNAL_SERVER_ERROR',
            message: 'Failed to parse AI response. Please try again.',
          })
        }
        throw parseError
      }
    }
  } catch (error) {
    if (error instanceof TRPCError) {
      throw error
    }

    const classified = classifyAIError(error)
    logAIError('EvaluationSummary', 'generateSummary', classified)

    await logAIUsage({
      userId,
      action: 'EVALUATION_SUMMARY',
      entityType: 'Project',
      entityId: projectId,
      model,
      promptTokens: 0,
      completionTokens: 0,
      totalTokens: 0,
      itemsProcessed: 0,
      status: 'ERROR',
      errorMessage: classified.message,
    })

    throw new TRPCError({
      code: 'INTERNAL_SERVER_ERROR',
      message: classified.message,
    })
  }

  // 4. Compute scoring patterns (server-side, no AI)
  const scoringPatterns = computeScoringPatterns(typedEvaluations, criteria)

  // 5. Merge and upsert
  const summaryJson = {
    ...aiResponse,
    scoringPatterns,
  }

  const summaryJsonValue = summaryJson as unknown as Prisma.InputJsonValue

  const summary = await prisma.evaluationSummary.upsert({
    where: {
      projectId_roundId: { projectId, roundId },
    },
    create: {
      projectId,
      roundId,
      summaryJson: summaryJsonValue,
      generatedById: userId,
      model,
      tokensUsed,
    },
    update: {
      summaryJson: summaryJsonValue,
      generatedAt: new Date(),
      generatedById: userId,
      model,
      tokensUsed,
    },
  })

  // 6. Log AI usage
  await logAIUsage({
    userId,
    action: 'EVALUATION_SUMMARY',
    entityType: 'Project',
    entityId: projectId,
    model,
    promptTokens: 0, // Detailed breakdown not always available
    completionTokens: 0,
    totalTokens: tokensUsed,
    itemsProcessed: evaluations.length,
    status: 'SUCCESS',
  })

  return {
    id: summary.id,
    projectId: summary.projectId,
    roundId: summary.roundId,
    summaryJson: summaryJson as AIResponsePayload & { scoringPatterns: ScoringPatterns },
    generatedAt: summary.generatedAt,
    model: summary.model,
    tokensUsed: summary.tokensUsed,
  }
}