feat: weighted criteria in AI ranking, z-score normalization, threshold advancement, CSV export

- Add criteriaWeights to EvaluationConfig for per-criterion weight assignment (0-10) - Rewrite ai-ranking service: fetch eval form criteria, compute per-criterion averages, z-score normalize juror scores to correct grading bias, send weighted criteria to AI - Update AI prompts with criteria_definitions and per-project criteria_scores - compositeScore uses weighted criteria when configured, falls back to globalScore - Add collapsible ranking config section to dashboard (criteria text + weight sliders) - Move rankingCriteria textarea from eval config tab to ranking dashboard - Store criteriaWeights in ranking snapshot parsedRulesJson for audit - Enhance projectScores CSV export with per-criterion averages, category, country - Add Export CSV button to ranking dashboard header - Add threshold-based advancement mode (decimal score threshold, e.g. 6.5) alongside existing top-N mode in advance dialog Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-02 11:24:14 +01:00
parent c6ebd169dd
commit 19b58e4434
6 changed files with 674 additions and 107 deletions
--- a/src/server/routers/export.ts
+++ b/src/server/routers/export.ts
@@ -98,20 +98,36 @@ export const exportRouter = router({
    }),

  /**
-   * Export project scores summary
+   * Export project scores summary with per-criterion averages
   */
  projectScores: adminProcedure
    .input(z.object({ roundId: z.string() }))
    .query(async ({ ctx, input }) => {
+      // Fetch evaluation form to get criteria labels
+      const evalForm = await ctx.prisma.evaluationForm.findFirst({
+        where: { roundId: input.roundId, isActive: true },
+        select: { criteriaJson: true },
+      })
+      const criteria = (evalForm?.criteriaJson as Array<{
+        id: string; label: string; type?: string
+      }> | null) ?? []
+      const numericCriteria = criteria.filter((c) => !c.type || c.type === 'numeric')
+
      const projects = await ctx.prisma.project.findMany({
        where: {
          assignments: { some: { roundId: input.roundId } },
        },
        include: {
          assignments: {
+            where: { roundId: input.roundId },
            include: {
              evaluation: {
                where: { status: 'SUBMITTED' },
+                select: {
+                  globalScore: true,
+                  binaryDecision: true,
+                  criterionScoresJson: true,
+                },
              },
            },
          },
@@ -132,9 +148,24 @@ export const exportRouter = router({
          (e) => e?.binaryDecision === true
        ).length

+        // Per-criterion averages
+        const criterionAvgs: Record<string, string | null> = {}
+        for (const c of numericCriteria) {
+          const values: number[] = []
+          for (const e of evaluations) {
+            const scores = e?.criterionScoresJson as Record<string, number> | null
+            if (scores && typeof scores[c.id] === 'number') values.push(scores[c.id])
+          }
+          criterionAvgs[c.label] = values.length > 0
+            ? (values.reduce((a, b) => a + b, 0) / values.length).toFixed(2)
+            : null
+        }
+
        return {
          title: p.title,
          teamName: p.teamName,
+          category: p.competitionCategory ?? '',
+          country: p.country ?? '',
          status: p.status,
          tags: p.tags.join(', '),
          totalEvaluations: evaluations.length,
@@ -146,6 +177,7 @@ export const exportRouter = router({
              : null,
          minScore: globalScores.length > 0 ? Math.min(...globalScores) : null,
          maxScore: globalScores.length > 0 ? Math.max(...globalScores) : null,
+          ...criterionAvgs,
          yesVotes,
          noVotes: evaluations.length - yesVotes,
          yesPercentage:
@@ -171,12 +203,15 @@ export const exportRouter = router({
        columns: [
          'title',
          'teamName',
+          'category',
+          'country',
          'status',
          'tags',
          'totalEvaluations',
          'averageScore',
          'minScore',
          'maxScore',
+          ...numericCriteria.map((c) => c.label),
          'yesVotes',
          'noVotes',
          'yesPercentage',
--- a/src/server/routers/ranking.ts
+++ b/src/server/routers/ranking.ts
@@ -85,7 +85,16 @@ export const rankingRouter = router({
        fetchAndRankCategory('BUSINESS_CONCEPT', rules, input.roundId, ctx.prisma, ctx.user.id),
      ])

-      // Persist snapshot
+      // Read criteria weights for snapshot audit trail
+      const round = await ctx.prisma.round.findUniqueOrThrow({
+        where: { id: input.roundId },
+        select: { configJson: true },
+      })
+      const evalConfig = (round.configJson as EvaluationConfig | null) ?? ({} as EvaluationConfig)
+      const criteriaWeights = evalConfig.criteriaWeights ?? undefined
+
+      // Persist snapshot — embed weights alongside rules for audit
+      const parsedRulesWithWeights = { rules, weights: criteriaWeights } as unknown as Prisma.InputJsonValue
      const snapshot = await ctx.prisma.rankingSnapshot.create({
        data: {
          roundId: input.roundId,
@@ -94,7 +103,7 @@ export const rankingRouter = router({
          mode: 'CONFIRMED',
          status: 'COMPLETED',
          criteriaText: input.criteriaText,
-          parsedRulesJson: rules as unknown as Prisma.InputJsonValue,
+          parsedRulesJson: parsedRulesWithWeights,
          startupRankingJson: startup.rankedProjects as unknown as Prisma.InputJsonValue,
          conceptRankingJson: concept.rankedProjects as unknown as Prisma.InputJsonValue,
        },
@@ -271,13 +280,16 @@ export const rankingRouter = router({

      const result = await aiQuickRank(criteriaText, roundId, ctx.prisma, ctx.user.id)

+      // Embed weights alongside rules for audit
+      const criteriaWeights = config.criteriaWeights ?? undefined
+      const parsedRulesWithWeights = { rules: result.parsedRules, weights: criteriaWeights } as unknown as Prisma.InputJsonValue
      const snapshot = await ctx.prisma.rankingSnapshot.create({
        data: {
          roundId,
          triggeredById: ctx.user.id,
          triggerType: 'MANUAL',
          criteriaText,
-          parsedRulesJson: result.parsedRules as unknown as Prisma.InputJsonValue,
+          parsedRulesJson: parsedRulesWithWeights,
          startupRankingJson: result.startup.rankedProjects as unknown as Prisma.InputJsonValue,
          conceptRankingJson: result.concept.rankedProjects as unknown as Prisma.InputJsonValue,
          mode: 'QUICK',
--- a/src/server/services/ai-ranking.ts
+++ b/src/server/services/ai-ranking.ts
@@ -11,7 +11,8 @@
 * Design decisions:
 * - Per-category processing (STARTUP / BUSINESS_CONCEPT) — two parallel AI calls
 * - Projects with zero submitted evaluations are excluded (not ranked last)
- * - compositeScore = 50% normalised avgGlobalScore + 50% passRate + tiny tiebreak
+ * - compositeScore uses weighted criteria when available, falls back to avgGlobalScore
+ * - Z-score normalization corrects for juror grading bias
 */

 import { getOpenAI, getConfiguredModel, buildCompletionParams } from '@/lib/openai'
@@ -20,25 +21,48 @@ import { classifyAIError, logAIError } from './ai-errors'
 import { sanitizeUserInput } from '@/server/services/ai-prompt-guard'
 import { TRPCError } from '@trpc/server'
 import type { CompetitionCategory, PrismaClient } from '@prisma/client'
+import type { EvaluationConfig } from '@/types/competition-configs'

 // ─── Types ────────────────────────────────────────────────────────────────────

+// Criterion definition from EvaluationForm.criteriaJson
+interface CriterionDef {
+  id: string
+  label: string
+  type?: string
+  scale?: number | string
+  weight?: number
+}
+
 // Internal shape of a project before anonymization
 interface ProjectForRanking {
  id: string
  competitionCategory: CompetitionCategory
  avgGlobalScore: number | null      // average of submitted Evaluation.globalScore
+  normalizedAvgScore: number | null  // z-score normalized average
  passRate: number                   // proportion of binaryDecision=true among SUBMITTED evaluations
  evaluatorCount: number             // count of SUBMITTED evaluations
+  criterionAverages: Record<string, number>           // criterionId → raw average score
+  normalizedCriterionAverages: Record<string, number> // criterionId → z-score normalized average
 }

 // Anonymized shape sent to OpenAI
 interface AnonymizedProjectForRanking {
  project_id: string  // "P001", "P002", etc. — never real IDs
  avg_score: number | null
+  normalized_avg_score: number | null
  pass_rate: number   // 0–1
  evaluator_count: number
  category: string
+  criteria_scores: Record<string, number>
+  normalized_criteria_scores: Record<string, number>
+}
+
+// Criterion definition sent to OpenAI
+interface CriterionDefForAI {
+  name: string
+  weight: number
+  scale: string
 }

 // A single parsed rule returned by the criteria parser
@@ -58,6 +82,7 @@ export interface RankedProjectEntry {
  rank: number               // 1-indexed
  compositeScore: number     // 0–1 floating point
  avgGlobalScore: number | null
+  normalizedAvgScore: number | null
  passRate: number
  evaluatorCount: number
  aiRationale?: string       // Optional: AI explanation for this project's rank
@@ -79,8 +104,11 @@ Admin will describe how they want projects ranked in natural language. Parse thi

 Available data fields for ranking:
 - avg_score: average jury evaluation score (1–10 scale, null if not scored)
+- normalized_avg_score: bias-corrected average (z-score normalized across jurors)
 - pass_rate: proportion of jury members who voted to advance the project (0–1)
 - evaluator_count: number of jury members who submitted evaluations (tiebreak)
+- criteria_scores: per-criterion averages (keyed by criterion name)
+- normalized_criteria_scores: bias-corrected per-criterion averages

 Return JSON only:
 {
@@ -103,7 +131,15 @@ Order rules so filters come first, sorts next, limits last.`

 const RANKING_SYSTEM_PROMPT = `You are a project ranking engine for an ocean conservation competition.

-You will receive a list of anonymized projects with numeric scores and a set of parsed ranking rules.
+You will receive:
+1. A list of anonymized projects with numeric scores (including per-criterion averages and bias-corrected scores)
+2. A set of parsed ranking rules
+3. Optional: criteria_definitions with weights indicating the relative importance of each evaluation criterion
+
+When criteria_definitions with weights are provided, use the weighted criteria scores as a PRIMARY ranking factor.
+The weighted score is: sum(criterion_avg * weight) / sum(weights).
+Use normalized (bias-corrected) scores when available — they account for differences in juror grading harshness.
+
 Apply the rules in order and return the final ranked list.

 Return JSON only:
@@ -126,34 +162,85 @@ Rules:

 // ─── Helpers ──────────────────────────────────────────────────────────────────

+/**
+ * Compute composite score using weighted criteria if available,
+ * falling back to avgGlobalScore otherwise.
+ */
 function computeCompositeScore(
-  avgGlobalScore: number | null,
-  passRate: number,
-  evaluatorCount: number,
+  project: ProjectForRanking,
  maxEvaluatorCount: number,
+  criteriaWeights: Record<string, number> | undefined,
+  criterionDefs: CriterionDef[],
 ): number {
-  const normalizedScore = avgGlobalScore != null ? (avgGlobalScore - 1) / 9 : 0.5
-  const composite = normalizedScore * 0.5 + passRate * 0.5
+  let scoreComponent: number
+
+  // Try weighted criteria first
+  if (criteriaWeights && Object.keys(criteriaWeights).length > 0) {
+    let weightedSum = 0
+    let totalWeight = 0
+    for (const [criterionId, weight] of Object.entries(criteriaWeights)) {
+      if (weight <= 0) continue
+      // Use normalized scores if available, otherwise raw
+      const score = project.normalizedCriterionAverages[criterionId]
+        ?? project.criterionAverages[criterionId]
+      if (score == null) continue
+      // Normalize to 0–1 based on criterion scale
+      const def = criterionDefs.find((d) => d.id === criterionId)
+      const maxScale = typeof def?.scale === 'number' ? def.scale
+        : typeof def?.scale === 'string' ? parseInt(def.scale.split('-').pop() ?? '10', 10)
+        : 10
+      const normalizedScore = maxScale > 1 ? (score - 1) / (maxScale - 1) : score
+      weightedSum += normalizedScore * weight
+      totalWeight += weight
+    }
+    scoreComponent = totalWeight > 0 ? weightedSum / totalWeight : 0.5
+  } else {
+    // Fallback: use avgGlobalScore normalized to 0–1
+    const avg = project.normalizedAvgScore ?? project.avgGlobalScore
+    scoreComponent = avg != null ? (avg - 1) / 9 : 0.5
+  }
+
+  const composite = scoreComponent * 0.5 + project.passRate * 0.5
  // Tiebreak: tiny bonus for more evaluators (won't change rank unless composite is equal)
  const tiebreakBonus = maxEvaluatorCount > 0
-    ? (evaluatorCount / maxEvaluatorCount) * 0.0001
+    ? (project.evaluatorCount / maxEvaluatorCount) * 0.0001
    : 0
  return composite + tiebreakBonus
 }

 function anonymizeProjectsForRanking(
  projects: ProjectForRanking[],
+  criterionDefs: CriterionDef[],
 ): { anonymized: AnonymizedProjectForRanking[]; idMap: Map<string, string> } {
+  // Build id → label map for criterion names (anonymize IDs)
+  const idToLabel = new Map(criterionDefs.map((d) => [d.id, d.label]))
+
  const idMap = new Map<string, string>()
  const anonymized = projects.map((p, i) => {
    const anonId = `P${String(i + 1).padStart(3, '0')}`
    idMap.set(anonId, p.id)
+
+    // Convert criterion ID keys to human-readable labels
+    const criteriaScores: Record<string, number> = {}
+    for (const [id, score] of Object.entries(p.criterionAverages)) {
+      const label = idToLabel.get(id) ?? id
+      criteriaScores[label] = Math.round(score * 100) / 100
+    }
+    const normalizedCriteriaScores: Record<string, number> = {}
+    for (const [id, score] of Object.entries(p.normalizedCriterionAverages)) {
+      const label = idToLabel.get(id) ?? id
+      normalizedCriteriaScores[label] = Math.round(score * 100) / 100
+    }
+
    return {
      project_id: anonId,
-      avg_score: p.avgGlobalScore,
+      avg_score: p.avgGlobalScore != null ? Math.round(p.avgGlobalScore * 100) / 100 : null,
+      normalized_avg_score: p.normalizedAvgScore != null ? Math.round(p.normalizedAvgScore * 100) / 100 : null,
      pass_rate: p.passRate,
      evaluator_count: p.evaluatorCount,
      category: p.competitionCategory,
+      criteria_scores: criteriaScores,
+      normalized_criteria_scores: normalizedCriteriaScores,
    }
  })
  return { anonymized, idMap }
@@ -206,6 +293,70 @@ function computePassRate(evaluations: Array<{ resolvedDecision: boolean | null }
  return passCount / evaluations.length
 }

+// ─── Z-Score Normalization ──────────────────────────────────────────────────
+
+interface JurorStats {
+  mean: number
+  stddev: number
+  count: number
+}
+
+/**
+ * Compute per-juror grading statistics (mean and stddev) for z-score normalization.
+ * Only considers numeric criterion scores and globalScore from SUBMITTED evaluations.
+ */
+function computeJurorStats(
+  assignments: Array<{
+    userId: string
+    evaluation: {
+      globalScore: number | null
+      criterionScoresJson: Record<string, unknown> | null
+    } | null
+  }>,
+  numericCriterionIds: Set<string>,
+): Map<string, JurorStats> {
+  // Collect all numeric scores per juror
+  const jurorScores = new Map<string, number[]>()
+  for (const a of assignments) {
+    if (!a.evaluation) continue
+    const scores: number[] = []
+    if (a.evaluation.globalScore != null) scores.push(a.evaluation.globalScore)
+    if (a.evaluation.criterionScoresJson) {
+      for (const [id, val] of Object.entries(a.evaluation.criterionScoresJson)) {
+        if (numericCriterionIds.has(id) && typeof val === 'number') {
+          scores.push(val)
+        }
+      }
+    }
+    const existing = jurorScores.get(a.userId) ?? []
+    existing.push(...scores)
+    jurorScores.set(a.userId, existing)
+  }
+
+  const stats = new Map<string, JurorStats>()
+  for (const [userId, scores] of jurorScores.entries()) {
+    if (scores.length < 2) {
+      // Not enough data for meaningful normalization — skip
+      stats.set(userId, { mean: 0, stddev: 0, count: scores.length })
+      continue
+    }
+    const mean = scores.reduce((a, b) => a + b, 0) / scores.length
+    const variance = scores.reduce((sum, s) => sum + (s - mean) ** 2, 0) / scores.length
+    const stddev = Math.sqrt(variance)
+    stats.set(userId, { mean, stddev, count: scores.length })
+  }
+  return stats
+}
+
+/**
+ * Normalize a raw score using z-score normalization.
+ * Returns the z-score, or null if normalization isn't possible (too few evals or stddev=0).
+ */
+function zScoreNormalize(raw: number, stats: JurorStats): number | null {
+  if (stats.count < 2 || stats.stddev === 0) return null
+  return (raw - stats.mean) / stats.stddev
+}
+
 // ─── Exported Functions ───────────────────────────────────────────────────────

 /**
@@ -275,11 +426,15 @@ export async function parseRankingCriteria(
 *
 * projects: raw data queried from Prisma, already filtered to one category
 * parsedRules: from parseRankingCriteria()
+ * criteriaWeights: optional admin-configured weights from round config
+ * criterionDefs: criterion definitions from the evaluation form
 */
 export async function executeAIRanking(
  parsedRules: ParsedRankingRule[],
  projects: ProjectForRanking[],
  category: CompetitionCategory,
+  criteriaWeights: Record<string, number> | undefined,
+  criterionDefs: CriterionDef[],
  userId?: string,
  entityId?: string,
 ): Promise<RankingResult> {
@@ -288,7 +443,7 @@ export async function executeAIRanking(
  }

  const maxEvaluatorCount = Math.max(...projects.map((p) => p.evaluatorCount))
-  const { anonymized, idMap } = anonymizeProjectsForRanking(projects)
+  const { anonymized, idMap } = anonymizeProjectsForRanking(projects, criterionDefs)

  const openai = await getOpenAI()
  if (!openai) {
@@ -297,10 +452,23 @@ export async function executeAIRanking(

  const model = await getConfiguredModel()

-  const userPrompt = JSON.stringify({
+  // Build criteria_definitions for the AI prompt (only numeric criteria)
+  const numericDefs = criterionDefs.filter((d) => !d.type || d.type === 'numeric')
+  const criteriaDefsForAI: CriterionDefForAI[] = numericDefs.map((d) => {
+    const adminWeight = criteriaWeights?.[d.id] ?? d.weight ?? 1
+    const scale = typeof d.scale === 'number' ? `1-${d.scale}` : typeof d.scale === 'string' ? d.scale : '1-10'
+    return { name: d.label, weight: adminWeight, scale }
+  })
+
+  const promptData: Record<string, unknown> = {
    rules: parsedRules.filter((r) => r.dataAvailable),
    projects: anonymized,
-  })
+  }
+  if (criteriaDefsForAI.length > 0) {
+    promptData.criteria_definitions = criteriaDefsForAI
+  }
+
+  const userPrompt = JSON.stringify(promptData)

  const params = buildCompletionParams(model, {
    messages: [
@@ -359,13 +527,9 @@ export async function executeAIRanking(
      return {
        projectId: realId,
        rank: entry.rank,
-        compositeScore: computeCompositeScore(
-          proj.avgGlobalScore,
-          proj.passRate,
-          proj.evaluatorCount,
-          maxEvaluatorCount,
-        ),
+        compositeScore: computeCompositeScore(proj, maxEvaluatorCount, criteriaWeights, criterionDefs),
        avgGlobalScore: proj.avgGlobalScore,
+        normalizedAvgScore: proj.normalizedAvgScore,
        passRate: proj.passRate,
        evaluatorCount: proj.evaluatorCount,
        aiRationale: entry.rationale,
@@ -404,6 +568,9 @@ export async function quickRank(
 * Internal helper: fetch eligible projects for one category and execute ranking.
 * Excluded: withdrawn projects and projects with zero submitted evaluations (locked decision).
 *
+ * Fetches evaluation form criteria, computes per-criterion averages, applies z-score
+ * normalization to correct for juror bias, and passes weighted criteria to the AI.
+ *
 * Exported so the tRPC router can call it separately when executing pre-parsed rules.
 */
 export async function fetchAndRankCategory(
@@ -413,12 +580,32 @@ export async function fetchAndRankCategory(
  prisma: PrismaClient,
  userId?: string,
 ): Promise<RankingResult> {
-  // Fetch the round config to find the boolean criterion ID (legacy fallback)
-  const round = await prisma.round.findUniqueOrThrow({
-    where: { id: roundId },
-    select: { configJson: true },
-  })
-  const boolCriterionId = findBooleanCriterionId(round.configJson as Record<string, unknown> | null)
+  // Fetch the round config and evaluation form in parallel
+  const [round, evalForm] = await Promise.all([
+    prisma.round.findUniqueOrThrow({
+      where: { id: roundId },
+      select: { configJson: true },
+    }),
+    prisma.evaluationForm.findFirst({
+      where: { roundId, isActive: true },
+      select: { criteriaJson: true },
+    }),
+  ])
+
+  const roundConfig = round.configJson as Record<string, unknown> | null
+  const boolCriterionId = findBooleanCriterionId(roundConfig)
+
+  // Parse evaluation config for criteria weights
+  const evalConfig = roundConfig as EvaluationConfig | null
+  const criteriaWeights = evalConfig?.criteriaWeights ?? undefined
+
+  // Parse criterion definitions from the evaluation form
+  const criterionDefs: CriterionDef[] = evalForm?.criteriaJson
+    ? (evalForm.criteriaJson as unknown as CriterionDef[])
+    : []
+  const numericCriterionIds = new Set(
+    criterionDefs.filter((d) => !d.type || d.type === 'numeric').map((d) => d.id),
+  )

  // Query submitted evaluations grouped by projectId for this category
  const assignments = await prisma.assignment.findMany({
@@ -446,8 +633,26 @@ export async function fetchAndRankCategory(
    },
  })

-  // Group by projectId, resolving binaryDecision from column or criterionScoresJson fallback
-  const byProject = new Map<string, Array<{ globalScore: number | null; resolvedDecision: boolean | null }>>()
+  // Compute per-juror stats for z-score normalization
+  const jurorStats = computeJurorStats(
+    assignments.map((a) => ({
+      userId: a.userId,
+      evaluation: a.evaluation ? {
+        globalScore: a.evaluation.globalScore,
+        criterionScoresJson: a.evaluation.criterionScoresJson as Record<string, unknown> | null,
+      } : null,
+    })),
+    numericCriterionIds,
+  )
+
+  // Group by projectId, collect per-juror scores for aggregation
+  type EvalData = {
+    globalScore: number | null
+    resolvedDecision: boolean | null
+    criterionScores: Record<string, unknown> | null
+    userId: string
+  }
+  const byProject = new Map<string, EvalData[]>()
  for (const a of assignments) {
    if (!a.evaluation) continue
    const resolved = resolveBinaryDecision(
@@ -456,21 +661,83 @@ export async function fetchAndRankCategory(
      boolCriterionId,
    )
    const list = byProject.get(a.project.id) ?? []
-    list.push({ globalScore: a.evaluation.globalScore, resolvedDecision: resolved })
+    list.push({
+      globalScore: a.evaluation.globalScore,
+      resolvedDecision: resolved,
+      criterionScores: a.evaluation.criterionScoresJson as Record<string, unknown> | null,
+      userId: a.userId,
+    })
    byProject.set(a.project.id, list)
  }

  // Build ProjectForRanking, excluding projects with zero submitted evaluations
  const projects: ProjectForRanking[] = []
  for (const [projectId, evals] of byProject.entries()) {
-    if (evals.length === 0) continue  // Exclude: no submitted evaluations
+    if (evals.length === 0) continue
+
+    // Raw avg global score
    const avgGlobalScore = evals.some((e) => e.globalScore != null)
      ? evals.filter((e) => e.globalScore != null).reduce((sum, e) => sum + e.globalScore!, 0) /
        evals.filter((e) => e.globalScore != null).length
      : null
+
+    // Z-score normalized avg global score
+    const normalizedGlobalScores: number[] = []
+    for (const e of evals) {
+      if (e.globalScore == null) continue
+      const stats = jurorStats.get(e.userId)
+      if (!stats) continue
+      const z = zScoreNormalize(e.globalScore, stats)
+      if (z != null) normalizedGlobalScores.push(z)
+    }
+    const normalizedAvgScore = normalizedGlobalScores.length > 0
+      ? normalizedGlobalScores.reduce((a, b) => a + b, 0) / normalizedGlobalScores.length
+      : null
+
+    // Per-criterion raw averages (numeric criteria only)
+    const criterionAverages: Record<string, number> = {}
+    for (const criterionId of numericCriterionIds) {
+      const values: number[] = []
+      for (const e of evals) {
+        if (!e.criterionScores) continue
+        const val = e.criterionScores[criterionId]
+        if (typeof val === 'number') values.push(val)
+      }
+      if (values.length > 0) {
+        criterionAverages[criterionId] = values.reduce((a, b) => a + b, 0) / values.length
+      }
+    }
+
+    // Per-criterion z-score normalized averages
+    const normalizedCriterionAverages: Record<string, number> = {}
+    for (const criterionId of numericCriterionIds) {
+      const zScores: number[] = []
+      for (const e of evals) {
+        if (!e.criterionScores) continue
+        const val = e.criterionScores[criterionId]
+        if (typeof val !== 'number') continue
+        const stats = jurorStats.get(e.userId)
+        if (!stats) continue
+        const z = zScoreNormalize(val, stats)
+        if (z != null) zScores.push(z)
+      }
+      if (zScores.length > 0) {
+        normalizedCriterionAverages[criterionId] = zScores.reduce((a, b) => a + b, 0) / zScores.length
+      }
+    }
+
    const passRate = computePassRate(evals)
-    projects.push({ id: projectId, competitionCategory: category, avgGlobalScore, passRate, evaluatorCount: evals.length })
+    projects.push({
+      id: projectId,
+      competitionCategory: category,
+      avgGlobalScore,
+      normalizedAvgScore,
+      passRate,
+      evaluatorCount: evals.length,
+      criterionAverages,
+      normalizedCriterionAverages,
+    })
  }

-  return executeAIRanking(parsedRules, projects, category, userId, roundId)
+  return executeAIRanking(parsedRules, projects, category, criteriaWeights, criterionDefs, userId, roundId)
 }