feat: surface juror-balanced scores and AI calibration advisory

Adds a shared juror-balancing utility (z-score normalization per juror, rescaled back onto the raw 1-10 scale) and wires it into: - Admin reports page: Top-10 project table now shows "Raw Avg" and "Balanced" columns side by side, and the summary stats row shows a balanced-average tile. Sort defaults to balanced so harsh and lenient graders no longer skew the ranking. - Ranking dashboard: each project row shows a green/amber balanced-score chip next to the raw average when the two differ by ≥0.05, making it obvious when juror calibration moved a project's effective ranking. Also adds AI Juror Calibration Advisory — a mutation that takes anonymized per-juror stats, calls OpenAI, and produces a plain-language explanation of the cohort's grading patterns plus per-juror severity (normal / notable / outlier) with a one-sentence narrative. The advisory describes the statistical balance that already runs; it does not introduce a new weighting layer. Rendered as a panel in the Juror Consistency tab when a specific round is selected. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-24 16:19:00 +02:00
parent 07dd7a0692
commit 982d5193c5
7 changed files with 774 additions and 65 deletions
--- a/src/server/services/juror-balance.ts
+++ b/src/server/services/juror-balance.ts
@@ -0,0 +1,120 @@
+/**
+ * Juror balancing: z-score normalization to correct for per-juror grading harshness.
+ *
+ * A juror who grades 1 standard deviation below their peers on shared projects
+ * shouldn't punish those projects more than a juror who grades at the mean.
+ * We compute per-juror mean + stddev across their scores in a round, z-normalize
+ * each score, then rescale back onto the same 1-10 scale using the overall
+ * round-level mean + stddev so the balanced number is directly comparable to
+ * the raw average.
+ */
+
+export type ScorePoint = {
+  projectId: string
+  userId: string
+  rawScore: number
+}
+
+export type BalancedProjectResult = {
+  projectId: string
+  rawAverage: number | null
+  balancedAverage: number | null
+  count: number
+}
+
+export type JurorBalance = {
+  userId: string
+  mean: number
+  stddev: number
+  count: number
+}
+
+export type BalanceContext = {
+  overallMean: number
+  overallStddev: number
+  jurorStats: Map<string, JurorBalance>
+}
+
+/**
+ * Build per-juror and overall grading statistics from a flat list of
+ * (project, juror, score) points. Returns the stats plus a helper to
+ * rescale z-scores back onto the raw-score scale.
+ */
+export function computeBalanceContext(points: ScorePoint[]): BalanceContext {
+  const jurorScores = new Map<string, number[]>()
+  for (const p of points) {
+    const arr = jurorScores.get(p.userId) ?? []
+    arr.push(p.rawScore)
+    jurorScores.set(p.userId, arr)
+  }
+
+  const jurorStats = new Map<string, JurorBalance>()
+  for (const [userId, scores] of jurorScores.entries()) {
+    const mean = scores.reduce((a, b) => a + b, 0) / scores.length
+    const variance = scores.length > 1
+      ? scores.reduce((s, v) => s + (v - mean) ** 2, 0) / scores.length
+      : 0
+    jurorStats.set(userId, {
+      userId,
+      mean,
+      stddev: Math.sqrt(variance),
+      count: scores.length,
+    })
+  }
+
+  const allScores = points.map((p) => p.rawScore)
+  const overallMean = allScores.length > 0
+    ? allScores.reduce((a, b) => a + b, 0) / allScores.length
+    : 0
+  const overallStddev = allScores.length > 1
+    ? Math.sqrt(
+        allScores.reduce((s, v) => s + (v - overallMean) ** 2, 0) / allScores.length,
+      )
+    : 0
+
+  return { overallMean, overallStddev, jurorStats }
+}
+
+/**
+ * Aggregate per-project raw + balanced averages from score points.
+ */
+export function computeBalancedProjectScores(
+  points: ScorePoint[],
+  ctx: BalanceContext,
+): Map<string, BalancedProjectResult> {
+  const byProject = new Map<string, ScorePoint[]>()
+  for (const p of points) {
+    const arr = byProject.get(p.projectId) ?? []
+    arr.push(p)
+    byProject.set(p.projectId, arr)
+  }
+
+  const results = new Map<string, BalancedProjectResult>()
+  for (const [projectId, projectPoints] of byProject.entries()) {
+    const rawAverage = projectPoints.reduce((a, b) => a + b.rawScore, 0) / projectPoints.length
+
+    let balancedAverage: number | null = null
+    if (ctx.overallStddev > 0) {
+      const zValues: number[] = []
+      for (const pt of projectPoints) {
+        const stats = ctx.jurorStats.get(pt.userId)
+        if (stats && stats.stddev > 0) {
+          zValues.push((pt.rawScore - stats.mean) / stats.stddev)
+        } else {
+          zValues.push((pt.rawScore - ctx.overallMean) / ctx.overallStddev)
+        }
+      }
+      const avgZ = zValues.reduce((a, b) => a + b, 0) / zValues.length
+      balancedAverage = ctx.overallMean + avgZ * ctx.overallStddev
+    }
+
+    results.set(projectId, {
+      projectId,
+      rawAverage,
+      balancedAverage,
+      count: projectPoints.length,
+    })
+  }
+
+  return results
+}