src/server/services/juror-balance.ts

/**
 * Juror balancing: z-score normalization to correct for per-juror grading harshness.
 *
 * A juror who grades 1 standard deviation below their peers on shared projects
 * shouldn't punish those projects more than a juror who grades at the mean.
 * We compute per-juror mean + stddev across their scores in a round, z-normalize
 * each score, then rescale back onto the same 1-10 scale using the overall
 * round-level mean + stddev so the balanced number is directly comparable to
 * the raw average.
 */

export type ScorePoint = {
  projectId: string
  userId: string
  rawScore: number
}

export type BalancedProjectResult = {
  projectId: string
  rawAverage: number | null
  balancedAverage: number | null
  count: number
}

export type JurorBalance = {
  userId: string
  mean: number
  stddev: number
  count: number
}

export type BalanceContext = {
  overallMean: number
  overallStddev: number
  jurorStats: Map<string, JurorBalance>
}

/**
 * Build per-juror and overall grading statistics from a flat list of
 * (project, juror, score) points. Returns the stats plus a helper to
 * rescale z-scores back onto the raw-score scale.
 */
export function computeBalanceContext(points: ScorePoint[]): BalanceContext {
  const jurorScores = new Map<string, number[]>()
  for (const p of points) {
    const arr = jurorScores.get(p.userId) ?? []
    arr.push(p.rawScore)
    jurorScores.set(p.userId, arr)
  }

  const jurorStats = new Map<string, JurorBalance>()
  for (const [userId, scores] of jurorScores.entries()) {
    const mean = scores.reduce((a, b) => a + b, 0) / scores.length
    const variance = scores.length > 1
      ? scores.reduce((s, v) => s + (v - mean) ** 2, 0) / scores.length
      : 0
    jurorStats.set(userId, {
      userId,
      mean,
      stddev: Math.sqrt(variance),
      count: scores.length,
    })
  }

  const allScores = points.map((p) => p.rawScore)
  const overallMean = allScores.length > 0
    ? allScores.reduce((a, b) => a + b, 0) / allScores.length
    : 0
  const overallStddev = allScores.length > 1
    ? Math.sqrt(
        allScores.reduce((s, v) => s + (v - overallMean) ** 2, 0) / allScores.length,
      )
    : 0

  return { overallMean, overallStddev, jurorStats }
}

/**
 * Aggregate per-project raw + balanced averages from score points.
 */
export function computeBalancedProjectScores(
  points: ScorePoint[],
  ctx: BalanceContext,
): Map<string, BalancedProjectResult> {
  const byProject = new Map<string, ScorePoint[]>()
  for (const p of points) {
    const arr = byProject.get(p.projectId) ?? []
    arr.push(p)
    byProject.set(p.projectId, arr)
  }

  const results = new Map<string, BalancedProjectResult>()
  for (const [projectId, projectPoints] of byProject.entries()) {
    const rawAverage = projectPoints.reduce((a, b) => a + b.rawScore, 0) / projectPoints.length

    let balancedAverage: number | null = null
    if (ctx.overallStddev > 0) {
      const zValues: number[] = []
      for (const pt of projectPoints) {
        const stats = ctx.jurorStats.get(pt.userId)
        if (stats && stats.stddev > 0) {
          zValues.push((pt.rawScore - stats.mean) / stats.stddev)
        } else {
          zValues.push((pt.rawScore - ctx.overallMean) / ctx.overallStddev)
        }
      }
      const avgZ = zValues.reduce((a, b) => a + b, 0) / zValues.length
      balancedAverage = ctx.overallMean + avgZ * ctx.overallStddev
    }

    results.set(projectId, {
      projectId,
      rawAverage,
      balancedAverage,
      count: projectPoints.length,
    })
  }

  return results
}
feat: surface juror-balanced scores and AI calibration advisory Adds a shared juror-balancing utility (z-score normalization per juror, rescaled back onto the raw 1-10 scale) and wires it into: - Admin reports page: Top-10 project table now shows "Raw Avg" and "Balanced" columns side by side, and the summary stats row shows a balanced-average tile. Sort defaults to balanced so harsh and lenient graders no longer skew the ranking. - Ranking dashboard: each project row shows a green/amber balanced-score chip next to the raw average when the two differ by ≥0.05, making it obvious when juror calibration moved a project's effective ranking. Also adds AI Juror Calibration Advisory — a mutation that takes anonymized per-juror stats, calls OpenAI, and produces a plain-language explanation of the cohort's grading patterns plus per-juror severity (normal / notable / outlier) with a one-sentence narrative. The advisory describes the statistical balance that already runs; it does not introduce a new weighting layer. Rendered as a panel in the Juror Consistency tab when a specific round is selected. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-04-24 16:19:00 +02:00			`/**`
			`* Juror balancing: z-score normalization to correct for per-juror grading harshness.`
			`*`
			`* A juror who grades 1 standard deviation below their peers on shared projects`
			`* shouldn't punish those projects more than a juror who grades at the mean.`
			`* We compute per-juror mean + stddev across their scores in a round, z-normalize`
			`* each score, then rescale back onto the same 1-10 scale using the overall`
			`* round-level mean + stddev so the balanced number is directly comparable to`
			`* the raw average.`
			`*/`

			`export type ScorePoint = {`
			`projectId: string`
			`userId: string`
			`rawScore: number`
			`}`

			`export type BalancedProjectResult = {`
			`projectId: string`
			`rawAverage: number \| null`
			`balancedAverage: number \| null`
			`count: number`
			`}`

			`export type JurorBalance = {`
			`userId: string`
			`mean: number`
			`stddev: number`
			`count: number`
			`}`

			`export type BalanceContext = {`
			`overallMean: number`
			`overallStddev: number`
			`jurorStats: Map<string, JurorBalance>`
			`}`

			`/**`
			`* Build per-juror and overall grading statistics from a flat list of`
			`* (project, juror, score) points. Returns the stats plus a helper to`
			`* rescale z-scores back onto the raw-score scale.`
			`*/`
			`export function computeBalanceContext(points: ScorePoint[]): BalanceContext {`
			`const jurorScores = new Map<string, number[]>()`
			`for (const p of points) {`
			`const arr = jurorScores.get(p.userId) ?? []`
			`arr.push(p.rawScore)`
			`jurorScores.set(p.userId, arr)`
			`}`

			`const jurorStats = new Map<string, JurorBalance>()`
			`for (const [userId, scores] of jurorScores.entries()) {`
			`const mean = scores.reduce((a, b) => a + b, 0) / scores.length`
			`const variance = scores.length > 1`
			`? scores.reduce((s, v) => s + (v - mean) ** 2, 0) / scores.length`
			`: 0`
			`jurorStats.set(userId, {`
			`userId,`
			`mean,`
			`stddev: Math.sqrt(variance),`
			`count: scores.length,`
			`})`
			`}`

			`const allScores = points.map((p) => p.rawScore)`
			`const overallMean = allScores.length > 0`
			`? allScores.reduce((a, b) => a + b, 0) / allScores.length`
			`: 0`
			`const overallStddev = allScores.length > 1`
			`? Math.sqrt(`
			`allScores.reduce((s, v) => s + (v - overallMean) ** 2, 0) / allScores.length,`
			`)`
			`: 0`

			`return { overallMean, overallStddev, jurorStats }`
			`}`

			`/**`
			`* Aggregate per-project raw + balanced averages from score points.`
			`*/`
			`export function computeBalancedProjectScores(`
			`points: ScorePoint[],`
			`ctx: BalanceContext,`
			`): Map<string, BalancedProjectResult> {`
			`const byProject = new Map<string, ScorePoint[]>()`
			`for (const p of points) {`
			`const arr = byProject.get(p.projectId) ?? []`
			`arr.push(p)`
			`byProject.set(p.projectId, arr)`
			`}`

			`const results = new Map<string, BalancedProjectResult>()`
			`for (const [projectId, projectPoints] of byProject.entries()) {`
			`const rawAverage = projectPoints.reduce((a, b) => a + b.rawScore, 0) / projectPoints.length`

			`let balancedAverage: number \| null = null`
			`if (ctx.overallStddev > 0) {`
			`const zValues: number[] = []`
			`for (const pt of projectPoints) {`
			`const stats = ctx.jurorStats.get(pt.userId)`
			`if (stats && stats.stddev > 0) {`
			`zValues.push((pt.rawScore - stats.mean) / stats.stddev)`
			`} else {`
			`zValues.push((pt.rawScore - ctx.overallMean) / ctx.overallStddev)`
			`}`
			`}`
			`const avgZ = zValues.reduce((a, b) => a + b, 0) / zValues.length`
			`balancedAverage = ctx.overallMean + avgZ * ctx.overallStddev`
			`}`

			`results.set(projectId, {`
			`projectId,`
			`rawAverage,`
			`balancedAverage,`
			`count: projectPoints.length,`
			`})`
			`}`

			`return results`
			`}`