feat: surface juror-balanced scores and AI calibration advisory
All checks were successful
Build and Push Docker Image / build (push) Successful in 7m27s
All checks were successful
Build and Push Docker Image / build (push) Successful in 7m27s
Adds a shared juror-balancing utility (z-score normalization per juror, rescaled back onto the raw 1-10 scale) and wires it into: - Admin reports page: Top-10 project table now shows "Raw Avg" and "Balanced" columns side by side, and the summary stats row shows a balanced-average tile. Sort defaults to balanced so harsh and lenient graders no longer skew the ranking. - Ranking dashboard: each project row shows a green/amber balanced-score chip next to the raw average when the two differ by ≥0.05, making it obvious when juror calibration moved a project's effective ranking. Also adds AI Juror Calibration Advisory — a mutation that takes anonymized per-juror stats, calls OpenAI, and produces a plain-language explanation of the cohort's grading patterns plus per-juror severity (normal / notable / outlier) with a one-sentence narrative. The advisory describes the statistical balance that already runs; it does not introduce a new weighting layer. Rendered as a panel in the Juror Consistency tab when a specific round is selected. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,11 +1,13 @@
|
||||
import { z } from 'zod'
|
||||
import { router, observerProcedure } from '../trpc'
|
||||
import { router, observerProcedure, adminProcedure } from '../trpc'
|
||||
import { normalizeCountryToCode } from '@/lib/countries'
|
||||
import { getUserAvatarUrl } from '../utils/avatar-url'
|
||||
import { getProjectLogoUrl } from '../utils/project-logo-url'
|
||||
import { aggregateVotes } from '../services/deliberation'
|
||||
import { validateRoundConfig } from '@/types/competition-configs'
|
||||
import type { LiveFinalConfig } from '@/types/competition-configs'
|
||||
import { computeBalanceContext, computeBalancedProjectScores, type ScorePoint } from '../services/juror-balance'
|
||||
import { generateJurorCalibration } from '../services/ai-juror-calibration'
|
||||
|
||||
const editionOrRoundInput = z.object({
|
||||
roundId: z.string().optional(),
|
||||
@@ -185,73 +187,70 @@ export const analyticsRouter = router({
|
||||
}),
|
||||
|
||||
/**
|
||||
* Get project rankings with average scores
|
||||
* Get project rankings with raw and balanced (juror-normalized) average scores.
|
||||
*
|
||||
* `averageScore` is the raw mean of per-evaluation criterion averages.
|
||||
* `balancedScore` rescales each juror's contributions via z-score (relative
|
||||
* to their own mean + stddev across projects they reviewed in this round),
|
||||
* then maps back onto the same 1-10 scale using the overall mean + stddev.
|
||||
* A harsh juror's scores are pulled up, a lenient juror's pulled down, so
|
||||
* rankings aren't skewed by a single outlier grader.
|
||||
*/
|
||||
getProjectRankings: observerProcedure
|
||||
.input(editionOrRoundInput.and(z.object({ limit: z.number().optional() })))
|
||||
.query(async ({ ctx, input }) => {
|
||||
const projects = await ctx.prisma.project.findMany({
|
||||
where: projectWhere(input),
|
||||
select: {
|
||||
id: true,
|
||||
title: true,
|
||||
teamName: true,
|
||||
status: true,
|
||||
assignments: {
|
||||
where: assignmentWhere(input),
|
||||
select: {
|
||||
evaluation: {
|
||||
select: { criterionScoresJson: true, status: true },
|
||||
},
|
||||
},
|
||||
const [projects, evaluations] = await Promise.all([
|
||||
ctx.prisma.project.findMany({
|
||||
where: projectWhere(input),
|
||||
select: {
|
||||
id: true,
|
||||
title: true,
|
||||
teamName: true,
|
||||
status: true,
|
||||
},
|
||||
},
|
||||
})
|
||||
}),
|
||||
ctx.prisma.evaluation.findMany({
|
||||
where: evalWhere(input, { status: 'SUBMITTED' }),
|
||||
select: {
|
||||
criterionScoresJson: true,
|
||||
assignment: { select: { userId: true, projectId: true } },
|
||||
},
|
||||
}),
|
||||
])
|
||||
|
||||
// Extract a single eval-level score (mean of numeric criterion scores) per evaluation.
|
||||
const points: ScorePoint[] = []
|
||||
for (const e of evaluations) {
|
||||
const scores = e.criterionScoresJson as Record<string, unknown> | null
|
||||
if (!scores) continue
|
||||
const vals = Object.values(scores).filter((s): s is number => typeof s === 'number')
|
||||
if (vals.length === 0) continue
|
||||
const rawScore = vals.reduce((a, b) => a + b, 0) / vals.length
|
||||
points.push({ projectId: e.assignment.projectId, userId: e.assignment.userId, rawScore })
|
||||
}
|
||||
|
||||
const balanceCtx = computeBalanceContext(points)
|
||||
const balancedByProject = computeBalancedProjectScores(points, balanceCtx)
|
||||
|
||||
// Calculate average scores
|
||||
const rankings = projects
|
||||
.map((project) => {
|
||||
const allScores: number[] = []
|
||||
|
||||
project.assignments.forEach((assignment) => {
|
||||
const evaluation = assignment.evaluation
|
||||
if (evaluation?.status === 'SUBMITTED') {
|
||||
const scores = evaluation.criterionScoresJson as Record<
|
||||
string,
|
||||
number
|
||||
> | null
|
||||
if (scores) {
|
||||
const scoreValues = Object.values(scores).filter(
|
||||
(s): s is number => typeof s === 'number'
|
||||
)
|
||||
if (scoreValues.length > 0) {
|
||||
const average =
|
||||
scoreValues.reduce((a, b) => a + b, 0) / scoreValues.length
|
||||
allScores.push(average)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
const averageScore =
|
||||
allScores.length > 0
|
||||
? allScores.reduce((a, b) => a + b, 0) / allScores.length
|
||||
: null
|
||||
|
||||
const result = balancedByProject.get(project.id)
|
||||
return {
|
||||
id: project.id,
|
||||
title: project.title,
|
||||
teamName: project.teamName,
|
||||
status: project.status,
|
||||
averageScore,
|
||||
evaluationCount: allScores.length,
|
||||
averageScore: result?.rawAverage ?? null,
|
||||
balancedScore: result?.balancedAverage ?? null,
|
||||
evaluationCount: result?.count ?? 0,
|
||||
}
|
||||
})
|
||||
.sort((a, b) => {
|
||||
// Evaluated projects first (sorted by score desc), unevaluated at bottom
|
||||
if (a.averageScore !== null && b.averageScore !== null) return b.averageScore - a.averageScore
|
||||
if (a.averageScore !== null) return -1
|
||||
if (b.averageScore !== null) return 1
|
||||
const aScore = a.balancedScore ?? a.averageScore
|
||||
const bScore = b.balancedScore ?? b.averageScore
|
||||
if (aScore !== null && bScore !== null) return bScore - aScore
|
||||
if (aScore !== null) return -1
|
||||
if (bScore !== null) return 1
|
||||
return 0
|
||||
})
|
||||
|
||||
@@ -2345,4 +2344,19 @@ export const analyticsRouter = router({
|
||||
standings,
|
||||
}
|
||||
}),
|
||||
|
||||
/**
|
||||
* AI-powered juror calibration analysis for an evaluation round.
|
||||
* Produces a plain-language explanation of the per-juror z-score balance
|
||||
* already applied in ranking — describes, does not prescribe.
|
||||
*/
|
||||
generateJurorCalibration: adminProcedure
|
||||
.input(z.object({ roundId: z.string() }))
|
||||
.mutation(async ({ ctx, input }) => {
|
||||
return generateJurorCalibration({
|
||||
roundId: input.roundId,
|
||||
userId: ctx.user.id,
|
||||
prisma: ctx.prisma,
|
||||
})
|
||||
}),
|
||||
})
|
||||
|
||||
@@ -12,6 +12,7 @@ import {
|
||||
} from '../services/ai-ranking'
|
||||
import { logAudit } from '../utils/audit'
|
||||
import type { EvaluationConfig } from '@/types/competition-configs'
|
||||
import { computeBalanceContext, computeBalancedProjectScores, type ScorePoint } from '../services/juror-balance'
|
||||
|
||||
// ─── Local Types ───────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -471,6 +472,7 @@ export const rankingRouter = router({
|
||||
evaluation: { status: 'SUBMITTED' },
|
||||
},
|
||||
select: {
|
||||
userId: true,
|
||||
projectId: true,
|
||||
user: { select: { name: true, email: true } },
|
||||
evaluation: {
|
||||
@@ -489,6 +491,8 @@ export const rankingRouter = router({
|
||||
decision: boolean | null
|
||||
}>> = {}
|
||||
|
||||
const balancePoints: ScorePoint[] = []
|
||||
|
||||
for (const a of assignments) {
|
||||
if (!a.evaluation) continue
|
||||
const list = byProject[a.projectId] ?? []
|
||||
@@ -511,8 +515,28 @@ export const rankingRouter = router({
|
||||
decision,
|
||||
})
|
||||
byProject[a.projectId] = list
|
||||
|
||||
if (a.evaluation.globalScore != null) {
|
||||
balancePoints.push({
|
||||
projectId: a.projectId,
|
||||
userId: a.userId,
|
||||
rawScore: a.evaluation.globalScore,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return byProject
|
||||
const balanceCtx = computeBalanceContext(balancePoints)
|
||||
const balancedByProject = computeBalancedProjectScores(balancePoints, balanceCtx)
|
||||
|
||||
// Per-project balanced average on the 1-10 scale, comparable to raw avgs.
|
||||
const balanced: Record<string, { rawAverage: number | null; balancedAverage: number | null }> = {}
|
||||
for (const [projectId, result] of balancedByProject.entries()) {
|
||||
balanced[projectId] = {
|
||||
rawAverage: result.rawAverage,
|
||||
balancedAverage: result.balancedAverage,
|
||||
}
|
||||
}
|
||||
|
||||
return { byProject, balanced }
|
||||
}),
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user