feat: surface juror-balanced scores and AI calibration advisory
All checks were successful
Build and Push Docker Image / build (push) Successful in 7m27s
All checks were successful
Build and Push Docker Image / build (push) Successful in 7m27s
Adds a shared juror-balancing utility (z-score normalization per juror, rescaled back onto the raw 1-10 scale) and wires it into: - Admin reports page: Top-10 project table now shows "Raw Avg" and "Balanced" columns side by side, and the summary stats row shows a balanced-average tile. Sort defaults to balanced so harsh and lenient graders no longer skew the ranking. - Ranking dashboard: each project row shows a green/amber balanced-score chip next to the raw average when the two differ by ≥0.05, making it obvious when juror calibration moved a project's effective ranking. Also adds AI Juror Calibration Advisory — a mutation that takes anonymized per-juror stats, calls OpenAI, and produces a plain-language explanation of the cohort's grading patterns plus per-juror severity (normal / notable / outlier) with a one-sentence narrative. The advisory describes the statistical balance that already runs; it does not introduce a new weighting layer. Rendered as a panel in the Juror Consistency tab when a specific round is selected. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,11 +1,13 @@
|
||||
import { z } from 'zod'
|
||||
import { router, observerProcedure } from '../trpc'
|
||||
import { router, observerProcedure, adminProcedure } from '../trpc'
|
||||
import { normalizeCountryToCode } from '@/lib/countries'
|
||||
import { getUserAvatarUrl } from '../utils/avatar-url'
|
||||
import { getProjectLogoUrl } from '../utils/project-logo-url'
|
||||
import { aggregateVotes } from '../services/deliberation'
|
||||
import { validateRoundConfig } from '@/types/competition-configs'
|
||||
import type { LiveFinalConfig } from '@/types/competition-configs'
|
||||
import { computeBalanceContext, computeBalancedProjectScores, type ScorePoint } from '../services/juror-balance'
|
||||
import { generateJurorCalibration } from '../services/ai-juror-calibration'
|
||||
|
||||
const editionOrRoundInput = z.object({
|
||||
roundId: z.string().optional(),
|
||||
@@ -185,73 +187,70 @@ export const analyticsRouter = router({
|
||||
}),
|
||||
|
||||
/**
|
||||
* Get project rankings with average scores
|
||||
* Get project rankings with raw and balanced (juror-normalized) average scores.
|
||||
*
|
||||
* `averageScore` is the raw mean of per-evaluation criterion averages.
|
||||
* `balancedScore` rescales each juror's contributions via z-score (relative
|
||||
* to their own mean + stddev across projects they reviewed in this round),
|
||||
* then maps back onto the same 1-10 scale using the overall mean + stddev.
|
||||
* A harsh juror's scores are pulled up, a lenient juror's pulled down, so
|
||||
* rankings aren't skewed by a single outlier grader.
|
||||
*/
|
||||
getProjectRankings: observerProcedure
|
||||
.input(editionOrRoundInput.and(z.object({ limit: z.number().optional() })))
|
||||
.query(async ({ ctx, input }) => {
|
||||
const projects = await ctx.prisma.project.findMany({
|
||||
where: projectWhere(input),
|
||||
select: {
|
||||
id: true,
|
||||
title: true,
|
||||
teamName: true,
|
||||
status: true,
|
||||
assignments: {
|
||||
where: assignmentWhere(input),
|
||||
select: {
|
||||
evaluation: {
|
||||
select: { criterionScoresJson: true, status: true },
|
||||
},
|
||||
},
|
||||
const [projects, evaluations] = await Promise.all([
|
||||
ctx.prisma.project.findMany({
|
||||
where: projectWhere(input),
|
||||
select: {
|
||||
id: true,
|
||||
title: true,
|
||||
teamName: true,
|
||||
status: true,
|
||||
},
|
||||
},
|
||||
})
|
||||
}),
|
||||
ctx.prisma.evaluation.findMany({
|
||||
where: evalWhere(input, { status: 'SUBMITTED' }),
|
||||
select: {
|
||||
criterionScoresJson: true,
|
||||
assignment: { select: { userId: true, projectId: true } },
|
||||
},
|
||||
}),
|
||||
])
|
||||
|
||||
// Extract a single eval-level score (mean of numeric criterion scores) per evaluation.
|
||||
const points: ScorePoint[] = []
|
||||
for (const e of evaluations) {
|
||||
const scores = e.criterionScoresJson as Record<string, unknown> | null
|
||||
if (!scores) continue
|
||||
const vals = Object.values(scores).filter((s): s is number => typeof s === 'number')
|
||||
if (vals.length === 0) continue
|
||||
const rawScore = vals.reduce((a, b) => a + b, 0) / vals.length
|
||||
points.push({ projectId: e.assignment.projectId, userId: e.assignment.userId, rawScore })
|
||||
}
|
||||
|
||||
const balanceCtx = computeBalanceContext(points)
|
||||
const balancedByProject = computeBalancedProjectScores(points, balanceCtx)
|
||||
|
||||
// Calculate average scores
|
||||
const rankings = projects
|
||||
.map((project) => {
|
||||
const allScores: number[] = []
|
||||
|
||||
project.assignments.forEach((assignment) => {
|
||||
const evaluation = assignment.evaluation
|
||||
if (evaluation?.status === 'SUBMITTED') {
|
||||
const scores = evaluation.criterionScoresJson as Record<
|
||||
string,
|
||||
number
|
||||
> | null
|
||||
if (scores) {
|
||||
const scoreValues = Object.values(scores).filter(
|
||||
(s): s is number => typeof s === 'number'
|
||||
)
|
||||
if (scoreValues.length > 0) {
|
||||
const average =
|
||||
scoreValues.reduce((a, b) => a + b, 0) / scoreValues.length
|
||||
allScores.push(average)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
const averageScore =
|
||||
allScores.length > 0
|
||||
? allScores.reduce((a, b) => a + b, 0) / allScores.length
|
||||
: null
|
||||
|
||||
const result = balancedByProject.get(project.id)
|
||||
return {
|
||||
id: project.id,
|
||||
title: project.title,
|
||||
teamName: project.teamName,
|
||||
status: project.status,
|
||||
averageScore,
|
||||
evaluationCount: allScores.length,
|
||||
averageScore: result?.rawAverage ?? null,
|
||||
balancedScore: result?.balancedAverage ?? null,
|
||||
evaluationCount: result?.count ?? 0,
|
||||
}
|
||||
})
|
||||
.sort((a, b) => {
|
||||
// Evaluated projects first (sorted by score desc), unevaluated at bottom
|
||||
if (a.averageScore !== null && b.averageScore !== null) return b.averageScore - a.averageScore
|
||||
if (a.averageScore !== null) return -1
|
||||
if (b.averageScore !== null) return 1
|
||||
const aScore = a.balancedScore ?? a.averageScore
|
||||
const bScore = b.balancedScore ?? b.averageScore
|
||||
if (aScore !== null && bScore !== null) return bScore - aScore
|
||||
if (aScore !== null) return -1
|
||||
if (bScore !== null) return 1
|
||||
return 0
|
||||
})
|
||||
|
||||
@@ -2345,4 +2344,19 @@ export const analyticsRouter = router({
|
||||
standings,
|
||||
}
|
||||
}),
|
||||
|
||||
/**
|
||||
* AI-powered juror calibration analysis for an evaluation round.
|
||||
* Produces a plain-language explanation of the per-juror z-score balance
|
||||
* already applied in ranking — describes, does not prescribe.
|
||||
*/
|
||||
generateJurorCalibration: adminProcedure
|
||||
.input(z.object({ roundId: z.string() }))
|
||||
.mutation(async ({ ctx, input }) => {
|
||||
return generateJurorCalibration({
|
||||
roundId: input.roundId,
|
||||
userId: ctx.user.id,
|
||||
prisma: ctx.prisma,
|
||||
})
|
||||
}),
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user