fix: compute z-context per-round in edition-mode rankings rollup
Previously the edition-level branch of analytics.getProjectRankings (programId mode) pooled every juror's evaluations across every round into a single z-normalization context. A juror's mean and stddev are not stable across round types — quick intake screening produces a very different grading profile than a deep evaluation round, and mixing them yields a meaningless personal calibration. The rollup now groups points by roundId, computes one balance context per round, and aggregates per-project as the unweighted mean of the per-round balanced averages. roundId mode is unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -6,7 +6,13 @@ import { getProjectLogoUrl } from '../utils/project-logo-url'
|
|||||||
import { aggregateVotes } from '../services/deliberation'
|
import { aggregateVotes } from '../services/deliberation'
|
||||||
import { validateRoundConfig } from '@/types/competition-configs'
|
import { validateRoundConfig } from '@/types/competition-configs'
|
||||||
import type { LiveFinalConfig } from '@/types/competition-configs'
|
import type { LiveFinalConfig } from '@/types/competition-configs'
|
||||||
import { computeBalanceContext, computeBalancedProjectScores, type ScorePoint } from '../services/juror-balance'
|
import {
|
||||||
|
computeBalanceContext,
|
||||||
|
computeBalancedProjectScores,
|
||||||
|
computePerRoundBalanced,
|
||||||
|
type ScorePoint,
|
||||||
|
type RoundScopedScorePoint,
|
||||||
|
} from '../services/juror-balance'
|
||||||
import { generateJurorCalibration } from '../services/ai-juror-calibration'
|
import { generateJurorCalibration } from '../services/ai-juror-calibration'
|
||||||
|
|
||||||
const editionOrRoundInput = z.object({
|
const editionOrRoundInput = z.object({
|
||||||
@@ -213,24 +219,39 @@ export const analyticsRouter = router({
|
|||||||
where: evalWhere(input, { status: 'SUBMITTED' }),
|
where: evalWhere(input, { status: 'SUBMITTED' }),
|
||||||
select: {
|
select: {
|
||||||
criterionScoresJson: true,
|
criterionScoresJson: true,
|
||||||
assignment: { select: { userId: true, projectId: true } },
|
assignment: { select: { userId: true, projectId: true, roundId: true } },
|
||||||
},
|
},
|
||||||
}),
|
}),
|
||||||
])
|
])
|
||||||
|
|
||||||
// Extract a single eval-level score (mean of numeric criterion scores) per evaluation.
|
// Extract a single eval-level score (mean of numeric criterion scores) per evaluation.
|
||||||
const points: ScorePoint[] = []
|
const rawPoints: RoundScopedScorePoint[] = []
|
||||||
for (const e of evaluations) {
|
for (const e of evaluations) {
|
||||||
const scores = e.criterionScoresJson as Record<string, unknown> | null
|
const scores = e.criterionScoresJson as Record<string, unknown> | null
|
||||||
if (!scores) continue
|
if (!scores) continue
|
||||||
const vals = Object.values(scores).filter((s): s is number => typeof s === 'number')
|
const vals = Object.values(scores).filter((s): s is number => typeof s === 'number')
|
||||||
if (vals.length === 0) continue
|
if (vals.length === 0) continue
|
||||||
const rawScore = vals.reduce((a, b) => a + b, 0) / vals.length
|
const rawScore = vals.reduce((a, b) => a + b, 0) / vals.length
|
||||||
points.push({ projectId: e.assignment.projectId, userId: e.assignment.userId, rawScore })
|
rawPoints.push({
|
||||||
|
projectId: e.assignment.projectId,
|
||||||
|
userId: e.assignment.userId,
|
||||||
|
roundId: e.assignment.roundId,
|
||||||
|
rawScore,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
const balanceCtx = computeBalanceContext(points)
|
// roundId mode: single-round z-context (existing behavior).
|
||||||
const balancedByProject = computeBalancedProjectScores(points, balanceCtx)
|
// programId mode: per-round z-contexts aggregated as the mean of per-round
|
||||||
|
// balanced averages — never pool z-contexts across rounds because a juror's
|
||||||
|
// grading profile differs by round type.
|
||||||
|
const balancedByProject: Map<string, { rawAverage: number | null; balancedAverage: number | null; count: number }> = (() => {
|
||||||
|
if (input.roundId) {
|
||||||
|
const flat: ScorePoint[] = rawPoints.map(({ projectId, userId, rawScore }) => ({ projectId, userId, rawScore }))
|
||||||
|
const ctx = computeBalanceContext(flat)
|
||||||
|
return computeBalancedProjectScores(flat, ctx)
|
||||||
|
}
|
||||||
|
return computePerRoundBalanced(rawPoints)
|
||||||
|
})()
|
||||||
|
|
||||||
const rankings = projects
|
const rankings = projects
|
||||||
.map((project) => {
|
.map((project) => {
|
||||||
|
|||||||
@@ -118,3 +118,71 @@ export function computeBalancedProjectScores(
|
|||||||
|
|
||||||
return results
|
return results
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Per-round balanced rollup: groups points by roundId, computes a balance
|
||||||
|
* context per round, then averages the per-round balanced averages for each
|
||||||
|
* project. Use when surfacing edition-level rankings — never pool z-contexts
|
||||||
|
* across rounds, because a juror's grading profile differs by round type.
|
||||||
|
*/
|
||||||
|
export type RoundScopedScorePoint = ScorePoint & { roundId: string }
|
||||||
|
|
||||||
|
export type EditionRollupResult = {
|
||||||
|
projectId: string
|
||||||
|
rawAverage: number | null
|
||||||
|
balancedAverage: number | null
|
||||||
|
count: number
|
||||||
|
roundCount: number
|
||||||
|
}
|
||||||
|
|
||||||
|
export function computePerRoundBalanced(
|
||||||
|
points: RoundScopedScorePoint[],
|
||||||
|
): Map<string, EditionRollupResult> {
|
||||||
|
const byRound = new Map<string, ScorePoint[]>()
|
||||||
|
for (const p of points) {
|
||||||
|
const arr = byRound.get(p.roundId) ?? []
|
||||||
|
arr.push({ projectId: p.projectId, userId: p.userId, rawScore: p.rawScore })
|
||||||
|
byRound.set(p.roundId, arr)
|
||||||
|
}
|
||||||
|
|
||||||
|
const perRoundResults: Array<Map<string, BalancedProjectResult>> = []
|
||||||
|
for (const roundPoints of byRound.values()) {
|
||||||
|
const ctx = computeBalanceContext(roundPoints)
|
||||||
|
perRoundResults.push(computeBalancedProjectScores(roundPoints, ctx))
|
||||||
|
}
|
||||||
|
|
||||||
|
const accumulator = new Map<
|
||||||
|
string,
|
||||||
|
{ rawSum: number; rawCount: number; balancedSum: number; balancedCount: number; count: number; roundCount: number }
|
||||||
|
>()
|
||||||
|
for (const roundMap of perRoundResults) {
|
||||||
|
for (const [projectId, result] of roundMap.entries()) {
|
||||||
|
const acc = accumulator.get(projectId) ?? {
|
||||||
|
rawSum: 0, rawCount: 0, balancedSum: 0, balancedCount: 0, count: 0, roundCount: 0,
|
||||||
|
}
|
||||||
|
if (result.rawAverage != null) {
|
||||||
|
acc.rawSum += result.rawAverage
|
||||||
|
acc.rawCount += 1
|
||||||
|
}
|
||||||
|
if (result.balancedAverage != null) {
|
||||||
|
acc.balancedSum += result.balancedAverage
|
||||||
|
acc.balancedCount += 1
|
||||||
|
}
|
||||||
|
acc.count += result.count
|
||||||
|
acc.roundCount += 1
|
||||||
|
accumulator.set(projectId, acc)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const out = new Map<string, EditionRollupResult>()
|
||||||
|
for (const [projectId, acc] of accumulator.entries()) {
|
||||||
|
out.set(projectId, {
|
||||||
|
projectId,
|
||||||
|
rawAverage: acc.rawCount > 0 ? acc.rawSum / acc.rawCount : null,
|
||||||
|
balancedAverage: acc.balancedCount > 0 ? acc.balancedSum / acc.balancedCount : null,
|
||||||
|
count: acc.count,
|
||||||
|
roundCount: acc.roundCount,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|||||||
@@ -70,3 +70,88 @@ describe('analytics.getProjectDetail round scoping', () => {
|
|||||||
expect(result.stats!.totalEvaluations).toBe(5)
|
expect(result.stats!.totalEvaluations).toBe(5)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
describe('analytics.getProjectRankings per-round z-context (edition mode)', () => {
|
||||||
|
let programId: string
|
||||||
|
let admin: { id: string; email: string; role: 'SUPER_ADMIN' }
|
||||||
|
let projectXId: string
|
||||||
|
let projectYId: string
|
||||||
|
const userIds: string[] = []
|
||||||
|
|
||||||
|
beforeAll(async () => {
|
||||||
|
const program = await createTestProgram({ name: `rank-edition-${uid()}` })
|
||||||
|
programId = program.id
|
||||||
|
const competition = await createTestCompetition(programId)
|
||||||
|
const roundA = await createTestRound(competition.id, { name: 'A', sortOrder: 0 })
|
||||||
|
const roundB = await createTestRound(competition.id, { name: 'B', sortOrder: 1 })
|
||||||
|
const formA = await createTestEvaluationForm(roundA.id, [
|
||||||
|
{ id: 'c1', label: 'X', scale: '1-10', weight: 1 },
|
||||||
|
])
|
||||||
|
const formB = await createTestEvaluationForm(roundB.id, [
|
||||||
|
{ id: 'c1', label: 'X', scale: '1-10', weight: 1 },
|
||||||
|
])
|
||||||
|
|
||||||
|
const projX = await createTestProject(programId, { title: 'X' })
|
||||||
|
const projY = await createTestProject(programId, { title: 'Y' })
|
||||||
|
projectXId = projX.id
|
||||||
|
projectYId = projY.id
|
||||||
|
await createTestProjectRoundState(projX.id, roundA.id)
|
||||||
|
await createTestProjectRoundState(projY.id, roundA.id)
|
||||||
|
await createTestProjectRoundState(projX.id, roundB.id)
|
||||||
|
await createTestProjectRoundState(projY.id, roundB.id)
|
||||||
|
|
||||||
|
const lenient = await createTestUser('JURY_MEMBER')
|
||||||
|
const harsh = await createTestUser('JURY_MEMBER')
|
||||||
|
userIds.push(lenient.id, harsh.id)
|
||||||
|
|
||||||
|
const writeEval = async (jurorId: string, projId: string, roundId: string, formId: string, c1: number) => {
|
||||||
|
const a = await createTestAssignment(jurorId, projId, roundId)
|
||||||
|
await prisma.evaluation.create({
|
||||||
|
data: {
|
||||||
|
assignmentId: a.id,
|
||||||
|
formId,
|
||||||
|
status: 'SUBMITTED',
|
||||||
|
submittedAt: new Date(),
|
||||||
|
criterionScoresJson: { c1 },
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Round A
|
||||||
|
await writeEval(lenient.id, projX.id, roundA.id, formA.id, 9)
|
||||||
|
await writeEval(lenient.id, projY.id, roundA.id, formA.id, 9)
|
||||||
|
await writeEval(harsh.id, projX.id, roundA.id, formA.id, 6)
|
||||||
|
await writeEval(harsh.id, projY.id, roundA.id, formA.id, 4)
|
||||||
|
// Round B (different scoring profile)
|
||||||
|
await writeEval(lenient.id, projX.id, roundB.id, formB.id, 8)
|
||||||
|
await writeEval(lenient.id, projY.id, roundB.id, formB.id, 8)
|
||||||
|
await writeEval(harsh.id, projX.id, roundB.id, formB.id, 7)
|
||||||
|
await writeEval(harsh.id, projY.id, roundB.id, formB.id, 5)
|
||||||
|
|
||||||
|
const adminUser = await createTestUser('SUPER_ADMIN')
|
||||||
|
userIds.push(adminUser.id)
|
||||||
|
admin = { id: adminUser.id, email: adminUser.email, role: 'SUPER_ADMIN' }
|
||||||
|
})
|
||||||
|
|
||||||
|
afterAll(async () => {
|
||||||
|
await cleanupTestData(programId, userIds)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('aggregates per-project balanced score as the mean of per-round balanced averages', async () => {
|
||||||
|
const caller = createCaller(analyticsRouter, admin)
|
||||||
|
const result = await caller.getProjectRankings({ programId })
|
||||||
|
const x = result.find((p: { id: string }) => p.id === projectXId)!
|
||||||
|
const y = result.find((p: { id: string }) => p.id === projectYId)!
|
||||||
|
// Per-round balanced (computed by hand using the algorithm in juror-balance.ts):
|
||||||
|
// Round A overall mean=7, stddev=√4.5; lenient stddev=0 (fallback), harsh stddev=1
|
||||||
|
// X balanced ≈ 9.06, Y balanced ≈ 6.94
|
||||||
|
// Round B overall mean=7, stddev=√1.5; lenient stddev=0 (fallback), harsh stddev=1
|
||||||
|
// X balanced ≈ 8.11, Y balanced ≈ 6.89
|
||||||
|
// Edition rollup = mean of per-round balanced averages:
|
||||||
|
// X ≈ 8.59, Y ≈ 6.91
|
||||||
|
expect(x.balancedScore!).toBeCloseTo(8.59, 1)
|
||||||
|
expect(y.balancedScore!).toBeCloseTo(6.91, 1)
|
||||||
|
// Crucially, X must rank above Y after the per-round correction.
|
||||||
|
expect(x.balancedScore!).toBeGreaterThan(y.balancedScore!)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|||||||
Reference in New Issue
Block a user