fix: compute z-context per-round in edition-mode rankings rollup
Previously the edition-level branch of analytics.getProjectRankings (programId mode) pooled every juror's evaluations across every round into a single z-normalization context. A juror's mean and stddev are not stable across round types — quick intake screening produces a very different grading profile than a deep evaluation round, and mixing them yields a meaningless personal calibration. The rollup now groups points by roundId, computes one balance context per round, and aggregates per-project as the unweighted mean of the per-round balanced averages. roundId mode is unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -70,3 +70,88 @@ describe('analytics.getProjectDetail round scoping', () => {
|
||||
expect(result.stats!.totalEvaluations).toBe(5)
|
||||
})
|
||||
})
|
||||
|
||||
describe('analytics.getProjectRankings per-round z-context (edition mode)', () => {
|
||||
let programId: string
|
||||
let admin: { id: string; email: string; role: 'SUPER_ADMIN' }
|
||||
let projectXId: string
|
||||
let projectYId: string
|
||||
const userIds: string[] = []
|
||||
|
||||
beforeAll(async () => {
|
||||
const program = await createTestProgram({ name: `rank-edition-${uid()}` })
|
||||
programId = program.id
|
||||
const competition = await createTestCompetition(programId)
|
||||
const roundA = await createTestRound(competition.id, { name: 'A', sortOrder: 0 })
|
||||
const roundB = await createTestRound(competition.id, { name: 'B', sortOrder: 1 })
|
||||
const formA = await createTestEvaluationForm(roundA.id, [
|
||||
{ id: 'c1', label: 'X', scale: '1-10', weight: 1 },
|
||||
])
|
||||
const formB = await createTestEvaluationForm(roundB.id, [
|
||||
{ id: 'c1', label: 'X', scale: '1-10', weight: 1 },
|
||||
])
|
||||
|
||||
const projX = await createTestProject(programId, { title: 'X' })
|
||||
const projY = await createTestProject(programId, { title: 'Y' })
|
||||
projectXId = projX.id
|
||||
projectYId = projY.id
|
||||
await createTestProjectRoundState(projX.id, roundA.id)
|
||||
await createTestProjectRoundState(projY.id, roundA.id)
|
||||
await createTestProjectRoundState(projX.id, roundB.id)
|
||||
await createTestProjectRoundState(projY.id, roundB.id)
|
||||
|
||||
const lenient = await createTestUser('JURY_MEMBER')
|
||||
const harsh = await createTestUser('JURY_MEMBER')
|
||||
userIds.push(lenient.id, harsh.id)
|
||||
|
||||
const writeEval = async (jurorId: string, projId: string, roundId: string, formId: string, c1: number) => {
|
||||
const a = await createTestAssignment(jurorId, projId, roundId)
|
||||
await prisma.evaluation.create({
|
||||
data: {
|
||||
assignmentId: a.id,
|
||||
formId,
|
||||
status: 'SUBMITTED',
|
||||
submittedAt: new Date(),
|
||||
criterionScoresJson: { c1 },
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
// Round A
|
||||
await writeEval(lenient.id, projX.id, roundA.id, formA.id, 9)
|
||||
await writeEval(lenient.id, projY.id, roundA.id, formA.id, 9)
|
||||
await writeEval(harsh.id, projX.id, roundA.id, formA.id, 6)
|
||||
await writeEval(harsh.id, projY.id, roundA.id, formA.id, 4)
|
||||
// Round B (different scoring profile)
|
||||
await writeEval(lenient.id, projX.id, roundB.id, formB.id, 8)
|
||||
await writeEval(lenient.id, projY.id, roundB.id, formB.id, 8)
|
||||
await writeEval(harsh.id, projX.id, roundB.id, formB.id, 7)
|
||||
await writeEval(harsh.id, projY.id, roundB.id, formB.id, 5)
|
||||
|
||||
const adminUser = await createTestUser('SUPER_ADMIN')
|
||||
userIds.push(adminUser.id)
|
||||
admin = { id: adminUser.id, email: adminUser.email, role: 'SUPER_ADMIN' }
|
||||
})
|
||||
|
||||
afterAll(async () => {
|
||||
await cleanupTestData(programId, userIds)
|
||||
})
|
||||
|
||||
it('aggregates per-project balanced score as the mean of per-round balanced averages', async () => {
|
||||
const caller = createCaller(analyticsRouter, admin)
|
||||
const result = await caller.getProjectRankings({ programId })
|
||||
const x = result.find((p: { id: string }) => p.id === projectXId)!
|
||||
const y = result.find((p: { id: string }) => p.id === projectYId)!
|
||||
// Per-round balanced (computed by hand using the algorithm in juror-balance.ts):
|
||||
// Round A overall mean=7, stddev=√4.5; lenient stddev=0 (fallback), harsh stddev=1
|
||||
// X balanced ≈ 9.06, Y balanced ≈ 6.94
|
||||
// Round B overall mean=7, stddev=√1.5; lenient stddev=0 (fallback), harsh stddev=1
|
||||
// X balanced ≈ 8.11, Y balanced ≈ 6.89
|
||||
// Edition rollup = mean of per-round balanced averages:
|
||||
// X ≈ 8.59, Y ≈ 6.91
|
||||
expect(x.balancedScore!).toBeCloseTo(8.59, 1)
|
||||
expect(y.balancedScore!).toBeCloseTo(6.91, 1)
|
||||
// Crucially, X must rank above Y after the per-round correction.
|
||||
expect(x.balancedScore!).toBeGreaterThan(y.balancedScore!)
|
||||
})
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user