feat: surface juror-balanced scores and AI calibration advisory
All checks were successful
Build and Push Docker Image / build (push) Successful in 7m27s
All checks were successful
Build and Push Docker Image / build (push) Successful in 7m27s
Adds a shared juror-balancing utility (z-score normalization per juror, rescaled back onto the raw 1-10 scale) and wires it into: - Admin reports page: Top-10 project table now shows "Raw Avg" and "Balanced" columns side by side, and the summary stats row shows a balanced-average tile. Sort defaults to balanced so harsh and lenient graders no longer skew the ranking. - Ranking dashboard: each project row shows a green/amber balanced-score chip next to the raw average when the two differ by ≥0.05, making it obvious when juror calibration moved a project's effective ranking. Also adds AI Juror Calibration Advisory — a mutation that takes anonymized per-juror stats, calls OpenAI, and produces a plain-language explanation of the cohort's grading patterns plus per-juror severity (normal / notable / outlier) with a one-sentence narrative. The advisory describes the statistical balance that already runs; it does not introduce a new weighting layer. Rendered as a panel in the Juror Consistency tab when a specific round is selected. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -45,7 +45,11 @@ import {
|
|||||||
Trophy,
|
Trophy,
|
||||||
ArrowRight,
|
ArrowRight,
|
||||||
Hash,
|
Hash,
|
||||||
|
Sparkles,
|
||||||
|
Loader2,
|
||||||
|
AlertTriangle,
|
||||||
} from 'lucide-react'
|
} from 'lucide-react'
|
||||||
|
import { toast } from 'sonner'
|
||||||
import { formatDateOnly } from '@/lib/utils'
|
import { formatDateOnly } from '@/lib/utils'
|
||||||
import {
|
import {
|
||||||
ScoreDistributionChart,
|
ScoreDistributionChart,
|
||||||
@@ -271,6 +275,12 @@ function ReportsOverview() {
|
|||||||
const evaluated = projectRankings.filter(p => p.averageScore !== null)
|
const evaluated = projectRankings.filter(p => p.averageScore !== null)
|
||||||
const scores = evaluated.map(p => p.averageScore as number)
|
const scores = evaluated.map(p => p.averageScore as number)
|
||||||
const avgScore = scores.length ? scores.reduce((a, b) => a + b, 0) / scores.length : 0
|
const avgScore = scores.length ? scores.reduce((a, b) => a + b, 0) / scores.length : 0
|
||||||
|
const balancedScores = projectRankings
|
||||||
|
.map(p => p.balancedScore)
|
||||||
|
.filter((s): s is number => s != null)
|
||||||
|
const avgBalanced = balancedScores.length
|
||||||
|
? balancedScores.reduce((a, b) => a + b, 0) / balancedScores.length
|
||||||
|
: null
|
||||||
const minScore = scores.length ? Math.min(...scores) : 0
|
const minScore = scores.length ? Math.min(...scores) : 0
|
||||||
const maxScore = scores.length ? Math.max(...scores) : 0
|
const maxScore = scores.length ? Math.max(...scores) : 0
|
||||||
const evalPercent = projectRankings.length ? Math.round((evaluated.length / projectRankings.length) * 100) : 0
|
const evalPercent = projectRankings.length ? Math.round((evaluated.length / projectRankings.length) * 100) : 0
|
||||||
@@ -281,14 +291,28 @@ function ReportsOverview() {
|
|||||||
|
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
<div className="grid grid-cols-2 gap-3 sm:grid-cols-4">
|
<div className="grid grid-cols-2 gap-3 sm:grid-cols-5">
|
||||||
<div className="rounded-lg border p-3 text-center">
|
<div className="rounded-lg border p-3 text-center">
|
||||||
<p className="text-xs text-muted-foreground">Total Projects</p>
|
<p className="text-xs text-muted-foreground">Total Projects</p>
|
||||||
<p className="text-xl font-bold tabular-nums">{projectRankings.length}</p>
|
<p className="text-xl font-bold tabular-nums">{projectRankings.length}</p>
|
||||||
</div>
|
</div>
|
||||||
<div className="rounded-lg border p-3 text-center">
|
<div
|
||||||
<p className="text-xs text-muted-foreground">Avg Score</p>
|
className="rounded-lg border p-3 text-center"
|
||||||
<p className="text-xl font-bold tabular-nums">{avgScore ? avgScore.toFixed(1) : '-'}</p>
|
title="Unweighted mean of all submitted juror scores"
|
||||||
|
>
|
||||||
|
<p className="text-xs text-muted-foreground">Raw Avg</p>
|
||||||
|
<p className="text-xl font-bold tabular-nums text-muted-foreground">
|
||||||
|
{avgScore ? avgScore.toFixed(1) : '-'}
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<div
|
||||||
|
className="rounded-lg border p-3 text-center"
|
||||||
|
title="Juror-balanced average: per-juror z-score normalization rescaled to the 1–10 range"
|
||||||
|
>
|
||||||
|
<p className="text-xs text-muted-foreground">Balanced Avg</p>
|
||||||
|
<p className="text-xl font-bold tabular-nums">
|
||||||
|
{avgBalanced == null ? '-' : avgBalanced.toFixed(1)}
|
||||||
|
</p>
|
||||||
</div>
|
</div>
|
||||||
<div className="rounded-lg border p-3 text-center">
|
<div className="rounded-lg border p-3 text-center">
|
||||||
<p className="text-xs text-muted-foreground">Evaluated</p>
|
<p className="text-xs text-muted-foreground">Evaluated</p>
|
||||||
@@ -319,7 +343,7 @@ function ReportsOverview() {
|
|||||||
{/* Top 10 ranked table */}
|
{/* Top 10 ranked table */}
|
||||||
<div>
|
<div>
|
||||||
<p className="text-sm font-medium text-muted-foreground mb-2 flex items-center gap-1.5">
|
<p className="text-sm font-medium text-muted-foreground mb-2 flex items-center gap-1.5">
|
||||||
<Trophy className="h-3.5 w-3.5" /> Top 10 by Average Score
|
<Trophy className="h-3.5 w-3.5" /> Top 10 by Balanced Score
|
||||||
</p>
|
</p>
|
||||||
<div className="rounded-lg border">
|
<div className="rounded-lg border">
|
||||||
<Table>
|
<Table>
|
||||||
@@ -328,7 +352,18 @@ function ReportsOverview() {
|
|||||||
<TableHead className="w-10">#</TableHead>
|
<TableHead className="w-10">#</TableHead>
|
||||||
<TableHead>Project</TableHead>
|
<TableHead>Project</TableHead>
|
||||||
<TableHead className="hidden sm:table-cell">Team</TableHead>
|
<TableHead className="hidden sm:table-cell">Team</TableHead>
|
||||||
<TableHead className="text-right">Avg</TableHead>
|
<TableHead
|
||||||
|
className="text-right"
|
||||||
|
title="Raw average of juror scores — uncorrected for per-juror harshness"
|
||||||
|
>
|
||||||
|
Raw Avg
|
||||||
|
</TableHead>
|
||||||
|
<TableHead
|
||||||
|
className="text-right"
|
||||||
|
title="Juror-balanced average: each juror's contribution is z-score normalized against their own grading distribution, then rescaled to the 1–10 range. Harsh and lenient jurors contribute on equal footing."
|
||||||
|
>
|
||||||
|
Balanced
|
||||||
|
</TableHead>
|
||||||
<TableHead className="text-right">Evals</TableHead>
|
<TableHead className="text-right">Evals</TableHead>
|
||||||
<TableHead>Status</TableHead>
|
<TableHead>Status</TableHead>
|
||||||
</TableRow>
|
</TableRow>
|
||||||
@@ -345,9 +380,12 @@ function ReportsOverview() {
|
|||||||
<TableCell className="hidden sm:table-cell text-muted-foreground">
|
<TableCell className="hidden sm:table-cell text-muted-foreground">
|
||||||
{p.teamName || '-'}
|
{p.teamName || '-'}
|
||||||
</TableCell>
|
</TableCell>
|
||||||
<TableCell className="text-right tabular-nums">
|
<TableCell className="text-right tabular-nums text-muted-foreground">
|
||||||
{p.averageScore === null ? '-' : p.averageScore.toFixed(2)}
|
{p.averageScore === null ? '-' : p.averageScore.toFixed(2)}
|
||||||
</TableCell>
|
</TableCell>
|
||||||
|
<TableCell className="text-right tabular-nums font-semibold">
|
||||||
|
{p.balancedScore == null ? '-' : p.balancedScore.toFixed(2)}
|
||||||
|
</TableCell>
|
||||||
<TableCell className="text-right tabular-nums">{p.evaluationCount}</TableCell>
|
<TableCell className="text-right tabular-nums">{p.evaluationCount}</TableCell>
|
||||||
<TableCell>
|
<TableCell>
|
||||||
<Badge variant="outline">{formatStatusLabel(p.status)}</Badge>
|
<Badge variant="outline">{formatStatusLabel(p.status)}</Badge>
|
||||||
@@ -870,10 +908,150 @@ function JurorConsistencyTab() {
|
|||||||
}}
|
}}
|
||||||
/>
|
/>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
{queryInput.roundId && (
|
||||||
|
<JurorCalibrationPanel roundId={queryInput.roundId} />
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function JurorCalibrationPanel({ roundId }: { roundId: string }) {
|
||||||
|
const mutation = trpc.analytics.generateJurorCalibration.useMutation({
|
||||||
|
onError: (err) => toast.error(`Calibration analysis failed: ${err.message}`),
|
||||||
|
})
|
||||||
|
const result = mutation.data
|
||||||
|
|
||||||
|
const severityStyle: Record<string, string> = {
|
||||||
|
outlier: 'bg-red-50 text-red-700 border-red-200',
|
||||||
|
notable: 'bg-amber-50 text-amber-700 border-amber-200',
|
||||||
|
normal: 'bg-muted text-muted-foreground',
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Card>
|
||||||
|
<CardHeader>
|
||||||
|
<div className="flex flex-wrap items-start justify-between gap-3">
|
||||||
|
<div>
|
||||||
|
<CardTitle className="flex items-center gap-2">
|
||||||
|
<Sparkles className="h-5 w-5 text-[#de0f1e]" />
|
||||||
|
AI Juror Calibration Advisory
|
||||||
|
</CardTitle>
|
||||||
|
<CardDescription>
|
||||||
|
Plain-language explanation of the per-juror score balancing already applied to rankings.
|
||||||
|
Describes, does not prescribe — the math runs regardless.
|
||||||
|
</CardDescription>
|
||||||
|
</div>
|
||||||
|
<Button
|
||||||
|
onClick={() => mutation.mutate({ roundId })}
|
||||||
|
disabled={mutation.isPending}
|
||||||
|
className="gap-2"
|
||||||
|
>
|
||||||
|
{mutation.isPending ? <Loader2 className="h-4 w-4 animate-spin" /> : <Sparkles className="h-4 w-4" />}
|
||||||
|
{mutation.isPending ? 'Analyzing…' : result ? 'Regenerate' : 'Analyze jurors'}
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
</CardHeader>
|
||||||
|
<CardContent className="space-y-5">
|
||||||
|
{!result && !mutation.isPending && (
|
||||||
|
<p className="text-sm text-muted-foreground">
|
||||||
|
Run the analysis to see per-juror grading patterns, cohort stats, and the calibration
|
||||||
|
narrative for the selected round.
|
||||||
|
</p>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{result && (
|
||||||
|
<>
|
||||||
|
<div className="grid grid-cols-2 gap-3 sm:grid-cols-4">
|
||||||
|
<div className="rounded-lg border p-3 text-center">
|
||||||
|
<p className="text-xs text-muted-foreground">Cohort Mean</p>
|
||||||
|
<p className="text-xl font-bold tabular-nums">{result.cohortMean.toFixed(2)}</p>
|
||||||
|
</div>
|
||||||
|
<div className="rounded-lg border p-3 text-center">
|
||||||
|
<p className="text-xs text-muted-foreground">Cohort Stddev</p>
|
||||||
|
<p className="text-xl font-bold tabular-nums">{result.cohortStddev.toFixed(2)}</p>
|
||||||
|
</div>
|
||||||
|
<div className="rounded-lg border p-3 text-center">
|
||||||
|
<p className="text-xs text-muted-foreground">Evaluations</p>
|
||||||
|
<p className="text-xl font-bold tabular-nums">{result.totalEvaluations}</p>
|
||||||
|
</div>
|
||||||
|
<div className="rounded-lg border p-3 text-center">
|
||||||
|
<p className="text-xs text-muted-foreground">Jurors</p>
|
||||||
|
<p className="text-xl font-bold tabular-nums">{result.totalJurors}</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className="rounded-lg border bg-muted/30 p-4">
|
||||||
|
<p className="text-sm leading-relaxed">{result.overallSummary}</p>
|
||||||
|
{result.keyTakeaways.length > 0 && (
|
||||||
|
<ul className="mt-3 space-y-1.5 text-sm">
|
||||||
|
{result.keyTakeaways.map((t, i) => (
|
||||||
|
<li key={i} className="flex items-start gap-2">
|
||||||
|
<ArrowRight className="mt-1 h-3.5 w-3.5 flex-shrink-0 text-muted-foreground" />
|
||||||
|
<span>{t}</span>
|
||||||
|
</li>
|
||||||
|
))}
|
||||||
|
</ul>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div className="rounded-lg border">
|
||||||
|
<Table>
|
||||||
|
<TableHeader>
|
||||||
|
<TableRow>
|
||||||
|
<TableHead>Juror</TableHead>
|
||||||
|
<TableHead className="text-right">Evals</TableHead>
|
||||||
|
<TableHead className="text-right">Mean</TableHead>
|
||||||
|
<TableHead className="text-right">Δ Cohort</TableHead>
|
||||||
|
<TableHead className="text-right" title="Juror's stddev / cohort stddev">
|
||||||
|
Influence
|
||||||
|
</TableHead>
|
||||||
|
<TableHead>Severity</TableHead>
|
||||||
|
<TableHead>Notes</TableHead>
|
||||||
|
</TableRow>
|
||||||
|
</TableHeader>
|
||||||
|
<TableBody>
|
||||||
|
{result.jurors.map((j) => (
|
||||||
|
<TableRow key={j.userId}>
|
||||||
|
<TableCell className="font-medium">{j.name}</TableCell>
|
||||||
|
<TableCell className="text-right tabular-nums">{j.evaluationCount}</TableCell>
|
||||||
|
<TableCell className="text-right tabular-nums">{j.rawMean.toFixed(2)}</TableCell>
|
||||||
|
<TableCell
|
||||||
|
className={`text-right tabular-nums ${
|
||||||
|
j.deltaFromCohort < -0.5 ? 'text-red-600' : j.deltaFromCohort > 0.5 ? 'text-emerald-600' : ''
|
||||||
|
}`}
|
||||||
|
>
|
||||||
|
{j.deltaFromCohort > 0 ? '+' : ''}
|
||||||
|
{j.deltaFromCohort.toFixed(2)}
|
||||||
|
</TableCell>
|
||||||
|
<TableCell className="text-right tabular-nums">
|
||||||
|
{j.effectiveInfluence == null ? '-' : j.effectiveInfluence.toFixed(2)}
|
||||||
|
</TableCell>
|
||||||
|
<TableCell>
|
||||||
|
<Badge variant="outline" className={severityStyle[j.severity]}>
|
||||||
|
{j.severity === 'outlier' && <AlertTriangle className="mr-1 h-3 w-3" />}
|
||||||
|
{j.severity}
|
||||||
|
</Badge>
|
||||||
|
</TableCell>
|
||||||
|
<TableCell className="max-w-md text-sm text-muted-foreground">
|
||||||
|
{j.summary}
|
||||||
|
</TableCell>
|
||||||
|
</TableRow>
|
||||||
|
))}
|
||||||
|
</TableBody>
|
||||||
|
</Table>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<p className="text-xs text-muted-foreground">
|
||||||
|
Generated {result.generatedAt.toLocaleString()} · {result.tokensUsed} tokens · model {result.model}
|
||||||
|
</p>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</CardContent>
|
||||||
|
</Card>
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
function DiversityTab() {
|
function DiversityTab() {
|
||||||
const [selectedValue, setSelectedValue] = useState<string | null>(null)
|
const [selectedValue, setSelectedValue] = useState<string | null>(null)
|
||||||
|
|
||||||
|
|||||||
@@ -82,6 +82,7 @@ type SortableProjectRowProps = {
|
|||||||
entry: (RankedProjectEntry & { originalIndex?: number }) | undefined
|
entry: (RankedProjectEntry & { originalIndex?: number }) | undefined
|
||||||
projectInfo: ProjectInfo | undefined
|
projectInfo: ProjectInfo | undefined
|
||||||
jurorScores: JurorScore[] | undefined
|
jurorScores: JurorScore[] | undefined
|
||||||
|
balancedScore: number | null
|
||||||
onSelect: () => void
|
onSelect: () => void
|
||||||
isSelected: boolean
|
isSelected: boolean
|
||||||
originalRank: number | undefined // from snapshotOrder — always in sync with localOrder
|
originalRank: number | undefined // from snapshotOrder — always in sync with localOrder
|
||||||
@@ -95,6 +96,7 @@ function SortableProjectRow({
|
|||||||
entry,
|
entry,
|
||||||
projectInfo,
|
projectInfo,
|
||||||
jurorScores,
|
jurorScores,
|
||||||
|
balancedScore,
|
||||||
onSelect,
|
onSelect,
|
||||||
isSelected,
|
isSelected,
|
||||||
originalRank,
|
originalRank,
|
||||||
@@ -199,11 +201,25 @@ function SortableProjectRow({
|
|||||||
</span>
|
</span>
|
||||||
) : null}
|
) : null}
|
||||||
|
|
||||||
{/* Average score */}
|
{/* Raw + balanced averages shown side by side */}
|
||||||
{entry?.avgGlobalScore !== null && entry?.avgGlobalScore !== undefined && jurorScores && jurorScores.length > 1 && (
|
{entry?.avgGlobalScore !== null && entry?.avgGlobalScore !== undefined && jurorScores && jurorScores.length > 1 && (
|
||||||
<span className="text-xs font-medium text-muted-foreground" title="Average score">
|
<div className="flex items-center gap-1.5 text-xs" title="Raw juror average vs. juror-balanced average (z-score normalized per juror, rescaled to 1-10)">
|
||||||
= {entry.avgGlobalScore.toFixed(1)}
|
<span className="font-medium text-muted-foreground">
|
||||||
</span>
|
{entry.avgGlobalScore.toFixed(1)}
|
||||||
|
</span>
|
||||||
|
{balancedScore != null && Math.abs(balancedScore - entry.avgGlobalScore) >= 0.05 && (
|
||||||
|
<span
|
||||||
|
className={cn(
|
||||||
|
'font-semibold tabular-nums rounded px-1.5 py-0.5 border',
|
||||||
|
balancedScore > entry.avgGlobalScore
|
||||||
|
? 'bg-emerald-50 text-emerald-700 border-emerald-200'
|
||||||
|
: 'bg-amber-50 text-amber-700 border-amber-200',
|
||||||
|
)}
|
||||||
|
>
|
||||||
|
⇢ {balancedScore.toFixed(1)}
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{/* Advance decision indicator */}
|
{/* Advance decision indicator */}
|
||||||
@@ -909,7 +925,8 @@ export function RankingDashboard({ competitionId: _competitionId, roundId }: Ran
|
|||||||
currentRank={index + 1}
|
currentRank={index + 1}
|
||||||
entry={rankingMap.get(projectId)}
|
entry={rankingMap.get(projectId)}
|
||||||
projectInfo={projectInfoMap.get(projectId)}
|
projectInfo={projectInfoMap.get(projectId)}
|
||||||
jurorScores={evalScores?.[projectId]}
|
jurorScores={evalScores?.byProject[projectId]}
|
||||||
|
balancedScore={evalScores?.balanced[projectId]?.balancedAverage ?? null}
|
||||||
onSelect={() => setSelectedProjectId(projectId)}
|
onSelect={() => setSelectedProjectId(projectId)}
|
||||||
isSelected={selectedProjectId === projectId}
|
isSelected={selectedProjectId === projectId}
|
||||||
originalRank={hasReorders ? snapshotOrder[projectId] : undefined}
|
originalRank={hasReorders ? snapshotOrder[projectId] : undefined}
|
||||||
|
|||||||
@@ -1,11 +1,13 @@
|
|||||||
import { z } from 'zod'
|
import { z } from 'zod'
|
||||||
import { router, observerProcedure } from '../trpc'
|
import { router, observerProcedure, adminProcedure } from '../trpc'
|
||||||
import { normalizeCountryToCode } from '@/lib/countries'
|
import { normalizeCountryToCode } from '@/lib/countries'
|
||||||
import { getUserAvatarUrl } from '../utils/avatar-url'
|
import { getUserAvatarUrl } from '../utils/avatar-url'
|
||||||
import { getProjectLogoUrl } from '../utils/project-logo-url'
|
import { getProjectLogoUrl } from '../utils/project-logo-url'
|
||||||
import { aggregateVotes } from '../services/deliberation'
|
import { aggregateVotes } from '../services/deliberation'
|
||||||
import { validateRoundConfig } from '@/types/competition-configs'
|
import { validateRoundConfig } from '@/types/competition-configs'
|
||||||
import type { LiveFinalConfig } from '@/types/competition-configs'
|
import type { LiveFinalConfig } from '@/types/competition-configs'
|
||||||
|
import { computeBalanceContext, computeBalancedProjectScores, type ScorePoint } from '../services/juror-balance'
|
||||||
|
import { generateJurorCalibration } from '../services/ai-juror-calibration'
|
||||||
|
|
||||||
const editionOrRoundInput = z.object({
|
const editionOrRoundInput = z.object({
|
||||||
roundId: z.string().optional(),
|
roundId: z.string().optional(),
|
||||||
@@ -185,73 +187,70 @@ export const analyticsRouter = router({
|
|||||||
}),
|
}),
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get project rankings with average scores
|
* Get project rankings with raw and balanced (juror-normalized) average scores.
|
||||||
|
*
|
||||||
|
* `averageScore` is the raw mean of per-evaluation criterion averages.
|
||||||
|
* `balancedScore` rescales each juror's contributions via z-score (relative
|
||||||
|
* to their own mean + stddev across projects they reviewed in this round),
|
||||||
|
* then maps back onto the same 1-10 scale using the overall mean + stddev.
|
||||||
|
* A harsh juror's scores are pulled up, a lenient juror's pulled down, so
|
||||||
|
* rankings aren't skewed by a single outlier grader.
|
||||||
*/
|
*/
|
||||||
getProjectRankings: observerProcedure
|
getProjectRankings: observerProcedure
|
||||||
.input(editionOrRoundInput.and(z.object({ limit: z.number().optional() })))
|
.input(editionOrRoundInput.and(z.object({ limit: z.number().optional() })))
|
||||||
.query(async ({ ctx, input }) => {
|
.query(async ({ ctx, input }) => {
|
||||||
const projects = await ctx.prisma.project.findMany({
|
const [projects, evaluations] = await Promise.all([
|
||||||
where: projectWhere(input),
|
ctx.prisma.project.findMany({
|
||||||
select: {
|
where: projectWhere(input),
|
||||||
id: true,
|
select: {
|
||||||
title: true,
|
id: true,
|
||||||
teamName: true,
|
title: true,
|
||||||
status: true,
|
teamName: true,
|
||||||
assignments: {
|
status: true,
|
||||||
where: assignmentWhere(input),
|
|
||||||
select: {
|
|
||||||
evaluation: {
|
|
||||||
select: { criterionScoresJson: true, status: true },
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
},
|
}),
|
||||||
})
|
ctx.prisma.evaluation.findMany({
|
||||||
|
where: evalWhere(input, { status: 'SUBMITTED' }),
|
||||||
|
select: {
|
||||||
|
criterionScoresJson: true,
|
||||||
|
assignment: { select: { userId: true, projectId: true } },
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
])
|
||||||
|
|
||||||
|
// Extract a single eval-level score (mean of numeric criterion scores) per evaluation.
|
||||||
|
const points: ScorePoint[] = []
|
||||||
|
for (const e of evaluations) {
|
||||||
|
const scores = e.criterionScoresJson as Record<string, unknown> | null
|
||||||
|
if (!scores) continue
|
||||||
|
const vals = Object.values(scores).filter((s): s is number => typeof s === 'number')
|
||||||
|
if (vals.length === 0) continue
|
||||||
|
const rawScore = vals.reduce((a, b) => a + b, 0) / vals.length
|
||||||
|
points.push({ projectId: e.assignment.projectId, userId: e.assignment.userId, rawScore })
|
||||||
|
}
|
||||||
|
|
||||||
|
const balanceCtx = computeBalanceContext(points)
|
||||||
|
const balancedByProject = computeBalancedProjectScores(points, balanceCtx)
|
||||||
|
|
||||||
// Calculate average scores
|
|
||||||
const rankings = projects
|
const rankings = projects
|
||||||
.map((project) => {
|
.map((project) => {
|
||||||
const allScores: number[] = []
|
const result = balancedByProject.get(project.id)
|
||||||
|
|
||||||
project.assignments.forEach((assignment) => {
|
|
||||||
const evaluation = assignment.evaluation
|
|
||||||
if (evaluation?.status === 'SUBMITTED') {
|
|
||||||
const scores = evaluation.criterionScoresJson as Record<
|
|
||||||
string,
|
|
||||||
number
|
|
||||||
> | null
|
|
||||||
if (scores) {
|
|
||||||
const scoreValues = Object.values(scores).filter(
|
|
||||||
(s): s is number => typeof s === 'number'
|
|
||||||
)
|
|
||||||
if (scoreValues.length > 0) {
|
|
||||||
const average =
|
|
||||||
scoreValues.reduce((a, b) => a + b, 0) / scoreValues.length
|
|
||||||
allScores.push(average)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
const averageScore =
|
|
||||||
allScores.length > 0
|
|
||||||
? allScores.reduce((a, b) => a + b, 0) / allScores.length
|
|
||||||
: null
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
id: project.id,
|
id: project.id,
|
||||||
title: project.title,
|
title: project.title,
|
||||||
teamName: project.teamName,
|
teamName: project.teamName,
|
||||||
status: project.status,
|
status: project.status,
|
||||||
averageScore,
|
averageScore: result?.rawAverage ?? null,
|
||||||
evaluationCount: allScores.length,
|
balancedScore: result?.balancedAverage ?? null,
|
||||||
|
evaluationCount: result?.count ?? 0,
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.sort((a, b) => {
|
.sort((a, b) => {
|
||||||
// Evaluated projects first (sorted by score desc), unevaluated at bottom
|
const aScore = a.balancedScore ?? a.averageScore
|
||||||
if (a.averageScore !== null && b.averageScore !== null) return b.averageScore - a.averageScore
|
const bScore = b.balancedScore ?? b.averageScore
|
||||||
if (a.averageScore !== null) return -1
|
if (aScore !== null && bScore !== null) return bScore - aScore
|
||||||
if (b.averageScore !== null) return 1
|
if (aScore !== null) return -1
|
||||||
|
if (bScore !== null) return 1
|
||||||
return 0
|
return 0
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -2345,4 +2344,19 @@ export const analyticsRouter = router({
|
|||||||
standings,
|
standings,
|
||||||
}
|
}
|
||||||
}),
|
}),
|
||||||
|
|
||||||
|
/**
|
||||||
|
* AI-powered juror calibration analysis for an evaluation round.
|
||||||
|
* Produces a plain-language explanation of the per-juror z-score balance
|
||||||
|
* already applied in ranking — describes, does not prescribe.
|
||||||
|
*/
|
||||||
|
generateJurorCalibration: adminProcedure
|
||||||
|
.input(z.object({ roundId: z.string() }))
|
||||||
|
.mutation(async ({ ctx, input }) => {
|
||||||
|
return generateJurorCalibration({
|
||||||
|
roundId: input.roundId,
|
||||||
|
userId: ctx.user.id,
|
||||||
|
prisma: ctx.prisma,
|
||||||
|
})
|
||||||
|
}),
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import {
|
|||||||
} from '../services/ai-ranking'
|
} from '../services/ai-ranking'
|
||||||
import { logAudit } from '../utils/audit'
|
import { logAudit } from '../utils/audit'
|
||||||
import type { EvaluationConfig } from '@/types/competition-configs'
|
import type { EvaluationConfig } from '@/types/competition-configs'
|
||||||
|
import { computeBalanceContext, computeBalancedProjectScores, type ScorePoint } from '../services/juror-balance'
|
||||||
|
|
||||||
// ─── Local Types ───────────────────────────────────────────────────────────────
|
// ─── Local Types ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
@@ -471,6 +472,7 @@ export const rankingRouter = router({
|
|||||||
evaluation: { status: 'SUBMITTED' },
|
evaluation: { status: 'SUBMITTED' },
|
||||||
},
|
},
|
||||||
select: {
|
select: {
|
||||||
|
userId: true,
|
||||||
projectId: true,
|
projectId: true,
|
||||||
user: { select: { name: true, email: true } },
|
user: { select: { name: true, email: true } },
|
||||||
evaluation: {
|
evaluation: {
|
||||||
@@ -489,6 +491,8 @@ export const rankingRouter = router({
|
|||||||
decision: boolean | null
|
decision: boolean | null
|
||||||
}>> = {}
|
}>> = {}
|
||||||
|
|
||||||
|
const balancePoints: ScorePoint[] = []
|
||||||
|
|
||||||
for (const a of assignments) {
|
for (const a of assignments) {
|
||||||
if (!a.evaluation) continue
|
if (!a.evaluation) continue
|
||||||
const list = byProject[a.projectId] ?? []
|
const list = byProject[a.projectId] ?? []
|
||||||
@@ -511,8 +515,28 @@ export const rankingRouter = router({
|
|||||||
decision,
|
decision,
|
||||||
})
|
})
|
||||||
byProject[a.projectId] = list
|
byProject[a.projectId] = list
|
||||||
|
|
||||||
|
if (a.evaluation.globalScore != null) {
|
||||||
|
balancePoints.push({
|
||||||
|
projectId: a.projectId,
|
||||||
|
userId: a.userId,
|
||||||
|
rawScore: a.evaluation.globalScore,
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return byProject
|
const balanceCtx = computeBalanceContext(balancePoints)
|
||||||
|
const balancedByProject = computeBalancedProjectScores(balancePoints, balanceCtx)
|
||||||
|
|
||||||
|
// Per-project balanced average on the 1-10 scale, comparable to raw avgs.
|
||||||
|
const balanced: Record<string, { rawAverage: number | null; balancedAverage: number | null }> = {}
|
||||||
|
for (const [projectId, result] of balancedByProject.entries()) {
|
||||||
|
balanced[projectId] = {
|
||||||
|
rawAverage: result.rawAverage,
|
||||||
|
balancedAverage: result.balancedAverage,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { byProject, balanced }
|
||||||
}),
|
}),
|
||||||
})
|
})
|
||||||
|
|||||||
355
src/server/services/ai-juror-calibration.ts
Normal file
355
src/server/services/ai-juror-calibration.ts
Normal file
@@ -0,0 +1,355 @@
|
|||||||
|
/**
|
||||||
|
* AI-Powered Juror Calibration Advisory
|
||||||
|
*
|
||||||
|
* Analyzes per-juror grading statistics for an evaluation round and
|
||||||
|
* produces a human-readable explanation of how each juror's scores compare
|
||||||
|
* to the cohort. Describes the z-score balance that's already applied in
|
||||||
|
* ranking; does NOT introduce a new weighting layer — only explains the
|
||||||
|
* existing math in plain language so admins can justify results to jurors.
|
||||||
|
*
|
||||||
|
* GDPR: Juror identifiers are replaced with Juror-1, Juror-2, ... before any
|
||||||
|
* call to OpenAI. No names or emails leave the server.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { TRPCError } from '@trpc/server'
|
||||||
|
import { getOpenAI, getConfiguredModel, buildCompletionParams, AI_MODELS } from '@/lib/openai'
|
||||||
|
import { logAIUsage, extractTokenUsage } from '@/server/utils/ai-usage'
|
||||||
|
import { classifyAIError, createParseError, logAIError } from './ai-errors'
|
||||||
|
import type { PrismaClient } from '@prisma/client'
|
||||||
|
import { computeBalanceContext, type ScorePoint } from './juror-balance'
|
||||||
|
|
||||||
|
// ─── Types ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export type JurorCalibrationEntry = {
|
||||||
|
userId: string
|
||||||
|
name: string
|
||||||
|
evaluationCount: number
|
||||||
|
rawMean: number
|
||||||
|
stddev: number
|
||||||
|
deltaFromCohort: number
|
||||||
|
effectiveInfluence: number | null
|
||||||
|
severity: 'normal' | 'notable' | 'outlier'
|
||||||
|
summary: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export type JurorCalibrationResult = {
|
||||||
|
roundId: string
|
||||||
|
roundName: string
|
||||||
|
cohortMean: number
|
||||||
|
cohortStddev: number
|
||||||
|
totalEvaluations: number
|
||||||
|
totalJurors: number
|
||||||
|
overallSummary: string
|
||||||
|
keyTakeaways: string[]
|
||||||
|
jurors: JurorCalibrationEntry[]
|
||||||
|
tokensUsed: number
|
||||||
|
model: string
|
||||||
|
generatedAt: Date
|
||||||
|
}
|
||||||
|
|
||||||
|
type AIResponsePayload = {
|
||||||
|
overallSummary: string
|
||||||
|
keyTakeaways: string[]
|
||||||
|
jurors: Array<{
|
||||||
|
jurorId: string
|
||||||
|
severity: 'normal' | 'notable' | 'outlier'
|
||||||
|
summary: string
|
||||||
|
}>
|
||||||
|
}
|
||||||
|
|
||||||
|
type InternalJurorRecord = {
|
||||||
|
userId: string
|
||||||
|
name: string
|
||||||
|
evaluationCount: number
|
||||||
|
rawMean: number
|
||||||
|
stddev: number
|
||||||
|
deltaFromCohort: number
|
||||||
|
effectiveInfluence: number | null
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Main Orchestrator ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export async function generateJurorCalibration({
|
||||||
|
roundId,
|
||||||
|
userId,
|
||||||
|
prisma,
|
||||||
|
}: {
|
||||||
|
roundId: string
|
||||||
|
userId: string
|
||||||
|
prisma: PrismaClient
|
||||||
|
}): Promise<JurorCalibrationResult> {
|
||||||
|
const round = await prisma.round.findUnique({
|
||||||
|
where: { id: roundId },
|
||||||
|
select: { id: true, name: true, roundType: true },
|
||||||
|
})
|
||||||
|
|
||||||
|
if (!round) {
|
||||||
|
throw new TRPCError({ code: 'NOT_FOUND', message: 'Round not found' })
|
||||||
|
}
|
||||||
|
|
||||||
|
const evaluations = await prisma.evaluation.findMany({
|
||||||
|
where: {
|
||||||
|
status: 'SUBMITTED',
|
||||||
|
assignment: { roundId },
|
||||||
|
},
|
||||||
|
select: {
|
||||||
|
globalScore: true,
|
||||||
|
criterionScoresJson: true,
|
||||||
|
assignment: {
|
||||||
|
select: {
|
||||||
|
userId: true,
|
||||||
|
projectId: true,
|
||||||
|
user: { select: { id: true, name: true, email: true } },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
// Build (project, juror, score) points using each eval's mean criterion score,
|
||||||
|
// matching how the reports page reports raw + balanced averages per project.
|
||||||
|
const points: ScorePoint[] = []
|
||||||
|
const nameByUserId = new Map<string, string>()
|
||||||
|
|
||||||
|
for (const e of evaluations) {
|
||||||
|
const scores = e.criterionScoresJson as Record<string, unknown> | null
|
||||||
|
if (!scores) continue
|
||||||
|
const vals = Object.values(scores).filter((s): s is number => typeof s === 'number')
|
||||||
|
if (vals.length === 0) continue
|
||||||
|
const rawScore = vals.reduce((a, b) => a + b, 0) / vals.length
|
||||||
|
points.push({
|
||||||
|
projectId: e.assignment.projectId,
|
||||||
|
userId: e.assignment.userId,
|
||||||
|
rawScore,
|
||||||
|
})
|
||||||
|
nameByUserId.set(
|
||||||
|
e.assignment.userId,
|
||||||
|
e.assignment.user.name ?? e.assignment.user.email ?? 'Unknown',
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (points.length === 0) {
|
||||||
|
throw new TRPCError({
|
||||||
|
code: 'BAD_REQUEST',
|
||||||
|
message: 'No submitted evaluations with numeric scores in this round',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
const ctx = computeBalanceContext(points)
|
||||||
|
|
||||||
|
// Build internal juror stats and a per-juror effective influence ratio
|
||||||
|
// (scale of the juror's raw stddev relative to the cohort stddev).
|
||||||
|
const internalJurors: InternalJurorRecord[] = []
|
||||||
|
for (const [uid, stats] of ctx.jurorStats.entries()) {
|
||||||
|
const effectiveInfluence = ctx.overallStddev > 0 && stats.stddev > 0
|
||||||
|
? Math.min(2, stats.stddev / ctx.overallStddev)
|
||||||
|
: null
|
||||||
|
internalJurors.push({
|
||||||
|
userId: uid,
|
||||||
|
name: nameByUserId.get(uid) ?? 'Unknown',
|
||||||
|
evaluationCount: stats.count,
|
||||||
|
rawMean: stats.mean,
|
||||||
|
stddev: stats.stddev,
|
||||||
|
deltaFromCohort: stats.mean - ctx.overallMean,
|
||||||
|
effectiveInfluence,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort by absolute delta from cohort desc so largest outliers land first in prompts + UI.
|
||||||
|
internalJurors.sort((a, b) => Math.abs(b.deltaFromCohort) - Math.abs(a.deltaFromCohort))
|
||||||
|
|
||||||
|
// Build anonymized payload for the AI call.
|
||||||
|
const anonymizedMap = new Map<string, string>()
|
||||||
|
const anonymizedJurors = internalJurors.map((j, i) => {
|
||||||
|
const id = `Juror-${i + 1}`
|
||||||
|
anonymizedMap.set(j.userId, id)
|
||||||
|
return {
|
||||||
|
jurorId: id,
|
||||||
|
evaluationCount: j.evaluationCount,
|
||||||
|
rawMean: Math.round(j.rawMean * 100) / 100,
|
||||||
|
stddev: Math.round(j.stddev * 100) / 100,
|
||||||
|
deltaFromCohort: Math.round(j.deltaFromCohort * 100) / 100,
|
||||||
|
effectiveInfluence: j.effectiveInfluence != null
|
||||||
|
? Math.round(j.effectiveInfluence * 100) / 100
|
||||||
|
: null,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
const openai = await getOpenAI()
|
||||||
|
if (!openai) {
|
||||||
|
throw new TRPCError({
|
||||||
|
code: 'PRECONDITION_FAILED',
|
||||||
|
message: 'OpenAI is not configured. Please set up your API key in Settings.',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
const model = await getConfiguredModel(AI_MODELS.QUICK)
|
||||||
|
|
||||||
|
const prompt = buildCalibrationPrompt({
|
||||||
|
roundName: round.name,
|
||||||
|
cohortMean: Math.round(ctx.overallMean * 100) / 100,
|
||||||
|
cohortStddev: Math.round(ctx.overallStddev * 100) / 100,
|
||||||
|
totalEvaluations: points.length,
|
||||||
|
jurors: anonymizedJurors,
|
||||||
|
})
|
||||||
|
|
||||||
|
let aiResponse: AIResponsePayload
|
||||||
|
let tokensUsed = 0
|
||||||
|
|
||||||
|
try {
|
||||||
|
const params = buildCompletionParams(model, {
|
||||||
|
messages: [{ role: 'user', content: prompt }],
|
||||||
|
jsonMode: true,
|
||||||
|
temperature: 0.2,
|
||||||
|
maxTokens: 2000,
|
||||||
|
})
|
||||||
|
|
||||||
|
const response = await openai.chat.completions.create(params)
|
||||||
|
tokensUsed = extractTokenUsage(response).totalTokens
|
||||||
|
const content = response.choices[0]?.message?.content
|
||||||
|
if (!content) throw new Error('Empty response from AI')
|
||||||
|
|
||||||
|
try {
|
||||||
|
aiResponse = JSON.parse(content) as AIResponsePayload
|
||||||
|
} catch (parseError) {
|
||||||
|
const err = createParseError((parseError as Error).message)
|
||||||
|
logAIError('JurorCalibration', 'generate', err)
|
||||||
|
throw new TRPCError({
|
||||||
|
code: 'INTERNAL_SERVER_ERROR',
|
||||||
|
message: 'Failed to parse AI response. Please try again.',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
if (error instanceof TRPCError) throw error
|
||||||
|
const classified = classifyAIError(error)
|
||||||
|
logAIError('JurorCalibration', 'generate', classified)
|
||||||
|
await logAIUsage({
|
||||||
|
userId,
|
||||||
|
action: 'JUROR_CALIBRATION',
|
||||||
|
entityType: 'Round',
|
||||||
|
entityId: roundId,
|
||||||
|
model,
|
||||||
|
promptTokens: 0,
|
||||||
|
completionTokens: 0,
|
||||||
|
totalTokens: tokensUsed,
|
||||||
|
itemsProcessed: 0,
|
||||||
|
status: 'ERROR',
|
||||||
|
errorMessage: classified.message,
|
||||||
|
})
|
||||||
|
throw new TRPCError({
|
||||||
|
code: 'INTERNAL_SERVER_ERROR',
|
||||||
|
message: classified.message,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
await logAIUsage({
|
||||||
|
userId,
|
||||||
|
action: 'JUROR_CALIBRATION',
|
||||||
|
entityType: 'Round',
|
||||||
|
entityId: roundId,
|
||||||
|
model,
|
||||||
|
promptTokens: 0,
|
||||||
|
completionTokens: 0,
|
||||||
|
totalTokens: tokensUsed,
|
||||||
|
itemsProcessed: internalJurors.length,
|
||||||
|
status: 'SUCCESS',
|
||||||
|
})
|
||||||
|
|
||||||
|
// Merge AI narrative back with internal stats by anonymized id.
|
||||||
|
const narrativeById = new Map(aiResponse.jurors.map((j) => [j.jurorId, j]))
|
||||||
|
const jurors: JurorCalibrationEntry[] = internalJurors.map((j) => {
|
||||||
|
const anonId = anonymizedMap.get(j.userId)!
|
||||||
|
const narrative = narrativeById.get(anonId)
|
||||||
|
return {
|
||||||
|
userId: j.userId,
|
||||||
|
name: j.name,
|
||||||
|
evaluationCount: j.evaluationCount,
|
||||||
|
rawMean: Math.round(j.rawMean * 100) / 100,
|
||||||
|
stddev: Math.round(j.stddev * 100) / 100,
|
||||||
|
deltaFromCohort: Math.round(j.deltaFromCohort * 100) / 100,
|
||||||
|
effectiveInfluence: j.effectiveInfluence != null
|
||||||
|
? Math.round(j.effectiveInfluence * 100) / 100
|
||||||
|
: null,
|
||||||
|
severity: narrative?.severity ?? classifySeverity(j, ctx.overallStddev),
|
||||||
|
summary: narrative?.summary ?? 'No AI narrative available.',
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
roundId: round.id,
|
||||||
|
roundName: round.name,
|
||||||
|
cohortMean: Math.round(ctx.overallMean * 100) / 100,
|
||||||
|
cohortStddev: Math.round(ctx.overallStddev * 100) / 100,
|
||||||
|
totalEvaluations: points.length,
|
||||||
|
totalJurors: internalJurors.length,
|
||||||
|
overallSummary: aiResponse.overallSummary,
|
||||||
|
keyTakeaways: aiResponse.keyTakeaways,
|
||||||
|
jurors,
|
||||||
|
tokensUsed,
|
||||||
|
model,
|
||||||
|
generatedAt: new Date(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Helpers ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
function classifySeverity(
|
||||||
|
juror: InternalJurorRecord,
|
||||||
|
cohortStddev: number,
|
||||||
|
): 'normal' | 'notable' | 'outlier' {
|
||||||
|
if (cohortStddev === 0) return 'normal'
|
||||||
|
const zDelta = Math.abs(juror.deltaFromCohort) / cohortStddev
|
||||||
|
if (zDelta >= 1.5) return 'outlier'
|
||||||
|
if (zDelta >= 0.75) return 'notable'
|
||||||
|
return 'normal'
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildCalibrationPrompt(payload: {
|
||||||
|
roundName: string
|
||||||
|
cohortMean: number
|
||||||
|
cohortStddev: number
|
||||||
|
totalEvaluations: number
|
||||||
|
jurors: Array<{
|
||||||
|
jurorId: string
|
||||||
|
evaluationCount: number
|
||||||
|
rawMean: number
|
||||||
|
stddev: number
|
||||||
|
deltaFromCohort: number
|
||||||
|
effectiveInfluence: number | null
|
||||||
|
}>
|
||||||
|
}): string {
|
||||||
|
return `You are analyzing juror grading patterns for a competition evaluation round. Your job is to EXPLAIN the statistical normalization that has already been applied; you are NOT introducing a new weighting scheme or prescribing changes.
|
||||||
|
|
||||||
|
CONTEXT:
|
||||||
|
- Round: "${payload.roundName}"
|
||||||
|
- Cohort mean: ${payload.cohortMean} (scale 1-10)
|
||||||
|
- Cohort stddev: ${payload.cohortStddev}
|
||||||
|
- Total submitted evaluations: ${payload.totalEvaluations}
|
||||||
|
|
||||||
|
HOW BALANCING WORKS:
|
||||||
|
Each juror's scores are z-score normalized against their own mean and stddev, then rescaled back onto the 1-10 range. A juror who averages 2 points below the cohort won't drag projects down more than their peers; a lenient juror won't inflate projects. "effectiveInfluence" is roughly the juror's stddev divided by the cohort stddev — a value near 1.0 means they spread their scores similarly to the cohort; values well under 1 mean compressed scoring, well over 1 mean wide spread.
|
||||||
|
|
||||||
|
JUROR DATA (anonymized, sorted by |deltaFromCohort| desc):
|
||||||
|
${JSON.stringify(payload.jurors, null, 2)}
|
||||||
|
|
||||||
|
Return a JSON object with this exact shape:
|
||||||
|
{
|
||||||
|
"overallSummary": "2-3 sentences summarizing grading dispersion across the cohort — is the panel tightly aligned or widely divergent?",
|
||||||
|
"keyTakeaways": ["up to 4 bullets: notable patterns, risks, what to watch"],
|
||||||
|
"jurors": [
|
||||||
|
{
|
||||||
|
"jurorId": "Juror-N (matching the input)",
|
||||||
|
"severity": "normal" | "notable" | "outlier",
|
||||||
|
"summary": "One short sentence about this juror's grading pattern, referring to their rawMean, deltaFromCohort, and stddev. Example: 'Scored on average 2.1 points below cohort across 8 evaluations — consistently harsh, low internal variance.'"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
Guidelines:
|
||||||
|
- "outlier" = delta from cohort >= 1.5 cohort-stddev in either direction
|
||||||
|
- "notable" = delta from cohort 0.75-1.5 cohort-stddev
|
||||||
|
- "normal" = delta from cohort < 0.75 cohort-stddev
|
||||||
|
- A juror with very few evaluations (< 3) can't be classified confidently — note this in their summary and prefer "normal".
|
||||||
|
- Be factual and specific. Reference the numbers. No speculation about intent.
|
||||||
|
- Do not include juror names — only the anonymized jurorId.
|
||||||
|
- Include every juror from the input in the jurors array. Order matches input.`
|
||||||
|
}
|
||||||
120
src/server/services/juror-balance.ts
Normal file
120
src/server/services/juror-balance.ts
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
/**
|
||||||
|
* Juror balancing: z-score normalization to correct for per-juror grading harshness.
|
||||||
|
*
|
||||||
|
* A juror who grades 1 standard deviation below their peers on shared projects
|
||||||
|
* shouldn't punish those projects more than a juror who grades at the mean.
|
||||||
|
* We compute per-juror mean + stddev across their scores in a round, z-normalize
|
||||||
|
* each score, then rescale back onto the same 1-10 scale using the overall
|
||||||
|
* round-level mean + stddev so the balanced number is directly comparable to
|
||||||
|
* the raw average.
|
||||||
|
*/
|
||||||
|
|
||||||
|
export type ScorePoint = {
|
||||||
|
projectId: string
|
||||||
|
userId: string
|
||||||
|
rawScore: number
|
||||||
|
}
|
||||||
|
|
||||||
|
export type BalancedProjectResult = {
|
||||||
|
projectId: string
|
||||||
|
rawAverage: number | null
|
||||||
|
balancedAverage: number | null
|
||||||
|
count: number
|
||||||
|
}
|
||||||
|
|
||||||
|
export type JurorBalance = {
|
||||||
|
userId: string
|
||||||
|
mean: number
|
||||||
|
stddev: number
|
||||||
|
count: number
|
||||||
|
}
|
||||||
|
|
||||||
|
export type BalanceContext = {
|
||||||
|
overallMean: number
|
||||||
|
overallStddev: number
|
||||||
|
jurorStats: Map<string, JurorBalance>
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build per-juror and overall grading statistics from a flat list of
|
||||||
|
* (project, juror, score) points. Returns the stats plus a helper to
|
||||||
|
* rescale z-scores back onto the raw-score scale.
|
||||||
|
*/
|
||||||
|
export function computeBalanceContext(points: ScorePoint[]): BalanceContext {
|
||||||
|
const jurorScores = new Map<string, number[]>()
|
||||||
|
for (const p of points) {
|
||||||
|
const arr = jurorScores.get(p.userId) ?? []
|
||||||
|
arr.push(p.rawScore)
|
||||||
|
jurorScores.set(p.userId, arr)
|
||||||
|
}
|
||||||
|
|
||||||
|
const jurorStats = new Map<string, JurorBalance>()
|
||||||
|
for (const [userId, scores] of jurorScores.entries()) {
|
||||||
|
const mean = scores.reduce((a, b) => a + b, 0) / scores.length
|
||||||
|
const variance = scores.length > 1
|
||||||
|
? scores.reduce((s, v) => s + (v - mean) ** 2, 0) / scores.length
|
||||||
|
: 0
|
||||||
|
jurorStats.set(userId, {
|
||||||
|
userId,
|
||||||
|
mean,
|
||||||
|
stddev: Math.sqrt(variance),
|
||||||
|
count: scores.length,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
const allScores = points.map((p) => p.rawScore)
|
||||||
|
const overallMean = allScores.length > 0
|
||||||
|
? allScores.reduce((a, b) => a + b, 0) / allScores.length
|
||||||
|
: 0
|
||||||
|
const overallStddev = allScores.length > 1
|
||||||
|
? Math.sqrt(
|
||||||
|
allScores.reduce((s, v) => s + (v - overallMean) ** 2, 0) / allScores.length,
|
||||||
|
)
|
||||||
|
: 0
|
||||||
|
|
||||||
|
return { overallMean, overallStddev, jurorStats }
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Aggregate per-project raw + balanced averages from score points.
|
||||||
|
*/
|
||||||
|
export function computeBalancedProjectScores(
|
||||||
|
points: ScorePoint[],
|
||||||
|
ctx: BalanceContext,
|
||||||
|
): Map<string, BalancedProjectResult> {
|
||||||
|
const byProject = new Map<string, ScorePoint[]>()
|
||||||
|
for (const p of points) {
|
||||||
|
const arr = byProject.get(p.projectId) ?? []
|
||||||
|
arr.push(p)
|
||||||
|
byProject.set(p.projectId, arr)
|
||||||
|
}
|
||||||
|
|
||||||
|
const results = new Map<string, BalancedProjectResult>()
|
||||||
|
for (const [projectId, projectPoints] of byProject.entries()) {
|
||||||
|
const rawAverage = projectPoints.reduce((a, b) => a + b.rawScore, 0) / projectPoints.length
|
||||||
|
|
||||||
|
let balancedAverage: number | null = null
|
||||||
|
if (ctx.overallStddev > 0) {
|
||||||
|
const zValues: number[] = []
|
||||||
|
for (const pt of projectPoints) {
|
||||||
|
const stats = ctx.jurorStats.get(pt.userId)
|
||||||
|
if (stats && stats.stddev > 0) {
|
||||||
|
zValues.push((pt.rawScore - stats.mean) / stats.stddev)
|
||||||
|
} else {
|
||||||
|
zValues.push((pt.rawScore - ctx.overallMean) / ctx.overallStddev)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const avgZ = zValues.reduce((a, b) => a + b, 0) / zValues.length
|
||||||
|
balancedAverage = ctx.overallMean + avgZ * ctx.overallStddev
|
||||||
|
}
|
||||||
|
|
||||||
|
results.set(projectId, {
|
||||||
|
projectId,
|
||||||
|
rawAverage,
|
||||||
|
balancedAverage,
|
||||||
|
count: projectPoints.length,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return results
|
||||||
|
}
|
||||||
@@ -21,6 +21,7 @@ export type AIAction =
|
|||||||
| 'ROUTING'
|
| 'ROUTING'
|
||||||
| 'SHORTLIST'
|
| 'SHORTLIST'
|
||||||
| 'RANKING'
|
| 'RANKING'
|
||||||
|
| 'JUROR_CALIBRATION'
|
||||||
|
|
||||||
export type AIStatus = 'SUCCESS' | 'PARTIAL' | 'ERROR'
|
export type AIStatus = 'SUCCESS' | 'PARTIAL' | 'ERROR'
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user