feat: surface juror-balanced scores and AI calibration advisory
All checks were successful
Build and Push Docker Image / build (push) Successful in 7m27s
All checks were successful
Build and Push Docker Image / build (push) Successful in 7m27s
Adds a shared juror-balancing utility (z-score normalization per juror, rescaled back onto the raw 1-10 scale) and wires it into: - Admin reports page: Top-10 project table now shows "Raw Avg" and "Balanced" columns side by side, and the summary stats row shows a balanced-average tile. Sort defaults to balanced so harsh and lenient graders no longer skew the ranking. - Ranking dashboard: each project row shows a green/amber balanced-score chip next to the raw average when the two differ by ≥0.05, making it obvious when juror calibration moved a project's effective ranking. Also adds AI Juror Calibration Advisory — a mutation that takes anonymized per-juror stats, calls OpenAI, and produces a plain-language explanation of the cohort's grading patterns plus per-juror severity (normal / notable / outlier) with a one-sentence narrative. The advisory describes the statistical balance that already runs; it does not introduce a new weighting layer. Rendered as a panel in the Juror Consistency tab when a specific round is selected. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -45,7 +45,11 @@ import {
|
||||
Trophy,
|
||||
ArrowRight,
|
||||
Hash,
|
||||
Sparkles,
|
||||
Loader2,
|
||||
AlertTriangle,
|
||||
} from 'lucide-react'
|
||||
import { toast } from 'sonner'
|
||||
import { formatDateOnly } from '@/lib/utils'
|
||||
import {
|
||||
ScoreDistributionChart,
|
||||
@@ -271,6 +275,12 @@ function ReportsOverview() {
|
||||
const evaluated = projectRankings.filter(p => p.averageScore !== null)
|
||||
const scores = evaluated.map(p => p.averageScore as number)
|
||||
const avgScore = scores.length ? scores.reduce((a, b) => a + b, 0) / scores.length : 0
|
||||
const balancedScores = projectRankings
|
||||
.map(p => p.balancedScore)
|
||||
.filter((s): s is number => s != null)
|
||||
const avgBalanced = balancedScores.length
|
||||
? balancedScores.reduce((a, b) => a + b, 0) / balancedScores.length
|
||||
: null
|
||||
const minScore = scores.length ? Math.min(...scores) : 0
|
||||
const maxScore = scores.length ? Math.max(...scores) : 0
|
||||
const evalPercent = projectRankings.length ? Math.round((evaluated.length / projectRankings.length) * 100) : 0
|
||||
@@ -281,14 +291,28 @@ function ReportsOverview() {
|
||||
|
||||
return (
|
||||
<>
|
||||
<div className="grid grid-cols-2 gap-3 sm:grid-cols-4">
|
||||
<div className="grid grid-cols-2 gap-3 sm:grid-cols-5">
|
||||
<div className="rounded-lg border p-3 text-center">
|
||||
<p className="text-xs text-muted-foreground">Total Projects</p>
|
||||
<p className="text-xl font-bold tabular-nums">{projectRankings.length}</p>
|
||||
</div>
|
||||
<div className="rounded-lg border p-3 text-center">
|
||||
<p className="text-xs text-muted-foreground">Avg Score</p>
|
||||
<p className="text-xl font-bold tabular-nums">{avgScore ? avgScore.toFixed(1) : '-'}</p>
|
||||
<div
|
||||
className="rounded-lg border p-3 text-center"
|
||||
title="Unweighted mean of all submitted juror scores"
|
||||
>
|
||||
<p className="text-xs text-muted-foreground">Raw Avg</p>
|
||||
<p className="text-xl font-bold tabular-nums text-muted-foreground">
|
||||
{avgScore ? avgScore.toFixed(1) : '-'}
|
||||
</p>
|
||||
</div>
|
||||
<div
|
||||
className="rounded-lg border p-3 text-center"
|
||||
title="Juror-balanced average: per-juror z-score normalization rescaled to the 1–10 range"
|
||||
>
|
||||
<p className="text-xs text-muted-foreground">Balanced Avg</p>
|
||||
<p className="text-xl font-bold tabular-nums">
|
||||
{avgBalanced == null ? '-' : avgBalanced.toFixed(1)}
|
||||
</p>
|
||||
</div>
|
||||
<div className="rounded-lg border p-3 text-center">
|
||||
<p className="text-xs text-muted-foreground">Evaluated</p>
|
||||
@@ -319,7 +343,7 @@ function ReportsOverview() {
|
||||
{/* Top 10 ranked table */}
|
||||
<div>
|
||||
<p className="text-sm font-medium text-muted-foreground mb-2 flex items-center gap-1.5">
|
||||
<Trophy className="h-3.5 w-3.5" /> Top 10 by Average Score
|
||||
<Trophy className="h-3.5 w-3.5" /> Top 10 by Balanced Score
|
||||
</p>
|
||||
<div className="rounded-lg border">
|
||||
<Table>
|
||||
@@ -328,7 +352,18 @@ function ReportsOverview() {
|
||||
<TableHead className="w-10">#</TableHead>
|
||||
<TableHead>Project</TableHead>
|
||||
<TableHead className="hidden sm:table-cell">Team</TableHead>
|
||||
<TableHead className="text-right">Avg</TableHead>
|
||||
<TableHead
|
||||
className="text-right"
|
||||
title="Raw average of juror scores — uncorrected for per-juror harshness"
|
||||
>
|
||||
Raw Avg
|
||||
</TableHead>
|
||||
<TableHead
|
||||
className="text-right"
|
||||
title="Juror-balanced average: each juror's contribution is z-score normalized against their own grading distribution, then rescaled to the 1–10 range. Harsh and lenient jurors contribute on equal footing."
|
||||
>
|
||||
Balanced
|
||||
</TableHead>
|
||||
<TableHead className="text-right">Evals</TableHead>
|
||||
<TableHead>Status</TableHead>
|
||||
</TableRow>
|
||||
@@ -345,9 +380,12 @@ function ReportsOverview() {
|
||||
<TableCell className="hidden sm:table-cell text-muted-foreground">
|
||||
{p.teamName || '-'}
|
||||
</TableCell>
|
||||
<TableCell className="text-right tabular-nums">
|
||||
<TableCell className="text-right tabular-nums text-muted-foreground">
|
||||
{p.averageScore === null ? '-' : p.averageScore.toFixed(2)}
|
||||
</TableCell>
|
||||
<TableCell className="text-right tabular-nums font-semibold">
|
||||
{p.balancedScore == null ? '-' : p.balancedScore.toFixed(2)}
|
||||
</TableCell>
|
||||
<TableCell className="text-right tabular-nums">{p.evaluationCount}</TableCell>
|
||||
<TableCell>
|
||||
<Badge variant="outline">{formatStatusLabel(p.status)}</Badge>
|
||||
@@ -870,10 +908,150 @@ function JurorConsistencyTab() {
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
|
||||
{queryInput.roundId && (
|
||||
<JurorCalibrationPanel roundId={queryInput.roundId} />
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
function JurorCalibrationPanel({ roundId }: { roundId: string }) {
|
||||
const mutation = trpc.analytics.generateJurorCalibration.useMutation({
|
||||
onError: (err) => toast.error(`Calibration analysis failed: ${err.message}`),
|
||||
})
|
||||
const result = mutation.data
|
||||
|
||||
const severityStyle: Record<string, string> = {
|
||||
outlier: 'bg-red-50 text-red-700 border-red-200',
|
||||
notable: 'bg-amber-50 text-amber-700 border-amber-200',
|
||||
normal: 'bg-muted text-muted-foreground',
|
||||
}
|
||||
|
||||
return (
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<div className="flex flex-wrap items-start justify-between gap-3">
|
||||
<div>
|
||||
<CardTitle className="flex items-center gap-2">
|
||||
<Sparkles className="h-5 w-5 text-[#de0f1e]" />
|
||||
AI Juror Calibration Advisory
|
||||
</CardTitle>
|
||||
<CardDescription>
|
||||
Plain-language explanation of the per-juror score balancing already applied to rankings.
|
||||
Describes, does not prescribe — the math runs regardless.
|
||||
</CardDescription>
|
||||
</div>
|
||||
<Button
|
||||
onClick={() => mutation.mutate({ roundId })}
|
||||
disabled={mutation.isPending}
|
||||
className="gap-2"
|
||||
>
|
||||
{mutation.isPending ? <Loader2 className="h-4 w-4 animate-spin" /> : <Sparkles className="h-4 w-4" />}
|
||||
{mutation.isPending ? 'Analyzing…' : result ? 'Regenerate' : 'Analyze jurors'}
|
||||
</Button>
|
||||
</div>
|
||||
</CardHeader>
|
||||
<CardContent className="space-y-5">
|
||||
{!result && !mutation.isPending && (
|
||||
<p className="text-sm text-muted-foreground">
|
||||
Run the analysis to see per-juror grading patterns, cohort stats, and the calibration
|
||||
narrative for the selected round.
|
||||
</p>
|
||||
)}
|
||||
|
||||
{result && (
|
||||
<>
|
||||
<div className="grid grid-cols-2 gap-3 sm:grid-cols-4">
|
||||
<div className="rounded-lg border p-3 text-center">
|
||||
<p className="text-xs text-muted-foreground">Cohort Mean</p>
|
||||
<p className="text-xl font-bold tabular-nums">{result.cohortMean.toFixed(2)}</p>
|
||||
</div>
|
||||
<div className="rounded-lg border p-3 text-center">
|
||||
<p className="text-xs text-muted-foreground">Cohort Stddev</p>
|
||||
<p className="text-xl font-bold tabular-nums">{result.cohortStddev.toFixed(2)}</p>
|
||||
</div>
|
||||
<div className="rounded-lg border p-3 text-center">
|
||||
<p className="text-xs text-muted-foreground">Evaluations</p>
|
||||
<p className="text-xl font-bold tabular-nums">{result.totalEvaluations}</p>
|
||||
</div>
|
||||
<div className="rounded-lg border p-3 text-center">
|
||||
<p className="text-xs text-muted-foreground">Jurors</p>
|
||||
<p className="text-xl font-bold tabular-nums">{result.totalJurors}</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="rounded-lg border bg-muted/30 p-4">
|
||||
<p className="text-sm leading-relaxed">{result.overallSummary}</p>
|
||||
{result.keyTakeaways.length > 0 && (
|
||||
<ul className="mt-3 space-y-1.5 text-sm">
|
||||
{result.keyTakeaways.map((t, i) => (
|
||||
<li key={i} className="flex items-start gap-2">
|
||||
<ArrowRight className="mt-1 h-3.5 w-3.5 flex-shrink-0 text-muted-foreground" />
|
||||
<span>{t}</span>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div className="rounded-lg border">
|
||||
<Table>
|
||||
<TableHeader>
|
||||
<TableRow>
|
||||
<TableHead>Juror</TableHead>
|
||||
<TableHead className="text-right">Evals</TableHead>
|
||||
<TableHead className="text-right">Mean</TableHead>
|
||||
<TableHead className="text-right">Δ Cohort</TableHead>
|
||||
<TableHead className="text-right" title="Juror's stddev / cohort stddev">
|
||||
Influence
|
||||
</TableHead>
|
||||
<TableHead>Severity</TableHead>
|
||||
<TableHead>Notes</TableHead>
|
||||
</TableRow>
|
||||
</TableHeader>
|
||||
<TableBody>
|
||||
{result.jurors.map((j) => (
|
||||
<TableRow key={j.userId}>
|
||||
<TableCell className="font-medium">{j.name}</TableCell>
|
||||
<TableCell className="text-right tabular-nums">{j.evaluationCount}</TableCell>
|
||||
<TableCell className="text-right tabular-nums">{j.rawMean.toFixed(2)}</TableCell>
|
||||
<TableCell
|
||||
className={`text-right tabular-nums ${
|
||||
j.deltaFromCohort < -0.5 ? 'text-red-600' : j.deltaFromCohort > 0.5 ? 'text-emerald-600' : ''
|
||||
}`}
|
||||
>
|
||||
{j.deltaFromCohort > 0 ? '+' : ''}
|
||||
{j.deltaFromCohort.toFixed(2)}
|
||||
</TableCell>
|
||||
<TableCell className="text-right tabular-nums">
|
||||
{j.effectiveInfluence == null ? '-' : j.effectiveInfluence.toFixed(2)}
|
||||
</TableCell>
|
||||
<TableCell>
|
||||
<Badge variant="outline" className={severityStyle[j.severity]}>
|
||||
{j.severity === 'outlier' && <AlertTriangle className="mr-1 h-3 w-3" />}
|
||||
{j.severity}
|
||||
</Badge>
|
||||
</TableCell>
|
||||
<TableCell className="max-w-md text-sm text-muted-foreground">
|
||||
{j.summary}
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
))}
|
||||
</TableBody>
|
||||
</Table>
|
||||
</div>
|
||||
|
||||
<p className="text-xs text-muted-foreground">
|
||||
Generated {result.generatedAt.toLocaleString()} · {result.tokensUsed} tokens · model {result.model}
|
||||
</p>
|
||||
</>
|
||||
)}
|
||||
</CardContent>
|
||||
</Card>
|
||||
)
|
||||
}
|
||||
|
||||
function DiversityTab() {
|
||||
const [selectedValue, setSelectedValue] = useState<string | null>(null)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user