diff --git a/src/components/shared/score-explainer-dialog.tsx b/src/components/shared/score-explainer-dialog.tsx new file mode 100644 index 0000000..3b7d635 --- /dev/null +++ b/src/components/shared/score-explainer-dialog.tsx @@ -0,0 +1,109 @@ +'use client' + +import { + Dialog, + DialogContent, + DialogHeader, + DialogTitle, + DialogTrigger, +} from '@/components/ui/dialog' +import { Button } from '@/components/ui/button' +import { Info } from 'lucide-react' +import type { ReactNode } from 'react' + +export function ScoreExplainerDialog({ trigger }: { trigger?: ReactNode }) { + return ( + + + {trigger ?? ( + + )} + + + + How scores are calculated + + +
+

+ Different jurors have different grading styles. Some grade harshly, some + leniently. Balanced scoring corrects for that so a project isn't + punished for drawing harsh jurors or rewarded for drawing lenient ones. +

+ +
+

How it works

+
    +
  1. For each juror, calculate their personal average and spread across all the projects they scored in this round.
  2. +
  3. Convert each individual score into "how many standard deviations above or below this juror's typical" — a 6 from a juror who averages 5 reads the same as a 9 from a juror who averages 8.
  4. +
  5. Average those normalized values across the project's jurors.
  6. +
  7. Rescale back onto the same 1–10 scale using the round's overall average and spread.
  8. +
  9. The result is directly comparable to the raw average — same scale, but corrected for grading style.
  10. +
+
+ +
+

Worked example

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
JurorTheir typical avgScore for "Project X"What that means
Juror A (lenient)8.209.00Just above their typical (+0.4σ)
Juror B (harsh)5.807.50Well above their typical (+1.5σ)
Juror C (typical)7.008.00Slightly above their typical (+0.7σ)
+

+ Raw average: (9.00 + 7.50 + 8.00) / 3 = 8.17. + Balanced average rescales each juror's enthusiasm to the round's + overall scale and lands at roughly 8.40 — Juror B's + strong endorsement (well above their harsh baseline) carries more weight + than the raw 7.50 suggests. +

+
+ +
+

When it kicks in

+
    +
  • Needs at least 2 evaluations from the round to compute a juror's spread; otherwise that juror falls back to the round-wide average.
  • +
  • Needs at least one juror with non-zero spread; if every juror gave identical scores, balanced equals raw.
  • +
  • Computed within a single round only — a juror's grading style in an intake screening doesn't affect their balance in a deep evaluation.
  • +
+
+ +
+

Why we still show "Raw"

+

+ Both numbers are always shown so you can sanity-check the correction. The + toggle at the top of the side panel decides which one is used for ranking. +

+
+
+
+
+ ) +}