From 70f1f64ea33682a43730f10bbfa881ff03e5b36d Mon Sep 17 00:00:00 2001
From: Matt <matt@Matt-Surface.local>
Date: Mon, 27 Apr 2026 14:28:49 +0200
Subject: [PATCH] feat: factor balanced pass rate into composite rankings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The dashboard now computes its own composite ranking score on the
client, blending (balanced-or-raw) average score with (balanced-or-raw)
advance pass rate via the existing scoreWeight / passRateWeight
sliders. Both inputs are toggled independently:

- 'Balance juror grading style (score)' — existing useBalancedRanking
- 'Balance juror approval rate (advance vote)' — new useBalancedPassRate

Both default to true and persist per-round. The pass rate is balanced
the same way scores are: each juror's personal yes-rate gives them a
Bernoulli stddev, each vote is z-normalized against that, and the
project's mean z is rescaled to the round's overall yes rate. A 'yes'
from a juror who rarely says yes counts more than a 'yes' from a
lenient juror.

List rows now show two chips — score (Bal/Raw X.XX) and pass rate
(Bal Yes% / Yes% N%) — so admins can see what's driving the order.
The threshold cutoff and live re-sort effect both use the same
composite formula.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../admin/round/ranking-dashboard.tsx         | 145 ++++++++++++++----
 src/server/routers/ranking.ts                 |  48 +++++-
 src/server/services/juror-balance.ts          | 105 +++++++++++++
 src/types/competition-configs.ts              |   6 +
 4 files changed, 268 insertions(+), 36 deletions(-)
diff --git a/src/components/admin/round/ranking-dashboard.tsx b/src/components/admin/round/ranking-dashboard.tsx
index 2528bab..403fc47 100644
--- a/src/components/admin/round/ranking-dashboard.tsx
+++ b/src/components/admin/round/ranking-dashboard.tsx
@@ -86,7 +86,10 @@ type SortableProjectRowProps = {
   jurorScores: JurorScore[] | undefined
   rawAverage: number | null
   balancedAverage: number | null
+  rawPassRate: number | null
+  balancedPassRate: number | null
   useBalanced: boolean
+  useBalancedPassRate: boolean
   onSelect: () => void
   isSelected: boolean
   originalRank: number | undefined // from snapshotOrder — always in sync with localOrder
@@ -102,7 +105,10 @@ function SortableProjectRow({
   jurorScores,
   rawAverage,
   balancedAverage,
+  rawPassRate,
+  balancedPassRate,
   useBalanced,
+  useBalancedPassRate,
   onSelect,
   isSelected,
   originalRank,
@@ -212,7 +218,7 @@ function SortableProjectRow({
           return (
             <span
               className="inline-flex items-baseline gap-1 rounded-md border bg-muted/50 px-2 py-0.5 text-xs tabular-nums"
-              title={`${label === 'Bal' ? 'Juror-balanced average' : 'Raw juror average'} (used for ranking)`}
+              title={`${label === 'Bal' ? 'Juror-balanced average' : 'Raw juror average'} (factored into rank)`}
             >
               <span className="text-[10px] uppercase tracking-wide text-muted-foreground">{label}</span>
               <span className="font-semibold">{active.toFixed(2)}</span>
@@ -220,6 +226,22 @@ function SortableProjectRow({
           )
         })()}
 
+        {/* Active pass rate chip */}
+        {(() => {
+          const active = useBalancedPassRate && balancedPassRate != null ? balancedPassRate : rawPassRate
+          if (active == null) return null
+          const label = useBalancedPassRate && balancedPassRate != null ? 'Bal Yes%' : 'Yes%'
+          return (
+            <span
+              className="inline-flex items-baseline gap-1 rounded-md border bg-muted/50 px-2 py-0.5 text-xs tabular-nums"
+              title={`${useBalancedPassRate && balancedPassRate != null ? 'Harshness-corrected approval rate' : 'Raw approval rate'} (factored into rank)`}
+            >
+              <span className="text-[10px] uppercase tracking-wide text-muted-foreground">{label}</span>
+              <span className="font-semibold">{Math.round(active * 100)}%</span>
+            </span>
+          )
+        })()}
+
         {/* Advance decision indicator */}
         <div className={cn(
           'inline-flex items-center gap-1 rounded-full px-2 py-0.5 text-xs font-medium',
@@ -270,6 +292,7 @@ export function RankingDashboard({ competitionId: _competitionId, roundId }: Ran
   const [localScoreWeight, setLocalScoreWeight] = useState(5)
   const [localPassRateWeight, setLocalPassRateWeight] = useState(5)
   const [useBalanced, setUseBalanced] = useState(true)
+  const [useBalancedPassRate, setUseBalancedPassRate] = useState(true)
   const weightsInitialized = useRef(false)
 
   // ─── Sensors ──────────────────────────────────────────────────────────────
@@ -409,20 +432,30 @@ export function RankingDashboard({ competitionId: _competitionId, roundId }: Ran
       const dedupedStartup = dedup(startup)
       const dedupedConcept = dedup(concept)
 
-      // Sort by balanced (juror-corrected) score descending when the toggle is
-      // on, otherwise by raw. compositeScore is the final tiebreaker. The
-      // threshold cutoff line uses the same metric so the cutoff lands in the
-      // right spot regardless of which score type is used.
-      const scoreFor = (projectId: string, raw: number | null | undefined) => {
-        const balanced = evalScores.balanced[projectId]?.balancedAverage
-        if (useBalanced && balanced != null) return balanced
-        return raw ?? 0
+      // Composite ranking score combining (balanced-or-raw) average with the
+      // (balanced-or-raw) advance pass rate via the round's scoreWeight /
+      // passRateWeight sliders. Same formula used by the live re-sort effect
+      // and the threshold cutoff so all three stay in lock-step.
+      const compositeFor = (projectId: string, rawScoreFallback: number | null | undefined): number => {
+        const b = evalScores.balanced[projectId]
+        const score = useBalanced && b?.balancedAverage != null ? b.balancedAverage : (rawScoreFallback ?? null)
+        const scoreUnit = score != null ? Math.max(0, Math.min(1, (score - 1) / 9)) : 0
+        const passRate =
+          useBalancedPassRate && b?.balancedPassRate != null ? b.balancedPassRate
+          : b?.rawPassRate != null ? b.rawPassRate
+          : null
+        const passUnit = passRate ?? 0
+        const sW = localScoreWeight
+        const pW = localPassRateWeight
+        const totalW = sW + pW
+        if (totalW <= 0) return scoreUnit
+        return (sW * scoreUnit + pW * passUnit) / totalW
       }
       dedupedStartup.sort((a, b) =>
-        scoreFor(b.projectId, b.avgGlobalScore) - scoreFor(a.projectId, a.avgGlobalScore)
+        compositeFor(b.projectId, b.avgGlobalScore) - compositeFor(a.projectId, a.avgGlobalScore)
         || b.compositeScore - a.compositeScore)
       dedupedConcept.sort((a, b) =>
-        scoreFor(b.projectId, b.avgGlobalScore) - scoreFor(a.projectId, a.avgGlobalScore)
+        compositeFor(b.projectId, b.avgGlobalScore) - compositeFor(a.projectId, a.avgGlobalScore)
         || b.compositeScore - a.compositeScore)
 
       // Track original order for override detection (same effect = always in sync)
@@ -492,22 +525,32 @@ export function RankingDashboard({ competitionId: _competitionId, roundId }: Ran
         return true
       })
     }
-    const scoreFor = (projectId: string, raw: number | null | undefined) => {
-      const balanced = evalScores.balanced[projectId]?.balancedAverage
-      if (useBalanced && balanced != null) return balanced
-      return raw ?? 0
+    const compositeFor = (projectId: string, rawScoreFallback: number | null | undefined): number => {
+      const b = evalScores.balanced[projectId]
+      const score = useBalanced && b?.balancedAverage != null ? b.balancedAverage : (rawScoreFallback ?? null)
+      const scoreUnit = score != null ? Math.max(0, Math.min(1, (score - 1) / 9)) : 0
+      const passRate =
+        useBalancedPassRate && b?.balancedPassRate != null ? b.balancedPassRate
+        : b?.rawPassRate != null ? b.rawPassRate
+        : null
+      const passUnit = passRate ?? 0
+      const sW = localScoreWeight
+      const pW = localPassRateWeight
+      const totalW = sW + pW
+      if (totalW <= 0) return scoreUnit
+      return (sW * scoreUnit + pW * passUnit) / totalW
     }
     const sortedStartup = dedup(startup).sort((a, b) =>
-      scoreFor(b.projectId, b.avgGlobalScore) - scoreFor(a.projectId, a.avgGlobalScore)
+      compositeFor(b.projectId, b.avgGlobalScore) - compositeFor(a.projectId, a.avgGlobalScore)
       || b.compositeScore - a.compositeScore)
     const sortedConcept = dedup(concept).sort((a, b) =>
-      scoreFor(b.projectId, b.avgGlobalScore) - scoreFor(a.projectId, a.avgGlobalScore)
+      compositeFor(b.projectId, b.avgGlobalScore) - compositeFor(a.projectId, a.avgGlobalScore)
       || b.compositeScore - a.compositeScore)
     setLocalOrder({
       STARTUP: sortedStartup.map((r) => r.projectId),
       BUSINESS_CONCEPT: sortedConcept.map((r) => r.projectId),
     })
-  }, [useBalanced, evalScores, snapshot])
+  }, [useBalanced, useBalancedPassRate, evalScores, snapshot, localScoreWeight, localPassRateWeight])
 
   // ─── numericCriteria from eval form ─────────────────────────────────────
   const numericCriteria = useMemo(() => {
@@ -523,6 +566,7 @@ export function RankingDashboard({ competitionId: _competitionId, roundId }: Ran
     if (!roundData?.configJson) return
     const cfg = roundData.configJson as Record<string, unknown>
     setUseBalanced((cfg.useBalancedRanking as boolean | undefined) ?? true)
+    setUseBalancedPassRate((cfg.useBalancedPassRate as boolean | undefined) ?? true)
     if (weightsInitialized.current) return
     const saved = (cfg.criteriaWeights ?? {}) as Record<string, number>
     setLocalWeights(saved)
@@ -543,6 +587,16 @@ export function RankingDashboard({ competitionId: _competitionId, roundId }: Ran
     })
   }
 
+  const persistUseBalancedPassRate = (next: boolean) => {
+    setUseBalancedPassRate(next)
+    if (!roundData?.configJson) return
+    const cfg = roundData.configJson as Record<string, unknown>
+    updateRoundMutation.mutate({
+      id: roundId,
+      configJson: { ...cfg, useBalancedPassRate: next },
+    })
+  }
+
   // ─── Save weights + criteria text to round config ─────────────────────────
   const saveRankingConfig = () => {
     if (!roundData?.configJson) return
@@ -930,15 +984,26 @@ export function RankingDashboard({ competitionId: _competitionId, roundId }: Ran
                   : (evalConfig?.conceptAdvanceCount ?? 0))
                 const threshold = evalConfig?.advanceScoreThreshold ?? 0
 
-                // Effective ranking score respects the per-round
-                // useBalancedRanking toggle. Both the sort and the threshold
-                // check read from the same helper so the cutoff lands in the
-                // right spot.
+                // Effective ranking score for the threshold cutoff. Mirrors
+                // the composite formula used by the sort: weighted blend of
+                // (balanced-or-raw) avg score and (balanced-or-raw) pass rate.
+                // For the visible 1-10 threshold we render the score component
+                // back on the 1-10 scale.
                 const effectiveScore = (id: string) => {
                   const e = rankingMap.get(id)
-                  const balanced = evalScores?.balanced[id]?.balancedAverage
-                  if (useBalanced && balanced != null) return balanced
-                  return e?.avgGlobalScore ?? 0
+                  const b = evalScores?.balanced[id]
+                  const score = useBalanced && b?.balancedAverage != null ? b.balancedAverage : (e?.avgGlobalScore ?? null)
+                  const scoreUnit = score != null ? Math.max(0, Math.min(1, (score - 1) / 9)) : 0
+                  const passRate =
+                    useBalancedPassRate && b?.balancedPassRate != null ? b.balancedPassRate
+                    : b?.rawPassRate != null ? b.rawPassRate
+                    : null
+                  const passUnit = passRate ?? 0
+                  const sW = localScoreWeight
+                  const pW = localPassRateWeight
+                  const totalW = sW + pW
+                  const composite = totalW <= 0 ? scoreUnit : (sW * scoreUnit + pW * passUnit) / totalW
+                  return composite * 9 + 1
                 }
 
                 let cutoffIndex = -1
@@ -1000,7 +1065,10 @@ export function RankingDashboard({ competitionId: _competitionId, roundId }: Ran
                                   jurorScores={evalScores?.byProject[projectId]}
                                   rawAverage={evalScores?.balanced[projectId]?.rawAverage ?? null}
                                   balancedAverage={evalScores?.balanced[projectId]?.balancedAverage ?? null}
+                                  rawPassRate={evalScores?.balanced[projectId]?.rawPassRate ?? null}
+                                  balancedPassRate={evalScores?.balanced[projectId]?.balancedPassRate ?? null}
                                   useBalanced={useBalanced}
+                                  useBalancedPassRate={useBalancedPassRate}
                                   onSelect={() => setSelectedProjectId(projectId)}
                                   isSelected={selectedProjectId === projectId}
                                   originalRank={hasReorders ? snapshotOrder[projectId] : undefined}
@@ -1065,15 +1133,26 @@ export function RankingDashboard({ competitionId: _competitionId, roundId }: Ran
             </div>
           ) : projectDetail ? (
             <div className="mt-6 space-y-6">
-              {/* Balanced-ranking toggle (per-round; persists across viewers) */}
-              <div className="flex items-center justify-between rounded-lg border p-3">
-                <div className="flex flex-col">
-                  <span className="text-sm font-medium">Use balanced scoring for ranking</span>
-                  <span className="text-xs text-muted-foreground">
-                    Corrects for per-juror grading style. Off uses raw averages.
-                  </span>
+              {/* Balanced-ranking toggles (per-round; persist across viewers) */}
+              <div className="space-y-2">
+                <div className="flex items-center justify-between rounded-lg border p-3">
+                  <div className="flex flex-col">
+                    <span className="text-sm font-medium">Balance juror grading style (score)</span>
+                    <span className="text-xs text-muted-foreground">
+                      Corrects for harshness on average scores. Off uses raw averages.
+                    </span>
+                  </div>
+                  <Switch checked={useBalanced} onCheckedChange={persistUseBalanced} />
+                </div>
+                <div className="flex items-center justify-between rounded-lg border p-3">
+                  <div className="flex flex-col">
+                    <span className="text-sm font-medium">Balance juror approval rate (advance vote)</span>
+                    <span className="text-xs text-muted-foreground">
+                      Weights yes/no votes by how often each juror says yes. Off uses raw pass rate.
+                    </span>
+                  </div>
+                  <Switch checked={useBalancedPassRate} onCheckedChange={persistUseBalancedPassRate} />
                 </div>
-                <Switch checked={useBalanced} onCheckedChange={persistUseBalanced} />
               </div>
               {/* Stats summary: combined Avg card with Raw + Balanced side-by-side */}
               {projectDetail.stats && (() => {
diff --git a/src/server/routers/ranking.ts b/src/server/routers/ranking.ts
index 5757c9a..a2cb1ab 100644
--- a/src/server/routers/ranking.ts
+++ b/src/server/routers/ranking.ts
@@ -12,7 +12,14 @@ import {
 } from '../services/ai-ranking'
 import { logAudit } from '../utils/audit'
 import type { EvaluationConfig } from '@/types/competition-configs'
-import { computeBalanceContext, computeBalancedProjectScores, type ScorePoint } from '../services/juror-balance'
+import {
+  computeBalanceContext,
+  computeBalancedProjectScores,
+  computePassRateContext,
+  computeBalancedPassRates,
+  type ScorePoint,
+  type VotePoint,
+} from '../services/juror-balance'
 
 // ─── Local Types ───────────────────────────────────────────────────────────────
 
@@ -492,6 +499,7 @@ export const rankingRouter = router({
       }>> = {}
 
       const balancePoints: ScorePoint[] = []
+      const votePoints: VotePoint[] = []
 
       for (const a of assignments) {
         if (!a.evaluation) continue
@@ -523,19 +531,45 @@ export const rankingRouter = router({
             rawScore: a.evaluation.globalScore,
           })
         }
+
+        if (decision !== null) {
+          votePoints.push({
+            projectId: a.projectId,
+            userId: a.userId,
+            vote: decision,
+          })
+        }
       }
 
       const balanceCtx = computeBalanceContext(balancePoints)
       const balancedByProject = computeBalancedProjectScores(balancePoints, balanceCtx)
 
-      // Per-project balanced average on the 1-10 scale, comparable to raw avgs.
-      const balanced: Record<string, { rawAverage: number | null; balancedAverage: number | null }> = {}
+      const passRateCtx = computePassRateContext(votePoints)
+      const balancedPassRateByProject = computeBalancedPassRates(votePoints, passRateCtx)
+
+      // Per-project: balanced score (1-10) + balanced pass rate (0-1).
+      const balanced: Record<string, {
+        rawAverage: number | null
+        balancedAverage: number | null
+        rawPassRate: number | null
+        balancedPassRate: number | null
+      }> = {}
       for (const [projectId, result] of balancedByProject.entries()) {
         balanced[projectId] = {
           rawAverage: result.rawAverage,
           balancedAverage: result.balancedAverage,
+          rawPassRate: null,
+          balancedPassRate: null,
         }
       }
+      for (const [projectId, result] of balancedPassRateByProject.entries()) {
+        const existing = balanced[projectId] ?? {
+          rawAverage: null, balancedAverage: null, rawPassRate: null, balancedPassRate: null,
+        }
+        existing.rawPassRate = result.rawPassRate
+        existing.balancedPassRate = result.balancedPassRate
+        balanced[projectId] = existing
+      }
 
       // Per-juror grading stats so the side panel can render each juror's
       // personal baseline and rescaled contribution.
@@ -544,12 +578,20 @@ export const rankingRouter = router({
         jurorStats[userId] = { mean: s.mean, stddev: s.stddev, count: s.count }
       }
 
+      const jurorYesRates: Record<string, { yesRate: number; stddev: number; count: number }> = {}
+      for (const [userId, s] of passRateCtx.jurorYesRates.entries()) {
+        jurorYesRates[userId] = { yesRate: s.yesRate, stddev: s.stddev, count: s.count }
+      }
+
       return {
         byProject,
         balanced,
         jurorStats,
         overallMean: balanceCtx.overallMean,
         overallStddev: balanceCtx.overallStddev,
+        jurorYesRates,
+        overallYesRate: passRateCtx.overallYesRate,
+        overallYesStddev: passRateCtx.overallStddev,
       }
     }),
 })
diff --git a/src/server/services/juror-balance.ts b/src/server/services/juror-balance.ts
index 05e6b29..3c9c001 100644
--- a/src/server/services/juror-balance.ts
+++ b/src/server/services/juror-balance.ts
@@ -186,3 +186,108 @@ export function computePerRoundBalanced(
   }
   return out
 }
+
+/**
+ * Juror balancing for binary advance votes (yes/no).
+ *
+ * A "yes" from a juror who rarely says yes carries more weight than a "yes"
+ * from a juror who routinely advances projects. We z-normalize each vote
+ * against the juror's personal yes-rate distribution, then rescale the
+ * project-level mean back onto the round's overall yes-rate scale so the
+ * balanced number is directly comparable to the raw pass rate.
+ */
+export type VotePoint = {
+  projectId: string
+  userId: string
+  vote: boolean
+}
+
+export type JurorYesRate = {
+  userId: string
+  yesRate: number
+  stddev: number
+  count: number
+}
+
+export type BalancedPassRateResult = {
+  projectId: string
+  rawPassRate: number | null
+  balancedPassRate: number | null
+  count: number
+}
+
+export type PassRateContext = {
+  overallYesRate: number
+  overallStddev: number
+  jurorYesRates: Map<string, JurorYesRate>
+}
+
+export function computePassRateContext(votes: VotePoint[]): PassRateContext {
+  const byJuror = new Map<string, boolean[]>()
+  for (const v of votes) {
+    const arr = byJuror.get(v.userId) ?? []
+    arr.push(v.vote)
+    byJuror.set(v.userId, arr)
+  }
+
+  const jurorYesRates = new Map<string, JurorYesRate>()
+  for (const [userId, jurorVotes] of byJuror.entries()) {
+    const yesCount = jurorVotes.filter(Boolean).length
+    const yesRate = yesCount / jurorVotes.length
+    // Bernoulli stddev: sqrt(p * (1 - p))
+    const stddev = Math.sqrt(yesRate * (1 - yesRate))
+    jurorYesRates.set(userId, { userId, yesRate, stddev, count: jurorVotes.length })
+  }
+
+  const totalYes = votes.filter((v) => v.vote).length
+  const overallYesRate = votes.length > 0 ? totalYes / votes.length : 0
+  const overallStddev = Math.sqrt(overallYesRate * (1 - overallYesRate))
+
+  return { overallYesRate, overallStddev, jurorYesRates }
+}
+
+export function computeBalancedPassRates(
+  votes: VotePoint[],
+  ctx: PassRateContext,
+): Map<string, BalancedPassRateResult> {
+  const byProject = new Map<string, VotePoint[]>()
+  for (const v of votes) {
+    const arr = byProject.get(v.projectId) ?? []
+    arr.push(v)
+    byProject.set(v.projectId, arr)
+  }
+
+  const results = new Map<string, BalancedPassRateResult>()
+  for (const [projectId, projectVotes] of byProject.entries()) {
+    const yesCount = projectVotes.filter((v) => v.vote).length
+    const rawPassRate = yesCount / projectVotes.length
+
+    let balancedPassRate: number | null = null
+    if (ctx.overallStddev > 0) {
+      const zValues: number[] = []
+      for (const v of projectVotes) {
+        const stats = ctx.jurorYesRates.get(v.userId)
+        const voteVal = v.vote ? 1 : 0
+        if (stats && stats.stddev > 0) {
+          zValues.push((voteVal - stats.yesRate) / stats.stddev)
+        } else {
+          zValues.push((voteVal - ctx.overallYesRate) / ctx.overallStddev)
+        }
+      }
+      const avgZ = zValues.reduce((a, b) => a + b, 0) / zValues.length
+      // Rescale and clamp to [0, 1] — z-rescaling can otherwise produce values
+      // slightly outside that range when the round's yes rate is near 0 or 1.
+      const rescaled = ctx.overallYesRate + avgZ * ctx.overallStddev
+      balancedPassRate = Math.max(0, Math.min(1, rescaled))
+    }
+
+    results.set(projectId, {
+      projectId,
+      rawPassRate,
+      balancedPassRate,
+      count: projectVotes.length,
+    })
+  }
+
+  return results
+}
diff --git a/src/types/competition-configs.ts b/src/types/competition-configs.ts
index cdf0318..95bd107 100644
--- a/src/types/competition-configs.ts
+++ b/src/types/competition-configs.ts
@@ -147,6 +147,12 @@ export const EvaluationConfigSchema = z.object({
   // from the dashboard side panel.
   useBalancedRanking: z.boolean().default(true),
 
+  // Whether the project pass rate (yes/no advance vote) is harshness-corrected
+  // before being fed into the composite ranking formula. When true, a "yes" from
+  // a juror who rarely says yes weighs more than a "yes" from a lenient juror.
+  // Toggled separately from useBalancedRanking; both default to true.
+  useBalancedPassRate: z.boolean().default(true),
+
   // Ranking (Phase 1)
   rankingEnabled: z.boolean().default(false),
   rankingCriteria: z.string().optional(),