feat: weighted criteria in AI ranking, z-score normalization, threshold advancement, CSV export
All checks were successful
Build and Push Docker Image / build (push) Successful in 9m16s

- Add criteriaWeights to EvaluationConfig for per-criterion weight assignment (0-10)
- Rewrite ai-ranking service: fetch eval form criteria, compute per-criterion averages,
  z-score normalize juror scores to correct grading bias, send weighted criteria to AI
- Update AI prompts with criteria_definitions and per-project criteria_scores
- compositeScore uses weighted criteria when configured, falls back to globalScore
- Add collapsible ranking config section to dashboard (criteria text + weight sliders)
- Move rankingCriteria textarea from eval config tab to ranking dashboard
- Store criteriaWeights in ranking snapshot parsedRulesJson for audit
- Enhance projectScores CSV export with per-criterion averages, category, country
- Add Export CSV button to ranking dashboard header
- Add threshold-based advancement mode (decimal score threshold, e.g. 6.5)
  alongside existing top-N mode in advance dialog

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-02 11:24:14 +01:00
parent c6ebd169dd
commit 19b58e4434
6 changed files with 674 additions and 107 deletions

View File

@@ -98,20 +98,36 @@ export const exportRouter = router({
}),
/**
* Export project scores summary
* Export project scores summary with per-criterion averages
*/
projectScores: adminProcedure
.input(z.object({ roundId: z.string() }))
.query(async ({ ctx, input }) => {
// Fetch evaluation form to get criteria labels
const evalForm = await ctx.prisma.evaluationForm.findFirst({
where: { roundId: input.roundId, isActive: true },
select: { criteriaJson: true },
})
const criteria = (evalForm?.criteriaJson as Array<{
id: string; label: string; type?: string
}> | null) ?? []
const numericCriteria = criteria.filter((c) => !c.type || c.type === 'numeric')
const projects = await ctx.prisma.project.findMany({
where: {
assignments: { some: { roundId: input.roundId } },
},
include: {
assignments: {
where: { roundId: input.roundId },
include: {
evaluation: {
where: { status: 'SUBMITTED' },
select: {
globalScore: true,
binaryDecision: true,
criterionScoresJson: true,
},
},
},
},
@@ -132,9 +148,24 @@ export const exportRouter = router({
(e) => e?.binaryDecision === true
).length
// Per-criterion averages
const criterionAvgs: Record<string, string | null> = {}
for (const c of numericCriteria) {
const values: number[] = []
for (const e of evaluations) {
const scores = e?.criterionScoresJson as Record<string, number> | null
if (scores && typeof scores[c.id] === 'number') values.push(scores[c.id])
}
criterionAvgs[c.label] = values.length > 0
? (values.reduce((a, b) => a + b, 0) / values.length).toFixed(2)
: null
}
return {
title: p.title,
teamName: p.teamName,
category: p.competitionCategory ?? '',
country: p.country ?? '',
status: p.status,
tags: p.tags.join(', '),
totalEvaluations: evaluations.length,
@@ -146,6 +177,7 @@ export const exportRouter = router({
: null,
minScore: globalScores.length > 0 ? Math.min(...globalScores) : null,
maxScore: globalScores.length > 0 ? Math.max(...globalScores) : null,
...criterionAvgs,
yesVotes,
noVotes: evaluations.length - yesVotes,
yesPercentage:
@@ -171,12 +203,15 @@ export const exportRouter = router({
columns: [
'title',
'teamName',
'category',
'country',
'status',
'tags',
'totalEvaluations',
'averageScore',
'minScore',
'maxScore',
...numericCriteria.map((c) => c.label),
'yesVotes',
'noVotes',
'yesPercentage',

View File

@@ -85,7 +85,16 @@ export const rankingRouter = router({
fetchAndRankCategory('BUSINESS_CONCEPT', rules, input.roundId, ctx.prisma, ctx.user.id),
])
// Persist snapshot
// Read criteria weights for snapshot audit trail
const round = await ctx.prisma.round.findUniqueOrThrow({
where: { id: input.roundId },
select: { configJson: true },
})
const evalConfig = (round.configJson as EvaluationConfig | null) ?? ({} as EvaluationConfig)
const criteriaWeights = evalConfig.criteriaWeights ?? undefined
// Persist snapshot — embed weights alongside rules for audit
const parsedRulesWithWeights = { rules, weights: criteriaWeights } as unknown as Prisma.InputJsonValue
const snapshot = await ctx.prisma.rankingSnapshot.create({
data: {
roundId: input.roundId,
@@ -94,7 +103,7 @@ export const rankingRouter = router({
mode: 'CONFIRMED',
status: 'COMPLETED',
criteriaText: input.criteriaText,
parsedRulesJson: rules as unknown as Prisma.InputJsonValue,
parsedRulesJson: parsedRulesWithWeights,
startupRankingJson: startup.rankedProjects as unknown as Prisma.InputJsonValue,
conceptRankingJson: concept.rankedProjects as unknown as Prisma.InputJsonValue,
},
@@ -271,13 +280,16 @@ export const rankingRouter = router({
const result = await aiQuickRank(criteriaText, roundId, ctx.prisma, ctx.user.id)
// Embed weights alongside rules for audit
const criteriaWeights = config.criteriaWeights ?? undefined
const parsedRulesWithWeights = { rules: result.parsedRules, weights: criteriaWeights } as unknown as Prisma.InputJsonValue
const snapshot = await ctx.prisma.rankingSnapshot.create({
data: {
roundId,
triggeredById: ctx.user.id,
triggerType: 'MANUAL',
criteriaText,
parsedRulesJson: result.parsedRules as unknown as Prisma.InputJsonValue,
parsedRulesJson: parsedRulesWithWeights,
startupRankingJson: result.startup.rankedProjects as unknown as Prisma.InputJsonValue,
conceptRankingJson: result.concept.rankedProjects as unknown as Prisma.InputJsonValue,
mode: 'QUICK',

View File

@@ -11,7 +11,8 @@
* Design decisions:
* - Per-category processing (STARTUP / BUSINESS_CONCEPT) — two parallel AI calls
* - Projects with zero submitted evaluations are excluded (not ranked last)
* - compositeScore = 50% normalised avgGlobalScore + 50% passRate + tiny tiebreak
* - compositeScore uses weighted criteria when available, falls back to avgGlobalScore
* - Z-score normalization corrects for juror grading bias
*/
import { getOpenAI, getConfiguredModel, buildCompletionParams } from '@/lib/openai'
@@ -20,25 +21,48 @@ import { classifyAIError, logAIError } from './ai-errors'
import { sanitizeUserInput } from '@/server/services/ai-prompt-guard'
import { TRPCError } from '@trpc/server'
import type { CompetitionCategory, PrismaClient } from '@prisma/client'
import type { EvaluationConfig } from '@/types/competition-configs'
// ─── Types ────────────────────────────────────────────────────────────────────
// Criterion definition from EvaluationForm.criteriaJson
interface CriterionDef {
id: string
label: string
type?: string
scale?: number | string
weight?: number
}
// Internal shape of a project before anonymization
interface ProjectForRanking {
id: string
competitionCategory: CompetitionCategory
avgGlobalScore: number | null // average of submitted Evaluation.globalScore
normalizedAvgScore: number | null // z-score normalized average
passRate: number // proportion of binaryDecision=true among SUBMITTED evaluations
evaluatorCount: number // count of SUBMITTED evaluations
criterionAverages: Record<string, number> // criterionId → raw average score
normalizedCriterionAverages: Record<string, number> // criterionId → z-score normalized average
}
// Anonymized shape sent to OpenAI
interface AnonymizedProjectForRanking {
project_id: string // "P001", "P002", etc. — never real IDs
avg_score: number | null
normalized_avg_score: number | null
pass_rate: number // 01
evaluator_count: number
category: string
criteria_scores: Record<string, number>
normalized_criteria_scores: Record<string, number>
}
// Criterion definition sent to OpenAI
interface CriterionDefForAI {
name: string
weight: number
scale: string
}
// A single parsed rule returned by the criteria parser
@@ -58,6 +82,7 @@ export interface RankedProjectEntry {
rank: number // 1-indexed
compositeScore: number // 01 floating point
avgGlobalScore: number | null
normalizedAvgScore: number | null
passRate: number
evaluatorCount: number
aiRationale?: string // Optional: AI explanation for this project's rank
@@ -79,8 +104,11 @@ Admin will describe how they want projects ranked in natural language. Parse thi
Available data fields for ranking:
- avg_score: average jury evaluation score (110 scale, null if not scored)
- normalized_avg_score: bias-corrected average (z-score normalized across jurors)
- pass_rate: proportion of jury members who voted to advance the project (01)
- evaluator_count: number of jury members who submitted evaluations (tiebreak)
- criteria_scores: per-criterion averages (keyed by criterion name)
- normalized_criteria_scores: bias-corrected per-criterion averages
Return JSON only:
{
@@ -103,7 +131,15 @@ Order rules so filters come first, sorts next, limits last.`
const RANKING_SYSTEM_PROMPT = `You are a project ranking engine for an ocean conservation competition.
You will receive a list of anonymized projects with numeric scores and a set of parsed ranking rules.
You will receive:
1. A list of anonymized projects with numeric scores (including per-criterion averages and bias-corrected scores)
2. A set of parsed ranking rules
3. Optional: criteria_definitions with weights indicating the relative importance of each evaluation criterion
When criteria_definitions with weights are provided, use the weighted criteria scores as a PRIMARY ranking factor.
The weighted score is: sum(criterion_avg * weight) / sum(weights).
Use normalized (bias-corrected) scores when available — they account for differences in juror grading harshness.
Apply the rules in order and return the final ranked list.
Return JSON only:
@@ -126,34 +162,85 @@ Rules:
// ─── Helpers ──────────────────────────────────────────────────────────────────
/**
* Compute composite score using weighted criteria if available,
* falling back to avgGlobalScore otherwise.
*/
function computeCompositeScore(
avgGlobalScore: number | null,
passRate: number,
evaluatorCount: number,
project: ProjectForRanking,
maxEvaluatorCount: number,
criteriaWeights: Record<string, number> | undefined,
criterionDefs: CriterionDef[],
): number {
const normalizedScore = avgGlobalScore != null ? (avgGlobalScore - 1) / 9 : 0.5
const composite = normalizedScore * 0.5 + passRate * 0.5
let scoreComponent: number
// Try weighted criteria first
if (criteriaWeights && Object.keys(criteriaWeights).length > 0) {
let weightedSum = 0
let totalWeight = 0
for (const [criterionId, weight] of Object.entries(criteriaWeights)) {
if (weight <= 0) continue
// Use normalized scores if available, otherwise raw
const score = project.normalizedCriterionAverages[criterionId]
?? project.criterionAverages[criterionId]
if (score == null) continue
// Normalize to 01 based on criterion scale
const def = criterionDefs.find((d) => d.id === criterionId)
const maxScale = typeof def?.scale === 'number' ? def.scale
: typeof def?.scale === 'string' ? parseInt(def.scale.split('-').pop() ?? '10', 10)
: 10
const normalizedScore = maxScale > 1 ? (score - 1) / (maxScale - 1) : score
weightedSum += normalizedScore * weight
totalWeight += weight
}
scoreComponent = totalWeight > 0 ? weightedSum / totalWeight : 0.5
} else {
// Fallback: use avgGlobalScore normalized to 01
const avg = project.normalizedAvgScore ?? project.avgGlobalScore
scoreComponent = avg != null ? (avg - 1) / 9 : 0.5
}
const composite = scoreComponent * 0.5 + project.passRate * 0.5
// Tiebreak: tiny bonus for more evaluators (won't change rank unless composite is equal)
const tiebreakBonus = maxEvaluatorCount > 0
? (evaluatorCount / maxEvaluatorCount) * 0.0001
? (project.evaluatorCount / maxEvaluatorCount) * 0.0001
: 0
return composite + tiebreakBonus
}
function anonymizeProjectsForRanking(
projects: ProjectForRanking[],
criterionDefs: CriterionDef[],
): { anonymized: AnonymizedProjectForRanking[]; idMap: Map<string, string> } {
// Build id → label map for criterion names (anonymize IDs)
const idToLabel = new Map(criterionDefs.map((d) => [d.id, d.label]))
const idMap = new Map<string, string>()
const anonymized = projects.map((p, i) => {
const anonId = `P${String(i + 1).padStart(3, '0')}`
idMap.set(anonId, p.id)
// Convert criterion ID keys to human-readable labels
const criteriaScores: Record<string, number> = {}
for (const [id, score] of Object.entries(p.criterionAverages)) {
const label = idToLabel.get(id) ?? id
criteriaScores[label] = Math.round(score * 100) / 100
}
const normalizedCriteriaScores: Record<string, number> = {}
for (const [id, score] of Object.entries(p.normalizedCriterionAverages)) {
const label = idToLabel.get(id) ?? id
normalizedCriteriaScores[label] = Math.round(score * 100) / 100
}
return {
project_id: anonId,
avg_score: p.avgGlobalScore,
avg_score: p.avgGlobalScore != null ? Math.round(p.avgGlobalScore * 100) / 100 : null,
normalized_avg_score: p.normalizedAvgScore != null ? Math.round(p.normalizedAvgScore * 100) / 100 : null,
pass_rate: p.passRate,
evaluator_count: p.evaluatorCount,
category: p.competitionCategory,
criteria_scores: criteriaScores,
normalized_criteria_scores: normalizedCriteriaScores,
}
})
return { anonymized, idMap }
@@ -206,6 +293,70 @@ function computePassRate(evaluations: Array<{ resolvedDecision: boolean | null }
return passCount / evaluations.length
}
// ─── Z-Score Normalization ──────────────────────────────────────────────────
interface JurorStats {
mean: number
stddev: number
count: number
}
/**
* Compute per-juror grading statistics (mean and stddev) for z-score normalization.
* Only considers numeric criterion scores and globalScore from SUBMITTED evaluations.
*/
function computeJurorStats(
assignments: Array<{
userId: string
evaluation: {
globalScore: number | null
criterionScoresJson: Record<string, unknown> | null
} | null
}>,
numericCriterionIds: Set<string>,
): Map<string, JurorStats> {
// Collect all numeric scores per juror
const jurorScores = new Map<string, number[]>()
for (const a of assignments) {
if (!a.evaluation) continue
const scores: number[] = []
if (a.evaluation.globalScore != null) scores.push(a.evaluation.globalScore)
if (a.evaluation.criterionScoresJson) {
for (const [id, val] of Object.entries(a.evaluation.criterionScoresJson)) {
if (numericCriterionIds.has(id) && typeof val === 'number') {
scores.push(val)
}
}
}
const existing = jurorScores.get(a.userId) ?? []
existing.push(...scores)
jurorScores.set(a.userId, existing)
}
const stats = new Map<string, JurorStats>()
for (const [userId, scores] of jurorScores.entries()) {
if (scores.length < 2) {
// Not enough data for meaningful normalization — skip
stats.set(userId, { mean: 0, stddev: 0, count: scores.length })
continue
}
const mean = scores.reduce((a, b) => a + b, 0) / scores.length
const variance = scores.reduce((sum, s) => sum + (s - mean) ** 2, 0) / scores.length
const stddev = Math.sqrt(variance)
stats.set(userId, { mean, stddev, count: scores.length })
}
return stats
}
/**
* Normalize a raw score using z-score normalization.
* Returns the z-score, or null if normalization isn't possible (too few evals or stddev=0).
*/
function zScoreNormalize(raw: number, stats: JurorStats): number | null {
if (stats.count < 2 || stats.stddev === 0) return null
return (raw - stats.mean) / stats.stddev
}
// ─── Exported Functions ───────────────────────────────────────────────────────
/**
@@ -275,11 +426,15 @@ export async function parseRankingCriteria(
*
* projects: raw data queried from Prisma, already filtered to one category
* parsedRules: from parseRankingCriteria()
* criteriaWeights: optional admin-configured weights from round config
* criterionDefs: criterion definitions from the evaluation form
*/
export async function executeAIRanking(
parsedRules: ParsedRankingRule[],
projects: ProjectForRanking[],
category: CompetitionCategory,
criteriaWeights: Record<string, number> | undefined,
criterionDefs: CriterionDef[],
userId?: string,
entityId?: string,
): Promise<RankingResult> {
@@ -288,7 +443,7 @@ export async function executeAIRanking(
}
const maxEvaluatorCount = Math.max(...projects.map((p) => p.evaluatorCount))
const { anonymized, idMap } = anonymizeProjectsForRanking(projects)
const { anonymized, idMap } = anonymizeProjectsForRanking(projects, criterionDefs)
const openai = await getOpenAI()
if (!openai) {
@@ -297,10 +452,23 @@ export async function executeAIRanking(
const model = await getConfiguredModel()
const userPrompt = JSON.stringify({
// Build criteria_definitions for the AI prompt (only numeric criteria)
const numericDefs = criterionDefs.filter((d) => !d.type || d.type === 'numeric')
const criteriaDefsForAI: CriterionDefForAI[] = numericDefs.map((d) => {
const adminWeight = criteriaWeights?.[d.id] ?? d.weight ?? 1
const scale = typeof d.scale === 'number' ? `1-${d.scale}` : typeof d.scale === 'string' ? d.scale : '1-10'
return { name: d.label, weight: adminWeight, scale }
})
const promptData: Record<string, unknown> = {
rules: parsedRules.filter((r) => r.dataAvailable),
projects: anonymized,
})
}
if (criteriaDefsForAI.length > 0) {
promptData.criteria_definitions = criteriaDefsForAI
}
const userPrompt = JSON.stringify(promptData)
const params = buildCompletionParams(model, {
messages: [
@@ -359,13 +527,9 @@ export async function executeAIRanking(
return {
projectId: realId,
rank: entry.rank,
compositeScore: computeCompositeScore(
proj.avgGlobalScore,
proj.passRate,
proj.evaluatorCount,
maxEvaluatorCount,
),
compositeScore: computeCompositeScore(proj, maxEvaluatorCount, criteriaWeights, criterionDefs),
avgGlobalScore: proj.avgGlobalScore,
normalizedAvgScore: proj.normalizedAvgScore,
passRate: proj.passRate,
evaluatorCount: proj.evaluatorCount,
aiRationale: entry.rationale,
@@ -404,6 +568,9 @@ export async function quickRank(
* Internal helper: fetch eligible projects for one category and execute ranking.
* Excluded: withdrawn projects and projects with zero submitted evaluations (locked decision).
*
* Fetches evaluation form criteria, computes per-criterion averages, applies z-score
* normalization to correct for juror bias, and passes weighted criteria to the AI.
*
* Exported so the tRPC router can call it separately when executing pre-parsed rules.
*/
export async function fetchAndRankCategory(
@@ -413,12 +580,32 @@ export async function fetchAndRankCategory(
prisma: PrismaClient,
userId?: string,
): Promise<RankingResult> {
// Fetch the round config to find the boolean criterion ID (legacy fallback)
const round = await prisma.round.findUniqueOrThrow({
where: { id: roundId },
select: { configJson: true },
})
const boolCriterionId = findBooleanCriterionId(round.configJson as Record<string, unknown> | null)
// Fetch the round config and evaluation form in parallel
const [round, evalForm] = await Promise.all([
prisma.round.findUniqueOrThrow({
where: { id: roundId },
select: { configJson: true },
}),
prisma.evaluationForm.findFirst({
where: { roundId, isActive: true },
select: { criteriaJson: true },
}),
])
const roundConfig = round.configJson as Record<string, unknown> | null
const boolCriterionId = findBooleanCriterionId(roundConfig)
// Parse evaluation config for criteria weights
const evalConfig = roundConfig as EvaluationConfig | null
const criteriaWeights = evalConfig?.criteriaWeights ?? undefined
// Parse criterion definitions from the evaluation form
const criterionDefs: CriterionDef[] = evalForm?.criteriaJson
? (evalForm.criteriaJson as unknown as CriterionDef[])
: []
const numericCriterionIds = new Set(
criterionDefs.filter((d) => !d.type || d.type === 'numeric').map((d) => d.id),
)
// Query submitted evaluations grouped by projectId for this category
const assignments = await prisma.assignment.findMany({
@@ -446,8 +633,26 @@ export async function fetchAndRankCategory(
},
})
// Group by projectId, resolving binaryDecision from column or criterionScoresJson fallback
const byProject = new Map<string, Array<{ globalScore: number | null; resolvedDecision: boolean | null }>>()
// Compute per-juror stats for z-score normalization
const jurorStats = computeJurorStats(
assignments.map((a) => ({
userId: a.userId,
evaluation: a.evaluation ? {
globalScore: a.evaluation.globalScore,
criterionScoresJson: a.evaluation.criterionScoresJson as Record<string, unknown> | null,
} : null,
})),
numericCriterionIds,
)
// Group by projectId, collect per-juror scores for aggregation
type EvalData = {
globalScore: number | null
resolvedDecision: boolean | null
criterionScores: Record<string, unknown> | null
userId: string
}
const byProject = new Map<string, EvalData[]>()
for (const a of assignments) {
if (!a.evaluation) continue
const resolved = resolveBinaryDecision(
@@ -456,21 +661,83 @@ export async function fetchAndRankCategory(
boolCriterionId,
)
const list = byProject.get(a.project.id) ?? []
list.push({ globalScore: a.evaluation.globalScore, resolvedDecision: resolved })
list.push({
globalScore: a.evaluation.globalScore,
resolvedDecision: resolved,
criterionScores: a.evaluation.criterionScoresJson as Record<string, unknown> | null,
userId: a.userId,
})
byProject.set(a.project.id, list)
}
// Build ProjectForRanking, excluding projects with zero submitted evaluations
const projects: ProjectForRanking[] = []
for (const [projectId, evals] of byProject.entries()) {
if (evals.length === 0) continue // Exclude: no submitted evaluations
if (evals.length === 0) continue
// Raw avg global score
const avgGlobalScore = evals.some((e) => e.globalScore != null)
? evals.filter((e) => e.globalScore != null).reduce((sum, e) => sum + e.globalScore!, 0) /
evals.filter((e) => e.globalScore != null).length
: null
// Z-score normalized avg global score
const normalizedGlobalScores: number[] = []
for (const e of evals) {
if (e.globalScore == null) continue
const stats = jurorStats.get(e.userId)
if (!stats) continue
const z = zScoreNormalize(e.globalScore, stats)
if (z != null) normalizedGlobalScores.push(z)
}
const normalizedAvgScore = normalizedGlobalScores.length > 0
? normalizedGlobalScores.reduce((a, b) => a + b, 0) / normalizedGlobalScores.length
: null
// Per-criterion raw averages (numeric criteria only)
const criterionAverages: Record<string, number> = {}
for (const criterionId of numericCriterionIds) {
const values: number[] = []
for (const e of evals) {
if (!e.criterionScores) continue
const val = e.criterionScores[criterionId]
if (typeof val === 'number') values.push(val)
}
if (values.length > 0) {
criterionAverages[criterionId] = values.reduce((a, b) => a + b, 0) / values.length
}
}
// Per-criterion z-score normalized averages
const normalizedCriterionAverages: Record<string, number> = {}
for (const criterionId of numericCriterionIds) {
const zScores: number[] = []
for (const e of evals) {
if (!e.criterionScores) continue
const val = e.criterionScores[criterionId]
if (typeof val !== 'number') continue
const stats = jurorStats.get(e.userId)
if (!stats) continue
const z = zScoreNormalize(val, stats)
if (z != null) zScores.push(z)
}
if (zScores.length > 0) {
normalizedCriterionAverages[criterionId] = zScores.reduce((a, b) => a + b, 0) / zScores.length
}
}
const passRate = computePassRate(evals)
projects.push({ id: projectId, competitionCategory: category, avgGlobalScore, passRate, evaluatorCount: evals.length })
projects.push({
id: projectId,
competitionCategory: category,
avgGlobalScore,
normalizedAvgScore,
passRate,
evaluatorCount: evals.length,
criterionAverages,
normalizedCriterionAverages,
})
}
return executeAIRanking(parsedRules, projects, category, userId, roundId)
return executeAIRanking(parsedRules, projects, category, criteriaWeights, criterionDefs, userId, roundId)
}