Files
MOPC-Portal/src/server/services/smart-assignment.ts

791 lines
25 KiB
TypeScript
Raw Normal View History

/**
* Smart Assignment Scoring Service
*
* Calculates scores for jury/mentor-project matching based on:
* - Tag overlap (expertise match)
* - Bio/description match (text similarity)
* - Workload balance (respects preferredWorkload and maxAssignments)
* - Country match (mentors only)
* - Geographic diversity penalty (prevents clustering by country)
* - Previous round familiarity bonus (continuity across rounds)
* - COI penalty (conflict of interest hard-block)
* - Availability window check (F2: penalizes jurors unavailable during voting)
*
* Score Breakdown:
* - Tag overlap: 0-40 points (weighted by confidence)
* - Bio match: 0-15 points (if bio exists)
* - Workload balance: 0-25 points (uses preferredWorkload as soft target)
* - Country match: 0-15 points (mentors only)
* - Geo diversity: -15 per excess same-country assignment (threshold: 2)
* - Previous round familiarity: +10 if reviewed in earlier round
* - COI: juror skipped entirely if conflict declared
* - Availability: -30 if unavailable during voting window
*/
import { prisma } from '@/lib/prisma'
// ─── Types ──────────────────────────────────────────────────────────────────
export interface ScoreBreakdown {
tagOverlap: number
bioMatch: number
workloadBalance: number
countryMatch: number
geoDiversityPenalty: number
previousRoundFamiliarity: number
coiPenalty: number
availabilityPenalty: number
categoryQuotaPenalty: number
}
export interface AssignmentScore {
userId: string
userName: string
userEmail: string
projectId: string
projectTitle: string
score: number
breakdown: ScoreBreakdown
reasoning: string[]
matchingTags: string[]
}
export interface ProjectTagData {
tagId: string
tagName: string
confidence: number
}
// ─── Constants ───────────────────────────────────────────────────────────────
const MAX_TAG_OVERLAP_SCORE = 40
const MAX_BIO_MATCH_SCORE = 15
const MAX_WORKLOAD_SCORE = 25
const MAX_COUNTRY_SCORE = 15
const POINTS_PER_TAG_MATCH = 8
// New scoring factors
const GEO_DIVERSITY_THRESHOLD = 2
const GEO_DIVERSITY_PENALTY_PER_EXCESS = -15
const PREVIOUS_ROUND_FAMILIARITY_BONUS = 10
// COI jurors are skipped entirely rather than penalized (effectively -Infinity)
const AVAILABILITY_PENALTY = -30 // Heavy penalty for unavailable jurors
const CATEGORY_QUOTA_PENALTY = -25 // Heavy penalty when juror exceeds category max
const CATEGORY_QUOTA_BONUS = 10 // Bonus when juror is below category min
// Common words to exclude from bio matching
const STOP_WORDS = new Set([
'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
'by', 'from', 'as', 'is', 'was', 'are', 'were', 'been', 'be', 'have', 'has', 'had',
'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must',
'that', 'which', 'who', 'whom', 'this', 'these', 'those', 'it', 'its', 'i', 'we',
'you', 'he', 'she', 'they', 'them', 'their', 'our', 'my', 'your', 'his', 'her',
'am', 'about', 'into', 'through', 'during', 'before', 'after', 'above', 'below',
'between', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when',
'where', 'why', 'how', 'all', 'each', 'few', 'more', 'most', 'other', 'some',
'such', 'no', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 'can',
'just', 'being', 'over', 'both', 'up', 'down', 'out', 'also', 'new', 'any',
])
// ─── Scoring Functions ───────────────────────────────────────────────────────
/**
* Extract meaningful keywords from text
*/
function extractKeywords(text: string | null | undefined): Set<string> {
if (!text) return new Set()
// Tokenize, lowercase, and filter
const words = text
.toLowerCase()
.replace(/[^\w\s]/g, ' ') // Remove punctuation
.split(/\s+/)
.filter((word) => word.length >= 3 && !STOP_WORDS.has(word))
return new Set(words)
}
/**
* Calculate bio match score between user bio and project description
* Only applies if user has a bio
*/
export function calculateBioMatchScore(
userBio: string | null | undefined,
projectDescription: string | null | undefined
): { score: number; matchingKeywords: string[] } {
// If no bio, return 0 (not penalized, just no bonus)
if (!userBio || userBio.trim().length === 0) {
return { score: 0, matchingKeywords: [] }
}
// If no project description, can't match
if (!projectDescription || projectDescription.trim().length === 0) {
return { score: 0, matchingKeywords: [] }
}
const bioKeywords = extractKeywords(userBio)
const projectKeywords = extractKeywords(projectDescription)
if (bioKeywords.size === 0 || projectKeywords.size === 0) {
return { score: 0, matchingKeywords: [] }
}
// Find matching keywords
const matchingKeywords: string[] = []
for (const keyword of bioKeywords) {
if (projectKeywords.has(keyword)) {
matchingKeywords.push(keyword)
}
}
if (matchingKeywords.length === 0) {
return { score: 0, matchingKeywords: [] }
}
// Calculate score based on match ratio
// Use Jaccard-like similarity: matches / (bio keywords + project keywords - matches)
const unionSize = bioKeywords.size + projectKeywords.size - matchingKeywords.length
const similarity = matchingKeywords.length / unionSize
// Scale to max score (15 points)
// A good match (20%+ overlap) should get near max
const score = Math.min(MAX_BIO_MATCH_SCORE, Math.round(similarity * 100))
return { score, matchingKeywords }
}
/**
* Calculate tag overlap score between user expertise and project tags
*/
export function calculateTagOverlapScore(
userTagNames: string[],
projectTags: ProjectTagData[]
): { score: number; matchingTags: string[] } {
if (projectTags.length === 0 || userTagNames.length === 0) {
return { score: 0, matchingTags: [] }
}
const userTagSet = new Set(userTagNames.map((t) => t.toLowerCase()))
const matchingTags: string[] = []
let weightedScore = 0
for (const pt of projectTags) {
if (userTagSet.has(pt.tagName.toLowerCase())) {
matchingTags.push(pt.tagName)
// Weight by confidence - higher confidence = more points
weightedScore += POINTS_PER_TAG_MATCH * pt.confidence
}
}
// Cap at max score
const score = Math.min(MAX_TAG_OVERLAP_SCORE, Math.round(weightedScore))
return { score, matchingTags }
}
/**
* Calculate workload balance score
* Full points if under target, decreasing as over target
*/
export function calculateWorkloadScore(
currentAssignments: number,
targetAssignments: number,
maxAssignments?: number | null
): number {
// If user is at or over their personal max, return 0
if (maxAssignments !== null && maxAssignments !== undefined) {
if (currentAssignments >= maxAssignments) {
return 0
}
}
// If under target, full points
if (currentAssignments < targetAssignments) {
return MAX_WORKLOAD_SCORE
}
// Over target - decrease score
const overload = currentAssignments - targetAssignments
return Math.max(0, MAX_WORKLOAD_SCORE - overload * 5)
}
/**
* Calculate country match score (mentors only)
* Same country = bonus points
*/
export function calculateCountryMatchScore(
userCountry: string | null | undefined,
projectCountry: string | null | undefined
): number {
if (!userCountry || !projectCountry) {
return 0
}
// Normalize for comparison
const normalizedUser = userCountry.toLowerCase().trim()
const normalizedProject = projectCountry.toLowerCase().trim()
if (normalizedUser === normalizedProject) {
return MAX_COUNTRY_SCORE
}
return 0
}
/**
* Check if a user is available during the round's voting window.
* availabilityJson is an array of { start, end } date-range objects
* representing when the user IS available.
* Returns 0 (available) or AVAILABILITY_PENALTY (unavailable).
*/
export function calculateAvailabilityPenalty(
availabilityJson: unknown,
votingStartAt: Date | null | undefined,
votingEndAt: Date | null | undefined
): number {
// If no availability windows set, user is always available
if (!availabilityJson || !Array.isArray(availabilityJson) || availabilityJson.length === 0) {
return 0
}
// If no voting window defined, can't check availability
if (!votingStartAt || !votingEndAt) {
return 0
}
// Check if any availability window overlaps with the voting window
for (const window of availabilityJson) {
if (!window || typeof window !== 'object') continue
const start = new Date((window as { start: string }).start)
const end = new Date((window as { end: string }).end)
if (isNaN(start.getTime()) || isNaN(end.getTime())) continue
// Check overlap: user available window overlaps with voting window
if (start <= votingEndAt && end >= votingStartAt) {
return 0 // Available during at least part of the voting window
}
}
// No availability window overlaps with voting window
return AVAILABILITY_PENALTY
}
/**
* Calculate category quota penalty/bonus for a juror-project pair.
* - If the juror's count for the project's category >= max quota, apply heavy penalty (-25)
* - If the juror's count is below min and other categories are above their min, apply bonus (+10)
* - Otherwise return 0
*/
export function calculateCategoryQuotaPenalty(
categoryQuotas: Record<string, { min: number; max: number }>,
jurorCategoryCounts: Record<string, number>,
projectCategory: string | null | undefined
): number {
if (!projectCategory) return 0
const normalizedCategory = projectCategory.toLowerCase().trim()
const quota = Object.entries(categoryQuotas).find(
([key]) => key.toLowerCase().trim() === normalizedCategory
)
if (!quota) return 0
const [, { min, max }] = quota
const currentCount = jurorCategoryCounts[normalizedCategory] || 0
// If at or over max, heavy penalty
if (currentCount >= max) {
return CATEGORY_QUOTA_PENALTY
}
// If below min and other categories are above their min, give bonus
if (currentCount < min) {
const otherCategoriesAboveMin = Object.entries(categoryQuotas).some(([key, q]) => {
if (key.toLowerCase().trim() === normalizedCategory) return false
const count = jurorCategoryCounts[key.toLowerCase().trim()] || 0
return count >= q.min
})
if (otherCategoriesAboveMin) {
return CATEGORY_QUOTA_BONUS
}
}
return 0
}
// ─── Main Scoring Function ───────────────────────────────────────────────────
/**
* Get smart assignment suggestions for a round
*/
export async function getSmartSuggestions(options: {
stageId: string
type: 'jury' | 'mentor'
limit?: number
aiMaxPerJudge?: number
categoryQuotas?: Record<string, { min: number; max: number }>
}): Promise<AssignmentScore[]> {
const { stageId, type, limit = 50, aiMaxPerJudge = 20, categoryQuotas } = options
const projectStageStates = await prisma.projectStageState.findMany({
where: { stageId },
select: { projectId: true },
})
const projectIds = projectStageStates.map((pss) => pss.projectId)
const projects = await prisma.project.findMany({
where: {
id: { in: projectIds },
status: { not: 'REJECTED' },
},
select: {
id: true,
title: true,
teamName: true,
description: true,
country: true,
competitionCategory: true,
status: true,
projectTags: {
include: { tag: true },
},
},
})
if (projects.length === 0) {
return []
}
const role = type === 'jury' ? 'JURY_MEMBER' : 'MENTOR'
const users = await prisma.user.findMany({
where: {
role,
status: 'ACTIVE',
},
select: {
id: true,
name: true,
email: true,
bio: true,
expertiseTags: true,
maxAssignments: true,
country: true,
availabilityJson: true,
preferredWorkload: true,
_count: {
select: {
assignments: {
where: { stageId },
},
},
},
},
})
if (users.length === 0) {
return []
}
const stageForAvailability = await prisma.stage.findUnique({
where: { id: stageId },
select: { windowOpenAt: true, windowCloseAt: true },
})
const existingAssignments = await prisma.assignment.findMany({
where: { stageId },
select: { userId: true, projectId: true },
})
const assignedPairs = new Set(
existingAssignments.map((a) => `${a.userId}:${a.projectId}`)
)
const assignmentsWithCountry = await prisma.assignment.findMany({
where: { stageId },
select: {
userId: true,
project: { select: { country: true } },
},
})
// Build map: userId -> { country -> count }
const userCountryDistribution = new Map<string, Map<string, number>>()
for (const a of assignmentsWithCountry) {
const country = a.project.country?.toLowerCase().trim()
if (!country) continue
let countryMap = userCountryDistribution.get(a.userId)
if (!countryMap) {
countryMap = new Map()
userCountryDistribution.set(a.userId, countryMap)
}
countryMap.set(country, (countryMap.get(country) || 0) + 1)
}
// Build map: userId -> { category -> count } for category quota scoring
const userCategoryDistribution = new Map<string, Record<string, number>>()
if (categoryQuotas) {
const assignmentsWithCategory = await prisma.assignment.findMany({
where: { stageId },
select: {
userId: true,
project: { select: { competitionCategory: true } },
},
})
for (const a of assignmentsWithCategory) {
const category = a.project.competitionCategory?.toLowerCase().trim()
if (!category) continue
let categoryMap = userCategoryDistribution.get(a.userId)
if (!categoryMap) {
categoryMap = {}
userCategoryDistribution.set(a.userId, categoryMap)
}
categoryMap[category] = (categoryMap[category] || 0) + 1
}
}
const currentStage = await prisma.stage.findUnique({
where: { id: stageId },
select: { trackId: true, sortOrder: true },
})
const previousStageAssignmentPairs = new Set<string>()
if (currentStage) {
const earlierStages = await prisma.stage.findMany({
where: {
trackId: currentStage.trackId,
sortOrder: { lt: currentStage.sortOrder },
},
select: { id: true },
})
const earlierStageIds = earlierStages.map((s) => s.id)
if (earlierStageIds.length > 0) {
const previousAssignments = await prisma.assignment.findMany({
where: {
stageId: { in: earlierStageIds },
},
select: { userId: true, projectId: true },
})
for (const pa of previousAssignments) {
previousStageAssignmentPairs.add(`${pa.userId}:${pa.projectId}`)
}
}
}
const coiRecords = await prisma.conflictOfInterest.findMany({
where: {
assignment: { stageId },
hasConflict: true,
},
select: { userId: true, projectId: true },
})
const coiPairs = new Set(
coiRecords.map((c) => `${c.userId}:${c.projectId}`)
)
// ── Calculate target assignments per user ─────────────────────────────────
const targetPerUser = Math.ceil(projects.length / users.length)
// ── Calculate scores for all user-project pairs ───────────────────────────
const suggestions: AssignmentScore[] = []
for (const user of users) {
const currentCount = user._count.assignments
// Skip users at AI max (they won't appear in suggestions)
if (currentCount >= aiMaxPerJudge) {
continue
}
// Per-juror hard block: skip entirely if at personal maxAssignments limit
if (user.maxAssignments !== null && user.maxAssignments !== undefined) {
if (currentCount >= user.maxAssignments) {
continue
}
}
for (const project of projects) {
// Skip if already assigned
const pairKey = `${user.id}:${project.id}`
if (assignedPairs.has(pairKey)) {
continue
}
// COI check - skip juror entirely for this project if COI declared
if (coiPairs.has(pairKey)) {
continue
}
// Get project tags data
const projectTags: ProjectTagData[] = project.projectTags.map((pt) => ({
tagId: pt.tagId,
tagName: pt.tag.name,
confidence: pt.confidence,
}))
// Calculate existing scores
const { score: tagScore, matchingTags } = calculateTagOverlapScore(
user.expertiseTags,
projectTags
)
const { score: bioScore, matchingKeywords } = calculateBioMatchScore(
user.bio,
project.description
)
// Use preferredWorkload as a soft target when available, fallback to calculated target
const effectiveTarget = user.preferredWorkload ?? targetPerUser
const workloadScore = calculateWorkloadScore(
currentCount,
effectiveTarget,
user.maxAssignments
)
const countryScore =
type === 'mentor'
? calculateCountryMatchScore(user.country, project.country)
: 0
const availabilityPenalty = calculateAvailabilityPenalty(
user.availabilityJson,
stageForAvailability?.windowOpenAt,
stageForAvailability?.windowCloseAt
)
// ── New scoring factors ─────────────────────────────────────────────
// Category quota penalty/bonus
let categoryQuotaPenalty = 0
if (categoryQuotas) {
const jurorCategoryCounts = userCategoryDistribution.get(user.id) || {}
categoryQuotaPenalty = calculateCategoryQuotaPenalty(
categoryQuotas,
jurorCategoryCounts,
project.competitionCategory
)
}
// Geographic diversity penalty
let geoDiversityPenalty = 0
const projectCountry = project.country?.toLowerCase().trim()
if (projectCountry) {
const countryMap = userCountryDistribution.get(user.id)
const sameCountryCount = countryMap?.get(projectCountry) || 0
if (sameCountryCount >= GEO_DIVERSITY_THRESHOLD) {
geoDiversityPenalty =
GEO_DIVERSITY_PENALTY_PER_EXCESS *
(sameCountryCount - GEO_DIVERSITY_THRESHOLD + 1)
}
}
let previousRoundFamiliarity = 0
if (previousStageAssignmentPairs.has(pairKey)) {
previousRoundFamiliarity = PREVIOUS_ROUND_FAMILIARITY_BONUS
}
const totalScore =
tagScore +
bioScore +
workloadScore +
countryScore +
geoDiversityPenalty +
previousRoundFamiliarity +
availabilityPenalty +
categoryQuotaPenalty
// Build reasoning
const reasoning: string[] = []
if (matchingTags.length > 0) {
reasoning.push(`Expertise match: ${matchingTags.length} tag(s)`)
}
if (bioScore > 0) {
reasoning.push(`Bio match: ${matchingKeywords.length} keyword(s)`)
}
if (workloadScore === MAX_WORKLOAD_SCORE) {
reasoning.push('Available capacity')
} else if (workloadScore > 0) {
reasoning.push('Moderate workload')
}
if (user.preferredWorkload) {
reasoning.push(`Preferred workload: ${user.preferredWorkload}`)
}
if (countryScore > 0) {
reasoning.push('Same country')
}
if (geoDiversityPenalty < 0) {
reasoning.push(`Geo diversity penalty (${geoDiversityPenalty})`)
}
if (previousRoundFamiliarity > 0) {
reasoning.push('Reviewed in previous round (+10)')
}
if (availabilityPenalty < 0) {
reasoning.push(`Unavailable during voting window (${availabilityPenalty})`)
}
if (categoryQuotaPenalty < 0) {
reasoning.push(`Category quota exceeded (${categoryQuotaPenalty})`)
} else if (categoryQuotaPenalty > 0) {
reasoning.push(`Category quota bonus (+${categoryQuotaPenalty})`)
}
suggestions.push({
userId: user.id,
userName: user.name || 'Unknown',
userEmail: user.email,
projectId: project.id,
projectTitle: project.title,
score: totalScore,
breakdown: {
tagOverlap: tagScore,
bioMatch: bioScore,
workloadBalance: workloadScore,
countryMatch: countryScore,
geoDiversityPenalty,
previousRoundFamiliarity,
coiPenalty: 0, // COI jurors are skipped entirely
availabilityPenalty,
categoryQuotaPenalty,
},
reasoning,
matchingTags,
})
}
}
// Sort by score descending and limit
return suggestions.sort((a, b) => b.score - a.score).slice(0, limit)
}
/**
* Get mentor suggestions for a specific project
*/
export async function getMentorSuggestionsForProject(
projectId: string,
limit: number = 10
): Promise<AssignmentScore[]> {
const project = await prisma.project.findUnique({
where: { id: projectId },
include: {
projectTags: {
include: { tag: true },
},
mentorAssignment: true,
},
})
if (!project) {
throw new Error(`Project not found: ${projectId}`)
}
// Get all active mentors with bio for matching
const mentors = await prisma.user.findMany({
where: {
role: 'MENTOR',
status: 'ACTIVE',
},
select: {
id: true,
name: true,
email: true,
bio: true,
expertiseTags: true,
maxAssignments: true,
country: true,
_count: {
select: { mentorAssignments: true },
},
},
})
if (mentors.length === 0) {
return []
}
const projectTags: ProjectTagData[] = project.projectTags.map((pt) => ({
tagId: pt.tagId,
tagName: pt.tag.name,
confidence: pt.confidence,
}))
const targetPerMentor = 5 // Target 5 projects per mentor
const suggestions: AssignmentScore[] = []
for (const mentor of mentors) {
// Skip if already assigned to this project
if (project.mentorAssignment?.mentorId === mentor.id) {
continue
}
// Per-mentor hard block: skip entirely if at personal maxAssignments limit
if (mentor.maxAssignments !== null && mentor.maxAssignments !== undefined) {
if (mentor._count.mentorAssignments >= mentor.maxAssignments) {
continue
}
}
const { score: tagScore, matchingTags } = calculateTagOverlapScore(
mentor.expertiseTags,
projectTags
)
// Bio match (only if mentor has a bio)
const { score: bioScore, matchingKeywords } = calculateBioMatchScore(
mentor.bio,
project.description
)
const workloadScore = calculateWorkloadScore(
mentor._count.mentorAssignments,
targetPerMentor,
mentor.maxAssignments
)
const countryScore = calculateCountryMatchScore(
mentor.country,
project.country
)
const totalScore = tagScore + bioScore + workloadScore + countryScore
const reasoning: string[] = []
if (matchingTags.length > 0) {
reasoning.push(`${matchingTags.length} matching expertise tag(s)`)
}
if (bioScore > 0) {
reasoning.push(`Bio match: ${matchingKeywords.length} keyword(s)`)
}
if (countryScore > 0) {
reasoning.push('Same country of origin')
}
if (workloadScore === MAX_WORKLOAD_SCORE) {
reasoning.push('Available capacity')
}
suggestions.push({
userId: mentor.id,
userName: mentor.name || 'Unknown',
userEmail: mentor.email,
projectId: project.id,
projectTitle: project.title,
score: totalScore,
breakdown: {
tagOverlap: tagScore,
bioMatch: bioScore,
workloadBalance: workloadScore,
countryMatch: countryScore,
geoDiversityPenalty: 0,
previousRoundFamiliarity: 0,
coiPenalty: 0,
availabilityPenalty: 0,
categoryQuotaPenalty: 0,
},
reasoning,
matchingTags,
})
}
return suggestions.sort((a, b) => b.score - a.score).slice(0, limit)
}