Optimize AI system with batching, token tracking, and GDPR compliance
- Add AIUsageLog model for persistent token/cost tracking - Implement batched processing for all AI services: - Assignment: 15 projects/batch - Filtering: 20 projects/batch - Award eligibility: 20 projects/batch - Mentor matching: 15 projects/batch - Create unified error classification (ai-errors.ts) - Enhance anonymization with comprehensive project data - Add AI usage dashboard to Settings page - Add usage stats endpoints to settings router - Create AI system documentation (5 files) - Create GDPR compliance documentation (2 files) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -3,17 +3,41 @@
|
||||
*
|
||||
* Uses GPT to analyze juror expertise and project requirements
|
||||
* to generate optimal assignment suggestions.
|
||||
*
|
||||
* Optimization:
|
||||
* - Batched processing (15 projects per batch)
|
||||
* - Description truncation (300 chars)
|
||||
* - Token tracking and cost logging
|
||||
*
|
||||
* GDPR Compliance:
|
||||
* - All data anonymized before AI processing
|
||||
* - IDs replaced with sequential identifiers
|
||||
* - No personal information sent to OpenAI
|
||||
*/
|
||||
|
||||
import { getOpenAI, getConfiguredModel } from '@/lib/openai'
|
||||
import { getOpenAI, getConfiguredModel, buildCompletionParams } from '@/lib/openai'
|
||||
import { logAIUsage, extractTokenUsage } from '@/server/utils/ai-usage'
|
||||
import { classifyAIError, createParseError, logAIError } from './ai-errors'
|
||||
import {
|
||||
anonymizeForAI,
|
||||
deanonymizeResults,
|
||||
validateAnonymization,
|
||||
DESCRIPTION_LIMITS,
|
||||
truncateAndSanitize,
|
||||
type AnonymizationResult,
|
||||
} from './anonymization'
|
||||
|
||||
// Types for AI assignment
|
||||
// ─── Constants ───────────────────────────────────────────────────────────────
|
||||
|
||||
const ASSIGNMENT_BATCH_SIZE = 15
|
||||
|
||||
// Optimized system prompt
|
||||
const ASSIGNMENT_SYSTEM_PROMPT = `Match jurors to projects by expertise. Return JSON assignments.
|
||||
Each: {juror_id, project_id, confidence_score: 0-1, expertise_match_score: 0-1, reasoning: str (1-2 sentences)}
|
||||
Distribute workload fairly. Avoid assigning jurors at capacity.`
|
||||
|
||||
// ─── Types ───────────────────────────────────────────────────────────────────
|
||||
|
||||
export interface AIAssignmentSuggestion {
|
||||
jurorId: string
|
||||
projectId: string
|
||||
@@ -61,153 +85,144 @@ interface AssignmentConstraints {
|
||||
}>
|
||||
}
|
||||
|
||||
/**
|
||||
* System prompt for AI assignment
|
||||
*/
|
||||
const ASSIGNMENT_SYSTEM_PROMPT = `You are an expert at matching jury members to projects based on expertise alignment.
|
||||
|
||||
Your task is to suggest optimal juror-project assignments that:
|
||||
1. Match juror expertise tags with project tags and content
|
||||
2. Distribute workload fairly among jurors
|
||||
3. Ensure each project gets the required number of reviews
|
||||
4. Avoid assigning jurors who are already at their limit
|
||||
|
||||
For each suggestion, provide:
|
||||
- A confidence score (0-1) based on how well the juror's expertise matches the project
|
||||
- An expertise match score (0-1) based purely on tag/content alignment
|
||||
- A brief reasoning explaining why this is a good match
|
||||
|
||||
Return your response as a JSON array of assignments.`
|
||||
// ─── AI Processing ───────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Generate AI-powered assignment suggestions
|
||||
* Process a batch of projects for assignment suggestions
|
||||
*/
|
||||
export async function generateAIAssignments(
|
||||
jurors: JurorForAssignment[],
|
||||
projects: ProjectForAssignment[],
|
||||
constraints: AssignmentConstraints
|
||||
): Promise<AIAssignmentResult> {
|
||||
// Anonymize data before sending to AI
|
||||
const anonymizedData = anonymizeForAI(jurors, projects)
|
||||
async function processAssignmentBatch(
|
||||
openai: NonNullable<Awaited<ReturnType<typeof getOpenAI>>>,
|
||||
model: string,
|
||||
anonymizedData: AnonymizationResult,
|
||||
batchProjects: typeof anonymizedData.projects,
|
||||
batchMappings: typeof anonymizedData.projectMappings,
|
||||
constraints: AssignmentConstraints,
|
||||
userId?: string,
|
||||
entityId?: string
|
||||
): Promise<{
|
||||
suggestions: AIAssignmentSuggestion[]
|
||||
tokensUsed: number
|
||||
}> {
|
||||
const suggestions: AIAssignmentSuggestion[] = []
|
||||
let tokensUsed = 0
|
||||
|
||||
// Validate anonymization
|
||||
if (!validateAnonymization(anonymizedData)) {
|
||||
console.error('Anonymization validation failed, falling back to algorithm')
|
||||
return generateFallbackAssignments(jurors, projects, constraints)
|
||||
}
|
||||
// Build prompt with batch-specific data
|
||||
const userPrompt = buildBatchPrompt(
|
||||
anonymizedData.jurors,
|
||||
batchProjects,
|
||||
constraints,
|
||||
anonymizedData.jurorMappings,
|
||||
batchMappings
|
||||
)
|
||||
|
||||
try {
|
||||
const openai = await getOpenAI()
|
||||
const params = buildCompletionParams(model, {
|
||||
messages: [
|
||||
{ role: 'system', content: ASSIGNMENT_SYSTEM_PROMPT },
|
||||
{ role: 'user', content: userPrompt },
|
||||
],
|
||||
jsonMode: true,
|
||||
temperature: 0.3,
|
||||
maxTokens: 4000,
|
||||
})
|
||||
|
||||
if (!openai) {
|
||||
console.log('OpenAI not configured, using fallback algorithm')
|
||||
return generateFallbackAssignments(jurors, projects, constraints)
|
||||
const response = await openai.chat.completions.create(params)
|
||||
const usage = extractTokenUsage(response)
|
||||
tokensUsed = usage.totalTokens
|
||||
|
||||
// Log batch usage
|
||||
await logAIUsage({
|
||||
userId,
|
||||
action: 'ASSIGNMENT',
|
||||
entityType: 'Round',
|
||||
entityId,
|
||||
model,
|
||||
promptTokens: usage.promptTokens,
|
||||
completionTokens: usage.completionTokens,
|
||||
totalTokens: usage.totalTokens,
|
||||
batchSize: batchProjects.length,
|
||||
itemsProcessed: batchProjects.length,
|
||||
status: 'SUCCESS',
|
||||
})
|
||||
|
||||
const content = response.choices[0]?.message?.content
|
||||
if (!content) {
|
||||
throw new Error('No response from AI')
|
||||
}
|
||||
|
||||
const suggestions = await callAIForAssignments(
|
||||
openai,
|
||||
anonymizedData,
|
||||
constraints
|
||||
)
|
||||
const parsed = JSON.parse(content) as {
|
||||
assignments: Array<{
|
||||
juror_id: string
|
||||
project_id: string
|
||||
confidence_score: number
|
||||
expertise_match_score: number
|
||||
reasoning: string
|
||||
}>
|
||||
}
|
||||
|
||||
// De-anonymize results
|
||||
const deanonymizedSuggestions = deanonymizeResults(
|
||||
suggestions.map((s) => ({
|
||||
...s,
|
||||
jurorId: s.jurorId,
|
||||
projectId: s.projectId,
|
||||
// De-anonymize and add to suggestions
|
||||
const deanonymized = deanonymizeResults(
|
||||
(parsed.assignments || []).map((a) => ({
|
||||
jurorId: a.juror_id,
|
||||
projectId: a.project_id,
|
||||
confidenceScore: Math.min(1, Math.max(0, a.confidence_score)),
|
||||
expertiseMatchScore: Math.min(1, Math.max(0, a.expertise_match_score)),
|
||||
reasoning: a.reasoning,
|
||||
})),
|
||||
anonymizedData.jurorMappings,
|
||||
anonymizedData.projectMappings
|
||||
).map((s) => ({
|
||||
jurorId: s.realJurorId,
|
||||
projectId: s.realProjectId,
|
||||
confidenceScore: s.confidenceScore,
|
||||
reasoning: s.reasoning,
|
||||
expertiseMatchScore: s.expertiseMatchScore,
|
||||
}))
|
||||
batchMappings
|
||||
)
|
||||
|
||||
return {
|
||||
success: true,
|
||||
suggestions: deanonymizedSuggestions,
|
||||
fallbackUsed: false,
|
||||
for (const item of deanonymized) {
|
||||
suggestions.push({
|
||||
jurorId: item.realJurorId,
|
||||
projectId: item.realProjectId,
|
||||
confidenceScore: item.confidenceScore,
|
||||
reasoning: item.reasoning,
|
||||
expertiseMatchScore: item.expertiseMatchScore,
|
||||
})
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error('AI assignment failed, using fallback:', error)
|
||||
return generateFallbackAssignments(jurors, projects, constraints)
|
||||
if (error instanceof SyntaxError) {
|
||||
const parseError = createParseError(error.message)
|
||||
logAIError('Assignment', 'batch processing', parseError)
|
||||
|
||||
await logAIUsage({
|
||||
userId,
|
||||
action: 'ASSIGNMENT',
|
||||
entityType: 'Round',
|
||||
entityId,
|
||||
model,
|
||||
promptTokens: 0,
|
||||
completionTokens: 0,
|
||||
totalTokens: tokensUsed,
|
||||
batchSize: batchProjects.length,
|
||||
itemsProcessed: 0,
|
||||
status: 'ERROR',
|
||||
errorMessage: parseError.message,
|
||||
})
|
||||
} else {
|
||||
throw error
|
||||
}
|
||||
}
|
||||
|
||||
return { suggestions, tokensUsed }
|
||||
}
|
||||
|
||||
/**
|
||||
* Call OpenAI API for assignment suggestions
|
||||
* Build prompt for a batch of projects
|
||||
*/
|
||||
async function callAIForAssignments(
|
||||
openai: Awaited<ReturnType<typeof getOpenAI>>,
|
||||
anonymizedData: AnonymizationResult,
|
||||
constraints: AssignmentConstraints
|
||||
): Promise<AIAssignmentSuggestion[]> {
|
||||
if (!openai) {
|
||||
throw new Error('OpenAI client not available')
|
||||
}
|
||||
|
||||
// Build the user prompt
|
||||
const userPrompt = buildAssignmentPrompt(anonymizedData, constraints)
|
||||
|
||||
const model = await getConfiguredModel()
|
||||
|
||||
const response = await openai.chat.completions.create({
|
||||
model,
|
||||
messages: [
|
||||
{ role: 'system', content: ASSIGNMENT_SYSTEM_PROMPT },
|
||||
{ role: 'user', content: userPrompt },
|
||||
],
|
||||
response_format: { type: 'json_object' },
|
||||
temperature: 0.3, // Lower temperature for more consistent results
|
||||
max_tokens: 4000,
|
||||
})
|
||||
|
||||
const content = response.choices[0]?.message?.content
|
||||
|
||||
if (!content) {
|
||||
throw new Error('No response from AI')
|
||||
}
|
||||
|
||||
// Parse the response
|
||||
const parsed = JSON.parse(content) as {
|
||||
assignments: Array<{
|
||||
juror_id: string
|
||||
project_id: string
|
||||
confidence_score: number
|
||||
expertise_match_score: number
|
||||
reasoning: string
|
||||
}>
|
||||
}
|
||||
|
||||
return (parsed.assignments || []).map((a) => ({
|
||||
jurorId: a.juror_id,
|
||||
projectId: a.project_id,
|
||||
confidenceScore: Math.min(1, Math.max(0, a.confidence_score)),
|
||||
expertiseMatchScore: Math.min(1, Math.max(0, a.expertise_match_score)),
|
||||
reasoning: a.reasoning,
|
||||
}))
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the prompt for AI assignment
|
||||
*/
|
||||
function buildAssignmentPrompt(
|
||||
data: AnonymizationResult,
|
||||
constraints: AssignmentConstraints
|
||||
function buildBatchPrompt(
|
||||
jurors: AnonymizationResult['jurors'],
|
||||
projects: AnonymizationResult['projects'],
|
||||
constraints: AssignmentConstraints,
|
||||
jurorMappings: AnonymizationResult['jurorMappings'],
|
||||
projectMappings: AnonymizationResult['projectMappings']
|
||||
): string {
|
||||
const { jurors, projects } = data
|
||||
|
||||
// Map existing assignments to anonymous IDs
|
||||
const jurorIdMap = new Map(
|
||||
data.jurorMappings.map((m) => [m.realId, m.anonymousId])
|
||||
)
|
||||
const projectIdMap = new Map(
|
||||
data.projectMappings.map((m) => [m.realId, m.anonymousId])
|
||||
)
|
||||
const jurorIdMap = new Map(jurorMappings.map((m) => [m.realId, m.anonymousId]))
|
||||
const projectIdMap = new Map(projectMappings.map((m) => [m.realId, m.anonymousId]))
|
||||
|
||||
const anonymousExisting = constraints.existingAssignments
|
||||
.map((a) => ({
|
||||
@@ -216,29 +231,110 @@ function buildAssignmentPrompt(
|
||||
}))
|
||||
.filter((a) => a.jurorId && a.projectId)
|
||||
|
||||
return `## Jurors Available
|
||||
${JSON.stringify(jurors, null, 2)}
|
||||
|
||||
## Projects to Assign
|
||||
${JSON.stringify(projects, null, 2)}
|
||||
|
||||
## Constraints
|
||||
- Each project needs ${constraints.requiredReviewsPerProject} reviews
|
||||
- Maximum assignments per juror: ${constraints.maxAssignmentsPerJuror || 'No limit'}
|
||||
- Existing assignments to avoid duplicating:
|
||||
${JSON.stringify(anonymousExisting, null, 2)}
|
||||
|
||||
## Instructions
|
||||
Generate optimal juror-project assignments. Return a JSON object with an "assignments" array where each assignment has:
|
||||
- juror_id: The anonymous juror ID
|
||||
- project_id: The anonymous project ID
|
||||
- confidence_score: 0-1 confidence in this match
|
||||
- expertise_match_score: 0-1 expertise alignment score
|
||||
- reasoning: Brief explanation (1-2 sentences)
|
||||
|
||||
Focus on matching expertise tags with project tags and descriptions. Distribute assignments fairly.`
|
||||
return `JURORS: ${JSON.stringify(jurors)}
|
||||
PROJECTS: ${JSON.stringify(projects)}
|
||||
CONSTRAINTS: ${constraints.requiredReviewsPerProject} reviews/project, max ${constraints.maxAssignmentsPerJuror || 'unlimited'}/juror
|
||||
EXISTING: ${JSON.stringify(anonymousExisting)}
|
||||
Return JSON: {"assignments": [...]}`
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate AI-powered assignment suggestions with batching
|
||||
*/
|
||||
export async function generateAIAssignments(
|
||||
jurors: JurorForAssignment[],
|
||||
projects: ProjectForAssignment[],
|
||||
constraints: AssignmentConstraints,
|
||||
userId?: string,
|
||||
entityId?: string
|
||||
): Promise<AIAssignmentResult> {
|
||||
// Truncate descriptions before anonymization
|
||||
const truncatedProjects = projects.map((p) => ({
|
||||
...p,
|
||||
description: truncateAndSanitize(p.description, DESCRIPTION_LIMITS.ASSIGNMENT),
|
||||
}))
|
||||
|
||||
// Anonymize data before sending to AI
|
||||
const anonymizedData = anonymizeForAI(jurors, truncatedProjects)
|
||||
|
||||
// Validate anonymization
|
||||
if (!validateAnonymization(anonymizedData)) {
|
||||
console.error('[AI Assignment] Anonymization validation failed, falling back to algorithm')
|
||||
return generateFallbackAssignments(jurors, projects, constraints)
|
||||
}
|
||||
|
||||
try {
|
||||
const openai = await getOpenAI()
|
||||
|
||||
if (!openai) {
|
||||
console.log('[AI Assignment] OpenAI not configured, using fallback algorithm')
|
||||
return generateFallbackAssignments(jurors, projects, constraints)
|
||||
}
|
||||
|
||||
const model = await getConfiguredModel()
|
||||
console.log(`[AI Assignment] Using model: ${model} for ${projects.length} projects in batches of ${ASSIGNMENT_BATCH_SIZE}`)
|
||||
|
||||
const allSuggestions: AIAssignmentSuggestion[] = []
|
||||
let totalTokens = 0
|
||||
|
||||
// Process projects in batches
|
||||
for (let i = 0; i < anonymizedData.projects.length; i += ASSIGNMENT_BATCH_SIZE) {
|
||||
const batchProjects = anonymizedData.projects.slice(i, i + ASSIGNMENT_BATCH_SIZE)
|
||||
const batchMappings = anonymizedData.projectMappings.slice(i, i + ASSIGNMENT_BATCH_SIZE)
|
||||
|
||||
console.log(`[AI Assignment] Processing batch ${Math.floor(i / ASSIGNMENT_BATCH_SIZE) + 1}/${Math.ceil(anonymizedData.projects.length / ASSIGNMENT_BATCH_SIZE)}`)
|
||||
|
||||
const { suggestions, tokensUsed } = await processAssignmentBatch(
|
||||
openai,
|
||||
model,
|
||||
anonymizedData,
|
||||
batchProjects,
|
||||
batchMappings,
|
||||
constraints,
|
||||
userId,
|
||||
entityId
|
||||
)
|
||||
|
||||
allSuggestions.push(...suggestions)
|
||||
totalTokens += tokensUsed
|
||||
}
|
||||
|
||||
console.log(`[AI Assignment] Completed. Total suggestions: ${allSuggestions.length}, Total tokens: ${totalTokens}`)
|
||||
|
||||
return {
|
||||
success: true,
|
||||
suggestions: allSuggestions,
|
||||
tokensUsed: totalTokens,
|
||||
fallbackUsed: false,
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
const classified = classifyAIError(error)
|
||||
logAIError('Assignment', 'generateAIAssignments', classified)
|
||||
|
||||
// Log failed attempt
|
||||
await logAIUsage({
|
||||
userId,
|
||||
action: 'ASSIGNMENT',
|
||||
entityType: 'Round',
|
||||
entityId,
|
||||
model: 'unknown',
|
||||
promptTokens: 0,
|
||||
completionTokens: 0,
|
||||
totalTokens: 0,
|
||||
batchSize: projects.length,
|
||||
itemsProcessed: 0,
|
||||
status: 'ERROR',
|
||||
errorMessage: classified.message,
|
||||
})
|
||||
|
||||
console.error('[AI Assignment] AI assignment failed, using fallback:', classified.message)
|
||||
return generateFallbackAssignments(jurors, projects, constraints)
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Fallback Algorithm ──────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Fallback algorithm-based assignment when AI is unavailable
|
||||
*/
|
||||
|
||||
@@ -4,9 +4,33 @@
|
||||
* Determines project eligibility for special awards using:
|
||||
* - Deterministic field matching (tags, country, category)
|
||||
* - AI interpretation of plain-language criteria
|
||||
*
|
||||
* GDPR Compliance:
|
||||
* - All project data is anonymized before AI processing
|
||||
* - IDs replaced with sequential identifiers
|
||||
* - No personal information sent to OpenAI
|
||||
*/
|
||||
|
||||
import { getOpenAI, getConfiguredModel } from '@/lib/openai'
|
||||
import { getOpenAI, getConfiguredModel, buildCompletionParams } from '@/lib/openai'
|
||||
import { logAIUsage, extractTokenUsage } from '@/server/utils/ai-usage'
|
||||
import { classifyAIError, createParseError, logAIError } from './ai-errors'
|
||||
import {
|
||||
anonymizeProjectsForAI,
|
||||
validateAnonymizedProjects,
|
||||
type ProjectWithRelations,
|
||||
type AnonymizedProjectForAI,
|
||||
type ProjectAIMapping,
|
||||
} from './anonymization'
|
||||
import type { SubmissionSource } from '@prisma/client'
|
||||
|
||||
// ─── Constants ───────────────────────────────────────────────────────────────
|
||||
|
||||
const BATCH_SIZE = 20
|
||||
|
||||
// Optimized system prompt
|
||||
const AI_ELIGIBILITY_SYSTEM_PROMPT = `Award eligibility evaluator. Evaluate projects against criteria, return JSON.
|
||||
Format: {"evaluations": [{project_id, eligible: bool, confidence: 0-1, reasoning: str}]}
|
||||
Be objective. Base evaluation only on provided data. No personal identifiers in reasoning.`
|
||||
|
||||
// ─── Types ──────────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -33,6 +57,16 @@ interface ProjectForEligibility {
|
||||
geographicZone?: string | null
|
||||
tags: string[]
|
||||
oceanIssue?: string | null
|
||||
institution?: string | null
|
||||
foundedAt?: Date | null
|
||||
wantsMentorship?: boolean
|
||||
submissionSource?: SubmissionSource
|
||||
submittedAt?: Date | null
|
||||
_count?: {
|
||||
teamMembers?: number
|
||||
files?: number
|
||||
}
|
||||
files?: Array<{ fileType: string | null }>
|
||||
}
|
||||
|
||||
// ─── Auto Tag Rules ─────────────────────────────────────────────────────────
|
||||
@@ -97,32 +131,162 @@ function getFieldValue(
|
||||
|
||||
// ─── AI Criteria Interpretation ─────────────────────────────────────────────
|
||||
|
||||
const AI_ELIGIBILITY_SYSTEM_PROMPT = `You are a special award eligibility evaluator. Given a list of projects and award criteria, determine which projects are eligible.
|
||||
|
||||
Return a JSON object with this structure:
|
||||
{
|
||||
"evaluations": [
|
||||
{
|
||||
"project_id": "string",
|
||||
"eligible": boolean,
|
||||
"confidence": number (0-1),
|
||||
"reasoning": "string"
|
||||
}
|
||||
]
|
||||
/**
|
||||
* Convert project to enhanced format for anonymization
|
||||
*/
|
||||
function toProjectWithRelations(project: ProjectForEligibility): ProjectWithRelations {
|
||||
return {
|
||||
id: project.id,
|
||||
title: project.title,
|
||||
description: project.description,
|
||||
competitionCategory: project.competitionCategory as any,
|
||||
oceanIssue: project.oceanIssue as any,
|
||||
country: project.country,
|
||||
geographicZone: project.geographicZone,
|
||||
institution: project.institution,
|
||||
tags: project.tags,
|
||||
foundedAt: project.foundedAt,
|
||||
wantsMentorship: project.wantsMentorship ?? false,
|
||||
submissionSource: project.submissionSource ?? 'MANUAL',
|
||||
submittedAt: project.submittedAt,
|
||||
_count: {
|
||||
teamMembers: project._count?.teamMembers ?? 0,
|
||||
files: project._count?.files ?? 0,
|
||||
},
|
||||
files: project.files?.map(f => ({ fileType: f.fileType as any })) ?? [],
|
||||
}
|
||||
}
|
||||
|
||||
Be fair, objective, and base your evaluation only on the provided information. Do not include personal identifiers in reasoning.`
|
||||
/**
|
||||
* Process a batch for AI eligibility evaluation
|
||||
*/
|
||||
async function processEligibilityBatch(
|
||||
openai: NonNullable<Awaited<ReturnType<typeof getOpenAI>>>,
|
||||
model: string,
|
||||
criteriaText: string,
|
||||
anonymized: AnonymizedProjectForAI[],
|
||||
mappings: ProjectAIMapping[],
|
||||
userId?: string,
|
||||
entityId?: string
|
||||
): Promise<{
|
||||
results: EligibilityResult[]
|
||||
tokensUsed: number
|
||||
}> {
|
||||
const results: EligibilityResult[] = []
|
||||
let tokensUsed = 0
|
||||
|
||||
const userPrompt = `CRITERIA: ${criteriaText}
|
||||
PROJECTS: ${JSON.stringify(anonymized)}
|
||||
Evaluate eligibility for each project.`
|
||||
|
||||
try {
|
||||
const params = buildCompletionParams(model, {
|
||||
messages: [
|
||||
{ role: 'system', content: AI_ELIGIBILITY_SYSTEM_PROMPT },
|
||||
{ role: 'user', content: userPrompt },
|
||||
],
|
||||
jsonMode: true,
|
||||
temperature: 0.3,
|
||||
maxTokens: 4000,
|
||||
})
|
||||
|
||||
const response = await openai.chat.completions.create(params)
|
||||
const usage = extractTokenUsage(response)
|
||||
tokensUsed = usage.totalTokens
|
||||
|
||||
// Log usage
|
||||
await logAIUsage({
|
||||
userId,
|
||||
action: 'AWARD_ELIGIBILITY',
|
||||
entityType: 'Award',
|
||||
entityId,
|
||||
model,
|
||||
promptTokens: usage.promptTokens,
|
||||
completionTokens: usage.completionTokens,
|
||||
totalTokens: usage.totalTokens,
|
||||
batchSize: anonymized.length,
|
||||
itemsProcessed: anonymized.length,
|
||||
status: 'SUCCESS',
|
||||
})
|
||||
|
||||
const content = response.choices[0]?.message?.content
|
||||
if (!content) {
|
||||
throw new Error('Empty response from AI')
|
||||
}
|
||||
|
||||
const parsed = JSON.parse(content) as {
|
||||
evaluations: Array<{
|
||||
project_id: string
|
||||
eligible: boolean
|
||||
confidence: number
|
||||
reasoning: string
|
||||
}>
|
||||
}
|
||||
|
||||
// Map results back to real IDs
|
||||
for (const eval_ of parsed.evaluations || []) {
|
||||
const mapping = mappings.find((m) => m.anonymousId === eval_.project_id)
|
||||
if (mapping) {
|
||||
results.push({
|
||||
projectId: mapping.realId,
|
||||
eligible: eval_.eligible,
|
||||
confidence: eval_.confidence,
|
||||
reasoning: eval_.reasoning,
|
||||
method: 'AI',
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
if (error instanceof SyntaxError) {
|
||||
const parseError = createParseError(error.message)
|
||||
logAIError('AwardEligibility', 'batch processing', parseError)
|
||||
|
||||
await logAIUsage({
|
||||
userId,
|
||||
action: 'AWARD_ELIGIBILITY',
|
||||
entityType: 'Award',
|
||||
entityId,
|
||||
model,
|
||||
promptTokens: 0,
|
||||
completionTokens: 0,
|
||||
totalTokens: tokensUsed,
|
||||
batchSize: anonymized.length,
|
||||
itemsProcessed: 0,
|
||||
status: 'ERROR',
|
||||
errorMessage: parseError.message,
|
||||
})
|
||||
|
||||
// Flag all for manual review
|
||||
for (const mapping of mappings) {
|
||||
results.push({
|
||||
projectId: mapping.realId,
|
||||
eligible: false,
|
||||
confidence: 0,
|
||||
reasoning: 'AI response parse error — requires manual review',
|
||||
method: 'AI',
|
||||
})
|
||||
}
|
||||
} else {
|
||||
throw error
|
||||
}
|
||||
}
|
||||
|
||||
return { results, tokensUsed }
|
||||
}
|
||||
|
||||
export async function aiInterpretCriteria(
|
||||
criteriaText: string,
|
||||
projects: ProjectForEligibility[]
|
||||
projects: ProjectForEligibility[],
|
||||
userId?: string,
|
||||
awardId?: string
|
||||
): Promise<EligibilityResult[]> {
|
||||
const results: EligibilityResult[] = []
|
||||
|
||||
try {
|
||||
const openai = await getOpenAI()
|
||||
if (!openai) {
|
||||
// No OpenAI — mark all as needing manual review
|
||||
console.warn('[AI Eligibility] OpenAI not configured')
|
||||
return projects.map((p) => ({
|
||||
projectId: p.id,
|
||||
eligible: false,
|
||||
@@ -133,91 +297,69 @@ export async function aiInterpretCriteria(
|
||||
}
|
||||
|
||||
const model = await getConfiguredModel()
|
||||
console.log(`[AI Eligibility] Using model: ${model} for ${projects.length} projects`)
|
||||
|
||||
// Anonymize and batch
|
||||
const anonymized = projects.map((p, i) => ({
|
||||
project_id: `P${i + 1}`,
|
||||
real_id: p.id,
|
||||
title: p.title,
|
||||
description: p.description?.slice(0, 500) || '',
|
||||
category: p.competitionCategory || 'Unknown',
|
||||
ocean_issue: p.oceanIssue || 'Unknown',
|
||||
country: p.country || 'Unknown',
|
||||
region: p.geographicZone || 'Unknown',
|
||||
tags: p.tags.join(', '),
|
||||
}))
|
||||
// Convert and anonymize projects
|
||||
const projectsWithRelations = projects.map(toProjectWithRelations)
|
||||
const { anonymized, mappings } = anonymizeProjectsForAI(projectsWithRelations, 'ELIGIBILITY')
|
||||
|
||||
const batchSize = 20
|
||||
for (let i = 0; i < anonymized.length; i += batchSize) {
|
||||
const batch = anonymized.slice(i, i + batchSize)
|
||||
|
||||
const userPrompt = `Award criteria: ${criteriaText}
|
||||
|
||||
Projects to evaluate:
|
||||
${JSON.stringify(
|
||||
batch.map(({ real_id, ...rest }) => rest),
|
||||
null,
|
||||
2
|
||||
)}
|
||||
|
||||
Evaluate each project against the award criteria.`
|
||||
|
||||
const response = await openai.chat.completions.create({
|
||||
model,
|
||||
messages: [
|
||||
{ role: 'system', content: AI_ELIGIBILITY_SYSTEM_PROMPT },
|
||||
{ role: 'user', content: userPrompt },
|
||||
],
|
||||
response_format: { type: 'json_object' },
|
||||
temperature: 0.3,
|
||||
max_tokens: 4000,
|
||||
})
|
||||
|
||||
const content = response.choices[0]?.message?.content
|
||||
if (content) {
|
||||
try {
|
||||
const parsed = JSON.parse(content) as {
|
||||
evaluations: Array<{
|
||||
project_id: string
|
||||
eligible: boolean
|
||||
confidence: number
|
||||
reasoning: string
|
||||
}>
|
||||
}
|
||||
|
||||
for (const eval_ of parsed.evaluations) {
|
||||
const anon = batch.find((b) => b.project_id === eval_.project_id)
|
||||
if (anon) {
|
||||
results.push({
|
||||
projectId: anon.real_id,
|
||||
eligible: eval_.eligible,
|
||||
confidence: eval_.confidence,
|
||||
reasoning: eval_.reasoning,
|
||||
method: 'AI',
|
||||
})
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Parse error — mark batch for manual review
|
||||
for (const item of batch) {
|
||||
results.push({
|
||||
projectId: item.real_id,
|
||||
eligible: false,
|
||||
confidence: 0,
|
||||
reasoning: 'AI response parse error — requires manual review',
|
||||
method: 'AI',
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
// Validate anonymization
|
||||
if (!validateAnonymizedProjects(anonymized)) {
|
||||
console.error('[AI Eligibility] Anonymization validation failed')
|
||||
throw new Error('GDPR compliance check failed: PII detected in anonymized data')
|
||||
}
|
||||
} catch {
|
||||
// OpenAI error — mark all for manual review
|
||||
|
||||
let totalTokens = 0
|
||||
|
||||
// Process in batches
|
||||
for (let i = 0; i < anonymized.length; i += BATCH_SIZE) {
|
||||
const batchAnon = anonymized.slice(i, i + BATCH_SIZE)
|
||||
const batchMappings = mappings.slice(i, i + BATCH_SIZE)
|
||||
|
||||
console.log(`[AI Eligibility] Processing batch ${Math.floor(i / BATCH_SIZE) + 1}/${Math.ceil(anonymized.length / BATCH_SIZE)}`)
|
||||
|
||||
const { results: batchResults, tokensUsed } = await processEligibilityBatch(
|
||||
openai,
|
||||
model,
|
||||
criteriaText,
|
||||
batchAnon,
|
||||
batchMappings,
|
||||
userId,
|
||||
awardId
|
||||
)
|
||||
|
||||
results.push(...batchResults)
|
||||
totalTokens += tokensUsed
|
||||
}
|
||||
|
||||
console.log(`[AI Eligibility] Completed. Total tokens: ${totalTokens}`)
|
||||
|
||||
} catch (error) {
|
||||
const classified = classifyAIError(error)
|
||||
logAIError('AwardEligibility', 'aiInterpretCriteria', classified)
|
||||
|
||||
// Log failed attempt
|
||||
await logAIUsage({
|
||||
userId,
|
||||
action: 'AWARD_ELIGIBILITY',
|
||||
entityType: 'Award',
|
||||
entityId: awardId,
|
||||
model: 'unknown',
|
||||
promptTokens: 0,
|
||||
completionTokens: 0,
|
||||
totalTokens: 0,
|
||||
batchSize: projects.length,
|
||||
itemsProcessed: 0,
|
||||
status: 'ERROR',
|
||||
errorMessage: classified.message,
|
||||
})
|
||||
|
||||
// Return all as needing manual review
|
||||
return projects.map((p) => ({
|
||||
projectId: p.id,
|
||||
eligible: false,
|
||||
confidence: 0,
|
||||
reasoning: 'AI error — requires manual eligibility review',
|
||||
reasoning: `AI error: ${classified.message}`,
|
||||
method: 'AI' as const,
|
||||
}))
|
||||
}
|
||||
|
||||
318
src/server/services/ai-errors.ts
Normal file
318
src/server/services/ai-errors.ts
Normal file
@@ -0,0 +1,318 @@
|
||||
/**
|
||||
* AI Error Classification Service
|
||||
*
|
||||
* Provides unified error handling and classification for all AI services.
|
||||
* Converts technical API errors into user-friendly messages.
|
||||
*/
|
||||
|
||||
// ─── Error Types ─────────────────────────────────────────────────────────────
|
||||
|
||||
export type AIErrorType =
|
||||
| 'rate_limit'
|
||||
| 'quota_exceeded'
|
||||
| 'model_not_found'
|
||||
| 'invalid_api_key'
|
||||
| 'context_length'
|
||||
| 'parse_error'
|
||||
| 'timeout'
|
||||
| 'network_error'
|
||||
| 'content_filter'
|
||||
| 'server_error'
|
||||
| 'unknown'
|
||||
|
||||
export interface ClassifiedError {
|
||||
type: AIErrorType
|
||||
message: string
|
||||
originalMessage: string
|
||||
retryable: boolean
|
||||
suggestedAction?: string
|
||||
}
|
||||
|
||||
// ─── Error Patterns ──────────────────────────────────────────────────────────
|
||||
|
||||
interface ErrorPattern {
|
||||
type: AIErrorType
|
||||
patterns: Array<string | RegExp>
|
||||
retryable: boolean
|
||||
userMessage: string
|
||||
suggestedAction?: string
|
||||
}
|
||||
|
||||
const ERROR_PATTERNS: ErrorPattern[] = [
|
||||
{
|
||||
type: 'rate_limit',
|
||||
patterns: [
|
||||
'rate_limit',
|
||||
'rate limit',
|
||||
'too many requests',
|
||||
'429',
|
||||
'quota exceeded',
|
||||
'Rate limit reached',
|
||||
],
|
||||
retryable: true,
|
||||
userMessage: 'Rate limit exceeded. Please wait a few minutes and try again.',
|
||||
suggestedAction: 'Wait 1-2 minutes before retrying, or reduce batch size.',
|
||||
},
|
||||
{
|
||||
type: 'quota_exceeded',
|
||||
patterns: [
|
||||
'insufficient_quota',
|
||||
'billing',
|
||||
'exceeded your current quota',
|
||||
'payment required',
|
||||
'account deactivated',
|
||||
],
|
||||
retryable: false,
|
||||
userMessage: 'API quota exceeded. Please check your OpenAI billing settings.',
|
||||
suggestedAction: 'Add payment method or increase spending limit in OpenAI dashboard.',
|
||||
},
|
||||
{
|
||||
type: 'model_not_found',
|
||||
patterns: [
|
||||
'model_not_found',
|
||||
'does not exist',
|
||||
'The model',
|
||||
'invalid model',
|
||||
'model not available',
|
||||
],
|
||||
retryable: false,
|
||||
userMessage: 'The selected AI model is not available. Please check your settings.',
|
||||
suggestedAction: 'Go to Settings → AI and select a different model.',
|
||||
},
|
||||
{
|
||||
type: 'invalid_api_key',
|
||||
patterns: [
|
||||
'invalid_api_key',
|
||||
'Incorrect API key',
|
||||
'authentication',
|
||||
'unauthorized',
|
||||
'401',
|
||||
'invalid api key',
|
||||
],
|
||||
retryable: false,
|
||||
userMessage: 'Invalid API key. Please check your OpenAI API key in settings.',
|
||||
suggestedAction: 'Go to Settings → AI and enter a valid API key.',
|
||||
},
|
||||
{
|
||||
type: 'context_length',
|
||||
patterns: [
|
||||
'context_length',
|
||||
'maximum context length',
|
||||
'tokens',
|
||||
'too long',
|
||||
'reduce the length',
|
||||
'max_tokens',
|
||||
],
|
||||
retryable: true,
|
||||
userMessage: 'Request too large. Try processing fewer items at once.',
|
||||
suggestedAction: 'Process items in smaller batches.',
|
||||
},
|
||||
{
|
||||
type: 'content_filter',
|
||||
patterns: [
|
||||
'content_filter',
|
||||
'content policy',
|
||||
'flagged',
|
||||
'inappropriate',
|
||||
'safety system',
|
||||
],
|
||||
retryable: false,
|
||||
userMessage: 'Content was flagged by the AI safety system. Please review the input data.',
|
||||
suggestedAction: 'Check project descriptions for potentially sensitive content.',
|
||||
},
|
||||
{
|
||||
type: 'timeout',
|
||||
patterns: [
|
||||
'timeout',
|
||||
'timed out',
|
||||
'ETIMEDOUT',
|
||||
'ECONNABORTED',
|
||||
'deadline exceeded',
|
||||
],
|
||||
retryable: true,
|
||||
userMessage: 'Request timed out. Please try again.',
|
||||
suggestedAction: 'Try again or process fewer items at once.',
|
||||
},
|
||||
{
|
||||
type: 'network_error',
|
||||
patterns: [
|
||||
'ENOTFOUND',
|
||||
'ECONNREFUSED',
|
||||
'network',
|
||||
'connection',
|
||||
'DNS',
|
||||
'getaddrinfo',
|
||||
],
|
||||
retryable: true,
|
||||
userMessage: 'Network error. Please check your connection and try again.',
|
||||
suggestedAction: 'Check network connectivity and firewall settings.',
|
||||
},
|
||||
{
|
||||
type: 'server_error',
|
||||
patterns: [
|
||||
'500',
|
||||
'502',
|
||||
'503',
|
||||
'504',
|
||||
'internal error',
|
||||
'server error',
|
||||
'service unavailable',
|
||||
],
|
||||
retryable: true,
|
||||
userMessage: 'OpenAI service temporarily unavailable. Please try again later.',
|
||||
suggestedAction: 'Wait a few minutes and retry. Check status.openai.com for outages.',
|
||||
},
|
||||
]
|
||||
|
||||
// ─── Error Classification ────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Classify an error from the OpenAI API
|
||||
*/
|
||||
export function classifyAIError(error: Error | unknown): ClassifiedError {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error)
|
||||
const errorString = errorMessage.toLowerCase()
|
||||
|
||||
// Check against known patterns
|
||||
for (const pattern of ERROR_PATTERNS) {
|
||||
for (const matcher of pattern.patterns) {
|
||||
const matches =
|
||||
typeof matcher === 'string'
|
||||
? errorString.includes(matcher.toLowerCase())
|
||||
: matcher.test(errorString)
|
||||
|
||||
if (matches) {
|
||||
return {
|
||||
type: pattern.type,
|
||||
message: pattern.userMessage,
|
||||
originalMessage: errorMessage,
|
||||
retryable: pattern.retryable,
|
||||
suggestedAction: pattern.suggestedAction,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Unknown error
|
||||
return {
|
||||
type: 'unknown',
|
||||
message: 'An unexpected error occurred. Please try again.',
|
||||
originalMessage: errorMessage,
|
||||
retryable: true,
|
||||
suggestedAction: 'If the problem persists, check the AI settings or contact support.',
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an error is a JSON parse error
|
||||
*/
|
||||
export function isParseError(error: Error | unknown): boolean {
|
||||
const message = error instanceof Error ? error.message : String(error)
|
||||
return (
|
||||
message.includes('JSON') ||
|
||||
message.includes('parse') ||
|
||||
message.includes('Unexpected token') ||
|
||||
message.includes('SyntaxError')
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a classified parse error
|
||||
*/
|
||||
export function createParseError(originalMessage: string): ClassifiedError {
|
||||
return {
|
||||
type: 'parse_error',
|
||||
message: 'AI returned an invalid response. Items flagged for manual review.',
|
||||
originalMessage,
|
||||
retryable: true,
|
||||
suggestedAction: 'Review flagged items manually. Consider using a different model.',
|
||||
}
|
||||
}
|
||||
|
||||
// ─── User-Friendly Messages ──────────────────────────────────────────────────
|
||||
|
||||
const USER_FRIENDLY_MESSAGES: Record<AIErrorType, string> = {
|
||||
rate_limit: 'Rate limit exceeded. Please wait a few minutes and try again.',
|
||||
quota_exceeded: 'API quota exceeded. Please check your OpenAI billing settings.',
|
||||
model_not_found: 'Selected AI model is not available. Please check your settings.',
|
||||
invalid_api_key: 'Invalid API key. Please verify your OpenAI API key.',
|
||||
context_length: 'Request too large. Please try with fewer items.',
|
||||
parse_error: 'AI response could not be processed. Items flagged for review.',
|
||||
timeout: 'Request timed out. Please try again.',
|
||||
network_error: 'Network connection error. Please check your connection.',
|
||||
content_filter: 'Content flagged by AI safety system. Please review input data.',
|
||||
server_error: 'AI service temporarily unavailable. Please try again later.',
|
||||
unknown: 'An unexpected error occurred. Please try again.',
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a user-friendly message for an error type
|
||||
*/
|
||||
export function getUserFriendlyMessage(errorType: AIErrorType): string {
|
||||
return USER_FRIENDLY_MESSAGES[errorType]
|
||||
}
|
||||
|
||||
// ─── Error Handling Helpers ──────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Wrap an async function with standardized AI error handling
|
||||
*/
|
||||
export async function withAIErrorHandling<T>(
|
||||
fn: () => Promise<T>,
|
||||
fallback: T
|
||||
): Promise<{ result: T; error?: ClassifiedError }> {
|
||||
try {
|
||||
const result = await fn()
|
||||
return { result }
|
||||
} catch (error) {
|
||||
const classified = classifyAIError(error)
|
||||
console.error(`[AI Error] ${classified.type}:`, classified.originalMessage)
|
||||
return { result: fallback, error: classified }
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Log an AI error with context
|
||||
*/
|
||||
export function logAIError(
|
||||
service: string,
|
||||
operation: string,
|
||||
error: ClassifiedError,
|
||||
context?: Record<string, unknown>
|
||||
): void {
|
||||
console.error(
|
||||
`[AI ${service}] ${operation} failed:`,
|
||||
JSON.stringify({
|
||||
type: error.type,
|
||||
message: error.message,
|
||||
originalMessage: error.originalMessage,
|
||||
retryable: error.retryable,
|
||||
...context,
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
// ─── Retry Logic ─────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Determine if an operation should be retried based on error type
|
||||
*/
|
||||
export function shouldRetry(error: ClassifiedError, attempt: number, maxAttempts: number = 3): boolean {
|
||||
if (!error.retryable) return false
|
||||
if (attempt >= maxAttempts) return false
|
||||
|
||||
// Rate limits need longer delays
|
||||
if (error.type === 'rate_limit') {
|
||||
return attempt < 2 // Only retry once for rate limits
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate delay before retry (exponential backoff)
|
||||
*/
|
||||
export function getRetryDelay(error: ClassifiedError, attempt: number): number {
|
||||
const baseDelay = error.type === 'rate_limit' ? 30000 : 1000 // 30s for rate limit, 1s otherwise
|
||||
return baseDelay * Math.pow(2, attempt)
|
||||
}
|
||||
@@ -5,10 +5,24 @@
|
||||
* - Field-based rules (age checks, category, country, etc.)
|
||||
* - Document checks (file existence/types)
|
||||
* - AI screening (GPT interprets criteria text, flags spam)
|
||||
*
|
||||
* GDPR Compliance:
|
||||
* - All project data is anonymized before AI processing
|
||||
* - Only necessary fields sent to OpenAI
|
||||
* - No personal identifiers in prompts or responses
|
||||
*/
|
||||
|
||||
import { getOpenAI, getConfiguredModel } from '@/lib/openai'
|
||||
import type { Prisma } from '@prisma/client'
|
||||
import { getOpenAI, getConfiguredModel, buildCompletionParams } from '@/lib/openai'
|
||||
import { logAIUsage, extractTokenUsage } from '@/server/utils/ai-usage'
|
||||
import { classifyAIError, createParseError, logAIError } from './ai-errors'
|
||||
import {
|
||||
anonymizeProjectsForAI,
|
||||
validateAnonymizedProjects,
|
||||
type ProjectWithRelations,
|
||||
type AnonymizedProjectForAI,
|
||||
type ProjectAIMapping,
|
||||
} from './anonymization'
|
||||
import type { Prisma, FileType, SubmissionSource } from '@prisma/client'
|
||||
|
||||
// ─── Types ──────────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -80,7 +94,14 @@ interface ProjectForFiltering {
|
||||
tags: string[]
|
||||
oceanIssue?: string | null
|
||||
wantsMentorship?: boolean | null
|
||||
files: Array<{ id: string; fileName: string; fileType?: string | null }>
|
||||
institution?: string | null
|
||||
submissionSource?: SubmissionSource
|
||||
submittedAt?: Date | null
|
||||
files: Array<{ id: string; fileName: string; fileType?: FileType | null }>
|
||||
_count?: {
|
||||
teamMembers?: number
|
||||
files?: number
|
||||
}
|
||||
}
|
||||
|
||||
interface FilteringRuleInput {
|
||||
@@ -92,6 +113,15 @@ interface FilteringRuleInput {
|
||||
isActive: boolean
|
||||
}
|
||||
|
||||
// ─── Constants ───────────────────────────────────────────────────────────────
|
||||
|
||||
const BATCH_SIZE = 20
|
||||
|
||||
// Optimized system prompt (compressed for token efficiency)
|
||||
const AI_SCREENING_SYSTEM_PROMPT = `Project screening assistant. Evaluate against criteria, return JSON.
|
||||
Format: {"projects": [{project_id, meets_criteria: bool, confidence: 0-1, reasoning: str, quality_score: 1-10, spam_risk: bool}]}
|
||||
Be objective. Base evaluation only on provided data. No personal identifiers in reasoning.`
|
||||
|
||||
// ─── Field-Based Rule Evaluation ────────────────────────────────────────────
|
||||
|
||||
function evaluateCondition(
|
||||
@@ -185,14 +215,9 @@ export function evaluateFieldRule(
|
||||
? results.every(Boolean)
|
||||
: results.some(Boolean)
|
||||
|
||||
// If conditions met, the rule's action applies
|
||||
// For PASS action: conditions met = passed, not met = not passed
|
||||
// For REJECT action: conditions met = rejected (not passed)
|
||||
// For FLAG action: conditions met = flagged
|
||||
if (config.action === 'PASS') {
|
||||
return { passed: allConditionsMet, action: config.action }
|
||||
}
|
||||
// For REJECT/FLAG: conditions matching means the project should be rejected/flagged
|
||||
return { passed: !allConditionsMet, action: config.action }
|
||||
}
|
||||
|
||||
@@ -226,55 +251,173 @@ export function evaluateDocumentRule(
|
||||
|
||||
// ─── AI Screening ───────────────────────────────────────────────────────────
|
||||
|
||||
const AI_SCREENING_SYSTEM_PROMPT = `You are a project screening assistant. You evaluate projects against specific criteria.
|
||||
You must return a JSON object with this structure:
|
||||
{
|
||||
"projects": [
|
||||
{
|
||||
"project_id": "string",
|
||||
"meets_criteria": boolean,
|
||||
"confidence": number (0-1),
|
||||
"reasoning": "string",
|
||||
"quality_score": number (1-10),
|
||||
"spam_risk": boolean
|
||||
}
|
||||
]
|
||||
interface AIScreeningResult {
|
||||
meetsCriteria: boolean
|
||||
confidence: number
|
||||
reasoning: string
|
||||
qualityScore: number
|
||||
spamRisk: boolean
|
||||
}
|
||||
|
||||
Be fair and objective. Base your evaluation only on the information provided.
|
||||
Never include personal identifiers in your reasoning.`
|
||||
/**
|
||||
* Convert project to enhanced format for anonymization
|
||||
*/
|
||||
function toProjectWithRelations(project: ProjectForFiltering): ProjectWithRelations {
|
||||
return {
|
||||
id: project.id,
|
||||
title: project.title,
|
||||
description: project.description,
|
||||
competitionCategory: project.competitionCategory as any,
|
||||
oceanIssue: project.oceanIssue as any,
|
||||
country: project.country,
|
||||
geographicZone: project.geographicZone,
|
||||
institution: project.institution,
|
||||
tags: project.tags,
|
||||
foundedAt: project.foundedAt,
|
||||
wantsMentorship: project.wantsMentorship ?? false,
|
||||
submissionSource: project.submissionSource ?? 'MANUAL',
|
||||
submittedAt: project.submittedAt,
|
||||
_count: {
|
||||
teamMembers: project._count?.teamMembers ?? 0,
|
||||
files: project.files?.length ?? 0,
|
||||
},
|
||||
files: project.files?.map(f => ({ fileType: f.fileType ?? null })) ?? [],
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute AI screening on a batch of projects
|
||||
*/
|
||||
async function processAIBatch(
|
||||
openai: NonNullable<Awaited<ReturnType<typeof getOpenAI>>>,
|
||||
model: string,
|
||||
criteriaText: string,
|
||||
anonymized: AnonymizedProjectForAI[],
|
||||
mappings: ProjectAIMapping[],
|
||||
userId?: string,
|
||||
entityId?: string
|
||||
): Promise<{
|
||||
results: Map<string, AIScreeningResult>
|
||||
tokensUsed: number
|
||||
}> {
|
||||
const results = new Map<string, AIScreeningResult>()
|
||||
let tokensUsed = 0
|
||||
|
||||
// Build optimized prompt
|
||||
const userPrompt = `CRITERIA: ${criteriaText}
|
||||
PROJECTS: ${JSON.stringify(anonymized)}
|
||||
Evaluate and return JSON.`
|
||||
|
||||
try {
|
||||
const params = buildCompletionParams(model, {
|
||||
messages: [
|
||||
{ role: 'system', content: AI_SCREENING_SYSTEM_PROMPT },
|
||||
{ role: 'user', content: userPrompt },
|
||||
],
|
||||
jsonMode: true,
|
||||
temperature: 0.3,
|
||||
maxTokens: 4000,
|
||||
})
|
||||
|
||||
const response = await openai.chat.completions.create(params)
|
||||
const usage = extractTokenUsage(response)
|
||||
tokensUsed = usage.totalTokens
|
||||
|
||||
// Log usage
|
||||
await logAIUsage({
|
||||
userId,
|
||||
action: 'FILTERING',
|
||||
entityType: 'Round',
|
||||
entityId,
|
||||
model,
|
||||
promptTokens: usage.promptTokens,
|
||||
completionTokens: usage.completionTokens,
|
||||
totalTokens: usage.totalTokens,
|
||||
batchSize: anonymized.length,
|
||||
itemsProcessed: anonymized.length,
|
||||
status: 'SUCCESS',
|
||||
})
|
||||
|
||||
const content = response.choices[0]?.message?.content
|
||||
if (!content) {
|
||||
throw new Error('Empty response from AI')
|
||||
}
|
||||
|
||||
const parsed = JSON.parse(content) as {
|
||||
projects: Array<{
|
||||
project_id: string
|
||||
meets_criteria: boolean
|
||||
confidence: number
|
||||
reasoning: string
|
||||
quality_score: number
|
||||
spam_risk: boolean
|
||||
}>
|
||||
}
|
||||
|
||||
// Map results back to real IDs
|
||||
for (const result of parsed.projects || []) {
|
||||
const mapping = mappings.find((m) => m.anonymousId === result.project_id)
|
||||
if (mapping) {
|
||||
results.set(mapping.realId, {
|
||||
meetsCriteria: result.meets_criteria,
|
||||
confidence: result.confidence,
|
||||
reasoning: result.reasoning,
|
||||
qualityScore: result.quality_score,
|
||||
spamRisk: result.spam_risk,
|
||||
})
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
// Check if parse error
|
||||
if (error instanceof SyntaxError) {
|
||||
const parseError = createParseError(error.message)
|
||||
logAIError('Filtering', 'batch processing', parseError)
|
||||
|
||||
await logAIUsage({
|
||||
userId,
|
||||
action: 'FILTERING',
|
||||
entityType: 'Round',
|
||||
entityId,
|
||||
model,
|
||||
promptTokens: 0,
|
||||
completionTokens: 0,
|
||||
totalTokens: tokensUsed,
|
||||
batchSize: anonymized.length,
|
||||
itemsProcessed: 0,
|
||||
status: 'ERROR',
|
||||
errorMessage: parseError.message,
|
||||
})
|
||||
|
||||
// Flag all for manual review
|
||||
for (const mapping of mappings) {
|
||||
results.set(mapping.realId, {
|
||||
meetsCriteria: false,
|
||||
confidence: 0,
|
||||
reasoning: 'AI response parse error — flagged for manual review',
|
||||
qualityScore: 5,
|
||||
spamRisk: false,
|
||||
})
|
||||
}
|
||||
} else {
|
||||
throw error // Re-throw for outer catch
|
||||
}
|
||||
}
|
||||
|
||||
return { results, tokensUsed }
|
||||
}
|
||||
|
||||
export async function executeAIScreening(
|
||||
config: AIScreeningConfig,
|
||||
projects: ProjectForFiltering[]
|
||||
): Promise<
|
||||
Map<
|
||||
string,
|
||||
{
|
||||
meetsCriteria: boolean
|
||||
confidence: number
|
||||
reasoning: string
|
||||
qualityScore: number
|
||||
spamRisk: boolean
|
||||
}
|
||||
>
|
||||
> {
|
||||
const results = new Map<
|
||||
string,
|
||||
{
|
||||
meetsCriteria: boolean
|
||||
confidence: number
|
||||
reasoning: string
|
||||
qualityScore: number
|
||||
spamRisk: boolean
|
||||
}
|
||||
>()
|
||||
projects: ProjectForFiltering[],
|
||||
userId?: string,
|
||||
entityId?: string
|
||||
): Promise<Map<string, AIScreeningResult>> {
|
||||
const results = new Map<string, AIScreeningResult>()
|
||||
|
||||
try {
|
||||
const openai = await getOpenAI()
|
||||
if (!openai) {
|
||||
// No OpenAI configured — flag all for manual review
|
||||
console.warn('[AI Filtering] OpenAI client not available - API key may not be configured')
|
||||
console.warn('[AI Filtering] OpenAI not configured')
|
||||
for (const p of projects) {
|
||||
results.set(p.id, {
|
||||
meetsCriteria: false,
|
||||
@@ -290,133 +433,71 @@ export async function executeAIScreening(
|
||||
const model = await getConfiguredModel()
|
||||
console.log(`[AI Filtering] Using model: ${model} for ${projects.length} projects`)
|
||||
|
||||
// Anonymize project data — use numeric IDs
|
||||
const anonymizedProjects = projects.map((p, i) => ({
|
||||
project_id: `P${i + 1}`,
|
||||
real_id: p.id,
|
||||
title: p.title,
|
||||
description: p.description?.slice(0, 500) || '',
|
||||
category: p.competitionCategory || 'Unknown',
|
||||
ocean_issue: p.oceanIssue || 'Unknown',
|
||||
country: p.country || 'Unknown',
|
||||
tags: p.tags.join(', '),
|
||||
has_files: (p.files?.length || 0) > 0,
|
||||
}))
|
||||
// Convert and anonymize projects
|
||||
const projectsWithRelations = projects.map(toProjectWithRelations)
|
||||
const { anonymized, mappings } = anonymizeProjectsForAI(projectsWithRelations, 'FILTERING')
|
||||
|
||||
// Process in batches of 20
|
||||
const batchSize = 20
|
||||
for (let i = 0; i < anonymizedProjects.length; i += batchSize) {
|
||||
const batch = anonymizedProjects.slice(i, i + batchSize)
|
||||
// Validate anonymization
|
||||
if (!validateAnonymizedProjects(anonymized)) {
|
||||
console.error('[AI Filtering] Anonymization validation failed')
|
||||
throw new Error('GDPR compliance check failed: PII detected in anonymized data')
|
||||
}
|
||||
|
||||
const userPrompt = `Evaluate these projects against the following criteria:
|
||||
let totalTokens = 0
|
||||
|
||||
CRITERIA: ${config.criteriaText}
|
||||
// Process in batches
|
||||
for (let i = 0; i < anonymized.length; i += BATCH_SIZE) {
|
||||
const batchAnon = anonymized.slice(i, i + BATCH_SIZE)
|
||||
const batchMappings = mappings.slice(i, i + BATCH_SIZE)
|
||||
|
||||
PROJECTS:
|
||||
${JSON.stringify(
|
||||
batch.map(({ real_id, ...rest }) => rest),
|
||||
null,
|
||||
2
|
||||
)}
|
||||
console.log(`[AI Filtering] Processing batch ${Math.floor(i / BATCH_SIZE) + 1}/${Math.ceil(anonymized.length / BATCH_SIZE)}`)
|
||||
|
||||
Return your evaluation as JSON.`
|
||||
|
||||
console.log(`[AI Filtering] Processing batch ${Math.floor(i / batchSize) + 1}, ${batch.length} projects`)
|
||||
|
||||
const response = await openai.chat.completions.create({
|
||||
const { results: batchResults, tokensUsed } = await processAIBatch(
|
||||
openai,
|
||||
model,
|
||||
messages: [
|
||||
{ role: 'system', content: AI_SCREENING_SYSTEM_PROMPT },
|
||||
{ role: 'user', content: userPrompt },
|
||||
],
|
||||
response_format: { type: 'json_object' },
|
||||
temperature: 0.3,
|
||||
max_tokens: 4000,
|
||||
})
|
||||
config.criteriaText,
|
||||
batchAnon,
|
||||
batchMappings,
|
||||
userId,
|
||||
entityId
|
||||
)
|
||||
|
||||
console.log(`[AI Filtering] Batch completed, usage: ${response.usage?.total_tokens} tokens`)
|
||||
totalTokens += tokensUsed
|
||||
|
||||
const content = response.choices[0]?.message?.content
|
||||
if (content) {
|
||||
try {
|
||||
const parsed = JSON.parse(content) as {
|
||||
projects: Array<{
|
||||
project_id: string
|
||||
meets_criteria: boolean
|
||||
confidence: number
|
||||
reasoning: string
|
||||
quality_score: number
|
||||
spam_risk: boolean
|
||||
}>
|
||||
}
|
||||
|
||||
console.log(`[AI Filtering] Parsed ${parsed.projects?.length || 0} results from response`)
|
||||
|
||||
for (const result of parsed.projects) {
|
||||
const anon = batch.find((b) => b.project_id === result.project_id)
|
||||
if (anon) {
|
||||
results.set(anon.real_id, {
|
||||
meetsCriteria: result.meets_criteria,
|
||||
confidence: result.confidence,
|
||||
reasoning: result.reasoning,
|
||||
qualityScore: result.quality_score,
|
||||
spamRisk: result.spam_risk,
|
||||
})
|
||||
}
|
||||
}
|
||||
} catch (parseError) {
|
||||
// Parse error — flag batch for manual review
|
||||
console.error('[AI Filtering] JSON parse error:', parseError)
|
||||
console.error('[AI Filtering] Raw response content:', content.slice(0, 500))
|
||||
for (const item of batch) {
|
||||
results.set(item.real_id, {
|
||||
meetsCriteria: false,
|
||||
confidence: 0,
|
||||
reasoning: 'AI response parse error — flagged for manual review',
|
||||
qualityScore: 5,
|
||||
spamRisk: false,
|
||||
})
|
||||
}
|
||||
}
|
||||
} else {
|
||||
console.error('[AI Filtering] Empty response content from API')
|
||||
// Merge batch results
|
||||
for (const [id, result] of batchResults) {
|
||||
results.set(id, result)
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`[AI Filtering] Completed. Total tokens: ${totalTokens}`)
|
||||
|
||||
} catch (error) {
|
||||
// OpenAI error — flag all for manual review with specific error info
|
||||
console.error('[AI Filtering] OpenAI API error:', error)
|
||||
const classified = classifyAIError(error)
|
||||
logAIError('Filtering', 'executeAIScreening', classified)
|
||||
|
||||
// Extract meaningful error message
|
||||
let errorType = 'unknown_error'
|
||||
let errorDetail = 'Unknown error occurred'
|
||||
|
||||
if (error instanceof Error) {
|
||||
const message = error.message.toLowerCase()
|
||||
if (message.includes('rate_limit') || message.includes('rate limit')) {
|
||||
errorType = 'rate_limit'
|
||||
errorDetail = 'OpenAI rate limit exceeded. Try again in a few minutes.'
|
||||
} else if (message.includes('model') && (message.includes('not found') || message.includes('does not exist'))) {
|
||||
errorType = 'model_not_found'
|
||||
errorDetail = 'The configured AI model is not available. Check Settings → AI.'
|
||||
} else if (message.includes('insufficient_quota') || message.includes('quota')) {
|
||||
errorType = 'quota_exceeded'
|
||||
errorDetail = 'OpenAI API quota exceeded. Check your billing settings.'
|
||||
} else if (message.includes('invalid_api_key') || message.includes('unauthorized')) {
|
||||
errorType = 'invalid_api_key'
|
||||
errorDetail = 'Invalid OpenAI API key. Check Settings → AI.'
|
||||
} else if (message.includes('context_length') || message.includes('token')) {
|
||||
errorType = 'context_length'
|
||||
errorDetail = 'Request too large. Try with fewer projects or shorter descriptions.'
|
||||
} else {
|
||||
errorDetail = error.message
|
||||
}
|
||||
}
|
||||
// Log failed attempt
|
||||
await logAIUsage({
|
||||
userId,
|
||||
action: 'FILTERING',
|
||||
entityType: 'Round',
|
||||
entityId,
|
||||
model: 'unknown',
|
||||
promptTokens: 0,
|
||||
completionTokens: 0,
|
||||
totalTokens: 0,
|
||||
batchSize: projects.length,
|
||||
itemsProcessed: 0,
|
||||
status: 'ERROR',
|
||||
errorMessage: classified.message,
|
||||
})
|
||||
|
||||
// Flag all for manual review with error info
|
||||
for (const p of projects) {
|
||||
results.set(p.id, {
|
||||
meetsCriteria: false,
|
||||
confidence: 0,
|
||||
reasoning: `AI screening error (${errorType}): ${errorDetail}`,
|
||||
reasoning: `AI screening error: ${classified.message}`,
|
||||
qualityScore: 5,
|
||||
spamRisk: false,
|
||||
})
|
||||
@@ -430,7 +511,9 @@ Return your evaluation as JSON.`
|
||||
|
||||
export async function executeFilteringRules(
|
||||
rules: FilteringRuleInput[],
|
||||
projects: ProjectForFiltering[]
|
||||
projects: ProjectForFiltering[],
|
||||
userId?: string,
|
||||
roundId?: string
|
||||
): Promise<ProjectFilteringResult[]> {
|
||||
const activeRules = rules
|
||||
.filter((r) => r.isActive)
|
||||
@@ -441,23 +524,11 @@ export async function executeFilteringRules(
|
||||
const nonAiRules = activeRules.filter((r) => r.ruleType !== 'AI_SCREENING')
|
||||
|
||||
// Pre-compute AI screening results if needed
|
||||
const aiResults = new Map<
|
||||
string,
|
||||
Map<
|
||||
string,
|
||||
{
|
||||
meetsCriteria: boolean
|
||||
confidence: number
|
||||
reasoning: string
|
||||
qualityScore: number
|
||||
spamRisk: boolean
|
||||
}
|
||||
>
|
||||
>()
|
||||
const aiResults = new Map<string, Map<string, AIScreeningResult>>()
|
||||
|
||||
for (const aiRule of aiRules) {
|
||||
const config = aiRule.configJson as unknown as AIScreeningConfig
|
||||
const screeningResults = await executeAIScreening(config, projects)
|
||||
const screeningResults = await executeAIScreening(config, projects, userId, roundId)
|
||||
aiResults.set(aiRule.id, screeningResults)
|
||||
}
|
||||
|
||||
|
||||
@@ -3,8 +3,44 @@
|
||||
*
|
||||
* Strips PII (names, emails, etc.) from data before sending to AI services.
|
||||
* Returns ID mappings for de-anonymization of results.
|
||||
*
|
||||
* GDPR Compliance:
|
||||
* - All personal identifiers are stripped before AI processing
|
||||
* - Project/user IDs are replaced with sequential anonymous IDs
|
||||
* - Text content is sanitized to remove emails, phones, URLs
|
||||
* - Validation ensures no PII leakage before each AI call
|
||||
*/
|
||||
|
||||
import type {
|
||||
CompetitionCategory,
|
||||
OceanIssue,
|
||||
FileType,
|
||||
SubmissionSource,
|
||||
} from '@prisma/client'
|
||||
|
||||
// ─── Description Limits ──────────────────────────────────────────────────────
|
||||
|
||||
export const DESCRIPTION_LIMITS = {
|
||||
ASSIGNMENT: 300,
|
||||
FILTERING: 500,
|
||||
ELIGIBILITY: 400,
|
||||
MENTOR: 350,
|
||||
} as const
|
||||
|
||||
export type DescriptionContext = keyof typeof DESCRIPTION_LIMITS
|
||||
|
||||
// ─── PII Patterns ────────────────────────────────────────────────────────────
|
||||
|
||||
const PII_PATTERNS = {
|
||||
email: /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g,
|
||||
phone: /(\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/g,
|
||||
url: /https?:\/\/[^\s]+/g,
|
||||
ssn: /\d{3}-\d{2}-\d{4}/g,
|
||||
ipv4: /\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/g,
|
||||
} as const
|
||||
|
||||
// ─── Basic Anonymization Types (Assignment Service) ──────────────────────────
|
||||
|
||||
export interface AnonymizedJuror {
|
||||
anonymousId: string
|
||||
expertiseTags: string[]
|
||||
@@ -37,9 +73,67 @@ export interface AnonymizationResult {
|
||||
projectMappings: ProjectMapping[]
|
||||
}
|
||||
|
||||
// ─── Enhanced Project Types (Filtering/Awards) ───────────────────────────────
|
||||
|
||||
/**
|
||||
* Juror data from database
|
||||
* Comprehensive anonymized project data for AI filtering
|
||||
* Includes all fields needed for flexible filtering criteria
|
||||
*/
|
||||
export interface AnonymizedProjectForAI {
|
||||
project_id: string // P1, P2, etc.
|
||||
title: string // Sanitized
|
||||
description: string // Truncated + PII stripped
|
||||
category: CompetitionCategory | null // STARTUP | BUSINESS_CONCEPT
|
||||
ocean_issue: OceanIssue | null // Enum value
|
||||
country: string | null
|
||||
region: string | null // geographicZone
|
||||
institution: string | null
|
||||
tags: string[]
|
||||
founded_year: number | null // Just the year
|
||||
team_size: number
|
||||
has_description: boolean
|
||||
file_count: number
|
||||
file_types: string[] // FileType values
|
||||
wants_mentorship: boolean
|
||||
submission_source: SubmissionSource
|
||||
submitted_date: string | null // YYYY-MM-DD only
|
||||
}
|
||||
|
||||
/**
|
||||
* Project input with all relations needed for comprehensive anonymization
|
||||
*/
|
||||
export interface ProjectWithRelations {
|
||||
id: string
|
||||
title: string
|
||||
description?: string | null
|
||||
teamName?: string | null
|
||||
competitionCategory?: CompetitionCategory | null
|
||||
oceanIssue?: OceanIssue | null
|
||||
country?: string | null
|
||||
geographicZone?: string | null
|
||||
institution?: string | null
|
||||
tags: string[]
|
||||
foundedAt?: Date | null
|
||||
wantsMentorship?: boolean
|
||||
submissionSource: SubmissionSource
|
||||
submittedAt?: Date | null
|
||||
_count?: {
|
||||
teamMembers?: number
|
||||
files?: number
|
||||
}
|
||||
files?: Array<{ fileType: FileType | null }>
|
||||
}
|
||||
|
||||
/**
|
||||
* Mapping for de-anonymization
|
||||
*/
|
||||
export interface ProjectAIMapping {
|
||||
anonymousId: string
|
||||
realId: string
|
||||
}
|
||||
|
||||
// ─── Basic Anonymization (Assignment Service) ────────────────────────────────
|
||||
|
||||
interface JurorInput {
|
||||
id: string
|
||||
name?: string | null
|
||||
@@ -51,9 +145,6 @@ interface JurorInput {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Project data from database
|
||||
*/
|
||||
interface ProjectInput {
|
||||
id: string
|
||||
title: string
|
||||
@@ -63,13 +154,7 @@ interface ProjectInput {
|
||||
}
|
||||
|
||||
/**
|
||||
* Anonymize juror and project data for AI processing
|
||||
*
|
||||
* This function:
|
||||
* 1. Strips all PII (names, emails) from juror data
|
||||
* 2. Replaces real IDs with sequential anonymous IDs
|
||||
* 3. Keeps only expertise tags and assignment counts
|
||||
* 4. Returns mappings for de-anonymization
|
||||
* Anonymize juror and project data for AI processing (Assignment service)
|
||||
*/
|
||||
export function anonymizeForAI(
|
||||
jurors: JurorInput[],
|
||||
@@ -78,7 +163,6 @@ export function anonymizeForAI(
|
||||
const jurorMappings: JurorMapping[] = []
|
||||
const projectMappings: ProjectMapping[] = []
|
||||
|
||||
// Anonymize jurors
|
||||
const anonymizedJurors: AnonymizedJuror[] = jurors.map((juror, index) => {
|
||||
const anonymousId = `juror_${(index + 1).toString().padStart(3, '0')}`
|
||||
|
||||
@@ -95,7 +179,6 @@ export function anonymizeForAI(
|
||||
}
|
||||
})
|
||||
|
||||
// Anonymize projects (keep content but replace IDs)
|
||||
const anonymizedProjects: AnonymizedProject[] = projects.map(
|
||||
(project, index) => {
|
||||
const anonymousId = `project_${(index + 1).toString().padStart(3, '0')}`
|
||||
@@ -109,10 +192,9 @@ export function anonymizeForAI(
|
||||
anonymousId,
|
||||
title: sanitizeText(project.title),
|
||||
description: project.description
|
||||
? sanitizeText(project.description)
|
||||
? truncateAndSanitize(project.description, DESCRIPTION_LIMITS.ASSIGNMENT)
|
||||
: null,
|
||||
tags: project.tags,
|
||||
// Replace specific team names with generic identifier
|
||||
teamName: project.teamName ? `Team ${index + 1}` : null,
|
||||
}
|
||||
}
|
||||
@@ -126,10 +208,77 @@ export function anonymizeForAI(
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Enhanced Anonymization (Filtering/Awards) ───────────────────────────────
|
||||
|
||||
/**
|
||||
* Anonymize a single project with comprehensive data for AI filtering
|
||||
*
|
||||
* GDPR Compliance:
|
||||
* - Strips team names, email references, phone numbers, URLs
|
||||
* - Replaces IDs with sequential anonymous IDs
|
||||
* - Truncates descriptions to limit data exposure
|
||||
* - Keeps only necessary fields for filtering criteria
|
||||
*/
|
||||
export function anonymizeProjectForAI(
|
||||
project: ProjectWithRelations,
|
||||
index: number,
|
||||
context: DescriptionContext = 'FILTERING'
|
||||
): AnonymizedProjectForAI {
|
||||
const descriptionLimit = DESCRIPTION_LIMITS[context]
|
||||
|
||||
return {
|
||||
project_id: `P${index + 1}`,
|
||||
title: sanitizeText(project.title),
|
||||
description: truncateAndSanitize(project.description, descriptionLimit),
|
||||
category: project.competitionCategory ?? null,
|
||||
ocean_issue: project.oceanIssue ?? null,
|
||||
country: project.country ?? null,
|
||||
region: project.geographicZone ?? null,
|
||||
institution: project.institution ?? null,
|
||||
tags: project.tags,
|
||||
founded_year: project.foundedAt?.getFullYear() ?? null,
|
||||
team_size: project._count?.teamMembers ?? 0,
|
||||
has_description: !!project.description?.trim(),
|
||||
file_count: project._count?.files ?? 0,
|
||||
file_types: project.files
|
||||
?.map((f) => f.fileType)
|
||||
.filter((ft): ft is FileType => ft !== null) ?? [],
|
||||
wants_mentorship: project.wantsMentorship ?? false,
|
||||
submission_source: project.submissionSource,
|
||||
submitted_date: project.submittedAt?.toISOString().split('T')[0] ?? null,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Anonymize multiple projects and return mappings
|
||||
*/
|
||||
export function anonymizeProjectsForAI(
|
||||
projects: ProjectWithRelations[],
|
||||
context: DescriptionContext = 'FILTERING'
|
||||
): {
|
||||
anonymized: AnonymizedProjectForAI[]
|
||||
mappings: ProjectAIMapping[]
|
||||
} {
|
||||
const mappings: ProjectAIMapping[] = []
|
||||
const anonymized = projects.map((project, index) => {
|
||||
mappings.push({
|
||||
anonymousId: `P${index + 1}`,
|
||||
realId: project.id,
|
||||
})
|
||||
return anonymizeProjectForAI(project, index, context)
|
||||
})
|
||||
|
||||
return { anonymized, mappings }
|
||||
}
|
||||
|
||||
// ─── De-anonymization ────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* De-anonymize AI results back to real IDs
|
||||
*/
|
||||
export function deanonymizeResults<T extends { jurorId: string; projectId: string }>(
|
||||
export function deanonymizeResults<
|
||||
T extends { jurorId: string; projectId: string }
|
||||
>(
|
||||
results: T[],
|
||||
jurorMappings: JurorMapping[],
|
||||
projectMappings: ProjectMapping[]
|
||||
@@ -149,50 +298,155 @@ export function deanonymizeResults<T extends { jurorId: string; projectId: strin
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitize text to remove potential PII patterns
|
||||
* Removes emails, phone numbers, and URLs from text
|
||||
* De-anonymize project-only results (for filtering/awards)
|
||||
*/
|
||||
function sanitizeText(text: string): string {
|
||||
export function deanonymizeProjectResults<T extends { project_id: string }>(
|
||||
results: T[],
|
||||
mappings: ProjectAIMapping[]
|
||||
): (T & { realProjectId: string })[] {
|
||||
const projectMap = new Map(mappings.map((m) => [m.anonymousId, m.realId]))
|
||||
|
||||
return results.map((result) => ({
|
||||
...result,
|
||||
realProjectId: projectMap.get(result.project_id) || result.project_id,
|
||||
}))
|
||||
}
|
||||
|
||||
// ─── Text Sanitization ───────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Sanitize text to remove potential PII patterns
|
||||
* Removes emails, phone numbers, URLs, and other identifying information
|
||||
*/
|
||||
export function sanitizeText(text: string): string {
|
||||
let sanitized = text
|
||||
|
||||
// Remove email addresses
|
||||
let sanitized = text.replace(
|
||||
/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g,
|
||||
'[email removed]'
|
||||
)
|
||||
sanitized = sanitized.replace(PII_PATTERNS.email, '[email removed]')
|
||||
|
||||
// Remove phone numbers (various formats)
|
||||
sanitized = sanitized.replace(
|
||||
/(\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/g,
|
||||
'[phone removed]'
|
||||
)
|
||||
sanitized = sanitized.replace(PII_PATTERNS.phone, '[phone removed]')
|
||||
|
||||
// Remove URLs
|
||||
sanitized = sanitized.replace(
|
||||
/https?:\/\/[^\s]+/g,
|
||||
'[url removed]'
|
||||
)
|
||||
sanitized = sanitized.replace(PII_PATTERNS.url, '[url removed]')
|
||||
|
||||
// Remove SSN-like patterns
|
||||
sanitized = sanitized.replace(PII_PATTERNS.ssn, '[id removed]')
|
||||
|
||||
return sanitized
|
||||
}
|
||||
|
||||
/**
|
||||
* Truncate text to a maximum length and sanitize
|
||||
*/
|
||||
export function truncateAndSanitize(
|
||||
text: string | null | undefined,
|
||||
maxLength: number
|
||||
): string {
|
||||
if (!text) return ''
|
||||
|
||||
const sanitized = sanitizeText(text)
|
||||
|
||||
if (sanitized.length <= maxLength) {
|
||||
return sanitized
|
||||
}
|
||||
|
||||
return sanitized.slice(0, maxLength - 3) + '...'
|
||||
}
|
||||
|
||||
// ─── GDPR Compliance Validation ──────────────────────────────────────────────
|
||||
|
||||
export interface PIIValidationResult {
|
||||
valid: boolean
|
||||
violations: string[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate that data contains no personal information
|
||||
* Used for GDPR compliance before sending data to AI
|
||||
*/
|
||||
export function validateNoPersonalData(
|
||||
data: Record<string, unknown>
|
||||
): PIIValidationResult {
|
||||
const violations: string[] = []
|
||||
const textContent = JSON.stringify(data)
|
||||
|
||||
// Check each PII pattern
|
||||
for (const [type, pattern] of Object.entries(PII_PATTERNS)) {
|
||||
// Reset regex state (global flag)
|
||||
pattern.lastIndex = 0
|
||||
|
||||
if (pattern.test(textContent)) {
|
||||
violations.push(`Potential ${type} detected in data`)
|
||||
}
|
||||
}
|
||||
|
||||
// Additional checks for common PII fields
|
||||
const sensitiveFields = [
|
||||
'email',
|
||||
'phone',
|
||||
'password',
|
||||
'ssn',
|
||||
'socialSecurity',
|
||||
'creditCard',
|
||||
'bankAccount',
|
||||
'drivingLicense',
|
||||
]
|
||||
|
||||
const keys = Object.keys(data).map((k) => k.toLowerCase())
|
||||
for (const field of sensitiveFields) {
|
||||
if (keys.includes(field)) {
|
||||
violations.push(`Sensitive field "${field}" present in data`)
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
valid: violations.length === 0,
|
||||
violations,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Enforce GDPR compliance before EVERY AI call
|
||||
* Throws an error if PII is detected
|
||||
*/
|
||||
export function enforceGDPRCompliance(data: unknown[]): void {
|
||||
for (let i = 0; i < data.length; i++) {
|
||||
const item = data[i]
|
||||
if (typeof item === 'object' && item !== null) {
|
||||
const { valid, violations } = validateNoPersonalData(
|
||||
item as Record<string, unknown>
|
||||
)
|
||||
if (!valid) {
|
||||
console.error(
|
||||
`[GDPR] PII validation failed for item ${i}:`,
|
||||
violations
|
||||
)
|
||||
throw new Error(
|
||||
`GDPR compliance check failed: ${violations.join(', ')}`
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate that data has been properly anonymized
|
||||
* Returns true if no PII patterns are detected
|
||||
*/
|
||||
export function validateAnonymization(data: AnonymizationResult): boolean {
|
||||
const piiPatterns = [
|
||||
/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/, // Email
|
||||
/(\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/, // Phone
|
||||
]
|
||||
|
||||
const checkText = (text: string | null | undefined): boolean => {
|
||||
if (!text) return true
|
||||
return !piiPatterns.some((pattern) => pattern.test(text))
|
||||
// Reset regex state for each check
|
||||
for (const pattern of Object.values(PII_PATTERNS)) {
|
||||
pattern.lastIndex = 0
|
||||
if (pattern.test(text)) return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Check jurors (they should only have expertise tags)
|
||||
// Check jurors
|
||||
for (const juror of data.jurors) {
|
||||
// Jurors should not have any text fields that could contain PII
|
||||
// Only check expertiseTags
|
||||
for (const tag of juror.expertiseTags) {
|
||||
if (!checkText(tag)) return false
|
||||
}
|
||||
@@ -209,3 +463,30 @@ export function validateAnonymization(data: AnonymizationResult): boolean {
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate anonymized projects for AI (enhanced version)
|
||||
*/
|
||||
export function validateAnonymizedProjects(
|
||||
projects: AnonymizedProjectForAI[]
|
||||
): boolean {
|
||||
const checkText = (text: string | null | undefined): boolean => {
|
||||
if (!text) return true
|
||||
for (const pattern of Object.values(PII_PATTERNS)) {
|
||||
pattern.lastIndex = 0
|
||||
if (pattern.test(text)) return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
for (const project of projects) {
|
||||
if (!checkText(project.title)) return false
|
||||
if (!checkText(project.description)) return false
|
||||
if (!checkText(project.institution)) return false
|
||||
for (const tag of project.tags) {
|
||||
if (!checkText(tag)) return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -1,5 +1,33 @@
|
||||
/**
|
||||
* AI-Powered Mentor Matching Service
|
||||
*
|
||||
* Matches mentors to projects based on expertise alignment.
|
||||
*
|
||||
* Optimization:
|
||||
* - Batched processing (15 projects per batch)
|
||||
* - Token tracking and cost logging
|
||||
* - Fallback to algorithmic matching
|
||||
*
|
||||
* GDPR Compliance:
|
||||
* - All data anonymized before AI processing
|
||||
* - No personal information sent to OpenAI
|
||||
*/
|
||||
|
||||
import { PrismaClient, OceanIssue, CompetitionCategory } from '@prisma/client'
|
||||
import { getOpenAI, getConfiguredModel } from '@/lib/openai'
|
||||
import { getOpenAI, getConfiguredModel, buildCompletionParams } from '@/lib/openai'
|
||||
import { logAIUsage, extractTokenUsage } from '@/server/utils/ai-usage'
|
||||
import { classifyAIError, createParseError, logAIError } from './ai-errors'
|
||||
|
||||
// ─── Constants ───────────────────────────────────────────────────────────────
|
||||
|
||||
const MENTOR_BATCH_SIZE = 15
|
||||
|
||||
// Optimized system prompt
|
||||
const MENTOR_MATCHING_SYSTEM_PROMPT = `Match mentors to projects by expertise. Return JSON.
|
||||
Format for each project: {"matches": [{project_id, mentor_matches: [{mentor_index, confidence_score: 0-1, expertise_match_score: 0-1, reasoning: str}]}]}
|
||||
Rank by suitability. Consider expertise alignment and availability.`
|
||||
|
||||
// ─── Types ───────────────────────────────────────────────────────────────────
|
||||
|
||||
interface ProjectInfo {
|
||||
id: string
|
||||
@@ -26,17 +54,162 @@ interface MentorMatch {
|
||||
reasoning: string
|
||||
}
|
||||
|
||||
// ─── Batched AI Matching ─────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Get AI-suggested mentor matches for a project
|
||||
* Process a batch of projects for mentor matching
|
||||
*/
|
||||
export async function getAIMentorSuggestions(
|
||||
async function processMatchingBatch(
|
||||
openai: NonNullable<Awaited<ReturnType<typeof getOpenAI>>>,
|
||||
model: string,
|
||||
projects: ProjectInfo[],
|
||||
mentors: MentorInfo[],
|
||||
limit: number,
|
||||
userId?: string
|
||||
): Promise<{
|
||||
results: Map<string, MentorMatch[]>
|
||||
tokensUsed: number
|
||||
}> {
|
||||
const results = new Map<string, MentorMatch[]>()
|
||||
let tokensUsed = 0
|
||||
|
||||
// Anonymize project data
|
||||
const anonymizedProjects = projects.map((p, index) => ({
|
||||
project_id: `P${index + 1}`,
|
||||
real_id: p.id,
|
||||
description: p.description?.slice(0, 350) || 'No description',
|
||||
category: p.competitionCategory,
|
||||
oceanIssue: p.oceanIssue,
|
||||
tags: p.tags,
|
||||
}))
|
||||
|
||||
// Anonymize mentor data
|
||||
const anonymizedMentors = mentors.map((m, index) => ({
|
||||
index,
|
||||
expertise: m.expertiseTags,
|
||||
availability: m.maxAssignments
|
||||
? `${m.currentAssignments}/${m.maxAssignments}`
|
||||
: 'unlimited',
|
||||
}))
|
||||
|
||||
const userPrompt = `PROJECTS:
|
||||
${anonymizedProjects.map(p => `${p.project_id}: Category=${p.category || 'N/A'}, Issue=${p.oceanIssue || 'N/A'}, Tags=[${p.tags.join(', ')}], Desc=${p.description.slice(0, 200)}`).join('\n')}
|
||||
|
||||
MENTORS:
|
||||
${anonymizedMentors.map(m => `${m.index}: Expertise=[${m.expertise.join(', ')}], Availability=${m.availability}`).join('\n')}
|
||||
|
||||
For each project, rank top ${limit} mentors.`
|
||||
|
||||
try {
|
||||
const params = buildCompletionParams(model, {
|
||||
messages: [
|
||||
{ role: 'system', content: MENTOR_MATCHING_SYSTEM_PROMPT },
|
||||
{ role: 'user', content: userPrompt },
|
||||
],
|
||||
jsonMode: true,
|
||||
temperature: 0.3,
|
||||
maxTokens: 4000,
|
||||
})
|
||||
|
||||
const response = await openai.chat.completions.create(params)
|
||||
const usage = extractTokenUsage(response)
|
||||
tokensUsed = usage.totalTokens
|
||||
|
||||
// Log usage
|
||||
await logAIUsage({
|
||||
userId,
|
||||
action: 'MENTOR_MATCHING',
|
||||
entityType: 'Project',
|
||||
model,
|
||||
promptTokens: usage.promptTokens,
|
||||
completionTokens: usage.completionTokens,
|
||||
totalTokens: usage.totalTokens,
|
||||
batchSize: projects.length,
|
||||
itemsProcessed: projects.length,
|
||||
status: 'SUCCESS',
|
||||
})
|
||||
|
||||
const content = response.choices[0]?.message?.content
|
||||
if (!content) {
|
||||
throw new Error('No response from AI')
|
||||
}
|
||||
|
||||
const parsed = JSON.parse(content) as {
|
||||
matches: Array<{
|
||||
project_id: string
|
||||
mentor_matches: Array<{
|
||||
mentor_index: number
|
||||
confidence_score: number
|
||||
expertise_match_score: number
|
||||
reasoning: string
|
||||
}>
|
||||
}>
|
||||
}
|
||||
|
||||
// Map results back to real IDs
|
||||
for (const projectMatch of parsed.matches || []) {
|
||||
const project = anonymizedProjects.find(p => p.project_id === projectMatch.project_id)
|
||||
if (!project) continue
|
||||
|
||||
const mentorMatches: MentorMatch[] = []
|
||||
for (const match of projectMatch.mentor_matches || []) {
|
||||
if (match.mentor_index >= 0 && match.mentor_index < mentors.length) {
|
||||
mentorMatches.push({
|
||||
mentorId: mentors[match.mentor_index].id,
|
||||
confidenceScore: Math.min(1, Math.max(0, match.confidence_score)),
|
||||
expertiseMatchScore: Math.min(1, Math.max(0, match.expertise_match_score)),
|
||||
reasoning: match.reasoning,
|
||||
})
|
||||
}
|
||||
}
|
||||
results.set(project.real_id, mentorMatches)
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
if (error instanceof SyntaxError) {
|
||||
const parseError = createParseError(error.message)
|
||||
logAIError('MentorMatching', 'batch processing', parseError)
|
||||
|
||||
await logAIUsage({
|
||||
userId,
|
||||
action: 'MENTOR_MATCHING',
|
||||
entityType: 'Project',
|
||||
model,
|
||||
promptTokens: 0,
|
||||
completionTokens: 0,
|
||||
totalTokens: tokensUsed,
|
||||
batchSize: projects.length,
|
||||
itemsProcessed: 0,
|
||||
status: 'ERROR',
|
||||
errorMessage: parseError.message,
|
||||
})
|
||||
|
||||
// Return empty results for batch (will fall back to algorithm)
|
||||
for (const project of projects) {
|
||||
results.set(project.id, [])
|
||||
}
|
||||
} else {
|
||||
throw error
|
||||
}
|
||||
}
|
||||
|
||||
return { results, tokensUsed }
|
||||
}
|
||||
|
||||
/**
|
||||
* Get AI-suggested mentor matches for multiple projects (batched)
|
||||
*/
|
||||
export async function getAIMentorSuggestionsBatch(
|
||||
prisma: PrismaClient,
|
||||
projectId: string,
|
||||
limit: number = 5
|
||||
): Promise<MentorMatch[]> {
|
||||
// Get project details
|
||||
const project = await prisma.project.findUniqueOrThrow({
|
||||
where: { id: projectId },
|
||||
projectIds: string[],
|
||||
limit: number = 5,
|
||||
userId?: string
|
||||
): Promise<Map<string, MentorMatch[]>> {
|
||||
const allResults = new Map<string, MentorMatch[]>()
|
||||
|
||||
// Get projects
|
||||
const projects = await prisma.project.findMany({
|
||||
where: { id: { in: projectIds } },
|
||||
select: {
|
||||
id: true,
|
||||
title: true,
|
||||
@@ -47,14 +220,16 @@ export async function getAIMentorSuggestions(
|
||||
},
|
||||
})
|
||||
|
||||
// Get available mentors (users with expertise tags)
|
||||
// In a full implementation, you'd have a MENTOR role
|
||||
// For now, we use users with expertiseTags and consider them potential mentors
|
||||
if (projects.length === 0) {
|
||||
return allResults
|
||||
}
|
||||
|
||||
// Get available mentors
|
||||
const mentors = await prisma.user.findMany({
|
||||
where: {
|
||||
OR: [
|
||||
{ expertiseTags: { isEmpty: false } },
|
||||
{ role: 'JURY_MEMBER' }, // Jury members can also be mentors
|
||||
{ role: 'JURY_MEMBER' },
|
||||
],
|
||||
status: 'ACTIVE',
|
||||
},
|
||||
@@ -86,118 +261,111 @@ export async function getAIMentorSuggestions(
|
||||
}))
|
||||
|
||||
if (availableMentors.length === 0) {
|
||||
return []
|
||||
return allResults
|
||||
}
|
||||
|
||||
// Try AI matching if API key is configured
|
||||
if (process.env.OPENAI_API_KEY) {
|
||||
try {
|
||||
return await getAIMatches(project, availableMentors, limit)
|
||||
} catch (error) {
|
||||
console.error('AI mentor matching failed, falling back to algorithm:', error)
|
||||
// Try AI matching
|
||||
try {
|
||||
const openai = await getOpenAI()
|
||||
if (!openai) {
|
||||
console.log('[Mentor Matching] OpenAI not configured, using algorithm')
|
||||
return getAlgorithmicMatchesBatch(projects, availableMentors, limit)
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to algorithmic matching
|
||||
return getAlgorithmicMatches(project, availableMentors, limit)
|
||||
const model = await getConfiguredModel()
|
||||
console.log(`[Mentor Matching] Using model: ${model} for ${projects.length} projects in batches of ${MENTOR_BATCH_SIZE}`)
|
||||
|
||||
let totalTokens = 0
|
||||
|
||||
// Process in batches
|
||||
for (let i = 0; i < projects.length; i += MENTOR_BATCH_SIZE) {
|
||||
const batchProjects = projects.slice(i, i + MENTOR_BATCH_SIZE)
|
||||
|
||||
console.log(`[Mentor Matching] Processing batch ${Math.floor(i / MENTOR_BATCH_SIZE) + 1}/${Math.ceil(projects.length / MENTOR_BATCH_SIZE)}`)
|
||||
|
||||
const { results, tokensUsed } = await processMatchingBatch(
|
||||
openai,
|
||||
model,
|
||||
batchProjects,
|
||||
availableMentors,
|
||||
limit,
|
||||
userId
|
||||
)
|
||||
|
||||
totalTokens += tokensUsed
|
||||
|
||||
// Merge results
|
||||
for (const [projectId, matches] of results) {
|
||||
allResults.set(projectId, matches)
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`[Mentor Matching] Completed. Total tokens: ${totalTokens}`)
|
||||
|
||||
// Fill in any missing projects with algorithmic fallback
|
||||
for (const project of projects) {
|
||||
if (!allResults.has(project.id) || allResults.get(project.id)?.length === 0) {
|
||||
const fallbackMatches = getAlgorithmicMatches(project, availableMentors, limit)
|
||||
allResults.set(project.id, fallbackMatches)
|
||||
}
|
||||
}
|
||||
|
||||
return allResults
|
||||
|
||||
} catch (error) {
|
||||
const classified = classifyAIError(error)
|
||||
logAIError('MentorMatching', 'getAIMentorSuggestionsBatch', classified)
|
||||
|
||||
// Log failed attempt
|
||||
await logAIUsage({
|
||||
userId,
|
||||
action: 'MENTOR_MATCHING',
|
||||
entityType: 'Project',
|
||||
model: 'unknown',
|
||||
promptTokens: 0,
|
||||
completionTokens: 0,
|
||||
totalTokens: 0,
|
||||
batchSize: projects.length,
|
||||
itemsProcessed: 0,
|
||||
status: 'ERROR',
|
||||
errorMessage: classified.message,
|
||||
})
|
||||
|
||||
console.error('[Mentor Matching] AI failed, using algorithm:', classified.message)
|
||||
return getAlgorithmicMatchesBatch(projects, availableMentors, limit)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Use OpenAI to match mentors to projects
|
||||
* Get AI-suggested mentor matches for a single project
|
||||
*/
|
||||
async function getAIMatches(
|
||||
project: ProjectInfo,
|
||||
export async function getAIMentorSuggestions(
|
||||
prisma: PrismaClient,
|
||||
projectId: string,
|
||||
limit: number = 5,
|
||||
userId?: string
|
||||
): Promise<MentorMatch[]> {
|
||||
const results = await getAIMentorSuggestionsBatch(prisma, [projectId], limit, userId)
|
||||
return results.get(projectId) || []
|
||||
}
|
||||
|
||||
// ─── Algorithmic Fallback ────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Algorithmic fallback for multiple projects
|
||||
*/
|
||||
function getAlgorithmicMatchesBatch(
|
||||
projects: ProjectInfo[],
|
||||
mentors: MentorInfo[],
|
||||
limit: number
|
||||
): Promise<MentorMatch[]> {
|
||||
// Anonymize data before sending to AI
|
||||
const anonymizedProject = {
|
||||
description: project.description?.slice(0, 500) || 'No description',
|
||||
category: project.competitionCategory,
|
||||
oceanIssue: project.oceanIssue,
|
||||
tags: project.tags,
|
||||
): Map<string, MentorMatch[]> {
|
||||
const results = new Map<string, MentorMatch[]>()
|
||||
|
||||
for (const project of projects) {
|
||||
results.set(project.id, getAlgorithmicMatches(project, mentors, limit))
|
||||
}
|
||||
|
||||
const anonymizedMentors = mentors.map((m, index) => ({
|
||||
index,
|
||||
expertise: m.expertiseTags,
|
||||
availability: m.maxAssignments
|
||||
? `${m.currentAssignments}/${m.maxAssignments}`
|
||||
: 'unlimited',
|
||||
}))
|
||||
|
||||
const prompt = `You are matching mentors to an ocean protection project.
|
||||
|
||||
PROJECT:
|
||||
- Category: ${anonymizedProject.category || 'Not specified'}
|
||||
- Ocean Issue: ${anonymizedProject.oceanIssue || 'Not specified'}
|
||||
- Tags: ${anonymizedProject.tags.join(', ') || 'None'}
|
||||
- Description: ${anonymizedProject.description}
|
||||
|
||||
AVAILABLE MENTORS:
|
||||
${anonymizedMentors.map((m) => `${m.index}: Expertise: [${m.expertise.join(', ')}], Availability: ${m.availability}`).join('\n')}
|
||||
|
||||
Rank the top ${limit} mentors by suitability. For each, provide:
|
||||
1. Mentor index (0-based)
|
||||
2. Confidence score (0-1)
|
||||
3. Expertise match score (0-1)
|
||||
4. Brief reasoning (1-2 sentences)
|
||||
|
||||
Respond in JSON format:
|
||||
{
|
||||
"matches": [
|
||||
{
|
||||
"mentorIndex": 0,
|
||||
"confidenceScore": 0.85,
|
||||
"expertiseMatchScore": 0.9,
|
||||
"reasoning": "Strong expertise alignment..."
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
const openai = await getOpenAI()
|
||||
if (!openai) {
|
||||
throw new Error('OpenAI client not available')
|
||||
}
|
||||
|
||||
const model = await getConfiguredModel()
|
||||
|
||||
const response = await openai.chat.completions.create({
|
||||
model,
|
||||
messages: [
|
||||
{
|
||||
role: 'system',
|
||||
content: 'You are an expert at matching mentors to projects based on expertise alignment. Always respond with valid JSON.',
|
||||
},
|
||||
{ role: 'user', content: prompt },
|
||||
],
|
||||
response_format: { type: 'json_object' },
|
||||
temperature: 0.3,
|
||||
max_tokens: 1000,
|
||||
})
|
||||
|
||||
const content = response.choices[0]?.message?.content
|
||||
if (!content) {
|
||||
throw new Error('No response from AI')
|
||||
}
|
||||
|
||||
const parsed = JSON.parse(content) as {
|
||||
matches: Array<{
|
||||
mentorIndex: number
|
||||
confidenceScore: number
|
||||
expertiseMatchScore: number
|
||||
reasoning: string
|
||||
}>
|
||||
}
|
||||
|
||||
return parsed.matches
|
||||
.filter((m) => m.mentorIndex >= 0 && m.mentorIndex < mentors.length)
|
||||
.map((m) => ({
|
||||
mentorId: mentors[m.mentorIndex].id,
|
||||
confidenceScore: m.confidenceScore,
|
||||
expertiseMatchScore: m.expertiseMatchScore,
|
||||
reasoning: m.reasoning,
|
||||
}))
|
||||
return results
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -226,7 +394,6 @@ function getAlgorithmicMatches(
|
||||
})
|
||||
|
||||
if (project.description) {
|
||||
// Extract key words from description
|
||||
const words = project.description.toLowerCase().split(/\s+/)
|
||||
words.forEach((word) => {
|
||||
if (word.length > 4) projectKeywords.add(word.replace(/[^a-z]/g, ''))
|
||||
@@ -267,7 +434,7 @@ function getAlgorithmicMatches(
|
||||
mentorId: mentor.id,
|
||||
confidenceScore: Math.round(confidenceScore * 100) / 100,
|
||||
expertiseMatchScore: Math.round(expertiseMatchScore * 100) / 100,
|
||||
reasoning: `Matched ${matchCount} keyword(s) with mentor expertise. Availability: ${availabilityScore > 0.5 ? 'Good' : 'Limited'}.`,
|
||||
reasoning: `Matched ${matchCount} keyword(s). Availability: ${availabilityScore > 0.5 ? 'Good' : 'Limited'}.`,
|
||||
}
|
||||
})
|
||||
|
||||
|
||||
Reference in New Issue
Block a user