- AI Tagging: batch 10 projects per API call with 3 concurrent batches (~10x faster) - New `tagProjectsBatch()` with `getAISuggestionsBatch()` for multi-project prompts - Single DB query for all projects, single anonymization pass - Compact JSON in prompts (no pretty-print) saves tokens - AI Shortlist: run STARTUP and BUSINESS_CONCEPT categories in parallel (2x faster) - AI Filtering: increase default parallel batches from 1 to 3 (3x faster) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
393 lines
13 KiB
TypeScript
393 lines
13 KiB
TypeScript
/**
|
|
* AI Shortlist Service
|
|
*
|
|
* Generates ranked recommendations at end of evaluation rounds.
|
|
* Runs SEPARATELY for each category (STARTUP / BUSINESS_CONCEPT)
|
|
* to produce independent rankings per the competition's advancement rules.
|
|
*
|
|
* GDPR Compliance:
|
|
* - All project data is anonymized before AI processing
|
|
* - No personal identifiers in prompts or responses
|
|
*/
|
|
|
|
import { getOpenAI, getConfiguredModel, buildCompletionParams } from '@/lib/openai'
|
|
import { logAIUsage, extractTokenUsage } from '@/server/utils/ai-usage'
|
|
import { classifyAIError, logAIError } from './ai-errors'
|
|
import { extractMultipleFileContents } from './file-content-extractor'
|
|
import type { PrismaClient } from '@prisma/client'
|
|
|
|
// ─── Types ──────────────────────────────────────────────────────────────────
|
|
|
|
export type ShortlistResult = {
|
|
success: boolean
|
|
recommendations: CategoryRecommendations
|
|
errors?: string[]
|
|
tokensUsed?: number
|
|
}
|
|
|
|
export type CategoryRecommendations = {
|
|
STARTUP: ShortlistRecommendation[]
|
|
BUSINESS_CONCEPT: ShortlistRecommendation[]
|
|
}
|
|
|
|
export type ShortlistRecommendation = {
|
|
projectId: string
|
|
rank: number
|
|
score: number
|
|
category: string
|
|
strengths: string[]
|
|
concerns: string[]
|
|
recommendation: string
|
|
}
|
|
|
|
// ─── Prompt Building ────────────────────────────────────────────────────────
|
|
|
|
function buildShortlistPrompt(category: string, topN: number, rubric?: string): string {
|
|
const categoryLabel = category === 'STARTUP' ? 'Startup' : 'Business Concept'
|
|
|
|
return `You are a senior jury advisor for the Monaco Ocean Protection Challenge.
|
|
|
|
## Your Role
|
|
Analyze aggregated evaluation data to produce a ranked shortlist of the top ${topN} ${categoryLabel} projects.
|
|
You are evaluating ONLY ${categoryLabel} projects in this batch — rank them against each other within this category.
|
|
|
|
## Ranking Criteria (Weighted)
|
|
- Evaluation Scores (40%): Average scores across all jury evaluations
|
|
- Innovation & Impact (25%): Novelty of approach and potential environmental impact
|
|
- Feasibility (20%): Likelihood of successful implementation
|
|
- Alignment (15%): Fit with ocean protection mission and competition goals
|
|
|
|
## Document Analysis
|
|
If document content is provided (text_content field in files), use it for deeper qualitative analysis.
|
|
Pay SPECIAL ATTENTION to files marked with is_current_round=true — these are the most recent submissions.
|
|
Older documents provide context, but recent ones should carry more weight in your assessment.
|
|
|
|
${rubric ? `## Custom Evaluation Rubric\n${rubric}\n` : ''}
|
|
## Output Format
|
|
Return a JSON array:
|
|
[
|
|
{
|
|
"anonymousId": "PROJECT_001",
|
|
"rank": 1,
|
|
"score": 0-100,
|
|
"strengths": ["strength 1", "strength 2"],
|
|
"concerns": ["concern 1"],
|
|
"recommendation": "1-2 sentence recommendation"
|
|
}
|
|
]
|
|
|
|
## Guidelines
|
|
- Only include the top ${topN} projects in your ranking
|
|
- Score should reflect weighted combination of all criteria
|
|
- Be specific in strengths and concerns — avoid generic statements
|
|
- Consider feedback themes and evaluator consensus
|
|
- Higher evaluator consensus should boost confidence in ranking
|
|
- Do not include any personal identifiers`
|
|
}
|
|
|
|
// ─── Single Category Processing ─────────────────────────────────────────────
|
|
|
|
async function generateCategoryShortlist(
|
|
params: {
|
|
roundId: string
|
|
category: string
|
|
topN: number
|
|
rubric?: string
|
|
aiParseFiles: boolean
|
|
},
|
|
prisma: PrismaClient | any,
|
|
): Promise<{ recommendations: ShortlistRecommendation[]; tokensUsed: number; errors: string[] }> {
|
|
const { roundId, category, topN, rubric, aiParseFiles } = params
|
|
|
|
// Load projects with evaluations for this category
|
|
const projects = await prisma.project.findMany({
|
|
where: {
|
|
competitionCategory: category,
|
|
assignments: { some: { roundId } },
|
|
},
|
|
include: {
|
|
assignments: {
|
|
where: { roundId },
|
|
include: { evaluation: true },
|
|
},
|
|
projectTags: { include: { tag: true } },
|
|
files: {
|
|
select: {
|
|
id: true,
|
|
fileName: true,
|
|
fileType: true,
|
|
mimeType: true,
|
|
size: true,
|
|
pageCount: true,
|
|
objectKey: true,
|
|
roundId: true,
|
|
createdAt: true,
|
|
},
|
|
orderBy: { createdAt: 'desc' as const },
|
|
},
|
|
teamMembers: { select: { user: { select: { name: true } } } },
|
|
},
|
|
})
|
|
|
|
if (projects.length === 0) {
|
|
return {
|
|
recommendations: [],
|
|
tokensUsed: 0,
|
|
errors: [`No ${category} projects found for this round`],
|
|
}
|
|
}
|
|
|
|
// Get round names for file tagging
|
|
const roundIds = new Set<string>()
|
|
for (const p of projects) {
|
|
for (const f of (p as any).files || []) {
|
|
if (f.roundId) roundIds.add(f.roundId)
|
|
}
|
|
}
|
|
const roundNames = new Map<string, string>()
|
|
if (roundIds.size > 0) {
|
|
const rounds = await prisma.round.findMany({
|
|
where: { id: { in: [...roundIds] } },
|
|
select: { id: true, name: true },
|
|
})
|
|
for (const r of rounds) roundNames.set(r.id, r.name)
|
|
}
|
|
|
|
// Optionally extract file contents
|
|
let fileContents: Map<string, string> | undefined
|
|
if (aiParseFiles) {
|
|
const allFiles = projects.flatMap((p: any) =>
|
|
((p.files || []) as Array<{ id: string; fileName: string; mimeType: string; objectKey: string }>)
|
|
)
|
|
const extractions = await extractMultipleFileContents(allFiles)
|
|
fileContents = new Map()
|
|
for (const e of extractions) {
|
|
if (e.content) fileContents.set(e.fileId, e.content)
|
|
}
|
|
}
|
|
|
|
// Aggregate scores per project
|
|
const projectSummaries = projects.map((project: any) => {
|
|
const evaluations = project.assignments
|
|
.map((a: any) => a.evaluation)
|
|
.filter(Boolean)
|
|
.filter((e: any) => e.status === 'SUBMITTED')
|
|
|
|
const scores = evaluations.map((e: any) => e.globalScore ?? 0)
|
|
const avgScore = scores.length > 0
|
|
? scores.reduce((sum: number, s: number) => sum + s, 0) / scores.length
|
|
: 0
|
|
|
|
const feedbacks = evaluations
|
|
.map((e: any) => e.feedbackGeneral || e.feedbackText)
|
|
.filter(Boolean)
|
|
|
|
return {
|
|
id: project.id,
|
|
description: project.description,
|
|
category: project.competitionCategory,
|
|
tags: project.projectTags.map((pt: any) => pt.tag.name),
|
|
avgScore,
|
|
evaluationCount: evaluations.length,
|
|
feedbackSamples: feedbacks.slice(0, 3),
|
|
files: (project.files || []).map((f: any) => ({
|
|
file_type: f.fileType ?? 'OTHER',
|
|
page_count: f.pageCount ?? null,
|
|
size_kb: Math.round((f.size ?? 0) / 1024),
|
|
round_name: f.roundId ? (roundNames.get(f.roundId) || null) : null,
|
|
is_current_round: f.roundId === roundId,
|
|
...(fileContents?.get(f.id) ? { text_content: fileContents.get(f.id) } : {}),
|
|
})),
|
|
}
|
|
})
|
|
|
|
// Anonymize for AI
|
|
const anonymized = projectSummaries.map((p: any, index: number) => ({
|
|
anonymousId: `PROJECT_${String(index + 1).padStart(3, '0')}`,
|
|
...p,
|
|
id: undefined,
|
|
}))
|
|
|
|
// Build idMap for de-anonymization
|
|
const idMap = new Map<string, string>()
|
|
projectSummaries.forEach((p: any, index: number) => {
|
|
idMap.set(`PROJECT_${String(index + 1).padStart(3, '0')}`, p.id)
|
|
})
|
|
|
|
// Call AI
|
|
const openai = await getOpenAI()
|
|
const model = await getConfiguredModel()
|
|
|
|
if (!openai) {
|
|
return { recommendations: [], tokensUsed: 0, errors: ['OpenAI client not configured'] }
|
|
}
|
|
|
|
const systemPrompt = buildShortlistPrompt(category, topN, rubric)
|
|
const userPrompt = `Analyze these anonymized ${category} project evaluations and produce a ranked shortlist of the top ${topN}.
|
|
|
|
Projects (${anonymized.length} total):
|
|
${JSON.stringify(anonymized, null, 2)}
|
|
|
|
Return a JSON array following the format specified. Only include the top ${topN} projects. Rank by overall quality within this category.`
|
|
|
|
const MAX_PARSE_RETRIES = 2
|
|
let parseAttempts = 0
|
|
let response = await openai.chat.completions.create(
|
|
buildCompletionParams(model, {
|
|
messages: [
|
|
{ role: 'system', content: systemPrompt },
|
|
{ role: 'user', content: userPrompt },
|
|
],
|
|
temperature: 0.1,
|
|
jsonMode: true,
|
|
}),
|
|
)
|
|
|
|
let tokenUsage = extractTokenUsage(response)
|
|
|
|
await logAIUsage({
|
|
action: 'SHORTLIST',
|
|
model,
|
|
promptTokens: tokenUsage.promptTokens,
|
|
completionTokens: tokenUsage.completionTokens,
|
|
totalTokens: tokenUsage.totalTokens,
|
|
status: 'SUCCESS',
|
|
})
|
|
|
|
// Parse response
|
|
let parsed: any[]
|
|
while (true) {
|
|
try {
|
|
const content = response.choices[0]?.message?.content
|
|
if (!content) {
|
|
return { recommendations: [], tokensUsed: tokenUsage.totalTokens, errors: ['Empty AI response'] }
|
|
}
|
|
const json = JSON.parse(content)
|
|
parsed = Array.isArray(json) ? json : json.rankings ?? json.projects ?? json.shortlist ?? []
|
|
break
|
|
} catch (parseError) {
|
|
if (parseError instanceof SyntaxError && parseAttempts < MAX_PARSE_RETRIES) {
|
|
parseAttempts++
|
|
response = await openai.chat.completions.create(
|
|
buildCompletionParams(model, {
|
|
messages: [
|
|
{ role: 'system', content: systemPrompt },
|
|
{ role: 'user', content: userPrompt + '\n\nIMPORTANT: Please ensure valid JSON output.' },
|
|
],
|
|
temperature: 0.1,
|
|
jsonMode: true,
|
|
}),
|
|
)
|
|
const retryUsage = extractTokenUsage(response)
|
|
tokenUsage.totalTokens += retryUsage.totalTokens
|
|
continue
|
|
}
|
|
return { recommendations: [], tokensUsed: tokenUsage.totalTokens, errors: ['Failed to parse AI response'] }
|
|
}
|
|
}
|
|
|
|
// De-anonymize
|
|
const recommendations: ShortlistRecommendation[] = parsed
|
|
.filter((item: any) => item.anonymousId && idMap.has(item.anonymousId))
|
|
.map((item: any) => ({
|
|
projectId: idMap.get(item.anonymousId)!,
|
|
rank: item.rank ?? 0,
|
|
score: item.score ?? 0,
|
|
category,
|
|
strengths: item.strengths ?? [],
|
|
concerns: item.concerns ?? [],
|
|
recommendation: item.recommendation ?? '',
|
|
}))
|
|
.sort((a: ShortlistRecommendation, b: ShortlistRecommendation) => a.rank - b.rank)
|
|
|
|
return { recommendations, tokensUsed: tokenUsage.totalTokens, errors: [] }
|
|
}
|
|
|
|
// ─── Main Function ──────────────────────────────────────────────────────────
|
|
|
|
/**
|
|
* Generate an AI shortlist for projects in a round, split by category.
|
|
* Runs independently for STARTUP and BUSINESS_CONCEPT.
|
|
*/
|
|
export async function generateShortlist(
|
|
params: {
|
|
roundId: string
|
|
competitionId: string
|
|
category?: string // If provided, only run for this category
|
|
topN?: number // Global fallback
|
|
startupTopN?: number // Per-category override
|
|
conceptTopN?: number // Per-category override
|
|
rubric?: string
|
|
aiParseFiles?: boolean
|
|
},
|
|
prisma: PrismaClient | any,
|
|
): Promise<ShortlistResult> {
|
|
const {
|
|
roundId,
|
|
category,
|
|
topN = 10,
|
|
startupTopN,
|
|
conceptTopN,
|
|
rubric,
|
|
aiParseFiles = false,
|
|
} = params
|
|
|
|
try {
|
|
const categories = category
|
|
? [category]
|
|
: ['STARTUP', 'BUSINESS_CONCEPT']
|
|
|
|
const allRecommendations: CategoryRecommendations = {
|
|
STARTUP: [],
|
|
BUSINESS_CONCEPT: [],
|
|
}
|
|
let totalTokens = 0
|
|
const allErrors: string[] = []
|
|
|
|
// Run categories in parallel for efficiency
|
|
const categoryPromises = categories.map(async (cat) => {
|
|
const catTopN = cat === 'STARTUP'
|
|
? (startupTopN ?? topN)
|
|
: (conceptTopN ?? topN)
|
|
|
|
console.log(`[AI Shortlist] Generating top-${catTopN} for ${cat}`)
|
|
|
|
const result = await generateCategoryShortlist(
|
|
{ roundId, category: cat, topN: catTopN, rubric, aiParseFiles },
|
|
prisma,
|
|
)
|
|
|
|
return { cat, result }
|
|
})
|
|
|
|
const categoryResults = await Promise.all(categoryPromises)
|
|
|
|
for (const { cat, result } of categoryResults) {
|
|
if (cat === 'STARTUP') {
|
|
allRecommendations.STARTUP = result.recommendations
|
|
} else {
|
|
allRecommendations.BUSINESS_CONCEPT = result.recommendations
|
|
}
|
|
totalTokens += result.tokensUsed
|
|
allErrors.push(...result.errors)
|
|
}
|
|
|
|
return {
|
|
success: true,
|
|
recommendations: allRecommendations,
|
|
tokensUsed: totalTokens,
|
|
errors: allErrors.length > 0 ? allErrors : undefined,
|
|
}
|
|
} catch (error) {
|
|
const classification = classifyAIError(error)
|
|
logAIError('ai-shortlist', 'generateShortlist', classification)
|
|
console.error('[AIShortlist] generateShortlist failed:', error)
|
|
|
|
return {
|
|
success: false,
|
|
recommendations: { STARTUP: [], BUSINESS_CONCEPT: [] },
|
|
errors: [error instanceof Error ? error.message : 'AI shortlist generation failed'],
|
|
}
|
|
}
|
|
}
|