AI category-aware evaluation: per-round config, file parsing, shortlist, advance flow
Some checks failed
Build and Push Docker Image / build (push) Has been cancelled
Some checks failed
Build and Push Docker Image / build (push) Has been cancelled
- Per-juror cap mode (HARD/SOFT/NONE) in add-member dialog and members table - Jury invite flow: create user + add to group + send invitation from dialog - Per-round config: notifyOnAdvance, aiParseFiles, startupAdvanceCount, conceptAdvanceCount - Moved notify-on-advance from competition-level to per-round setting - AI filtering: round-tagged files with newest-first sorting, optional file content extraction - File content extractor service (pdf-parse for PDF, utf-8 for text files) - AI shortlist runs independently per category (STARTUP / BUSINESS_CONCEPT) - generateAIRecommendations tRPC endpoint with per-round config integration - AI recommendations UI: trigger button, confirmation dialog, per-category results display - Category-aware advance dialog: select/deselect projects by category with target caps - STAGE_ACTIVE bug fix in assignment router Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -159,7 +159,7 @@ async function runAIAssignmentJob(jobId: string, roundId: string, userId: string
|
||||
type: NotificationTypes.AI_SUGGESTIONS_READY,
|
||||
title: 'AI Assignment Suggestions Ready',
|
||||
message: `AI generated ${result.suggestions.length} assignment suggestions for ${round.name || 'round'}${result.fallbackUsed ? ' (using fallback algorithm)' : ''}.`,
|
||||
linkUrl: `/admin/competitions/${round.competitionId}/assignments`,
|
||||
linkUrl: `/admin/rounds/${roundId}`,
|
||||
linkLabel: 'View Suggestions',
|
||||
priority: 'high',
|
||||
metadata: {
|
||||
|
||||
@@ -1206,4 +1206,285 @@ export const fileRouter = router({
|
||||
orderBy: [{ competition: { program: { year: 'desc' } } }, { sortOrder: 'asc' }],
|
||||
})
|
||||
}),
|
||||
|
||||
/**
|
||||
* List rounds with their file requirement counts (for bulk upload round selector)
|
||||
*/
|
||||
listRoundsForBulkUpload: adminProcedure
|
||||
.query(async ({ ctx }) => {
|
||||
return ctx.prisma.round.findMany({
|
||||
where: {
|
||||
fileRequirements: { some: {} },
|
||||
},
|
||||
select: {
|
||||
id: true,
|
||||
name: true,
|
||||
roundType: true,
|
||||
sortOrder: true,
|
||||
competition: {
|
||||
select: { id: true, name: true, program: { select: { name: true, year: true } } },
|
||||
},
|
||||
fileRequirements: {
|
||||
select: { id: true },
|
||||
},
|
||||
},
|
||||
orderBy: [
|
||||
{ competition: { program: { year: 'desc' } } },
|
||||
{ sortOrder: 'asc' },
|
||||
],
|
||||
})
|
||||
}),
|
||||
|
||||
/**
|
||||
* List projects with upload status against a round's FileRequirements (for bulk upload)
|
||||
*/
|
||||
listProjectsByRoundRequirements: adminProcedure
|
||||
.input(
|
||||
z.object({
|
||||
roundId: z.string(),
|
||||
search: z.string().optional(),
|
||||
status: z.enum(['all', 'missing', 'complete']).default('all'),
|
||||
page: z.number().int().min(1).default(1),
|
||||
pageSize: z.number().int().min(1).max(100).default(50),
|
||||
})
|
||||
)
|
||||
.query(async ({ ctx, input }) => {
|
||||
const round = await ctx.prisma.round.findUniqueOrThrow({
|
||||
where: { id: input.roundId },
|
||||
include: {
|
||||
competition: { select: { id: true, programId: true, name: true } },
|
||||
fileRequirements: { orderBy: { sortOrder: 'asc' } },
|
||||
},
|
||||
})
|
||||
|
||||
// Normalize requirements to a common shape
|
||||
const requirements = round.fileRequirements.map((req) => ({
|
||||
id: req.id,
|
||||
label: req.name,
|
||||
mimeTypes: req.acceptedMimeTypes,
|
||||
required: req.isRequired,
|
||||
maxSizeMb: req.maxSizeMB,
|
||||
description: req.description,
|
||||
}))
|
||||
|
||||
// Build project filter
|
||||
const projectWhere: Record<string, unknown> = {
|
||||
programId: round.competition.programId,
|
||||
}
|
||||
if (input.search) {
|
||||
projectWhere.OR = [
|
||||
{ title: { contains: input.search, mode: 'insensitive' } },
|
||||
{ teamName: { contains: input.search, mode: 'insensitive' } },
|
||||
]
|
||||
}
|
||||
|
||||
const allProjects = await ctx.prisma.project.findMany({
|
||||
where: projectWhere,
|
||||
select: {
|
||||
id: true,
|
||||
title: true,
|
||||
teamName: true,
|
||||
submittedByUserId: true,
|
||||
submittedBy: { select: { id: true, name: true, email: true } },
|
||||
files: {
|
||||
where: { roundId: input.roundId, requirementId: { not: null } },
|
||||
select: {
|
||||
id: true,
|
||||
fileName: true,
|
||||
mimeType: true,
|
||||
size: true,
|
||||
createdAt: true,
|
||||
requirementId: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
orderBy: { title: 'asc' },
|
||||
})
|
||||
|
||||
// Map projects with their requirement status
|
||||
const mapped = allProjects.map((project) => {
|
||||
const reqStatus = requirements.map((req) => {
|
||||
const file = project.files.find(
|
||||
(f) => f.requirementId === req.id
|
||||
)
|
||||
return {
|
||||
requirementId: req.id,
|
||||
label: req.label,
|
||||
mimeTypes: req.mimeTypes,
|
||||
required: req.required,
|
||||
file: file ?? null,
|
||||
}
|
||||
})
|
||||
|
||||
const totalRequired = reqStatus.filter((r) => r.required).length
|
||||
const filledRequired = reqStatus.filter(
|
||||
(r) => r.required && r.file
|
||||
).length
|
||||
|
||||
return {
|
||||
project: {
|
||||
id: project.id,
|
||||
title: project.title,
|
||||
teamName: project.teamName,
|
||||
submittedBy: project.submittedBy,
|
||||
},
|
||||
requirements: reqStatus,
|
||||
isComplete: totalRequired > 0 ? filledRequired >= totalRequired : reqStatus.every((r) => r.file),
|
||||
filledCount: reqStatus.filter((r) => r.file).length,
|
||||
totalCount: reqStatus.length,
|
||||
}
|
||||
})
|
||||
|
||||
// Apply status filter
|
||||
const filtered =
|
||||
input.status === 'missing'
|
||||
? mapped.filter((p) => !p.isComplete)
|
||||
: input.status === 'complete'
|
||||
? mapped.filter((p) => p.isComplete)
|
||||
: mapped
|
||||
|
||||
// Paginate
|
||||
const total = filtered.length
|
||||
const totalPages = Math.ceil(total / input.pageSize)
|
||||
const page = Math.min(input.page, Math.max(totalPages, 1))
|
||||
const projects = filtered.slice(
|
||||
(page - 1) * input.pageSize,
|
||||
page * input.pageSize
|
||||
)
|
||||
|
||||
const completeCount = mapped.filter((p) => p.isComplete).length
|
||||
|
||||
return {
|
||||
projects,
|
||||
requirements,
|
||||
total,
|
||||
page,
|
||||
totalPages,
|
||||
completeCount,
|
||||
totalProjects: mapped.length,
|
||||
competition: round.competition,
|
||||
}
|
||||
}),
|
||||
|
||||
/**
|
||||
* Upload a file for a round's FileRequirement (admin bulk upload)
|
||||
*/
|
||||
adminUploadForRoundRequirement: adminProcedure
|
||||
.input(
|
||||
z.object({
|
||||
projectId: z.string(),
|
||||
fileName: z.string(),
|
||||
mimeType: z.string(),
|
||||
size: z.number().int().positive(),
|
||||
roundId: z.string(),
|
||||
requirementId: z.string(),
|
||||
})
|
||||
)
|
||||
.mutation(async ({ ctx, input }) => {
|
||||
// Block dangerous file extensions
|
||||
const dangerousExtensions = ['.exe', '.sh', '.bat', '.cmd', '.ps1', '.php', '.jsp', '.cgi', '.dll', '.msi']
|
||||
const ext = input.fileName.toLowerCase().slice(input.fileName.lastIndexOf('.'))
|
||||
if (dangerousExtensions.includes(ext)) {
|
||||
throw new TRPCError({
|
||||
code: 'BAD_REQUEST',
|
||||
message: `File type "${ext}" is not allowed`,
|
||||
})
|
||||
}
|
||||
|
||||
// Validate requirement exists and belongs to the round
|
||||
const requirement = await ctx.prisma.fileRequirement.findFirst({
|
||||
where: {
|
||||
id: input.requirementId,
|
||||
roundId: input.roundId,
|
||||
},
|
||||
})
|
||||
if (!requirement) {
|
||||
throw new TRPCError({
|
||||
code: 'NOT_FOUND',
|
||||
message: 'Requirement not found for this round',
|
||||
})
|
||||
}
|
||||
|
||||
// Validate MIME type if requirement specifies allowed types
|
||||
if (requirement.acceptedMimeTypes.length > 0) {
|
||||
const isAllowed = requirement.acceptedMimeTypes.some((allowed) => {
|
||||
if (allowed.endsWith('/*')) {
|
||||
return input.mimeType.startsWith(allowed.replace('/*', '/'))
|
||||
}
|
||||
return input.mimeType === allowed
|
||||
})
|
||||
if (!isAllowed) {
|
||||
throw new TRPCError({
|
||||
code: 'BAD_REQUEST',
|
||||
message: `File type "${input.mimeType}" is not allowed for this requirement. Accepted: ${requirement.acceptedMimeTypes.join(', ')}`,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Infer fileType from mimeType
|
||||
let fileType: 'EXEC_SUMMARY' | 'PRESENTATION' | 'VIDEO' | 'OTHER' = 'OTHER'
|
||||
if (input.mimeType.startsWith('video/')) fileType = 'VIDEO'
|
||||
else if (input.mimeType === 'application/pdf') fileType = 'EXEC_SUMMARY'
|
||||
else if (input.mimeType.includes('presentation') || input.mimeType.includes('powerpoint'))
|
||||
fileType = 'PRESENTATION'
|
||||
|
||||
// Fetch project title and round name for storage path
|
||||
const [project, round] = await Promise.all([
|
||||
ctx.prisma.project.findUniqueOrThrow({
|
||||
where: { id: input.projectId },
|
||||
select: { title: true },
|
||||
}),
|
||||
ctx.prisma.round.findUniqueOrThrow({
|
||||
where: { id: input.roundId },
|
||||
select: { name: true },
|
||||
}),
|
||||
])
|
||||
|
||||
const bucket = BUCKET_NAME
|
||||
const objectKey = generateObjectKey(project.title, input.fileName, round.name)
|
||||
const uploadUrl = await getPresignedUrl(bucket, objectKey, 'PUT', 3600)
|
||||
|
||||
// Remove any existing file for this project+requirement combo (replace)
|
||||
await ctx.prisma.projectFile.deleteMany({
|
||||
where: {
|
||||
projectId: input.projectId,
|
||||
roundId: input.roundId,
|
||||
requirementId: input.requirementId,
|
||||
},
|
||||
})
|
||||
|
||||
// Create file record
|
||||
const file = await ctx.prisma.projectFile.create({
|
||||
data: {
|
||||
projectId: input.projectId,
|
||||
fileType,
|
||||
fileName: input.fileName,
|
||||
mimeType: input.mimeType,
|
||||
size: input.size,
|
||||
bucket,
|
||||
objectKey,
|
||||
roundId: input.roundId,
|
||||
requirementId: input.requirementId,
|
||||
},
|
||||
})
|
||||
|
||||
await logAudit({
|
||||
prisma: ctx.prisma,
|
||||
userId: ctx.user.id,
|
||||
action: 'UPLOAD_FILE',
|
||||
entityType: 'ProjectFile',
|
||||
entityId: file.id,
|
||||
detailsJson: {
|
||||
projectId: input.projectId,
|
||||
fileName: input.fileName,
|
||||
roundId: input.roundId,
|
||||
requirementId: input.requirementId,
|
||||
bulkUpload: true,
|
||||
},
|
||||
ipAddress: ctx.ip,
|
||||
userAgent: ctx.userAgent,
|
||||
})
|
||||
|
||||
return { uploadUrl, file }
|
||||
}),
|
||||
})
|
||||
|
||||
@@ -43,6 +43,14 @@ export async function runFilteringJob(jobId: string, roundId: string, userId: st
|
||||
orderBy: { priority: 'asc' },
|
||||
})
|
||||
|
||||
// Get current round with config
|
||||
const currentRound = await prisma.round.findUniqueOrThrow({
|
||||
where: { id: roundId },
|
||||
select: { id: true, name: true, configJson: true },
|
||||
})
|
||||
const roundConfig = (currentRound.configJson as Record<string, unknown>) || {}
|
||||
const aiParseFiles = !!roundConfig.aiParseFiles
|
||||
|
||||
// Get projects in this round via ProjectRoundState
|
||||
const projectStates = await prisma.projectRoundState.findMany({
|
||||
where: {
|
||||
@@ -54,13 +62,67 @@ export async function runFilteringJob(jobId: string, roundId: string, userId: st
|
||||
project: {
|
||||
include: {
|
||||
files: {
|
||||
select: { id: true, fileName: true, fileType: true, size: true, pageCount: true },
|
||||
select: {
|
||||
id: true,
|
||||
fileName: true,
|
||||
fileType: true,
|
||||
mimeType: true,
|
||||
size: true,
|
||||
pageCount: true,
|
||||
objectKey: true,
|
||||
roundId: true,
|
||||
createdAt: true,
|
||||
},
|
||||
orderBy: { createdAt: 'desc' },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
const projects = projectStates.map((pss: any) => pss.project).filter(Boolean)
|
||||
|
||||
// Get round names for file tagging
|
||||
const roundIds = new Set<string>()
|
||||
for (const pss of projectStates) {
|
||||
for (const f of (pss as any).project?.files || []) {
|
||||
if (f.roundId) roundIds.add(f.roundId)
|
||||
}
|
||||
}
|
||||
const roundNames = new Map<string, string>()
|
||||
if (roundIds.size > 0) {
|
||||
const rounds = await prisma.round.findMany({
|
||||
where: { id: { in: [...roundIds] } },
|
||||
select: { id: true, name: true },
|
||||
})
|
||||
for (const r of rounds) roundNames.set(r.id, r.name)
|
||||
}
|
||||
|
||||
// Optionally extract file contents
|
||||
let fileContents: Map<string, string> | undefined
|
||||
if (aiParseFiles) {
|
||||
const { extractMultipleFileContents } = await import('@/server/services/file-content-extractor')
|
||||
const allFiles = projectStates.flatMap((pss: any) =>
|
||||
((pss.project?.files || []) as Array<{ id: string; fileName: string; mimeType: string; objectKey: string }>)
|
||||
)
|
||||
const extractions = await extractMultipleFileContents(allFiles)
|
||||
fileContents = new Map()
|
||||
for (const e of extractions) {
|
||||
if (e.content) fileContents.set(e.fileId, e.content)
|
||||
}
|
||||
}
|
||||
|
||||
// Enrich projects with round-tagged file data
|
||||
const projects = projectStates.map((pss: any) => {
|
||||
const project = pss.project
|
||||
if (project?.files) {
|
||||
project.files = project.files.map((f: any) => ({
|
||||
...f,
|
||||
roundName: f.roundId ? (roundNames.get(f.roundId) || 'Unknown Round') : null,
|
||||
isCurrentRound: f.roundId === roundId,
|
||||
textContent: fileContents?.get(f.id) || undefined,
|
||||
}))
|
||||
}
|
||||
return project
|
||||
}).filter(Boolean)
|
||||
|
||||
// Calculate batch info
|
||||
const BATCH_SIZE = 20
|
||||
@@ -149,10 +211,10 @@ export async function runFilteringJob(jobId: string, roundId: string, userId: st
|
||||
},
|
||||
})
|
||||
|
||||
// Get round name and competitionId for notification
|
||||
// Get round name for notification
|
||||
const round = await prisma.round.findUnique({
|
||||
where: { id: roundId },
|
||||
select: { name: true, competitionId: true },
|
||||
select: { name: true },
|
||||
})
|
||||
|
||||
// Notify admins that filtering is complete
|
||||
@@ -160,7 +222,7 @@ export async function runFilteringJob(jobId: string, roundId: string, userId: st
|
||||
type: NotificationTypes.FILTERING_COMPLETE,
|
||||
title: 'AI Filtering Complete',
|
||||
message: `Filtering complete for ${round?.name || 'round'}: ${passedCount} passed, ${flaggedCount} flagged, ${filteredCount} filtered out`,
|
||||
linkUrl: `/admin/competitions/${round?.competitionId}/rounds/${roundId}`,
|
||||
linkUrl: `/admin/rounds/${roundId}`,
|
||||
linkLabel: 'View Results',
|
||||
priority: 'high',
|
||||
metadata: {
|
||||
@@ -183,16 +245,11 @@ export async function runFilteringJob(jobId: string, roundId: string, userId: st
|
||||
},
|
||||
})
|
||||
|
||||
// Notify admins of failure - need to fetch round info for competitionId
|
||||
const round = await prisma.round.findUnique({
|
||||
where: { id: roundId },
|
||||
select: { competitionId: true },
|
||||
})
|
||||
await notifyAdmins({
|
||||
type: NotificationTypes.FILTERING_FAILED,
|
||||
title: 'AI Filtering Failed',
|
||||
message: `Filtering job failed: ${error instanceof Error ? error.message : 'Unknown error'}`,
|
||||
linkUrl: round?.competitionId ? `/admin/competitions/${round.competitionId}/rounds/${roundId}` : `/admin/competitions`,
|
||||
linkUrl: `/admin/rounds/${roundId}`,
|
||||
linkLabel: 'View Details',
|
||||
priority: 'urgent',
|
||||
metadata: { roundId, jobId, error: error instanceof Error ? error.message : 'Unknown error' },
|
||||
|
||||
@@ -4,6 +4,7 @@ import { Prisma } from '@prisma/client'
|
||||
import { router, adminProcedure, protectedProcedure } from '../trpc'
|
||||
import { logAudit } from '@/server/utils/audit'
|
||||
import { validateRoundConfig, defaultRoundConfig } from '@/types/competition-configs'
|
||||
import { generateShortlist } from '../services/ai-shortlist'
|
||||
import {
|
||||
openWindow,
|
||||
closeWindow,
|
||||
@@ -358,6 +359,74 @@ export const roundRouter = router({
|
||||
}
|
||||
}),
|
||||
|
||||
// =========================================================================
|
||||
// AI Shortlist Recommendations
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Generate AI-powered shortlist recommendations for a round.
|
||||
* Runs independently for STARTUP and BUSINESS_CONCEPT categories.
|
||||
* Uses per-round config for advancement targets and file parsing.
|
||||
*/
|
||||
generateAIRecommendations: adminProcedure
|
||||
.input(
|
||||
z.object({
|
||||
roundId: z.string(),
|
||||
rubric: z.string().optional(),
|
||||
})
|
||||
)
|
||||
.mutation(async ({ ctx, input }) => {
|
||||
const round = await ctx.prisma.round.findUniqueOrThrow({
|
||||
where: { id: input.roundId },
|
||||
select: {
|
||||
id: true,
|
||||
name: true,
|
||||
competitionId: true,
|
||||
configJson: true,
|
||||
},
|
||||
})
|
||||
|
||||
const config = (round.configJson as Record<string, unknown>) ?? {}
|
||||
const startupTopN = (config.startupAdvanceCount as number) || 10
|
||||
const conceptTopN = (config.conceptAdvanceCount as number) || 10
|
||||
const aiParseFiles = !!config.aiParseFiles
|
||||
|
||||
const result = await generateShortlist(
|
||||
{
|
||||
roundId: input.roundId,
|
||||
competitionId: round.competitionId,
|
||||
startupTopN,
|
||||
conceptTopN,
|
||||
rubric: input.rubric,
|
||||
aiParseFiles,
|
||||
},
|
||||
ctx.prisma,
|
||||
)
|
||||
|
||||
await logAudit({
|
||||
prisma: ctx.prisma,
|
||||
userId: ctx.user.id,
|
||||
action: 'AI_SHORTLIST',
|
||||
entityType: 'Round',
|
||||
entityId: input.roundId,
|
||||
detailsJson: {
|
||||
roundName: round.name,
|
||||
startupTopN,
|
||||
conceptTopN,
|
||||
aiParseFiles,
|
||||
success: result.success,
|
||||
startupCount: result.recommendations.STARTUP.length,
|
||||
conceptCount: result.recommendations.BUSINESS_CONCEPT.length,
|
||||
tokensUsed: result.tokensUsed,
|
||||
errors: result.errors,
|
||||
},
|
||||
ipAddress: ctx.ip,
|
||||
userAgent: ctx.userAgent,
|
||||
})
|
||||
|
||||
return result
|
||||
}),
|
||||
|
||||
// =========================================================================
|
||||
// Submission Window Management
|
||||
// =========================================================================
|
||||
|
||||
@@ -112,7 +112,16 @@ interface ProjectForFiltering {
|
||||
institution?: string | null
|
||||
submissionSource?: SubmissionSource
|
||||
submittedAt?: Date | null
|
||||
files: Array<{ id: string; fileName: string; fileType?: FileType | null; size?: number; pageCount?: number | null }>
|
||||
files: Array<{
|
||||
id: string
|
||||
fileName: string
|
||||
fileType?: FileType | null
|
||||
size?: number
|
||||
pageCount?: number | null
|
||||
roundName?: string | null
|
||||
isCurrentRound?: boolean
|
||||
textContent?: string
|
||||
}>
|
||||
_count?: {
|
||||
teamMembers?: number
|
||||
files?: number
|
||||
@@ -170,9 +179,10 @@ Return a JSON object with this exact structure:
|
||||
- founded_year: when the company/initiative was founded (use for age checks)
|
||||
- ocean_issue: the ocean conservation area
|
||||
- file_count, file_types: uploaded documents summary
|
||||
- files[]: per-file details with file_type, page_count (if known), and size_kb
|
||||
- files[]: per-file details with file_type, page_count (if known), size_kb, round_name (which round the file was submitted for), and is_current_round flag
|
||||
- description: project summary text
|
||||
- tags: topic tags
|
||||
- If document content is provided (text_content field in files), use it for deeper analysis. Pay SPECIAL ATTENTION to files from the current round (is_current_round=true) as they are the most recent and relevant submissions.
|
||||
|
||||
## Guidelines
|
||||
- Evaluate ONLY against the provided criteria, not your own standards
|
||||
|
||||
@@ -2,7 +2,8 @@
|
||||
* AI Shortlist Service
|
||||
*
|
||||
* Generates ranked recommendations at end of evaluation rounds.
|
||||
* Follows patterns from ai-filtering.ts and ai-evaluation-summary.ts.
|
||||
* Runs SEPARATELY for each category (STARTUP / BUSINESS_CONCEPT)
|
||||
* to produce independent rankings per the competition's advancement rules.
|
||||
*
|
||||
* GDPR Compliance:
|
||||
* - All project data is anonymized before AI processing
|
||||
@@ -12,124 +13,43 @@
|
||||
import { getOpenAI, getConfiguredModel, buildCompletionParams } from '@/lib/openai'
|
||||
import { logAIUsage, extractTokenUsage } from '@/server/utils/ai-usage'
|
||||
import { classifyAIError, logAIError } from './ai-errors'
|
||||
import { extractMultipleFileContents } from './file-content-extractor'
|
||||
import type { PrismaClient } from '@prisma/client'
|
||||
|
||||
// ─── Types ──────────────────────────────────────────────────────────────────
|
||||
|
||||
export type ShortlistResult = {
|
||||
success: boolean
|
||||
recommendations: ShortlistRecommendation[]
|
||||
recommendations: CategoryRecommendations
|
||||
errors?: string[]
|
||||
tokensUsed?: number
|
||||
}
|
||||
|
||||
export type CategoryRecommendations = {
|
||||
STARTUP: ShortlistRecommendation[]
|
||||
BUSINESS_CONCEPT: ShortlistRecommendation[]
|
||||
}
|
||||
|
||||
export type ShortlistRecommendation = {
|
||||
projectId: string
|
||||
rank: number
|
||||
score: number
|
||||
category: string
|
||||
strengths: string[]
|
||||
concerns: string[]
|
||||
recommendation: string
|
||||
}
|
||||
|
||||
// ─── Main Function ──────────────────────────────────────────────────────────
|
||||
// ─── Prompt Building ────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Generate an AI shortlist for projects in a round.
|
||||
* Only runs if EvaluationConfig.generateAiShortlist is true.
|
||||
*/
|
||||
export async function generateShortlist(
|
||||
params: {
|
||||
roundId: string
|
||||
competitionId: string
|
||||
category?: string
|
||||
topN?: number
|
||||
rubric?: string
|
||||
},
|
||||
prisma: PrismaClient | any,
|
||||
): Promise<ShortlistResult> {
|
||||
const { roundId, competitionId, category, topN = 10, rubric } = params
|
||||
function buildShortlistPrompt(category: string, topN: number, rubric?: string): string {
|
||||
const categoryLabel = category === 'STARTUP' ? 'Startup' : 'Business Concept'
|
||||
|
||||
try {
|
||||
// Load projects with evaluations
|
||||
const where: Record<string, unknown> = {
|
||||
assignments: { some: { roundId } },
|
||||
}
|
||||
if (category) {
|
||||
where.competitionCategory = category
|
||||
}
|
||||
|
||||
const projects = await prisma.project.findMany({
|
||||
where,
|
||||
include: {
|
||||
assignments: {
|
||||
where: { roundId },
|
||||
include: {
|
||||
evaluation: true,
|
||||
},
|
||||
},
|
||||
projectTags: { include: { tag: true } },
|
||||
files: { select: { id: true, type: true } },
|
||||
teamMembers: { select: { user: { select: { name: true } } } },
|
||||
},
|
||||
})
|
||||
|
||||
if (projects.length === 0) {
|
||||
return {
|
||||
success: true,
|
||||
recommendations: [],
|
||||
errors: ['No projects found for this round'],
|
||||
}
|
||||
}
|
||||
|
||||
// Aggregate scores per project
|
||||
const projectSummaries = projects.map((project: any) => {
|
||||
const evaluations = project.assignments
|
||||
.map((a: any) => a.evaluation)
|
||||
.filter(Boolean)
|
||||
.filter((e: any) => e.status === 'SUBMITTED')
|
||||
|
||||
const scores = evaluations.map((e: any) => e.globalScore ?? 0)
|
||||
const avgScore = scores.length > 0
|
||||
? scores.reduce((sum: number, s: number) => sum + s, 0) / scores.length
|
||||
: 0
|
||||
|
||||
const feedbacks = evaluations
|
||||
.map((e: any) => e.feedbackGeneral)
|
||||
.filter(Boolean)
|
||||
|
||||
return {
|
||||
id: project.id,
|
||||
title: project.title,
|
||||
description: project.description,
|
||||
category: project.competitionCategory,
|
||||
tags: project.projectTags.map((pt: any) => pt.tag.name),
|
||||
avgScore,
|
||||
evaluationCount: evaluations.length,
|
||||
feedbackSamples: feedbacks.slice(0, 3), // Max 3 feedback samples
|
||||
}
|
||||
})
|
||||
|
||||
// Anonymize for AI
|
||||
const anonymized = projectSummaries.map((p: any, index: number) => ({
|
||||
anonymousId: `PROJECT_${String(index + 1).padStart(3, '0')}`,
|
||||
...p,
|
||||
// Strip identifying info
|
||||
title: undefined,
|
||||
id: undefined,
|
||||
}))
|
||||
|
||||
// Build idMap for de-anonymization
|
||||
const idMap = new Map<string, string>()
|
||||
projectSummaries.forEach((p: any, index: number) => {
|
||||
idMap.set(`PROJECT_${String(index + 1).padStart(3, '0')}`, p.id)
|
||||
})
|
||||
|
||||
// Build prompt
|
||||
const systemPrompt = `You are a senior jury advisor for the Monaco Ocean Protection Challenge.
|
||||
return `You are a senior jury advisor for the Monaco Ocean Protection Challenge.
|
||||
|
||||
## Your Role
|
||||
Analyze aggregated evaluation data to produce a ranked shortlist of top projects.
|
||||
Analyze aggregated evaluation data to produce a ranked shortlist of the top ${topN} ${categoryLabel} projects.
|
||||
You are evaluating ONLY ${categoryLabel} projects in this batch — rank them against each other within this category.
|
||||
|
||||
## Ranking Criteria (Weighted)
|
||||
- Evaluation Scores (40%): Average scores across all jury evaluations
|
||||
@@ -137,6 +57,12 @@ Analyze aggregated evaluation data to produce a ranked shortlist of top projects
|
||||
- Feasibility (20%): Likelihood of successful implementation
|
||||
- Alignment (15%): Fit with ocean protection mission and competition goals
|
||||
|
||||
## Document Analysis
|
||||
If document content is provided (text_content field in files), use it for deeper qualitative analysis.
|
||||
Pay SPECIAL ATTENTION to files marked with is_current_round=true — these are the most recent submissions.
|
||||
Older documents provide context, but recent ones should carry more weight in your assessment.
|
||||
|
||||
${rubric ? `## Custom Evaluation Rubric\n${rubric}\n` : ''}
|
||||
## Output Format
|
||||
Return a JSON array:
|
||||
[
|
||||
@@ -146,129 +72,305 @@ Return a JSON array:
|
||||
"score": 0-100,
|
||||
"strengths": ["strength 1", "strength 2"],
|
||||
"concerns": ["concern 1"],
|
||||
"recommendation": "1-2 sentence recommendation",
|
||||
"criterionBreakdown": {
|
||||
"evaluationScores": 38,
|
||||
"innovationImpact": 22,
|
||||
"feasibility": 18,
|
||||
"alignment": 14
|
||||
}
|
||||
"recommendation": "1-2 sentence recommendation"
|
||||
}
|
||||
]
|
||||
|
||||
## Guidelines
|
||||
- Only include the requested number of top projects
|
||||
- Only include the top ${topN} projects in your ranking
|
||||
- Score should reflect weighted combination of all criteria
|
||||
- Be specific in strengths and concerns — avoid generic statements
|
||||
- Consider feedback themes and evaluator consensus
|
||||
- Higher evaluator consensus should boost confidence in ranking`
|
||||
- Higher evaluator consensus should boost confidence in ranking
|
||||
- Do not include any personal identifiers`
|
||||
}
|
||||
|
||||
const userPrompt = `Analyze these anonymized project evaluations and produce a ranked shortlist of the top ${topN} projects.
|
||||
// ─── Single Category Processing ─────────────────────────────────────────────
|
||||
|
||||
${rubric ? `Evaluation rubric:\n${rubric}\n\n` : ''}Projects:
|
||||
async function generateCategoryShortlist(
|
||||
params: {
|
||||
roundId: string
|
||||
category: string
|
||||
topN: number
|
||||
rubric?: string
|
||||
aiParseFiles: boolean
|
||||
},
|
||||
prisma: PrismaClient | any,
|
||||
): Promise<{ recommendations: ShortlistRecommendation[]; tokensUsed: number; errors: string[] }> {
|
||||
const { roundId, category, topN, rubric, aiParseFiles } = params
|
||||
|
||||
// Load projects with evaluations for this category
|
||||
const projects = await prisma.project.findMany({
|
||||
where: {
|
||||
competitionCategory: category,
|
||||
assignments: { some: { roundId } },
|
||||
},
|
||||
include: {
|
||||
assignments: {
|
||||
where: { roundId },
|
||||
include: { evaluation: true },
|
||||
},
|
||||
projectTags: { include: { tag: true } },
|
||||
files: {
|
||||
select: {
|
||||
id: true,
|
||||
fileName: true,
|
||||
fileType: true,
|
||||
mimeType: true,
|
||||
size: true,
|
||||
pageCount: true,
|
||||
objectKey: true,
|
||||
roundId: true,
|
||||
createdAt: true,
|
||||
},
|
||||
orderBy: { createdAt: 'desc' as const },
|
||||
},
|
||||
teamMembers: { select: { user: { select: { name: true } } } },
|
||||
},
|
||||
})
|
||||
|
||||
if (projects.length === 0) {
|
||||
return {
|
||||
recommendations: [],
|
||||
tokensUsed: 0,
|
||||
errors: [`No ${category} projects found for this round`],
|
||||
}
|
||||
}
|
||||
|
||||
// Get round names for file tagging
|
||||
const roundIds = new Set<string>()
|
||||
for (const p of projects) {
|
||||
for (const f of (p as any).files || []) {
|
||||
if (f.roundId) roundIds.add(f.roundId)
|
||||
}
|
||||
}
|
||||
const roundNames = new Map<string, string>()
|
||||
if (roundIds.size > 0) {
|
||||
const rounds = await prisma.round.findMany({
|
||||
where: { id: { in: [...roundIds] } },
|
||||
select: { id: true, name: true },
|
||||
})
|
||||
for (const r of rounds) roundNames.set(r.id, r.name)
|
||||
}
|
||||
|
||||
// Optionally extract file contents
|
||||
let fileContents: Map<string, string> | undefined
|
||||
if (aiParseFiles) {
|
||||
const allFiles = projects.flatMap((p: any) =>
|
||||
((p.files || []) as Array<{ id: string; fileName: string; mimeType: string; objectKey: string }>)
|
||||
)
|
||||
const extractions = await extractMultipleFileContents(allFiles)
|
||||
fileContents = new Map()
|
||||
for (const e of extractions) {
|
||||
if (e.content) fileContents.set(e.fileId, e.content)
|
||||
}
|
||||
}
|
||||
|
||||
// Aggregate scores per project
|
||||
const projectSummaries = projects.map((project: any) => {
|
||||
const evaluations = project.assignments
|
||||
.map((a: any) => a.evaluation)
|
||||
.filter(Boolean)
|
||||
.filter((e: any) => e.status === 'SUBMITTED')
|
||||
|
||||
const scores = evaluations.map((e: any) => e.globalScore ?? 0)
|
||||
const avgScore = scores.length > 0
|
||||
? scores.reduce((sum: number, s: number) => sum + s, 0) / scores.length
|
||||
: 0
|
||||
|
||||
const feedbacks = evaluations
|
||||
.map((e: any) => e.feedbackGeneral || e.feedbackText)
|
||||
.filter(Boolean)
|
||||
|
||||
return {
|
||||
id: project.id,
|
||||
description: project.description,
|
||||
category: project.competitionCategory,
|
||||
tags: project.projectTags.map((pt: any) => pt.tag.name),
|
||||
avgScore,
|
||||
evaluationCount: evaluations.length,
|
||||
feedbackSamples: feedbacks.slice(0, 3),
|
||||
files: (project.files || []).map((f: any) => ({
|
||||
file_type: f.fileType ?? 'OTHER',
|
||||
page_count: f.pageCount ?? null,
|
||||
size_kb: Math.round((f.size ?? 0) / 1024),
|
||||
round_name: f.roundId ? (roundNames.get(f.roundId) || null) : null,
|
||||
is_current_round: f.roundId === roundId,
|
||||
...(fileContents?.get(f.id) ? { text_content: fileContents.get(f.id) } : {}),
|
||||
})),
|
||||
}
|
||||
})
|
||||
|
||||
// Anonymize for AI
|
||||
const anonymized = projectSummaries.map((p: any, index: number) => ({
|
||||
anonymousId: `PROJECT_${String(index + 1).padStart(3, '0')}`,
|
||||
...p,
|
||||
id: undefined,
|
||||
}))
|
||||
|
||||
// Build idMap for de-anonymization
|
||||
const idMap = new Map<string, string>()
|
||||
projectSummaries.forEach((p: any, index: number) => {
|
||||
idMap.set(`PROJECT_${String(index + 1).padStart(3, '0')}`, p.id)
|
||||
})
|
||||
|
||||
// Call AI
|
||||
const openai = await getOpenAI()
|
||||
const model = await getConfiguredModel()
|
||||
|
||||
if (!openai) {
|
||||
return { recommendations: [], tokensUsed: 0, errors: ['OpenAI client not configured'] }
|
||||
}
|
||||
|
||||
const systemPrompt = buildShortlistPrompt(category, topN, rubric)
|
||||
const userPrompt = `Analyze these anonymized ${category} project evaluations and produce a ranked shortlist of the top ${topN}.
|
||||
|
||||
Projects (${anonymized.length} total):
|
||||
${JSON.stringify(anonymized, null, 2)}
|
||||
|
||||
Return a JSON array following the format specified in your instructions. Only include the top ${topN} projects. Rank by overall quality considering scores and feedback.`
|
||||
Return a JSON array following the format specified. Only include the top ${topN} projects. Rank by overall quality within this category.`
|
||||
|
||||
const openai = await getOpenAI()
|
||||
const model = await getConfiguredModel()
|
||||
const MAX_PARSE_RETRIES = 2
|
||||
let parseAttempts = 0
|
||||
let response = await openai.chat.completions.create(
|
||||
buildCompletionParams(model, {
|
||||
messages: [
|
||||
{ role: 'system', content: systemPrompt },
|
||||
{ role: 'user', content: userPrompt },
|
||||
],
|
||||
temperature: 0.1,
|
||||
jsonMode: true,
|
||||
}),
|
||||
)
|
||||
|
||||
if (!openai) {
|
||||
return {
|
||||
success: false,
|
||||
recommendations: [],
|
||||
errors: ['OpenAI client not configured'],
|
||||
let tokenUsage = extractTokenUsage(response)
|
||||
|
||||
await logAIUsage({
|
||||
action: 'SHORTLIST',
|
||||
model,
|
||||
promptTokens: tokenUsage.promptTokens,
|
||||
completionTokens: tokenUsage.completionTokens,
|
||||
totalTokens: tokenUsage.totalTokens,
|
||||
status: 'SUCCESS',
|
||||
})
|
||||
|
||||
// Parse response
|
||||
let parsed: any[]
|
||||
while (true) {
|
||||
try {
|
||||
const content = response.choices[0]?.message?.content
|
||||
if (!content) {
|
||||
return { recommendations: [], tokensUsed: tokenUsage.totalTokens, errors: ['Empty AI response'] }
|
||||
}
|
||||
}
|
||||
|
||||
const MAX_PARSE_RETRIES = 2
|
||||
let parseAttempts = 0
|
||||
let response = await openai.chat.completions.create(
|
||||
buildCompletionParams(model, {
|
||||
messages: [
|
||||
{ role: 'system', content: systemPrompt },
|
||||
{ role: 'user', content: userPrompt },
|
||||
],
|
||||
temperature: 0.1,
|
||||
jsonMode: true,
|
||||
}),
|
||||
)
|
||||
|
||||
let tokenUsage = extractTokenUsage(response)
|
||||
|
||||
await logAIUsage({
|
||||
action: 'FILTERING',
|
||||
model,
|
||||
promptTokens: tokenUsage.promptTokens,
|
||||
completionTokens: tokenUsage.completionTokens,
|
||||
totalTokens: tokenUsage.totalTokens,
|
||||
status: 'SUCCESS',
|
||||
})
|
||||
|
||||
// Parse response with retry logic
|
||||
let parsed: any[]
|
||||
while (true) {
|
||||
try {
|
||||
const content = response.choices[0]?.message?.content
|
||||
if (!content) {
|
||||
return {
|
||||
success: false,
|
||||
recommendations: [],
|
||||
errors: ['Empty AI response'],
|
||||
tokensUsed: tokenUsage.totalTokens,
|
||||
}
|
||||
}
|
||||
|
||||
const json = JSON.parse(content)
|
||||
parsed = Array.isArray(json) ? json : json.rankings ?? json.projects ?? json.shortlist ?? []
|
||||
break
|
||||
} catch (parseError) {
|
||||
if (parseError instanceof SyntaxError && parseAttempts < MAX_PARSE_RETRIES) {
|
||||
parseAttempts++
|
||||
console.warn(`[AI Shortlist] JSON parse failed, retrying (${parseAttempts}/${MAX_PARSE_RETRIES})`)
|
||||
|
||||
// Retry the API call with hint
|
||||
response = await openai.chat.completions.create(
|
||||
buildCompletionParams(model, {
|
||||
messages: [
|
||||
{ role: 'system', content: systemPrompt },
|
||||
{ role: 'user', content: userPrompt + '\n\nIMPORTANT: Please ensure valid JSON output.' },
|
||||
],
|
||||
temperature: 0.1,
|
||||
jsonMode: true,
|
||||
}),
|
||||
)
|
||||
const retryUsage = extractTokenUsage(response)
|
||||
tokenUsage.totalTokens += retryUsage.totalTokens
|
||||
continue
|
||||
}
|
||||
|
||||
return {
|
||||
success: false,
|
||||
recommendations: [],
|
||||
errors: ['Failed to parse AI response as JSON'],
|
||||
tokensUsed: tokenUsage.totalTokens,
|
||||
}
|
||||
const json = JSON.parse(content)
|
||||
parsed = Array.isArray(json) ? json : json.rankings ?? json.projects ?? json.shortlist ?? []
|
||||
break
|
||||
} catch (parseError) {
|
||||
if (parseError instanceof SyntaxError && parseAttempts < MAX_PARSE_RETRIES) {
|
||||
parseAttempts++
|
||||
response = await openai.chat.completions.create(
|
||||
buildCompletionParams(model, {
|
||||
messages: [
|
||||
{ role: 'system', content: systemPrompt },
|
||||
{ role: 'user', content: userPrompt + '\n\nIMPORTANT: Please ensure valid JSON output.' },
|
||||
],
|
||||
temperature: 0.1,
|
||||
jsonMode: true,
|
||||
}),
|
||||
)
|
||||
const retryUsage = extractTokenUsage(response)
|
||||
tokenUsage.totalTokens += retryUsage.totalTokens
|
||||
continue
|
||||
}
|
||||
return { recommendations: [], tokensUsed: tokenUsage.totalTokens, errors: ['Failed to parse AI response'] }
|
||||
}
|
||||
}
|
||||
|
||||
// De-anonymize and build recommendations
|
||||
const recommendations: ShortlistRecommendation[] = parsed
|
||||
.filter((item: any) => item.anonymousId && idMap.has(item.anonymousId))
|
||||
.map((item: any) => ({
|
||||
projectId: idMap.get(item.anonymousId)!,
|
||||
rank: item.rank ?? 0,
|
||||
score: item.score ?? 0,
|
||||
strengths: item.strengths ?? [],
|
||||
concerns: item.concerns ?? [],
|
||||
recommendation: item.recommendation ?? '',
|
||||
}))
|
||||
.sort((a: ShortlistRecommendation, b: ShortlistRecommendation) => a.rank - b.rank)
|
||||
// De-anonymize
|
||||
const recommendations: ShortlistRecommendation[] = parsed
|
||||
.filter((item: any) => item.anonymousId && idMap.has(item.anonymousId))
|
||||
.map((item: any) => ({
|
||||
projectId: idMap.get(item.anonymousId)!,
|
||||
rank: item.rank ?? 0,
|
||||
score: item.score ?? 0,
|
||||
category,
|
||||
strengths: item.strengths ?? [],
|
||||
concerns: item.concerns ?? [],
|
||||
recommendation: item.recommendation ?? '',
|
||||
}))
|
||||
.sort((a: ShortlistRecommendation, b: ShortlistRecommendation) => a.rank - b.rank)
|
||||
|
||||
return { recommendations, tokensUsed: tokenUsage.totalTokens, errors: [] }
|
||||
}
|
||||
|
||||
// ─── Main Function ──────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Generate an AI shortlist for projects in a round, split by category.
|
||||
* Runs independently for STARTUP and BUSINESS_CONCEPT.
|
||||
*/
|
||||
export async function generateShortlist(
|
||||
params: {
|
||||
roundId: string
|
||||
competitionId: string
|
||||
category?: string // If provided, only run for this category
|
||||
topN?: number // Global fallback
|
||||
startupTopN?: number // Per-category override
|
||||
conceptTopN?: number // Per-category override
|
||||
rubric?: string
|
||||
aiParseFiles?: boolean
|
||||
},
|
||||
prisma: PrismaClient | any,
|
||||
): Promise<ShortlistResult> {
|
||||
const {
|
||||
roundId,
|
||||
category,
|
||||
topN = 10,
|
||||
startupTopN,
|
||||
conceptTopN,
|
||||
rubric,
|
||||
aiParseFiles = false,
|
||||
} = params
|
||||
|
||||
try {
|
||||
const categories = category
|
||||
? [category]
|
||||
: ['STARTUP', 'BUSINESS_CONCEPT']
|
||||
|
||||
const allRecommendations: CategoryRecommendations = {
|
||||
STARTUP: [],
|
||||
BUSINESS_CONCEPT: [],
|
||||
}
|
||||
let totalTokens = 0
|
||||
const allErrors: string[] = []
|
||||
|
||||
// Run each category independently
|
||||
for (const cat of categories) {
|
||||
const catTopN = cat === 'STARTUP'
|
||||
? (startupTopN ?? topN)
|
||||
: (conceptTopN ?? topN)
|
||||
|
||||
console.log(`[AI Shortlist] Generating top-${catTopN} for ${cat}`)
|
||||
|
||||
const result = await generateCategoryShortlist(
|
||||
{ roundId, category: cat, topN: catTopN, rubric, aiParseFiles },
|
||||
prisma,
|
||||
)
|
||||
|
||||
if (cat === 'STARTUP') {
|
||||
allRecommendations.STARTUP = result.recommendations
|
||||
} else {
|
||||
allRecommendations.BUSINESS_CONCEPT = result.recommendations
|
||||
}
|
||||
totalTokens += result.tokensUsed
|
||||
allErrors.push(...result.errors)
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
recommendations,
|
||||
tokensUsed: tokenUsage.totalTokens,
|
||||
recommendations: allRecommendations,
|
||||
tokensUsed: totalTokens,
|
||||
errors: allErrors.length > 0 ? allErrors : undefined,
|
||||
}
|
||||
} catch (error) {
|
||||
const classification = classifyAIError(error)
|
||||
@@ -277,7 +379,7 @@ Return a JSON array following the format specified in your instructions. Only in
|
||||
|
||||
return {
|
||||
success: false,
|
||||
recommendations: [],
|
||||
recommendations: { STARTUP: [], BUSINESS_CONCEPT: [] },
|
||||
errors: [error instanceof Error ? error.message : 'AI shortlist generation failed'],
|
||||
}
|
||||
}
|
||||
|
||||
@@ -83,6 +83,9 @@ export interface AnonymizedFileInfo {
|
||||
file_type: string // FileType enum value
|
||||
page_count: number | null // Number of pages if known
|
||||
size_kb: number // File size in KB
|
||||
round_name?: string | null // Which round the file was submitted for
|
||||
is_current_round?: boolean // Whether this file belongs to the current filtering/evaluation round
|
||||
text_content?: string // Extracted text content (when aiParseFiles is enabled)
|
||||
}
|
||||
|
||||
export interface AnonymizedProjectForAI {
|
||||
@@ -299,10 +302,13 @@ export function anonymizeProjectForAI(
|
||||
file_types: project.files
|
||||
?.map((f) => f.fileType)
|
||||
.filter((ft): ft is FileType => ft !== null) ?? [],
|
||||
files: project.files?.map((f) => ({
|
||||
files: project.files?.map((f: any) => ({
|
||||
file_type: f.fileType ?? 'OTHER',
|
||||
page_count: f.pageCount ?? null,
|
||||
size_kb: Math.round((f.size ?? 0) / 1024),
|
||||
...(f.roundName ? { round_name: f.roundName } : {}),
|
||||
...(f.isCurrentRound !== undefined ? { is_current_round: f.isCurrentRound } : {}),
|
||||
...(f.textContent ? { text_content: f.textContent } : {}),
|
||||
})) ?? [],
|
||||
wants_mentorship: project.wantsMentorship ?? false,
|
||||
submission_source: project.submissionSource,
|
||||
|
||||
112
src/server/services/file-content-extractor.ts
Normal file
112
src/server/services/file-content-extractor.ts
Normal file
@@ -0,0 +1,112 @@
|
||||
/**
|
||||
* File Content Extractor
|
||||
*
|
||||
* Downloads files from storage and extracts text content for AI analysis.
|
||||
* Supports PDF and plain text files. Used when round config has aiParseFiles=true.
|
||||
*
|
||||
* Limits:
|
||||
* - Max 50KB of extracted text per file (to stay within AI token limits)
|
||||
* - Only PDF and text-based files are parsed
|
||||
* - Extraction failures are non-fatal (file is skipped)
|
||||
*/
|
||||
|
||||
import { getStorageProvider } from '@/lib/storage'
|
||||
|
||||
const MAX_TEXT_PER_FILE = 50_000 // ~50KB of text per file
|
||||
const PARSEABLE_MIME_TYPES = [
|
||||
'application/pdf',
|
||||
'text/plain',
|
||||
'text/csv',
|
||||
'text/markdown',
|
||||
'text/html',
|
||||
'application/rtf',
|
||||
]
|
||||
|
||||
export type ExtractedFileContent = {
|
||||
fileId: string
|
||||
fileName: string
|
||||
content: string | null
|
||||
error?: string
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a file's mime type supports content extraction
|
||||
*/
|
||||
export function isParseableMimeType(mimeType: string): boolean {
|
||||
return PARSEABLE_MIME_TYPES.some((t) => mimeType.startsWith(t))
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract text content from a single file stored in MinIO/S3.
|
||||
* Returns null content if file type is unsupported or extraction fails.
|
||||
*/
|
||||
export async function extractFileContent(
|
||||
objectKey: string,
|
||||
mimeType: string,
|
||||
fileName: string,
|
||||
fileId: string,
|
||||
): Promise<ExtractedFileContent> {
|
||||
if (!isParseableMimeType(mimeType)) {
|
||||
return { fileId, fileName, content: null, error: 'Unsupported mime type' }
|
||||
}
|
||||
|
||||
try {
|
||||
const storage = await getStorageProvider()
|
||||
const buffer = await storage.getObject(objectKey)
|
||||
|
||||
let text: string
|
||||
|
||||
if (mimeType === 'application/pdf') {
|
||||
// Dynamic import to avoid loading pdf-parse when not needed
|
||||
const pdfParseModule = await import('pdf-parse')
|
||||
const pdfParse = typeof pdfParseModule === 'function' ? pdfParseModule : (pdfParseModule as any).default ?? pdfParseModule
|
||||
const pdf = await pdfParse(buffer)
|
||||
text = pdf.text
|
||||
} else {
|
||||
// Text-based files
|
||||
text = buffer.toString('utf-8')
|
||||
}
|
||||
|
||||
// Truncate to limit
|
||||
if (text.length > MAX_TEXT_PER_FILE) {
|
||||
text = text.slice(0, MAX_TEXT_PER_FILE) + '\n[... content truncated ...]'
|
||||
}
|
||||
|
||||
return { fileId, fileName, content: text }
|
||||
} catch (error) {
|
||||
console.warn(`[FileExtractor] Failed to extract content from ${fileName}:`, error)
|
||||
return {
|
||||
fileId,
|
||||
fileName,
|
||||
content: null,
|
||||
error: error instanceof Error ? error.message : 'Extraction failed',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract content from multiple files in parallel.
|
||||
* Non-fatal: files that fail extraction are returned with null content.
|
||||
*/
|
||||
export async function extractMultipleFileContents(
|
||||
files: Array<{
|
||||
id: string
|
||||
fileName: string
|
||||
mimeType: string
|
||||
objectKey: string
|
||||
}>,
|
||||
): Promise<ExtractedFileContent[]> {
|
||||
const parseableFiles = files.filter((f) => isParseableMimeType(f.mimeType))
|
||||
|
||||
if (parseableFiles.length === 0) return []
|
||||
|
||||
const results = await Promise.allSettled(
|
||||
parseableFiles.map((f) => extractFileContent(f.objectKey, f.mimeType, f.fileName, f.id)),
|
||||
)
|
||||
|
||||
return results.map((r, i) =>
|
||||
r.status === 'fulfilled'
|
||||
? r.value
|
||||
: { fileId: parseableFiles[i].id, fileName: parseableFiles[i].fileName, content: null, error: 'Promise rejected' },
|
||||
)
|
||||
}
|
||||
@@ -19,6 +19,7 @@ export type AIAction =
|
||||
| 'PROJECT_TAGGING'
|
||||
| 'EVALUATION_SUMMARY'
|
||||
| 'ROUTING'
|
||||
| 'SHORTLIST'
|
||||
|
||||
export type AIStatus = 'SUCCESS' | 'PARTIAL' | 'ERROR'
|
||||
|
||||
|
||||
Reference in New Issue
Block a user