AI category-aware evaluation: per-round config, file parsing, shortlist, advance flow
Some checks failed
Build and Push Docker Image / build (push) Has been cancelled

- Per-juror cap mode (HARD/SOFT/NONE) in add-member dialog and members table
- Jury invite flow: create user + add to group + send invitation from dialog
- Per-round config: notifyOnAdvance, aiParseFiles, startupAdvanceCount, conceptAdvanceCount
- Moved notify-on-advance from competition-level to per-round setting
- AI filtering: round-tagged files with newest-first sorting, optional file content extraction
- File content extractor service (pdf-parse for PDF, utf-8 for text files)
- AI shortlist runs independently per category (STARTUP / BUSINESS_CONCEPT)
- generateAIRecommendations tRPC endpoint with per-round config integration
- AI recommendations UI: trigger button, confirmation dialog, per-category results display
- Category-aware advance dialog: select/deselect projects by category with target caps
- STAGE_ACTIVE bug fix in assignment router

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-16 10:09:52 +01:00
parent 93f4ad4b31
commit 80c9e35971
21 changed files with 1886 additions and 1381 deletions

View File

@@ -0,0 +1,112 @@
/**
* File Content Extractor
*
* Downloads files from storage and extracts text content for AI analysis.
* Supports PDF and plain text files. Used when round config has aiParseFiles=true.
*
* Limits:
* - Max 50KB of extracted text per file (to stay within AI token limits)
* - Only PDF and text-based files are parsed
* - Extraction failures are non-fatal (file is skipped)
*/
import { getStorageProvider } from '@/lib/storage'
const MAX_TEXT_PER_FILE = 50_000 // ~50KB of text per file
const PARSEABLE_MIME_TYPES = [
'application/pdf',
'text/plain',
'text/csv',
'text/markdown',
'text/html',
'application/rtf',
]
export type ExtractedFileContent = {
fileId: string
fileName: string
content: string | null
error?: string
}
/**
* Check if a file's mime type supports content extraction
*/
export function isParseableMimeType(mimeType: string): boolean {
return PARSEABLE_MIME_TYPES.some((t) => mimeType.startsWith(t))
}
/**
* Extract text content from a single file stored in MinIO/S3.
* Returns null content if file type is unsupported or extraction fails.
*/
export async function extractFileContent(
objectKey: string,
mimeType: string,
fileName: string,
fileId: string,
): Promise<ExtractedFileContent> {
if (!isParseableMimeType(mimeType)) {
return { fileId, fileName, content: null, error: 'Unsupported mime type' }
}
try {
const storage = await getStorageProvider()
const buffer = await storage.getObject(objectKey)
let text: string
if (mimeType === 'application/pdf') {
// Dynamic import to avoid loading pdf-parse when not needed
const pdfParseModule = await import('pdf-parse')
const pdfParse = typeof pdfParseModule === 'function' ? pdfParseModule : (pdfParseModule as any).default ?? pdfParseModule
const pdf = await pdfParse(buffer)
text = pdf.text
} else {
// Text-based files
text = buffer.toString('utf-8')
}
// Truncate to limit
if (text.length > MAX_TEXT_PER_FILE) {
text = text.slice(0, MAX_TEXT_PER_FILE) + '\n[... content truncated ...]'
}
return { fileId, fileName, content: text }
} catch (error) {
console.warn(`[FileExtractor] Failed to extract content from ${fileName}:`, error)
return {
fileId,
fileName,
content: null,
error: error instanceof Error ? error.message : 'Extraction failed',
}
}
}
/**
* Extract content from multiple files in parallel.
* Non-fatal: files that fail extraction are returned with null content.
*/
export async function extractMultipleFileContents(
files: Array<{
id: string
fileName: string
mimeType: string
objectKey: string
}>,
): Promise<ExtractedFileContent[]> {
const parseableFiles = files.filter((f) => isParseableMimeType(f.mimeType))
if (parseableFiles.length === 0) return []
const results = await Promise.allSettled(
parseableFiles.map((f) => extractFileContent(f.objectKey, f.mimeType, f.fileName, f.id)),
)
return results.map((r, i) =>
r.status === 'fulfilled'
? r.value
: { fileId: parseableFiles[i].id, fileName: parseableFiles[i].fileName, content: null, error: 'Promise rejected' },
)
}