Add document analysis: page count, text extraction & language detection
All checks were successful
Build and Push Docker Image / build (push) Successful in 11m7s

Introduces a document analyzer service that extracts page count (via pdf-parse),
text preview, and detected language (via franc) from uploaded files. Analysis runs
automatically on upload (configurable via SystemSettings) and can be triggered
retroactively for existing files. Results are displayed as badges in the FileViewer
and fed to AI screening for language-based filtering criteria.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Matt
2026-02-17 10:08:04 +01:00
parent 771f35c695
commit c9640c6086
13 changed files with 565 additions and 10 deletions

View File

@@ -49,7 +49,10 @@ import {
Heart,
Crown,
UserPlus,
Loader2,
ScanSearch,
} from 'lucide-react'
import { toast } from 'sonner'
import { formatDate, formatDateOnly } from '@/lib/utils'
interface PageProps {
@@ -529,15 +532,20 @@ function ProjectDetailContent({ projectId }: { projectId: string }) {
<AnimatedCard index={4}>
<Card>
<CardHeader>
<CardTitle className="flex items-center gap-2.5 text-lg">
<div className="rounded-lg bg-rose-500/10 p-1.5">
<FileText className="h-4 w-4 text-rose-500" />
<div className="flex items-center justify-between">
<div>
<CardTitle className="flex items-center gap-2.5 text-lg">
<div className="rounded-lg bg-rose-500/10 p-1.5">
<FileText className="h-4 w-4 text-rose-500" />
</div>
Files
</CardTitle>
<CardDescription>
Project documents and materials organized by competition round
</CardDescription>
</div>
Files
</CardTitle>
<CardDescription>
Project documents and materials organized by competition round
</CardDescription>
<AnalyzeDocumentsButton projectId={projectId} onComplete={() => utils.file.listByProject.invalidate({ projectId })} />
</div>
</CardHeader>
<CardContent className="space-y-6">
{/* Requirements organized by round */}
@@ -664,6 +672,11 @@ function ProjectDetailContent({ projectId }: { projectId: string }) {
size: f.size,
bucket: f.bucket,
objectKey: f.objectKey,
pageCount: f.pageCount,
textPreview: f.textPreview,
detectedLang: f.detectedLang,
langConfidence: f.langConfidence,
analyzedAt: f.analyzedAt ? String(f.analyzedAt) : null,
}))}
/>
</div>
@@ -847,6 +860,36 @@ function ProjectDetailSkeleton() {
)
}
function AnalyzeDocumentsButton({ projectId, onComplete }: { projectId: string; onComplete: () => void }) {
const analyzeMutation = trpc.file.analyzeProjectFiles.useMutation({
onSuccess: (result) => {
toast.success(
`Analyzed ${result.analyzed} file${result.analyzed !== 1 ? 's' : ''}${result.failed > 0 ? ` (${result.failed} failed)` : ''}`
)
onComplete()
},
onError: (error) => {
toast.error(error.message || 'Analysis failed')
},
})
return (
<Button
variant="outline"
size="sm"
onClick={() => analyzeMutation.mutate({ projectId })}
disabled={analyzeMutation.isPending}
>
{analyzeMutation.isPending ? (
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
) : (
<ScanSearch className="mr-2 h-4 w-4" />
)}
{analyzeMutation.isPending ? 'Analyzing...' : 'Analyze Documents'}
</Button>
)
}
export default function ProjectDetailPage({ params }: PageProps) {
const { id } = use(params)