Add document analysis: page count, text extraction & language detection
All checks were successful
Build and Push Docker Image / build (push) Successful in 11m7s
All checks were successful
Build and Push Docker Image / build (push) Successful in 11m7s
Introduces a document analyzer service that extracts page count (via pdf-parse), text preview, and detected language (via franc) from uploaded files. Analysis runs automatically on upload (configurable via SystemSettings) and can be triggered retroactively for existing files. Results are displayed as badges in the FileViewer and fed to AI screening for language-based filtering criteria. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -206,6 +206,14 @@ export const fileRouter = router({
|
||||
userAgent: ctx.userAgent,
|
||||
})
|
||||
|
||||
// Auto-analyze document (fire-and-forget, delayed for presigned upload)
|
||||
import('../services/document-analyzer').then(({ analyzeFileDelayed, isAutoAnalysisEnabled }) =>
|
||||
isAutoAnalysisEnabled().then((enabled) => {
|
||||
if (enabled) analyzeFileDelayed(file.id).catch((err) =>
|
||||
console.warn('[DocAnalyzer] Post-upload analysis failed:', err))
|
||||
})
|
||||
).catch(() => {})
|
||||
|
||||
return {
|
||||
uploadUrl,
|
||||
file,
|
||||
@@ -1201,6 +1209,14 @@ export const fileRouter = router({
|
||||
userAgent: ctx.userAgent,
|
||||
})
|
||||
|
||||
// Auto-analyze document (fire-and-forget, delayed for presigned upload)
|
||||
import('../services/document-analyzer').then(({ analyzeFileDelayed, isAutoAnalysisEnabled }) =>
|
||||
isAutoAnalysisEnabled().then((enabled) => {
|
||||
if (enabled) analyzeFileDelayed(file.id).catch((err) =>
|
||||
console.warn('[DocAnalyzer] Post-upload analysis failed:', err))
|
||||
})
|
||||
).catch(() => {})
|
||||
|
||||
return { uploadUrl, file }
|
||||
}),
|
||||
|
||||
@@ -1510,6 +1526,14 @@ export const fileRouter = router({
|
||||
ctx.prisma,
|
||||
)
|
||||
|
||||
// Auto-analyze document (fire-and-forget, delayed for presigned upload)
|
||||
import('../services/document-analyzer').then(({ analyzeFileDelayed, isAutoAnalysisEnabled }) =>
|
||||
isAutoAnalysisEnabled().then((enabled) => {
|
||||
if (enabled) analyzeFileDelayed(file.id).catch((err) =>
|
||||
console.warn('[DocAnalyzer] Post-upload analysis failed:', err))
|
||||
})
|
||||
).catch(() => {})
|
||||
|
||||
return { uploadUrl, file }
|
||||
}),
|
||||
|
||||
@@ -1545,4 +1569,25 @@ export const fileRouter = router({
|
||||
)
|
||||
return results
|
||||
}),
|
||||
|
||||
/**
|
||||
* Analyze all files for a specific project (page count, language, text preview).
|
||||
* Retroactive: re-analyzes even previously analyzed files.
|
||||
*/
|
||||
analyzeProjectFiles: adminProcedure
|
||||
.input(z.object({ projectId: z.string() }))
|
||||
.mutation(async ({ input }) => {
|
||||
const { analyzeProjectFiles } = await import('../services/document-analyzer')
|
||||
return analyzeProjectFiles(input.projectId)
|
||||
}),
|
||||
|
||||
/**
|
||||
* Batch analyze all unanalyzed files across the platform.
|
||||
* For retroactive analysis of files uploaded before this feature.
|
||||
*/
|
||||
analyzeAllFiles: adminProcedure
|
||||
.mutation(async () => {
|
||||
const { analyzeAllUnanalyzed } = await import('../services/document-analyzer')
|
||||
return analyzeAllUnanalyzed()
|
||||
}),
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user