Fix document analysis: switch to unpdf + mammoth for PDF/Word parsing

pdf-parse v2 requires DOMMatrix (browser API) which fails in Node.js. Replaced with unpdf (serverless PDF.js build) for PDFs and mammoth for Word .docx files. Also fixed the same broken pdf-parse usage in file-content-extractor.ts used by AI filtering. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-17 10:27:36 +01:00
parent c9640c6086
commit ed5e782f61
4 changed files with 298 additions and 26 deletions
--- a/package.json
+++ b/package.json
@@ -68,6 +68,7 @@
    "jspdf-autotable": "^5.0.7",
    "leaflet": "^1.9.4",
    "lucide-react": "^0.563.0",
+    "mammoth": "^1.11.0",
    "minio": "^8.0.2",
    "motion": "^11.15.0",
    "next": "^15.1.0",
@@ -88,6 +89,7 @@
    "sonner": "^2.0.7",
    "superjson": "^2.2.2",
    "tailwind-merge": "^3.4.0",
+    "unpdf": "^1.4.0",
    "use-debounce": "^10.0.4",
    "zod": "^3.24.1"
  },