Files
MOPC-Portal/src/server/services/anonymization.ts
Matt 6743119c4d
All checks were successful
Build and Push Docker Image / build (push) Successful in 8m19s
AI-powered assignment generation with enriched data and streaming UI
- Add aiPreview mutation with full project/juror data (bios, descriptions,
  documents, categories, ocean issues, countries, team sizes)
- Increase AI description limit from 300 to 2000 chars for richer context
- Update GPT system prompt to use all available data fields
- Add mode toggle (AI default / Algorithm fallback) in assignment preview
- Lift AI mutation to parent page for background generation persistence
- Show visual indicator on page while AI generates (spinner + progress card)
- Toast notification with "Review" action when AI completes
- Staggered reveal animation for assignment results (streaming feel)
- Fix assignment balance with dynamic penalty (25pts per existing assignment)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-17 14:45:57 +01:00

591 lines
18 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Data Anonymization Service
*
* Strips PII (names, emails, etc.) from data before sending to AI services.
* Returns ID mappings for de-anonymization of results.
*
* GDPR Compliance:
* - All personal identifiers are stripped before AI processing
* - Project/user IDs are replaced with sequential anonymous IDs
* - Text content is sanitized to remove emails, phones, URLs
* - Validation ensures no PII leakage before each AI call
*/
import type {
CompetitionCategory,
OceanIssue,
FileType,
SubmissionSource,
} from '@prisma/client'
// ─── Description Limits ──────────────────────────────────────────────────────
export const DESCRIPTION_LIMITS = {
ASSIGNMENT: 2000,
FILTERING: 500,
ELIGIBILITY: 400,
MENTOR: 350,
} as const
export type DescriptionContext = keyof typeof DESCRIPTION_LIMITS
// ─── PII Patterns ────────────────────────────────────────────────────────────
const PII_PATTERNS = {
email: /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g,
phone: /(\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/g,
url: /https?:\/\/[^\s]+/g,
ssn: /\d{3}-\d{2}-\d{4}/g,
ipv4: /\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/g,
} as const
// ─── Basic Anonymization Types (Assignment Service) ──────────────────────────
export interface AnonymizedJuror {
anonymousId: string
expertiseTags: string[]
currentAssignmentCount: number
maxAssignments: number | null
bio?: string | null
country?: string | null
}
export interface AnonymizedProject {
anonymousId: string
title: string
description: string | null
tags: Array<{ name: string; confidence: number }>
teamName: string | null
category?: string | null
oceanIssue?: string | null
country?: string | null
institution?: string | null
teamSize?: number
fileTypes?: string[]
}
export interface JurorMapping {
anonymousId: string
realId: string
}
export interface ProjectMapping {
anonymousId: string
realId: string
}
export interface AnonymizationResult {
jurors: AnonymizedJuror[]
projects: AnonymizedProject[]
jurorMappings: JurorMapping[]
projectMappings: ProjectMapping[]
}
// ─── Enhanced Project Types (Filtering/Awards) ───────────────────────────────
/**
* Comprehensive anonymized project data for AI filtering
* Includes all fields needed for flexible filtering criteria
*/
export interface AnonymizedFileInfo {
file_type: string // FileType enum value
page_count: number | null // Number of pages if known
size_kb: number // File size in KB
detected_lang?: string | null // ISO 639-3 language code (e.g. 'eng', 'fra')
lang_confidence?: number | null // 0.01.0 confidence score
round_name?: string | null // Which round the file was submitted for
is_current_round?: boolean // Whether this file belongs to the current filtering/evaluation round
text_content?: string // Extracted text content (when aiParseFiles is enabled)
}
export interface AnonymizedProjectForAI {
project_id: string // P1, P2, etc.
title: string // Sanitized
description: string // Truncated + PII stripped
category: CompetitionCategory | null // STARTUP | BUSINESS_CONCEPT
ocean_issue: OceanIssue | null // Enum value
country: string | null
region: string | null // geographicZone
institution: string | null
tags: string[]
founded_year: number | null // Just the year
team_size: number
has_description: boolean
file_count: number
file_types: string[] // FileType values
files: AnonymizedFileInfo[] // Per-file details for document analysis
wants_mentorship: boolean
submission_source: SubmissionSource
submitted_date: string | null // YYYY-MM-DD only
}
/**
* Project input with all relations needed for comprehensive anonymization
*/
export interface ProjectWithRelations {
id: string
title: string
description?: string | null
teamName?: string | null
competitionCategory?: CompetitionCategory | null
oceanIssue?: OceanIssue | null
country?: string | null
geographicZone?: string | null
institution?: string | null
tags: string[]
foundedAt?: Date | null
wantsMentorship?: boolean
submissionSource: SubmissionSource
submittedAt?: Date | null
_count?: {
teamMembers?: number
files?: number
}
files?: Array<{ fileType: FileType | null; size?: number; pageCount?: number | null }>
}
/**
* Mapping for de-anonymization
*/
export interface ProjectAIMapping {
anonymousId: string
realId: string
}
// ─── Project Conversion Helper ──────────────────────────────────────────────
/**
* Convert a loosely-typed Prisma project result to ProjectWithRelations.
* Used by ai-tagging, ai-filtering, and ai-award-eligibility services.
*/
export function toProjectWithRelations(project: {
id: string
title: string
description?: string | null
competitionCategory?: string | null
oceanIssue?: string | null
country?: string | null
geographicZone?: string | null
institution?: string | null
tags: string[]
foundedAt?: Date | null
wantsMentorship?: boolean | null
submissionSource?: string
submittedAt?: Date | null
_count?: { teamMembers?: number; files?: number }
files?: Array<{ fileType?: string | null; size?: number; pageCount?: number | null; [key: string]: unknown }>
}): ProjectWithRelations {
return {
id: project.id,
title: project.title,
description: project.description,
competitionCategory: project.competitionCategory as ProjectWithRelations['competitionCategory'],
oceanIssue: project.oceanIssue as ProjectWithRelations['oceanIssue'],
country: project.country,
geographicZone: project.geographicZone,
institution: project.institution,
tags: project.tags,
foundedAt: project.foundedAt,
wantsMentorship: project.wantsMentorship ?? false,
submissionSource: (project.submissionSource as ProjectWithRelations['submissionSource']) ?? 'MANUAL',
submittedAt: project.submittedAt,
_count: {
teamMembers: project._count?.teamMembers ?? 0,
files: project._count?.files ?? project.files?.length ?? 0,
},
files: project.files?.map((f) => ({
fileType: (f.fileType as FileType) ?? null,
size: f.size,
pageCount: f.pageCount ?? null,
})) ?? [],
}
}
// ─── Basic Anonymization (Assignment Service) ────────────────────────────────
interface JurorInput {
id: string
name?: string | null
email: string
expertiseTags: string[]
bio?: string | null
country?: string | null
maxAssignments?: number | null
_count?: {
assignments: number
}
}
interface ProjectInput {
id: string
title: string
description?: string | null
tags: string[]
tagConfidences?: Array<{ name: string; confidence: number }>
teamName?: string | null
competitionCategory?: string | null
oceanIssue?: string | null
country?: string | null
institution?: string | null
teamSize?: number
fileTypes?: string[]
}
/**
* Anonymize juror and project data for AI processing (Assignment service)
*/
export function anonymizeForAI(
jurors: JurorInput[],
projects: ProjectInput[]
): AnonymizationResult {
const jurorMappings: JurorMapping[] = []
const projectMappings: ProjectMapping[] = []
const anonymizedJurors: AnonymizedJuror[] = jurors.map((juror, index) => {
const anonymousId = `juror_${(index + 1).toString().padStart(3, '0')}`
jurorMappings.push({
anonymousId,
realId: juror.id,
})
return {
anonymousId,
expertiseTags: juror.expertiseTags,
currentAssignmentCount: juror._count?.assignments ?? 0,
maxAssignments: juror.maxAssignments ?? null,
bio: juror.bio ? truncateAndSanitize(juror.bio, 500) : null,
country: juror.country ?? null,
}
})
const anonymizedProjects: AnonymizedProject[] = projects.map(
(project, index) => {
const anonymousId = `project_${(index + 1).toString().padStart(3, '0')}`
projectMappings.push({
anonymousId,
realId: project.id,
})
return {
anonymousId,
title: sanitizeText(project.title),
description: project.description
? truncateAndSanitize(project.description, DESCRIPTION_LIMITS.ASSIGNMENT)
: null,
tags: project.tagConfidences && project.tagConfidences.length > 0
? project.tagConfidences
: project.tags.map((t) => ({ name: t, confidence: 1.0 })),
teamName: project.teamName ? `Team ${index + 1}` : null,
category: project.competitionCategory ?? null,
oceanIssue: project.oceanIssue ?? null,
country: project.country ?? null,
institution: project.institution ? sanitizeText(project.institution) : null,
teamSize: project.teamSize,
fileTypes: project.fileTypes,
}
}
)
return {
jurors: anonymizedJurors,
projects: anonymizedProjects,
jurorMappings,
projectMappings,
}
}
// ─── Enhanced Anonymization (Filtering/Awards) ───────────────────────────────
/**
* Anonymize a single project with comprehensive data for AI filtering
*
* GDPR Compliance:
* - Strips team names, email references, phone numbers, URLs
* - Replaces IDs with sequential anonymous IDs
* - Truncates descriptions to limit data exposure
* - Keeps only necessary fields for filtering criteria
*/
export function anonymizeProjectForAI(
project: ProjectWithRelations,
index: number,
context: DescriptionContext = 'FILTERING'
): AnonymizedProjectForAI {
const descriptionLimit = DESCRIPTION_LIMITS[context]
return {
project_id: `P${index + 1}`,
title: sanitizeText(project.title),
description: truncateAndSanitize(project.description, descriptionLimit),
category: project.competitionCategory ?? null,
ocean_issue: project.oceanIssue ?? null,
country: project.country ?? null,
region: project.geographicZone ?? null,
institution: project.institution ?? null,
tags: project.tags,
founded_year: project.foundedAt?.getFullYear() ?? null,
team_size: project._count?.teamMembers ?? 0,
has_description: !!project.description?.trim(),
file_count: project._count?.files ?? 0,
file_types: project.files
?.map((f) => f.fileType)
.filter((ft): ft is FileType => ft !== null) ?? [],
files: project.files?.map((f: any) => ({
file_type: f.fileType ?? 'OTHER',
page_count: f.pageCount ?? null,
size_kb: Math.round((f.size ?? 0) / 1024),
...(f.detectedLang ? { detected_lang: f.detectedLang } : {}),
...(f.langConfidence != null ? { lang_confidence: f.langConfidence } : {}),
...(f.roundName ? { round_name: f.roundName } : {}),
...(f.isCurrentRound !== undefined ? { is_current_round: f.isCurrentRound } : {}),
...(f.textContent ? { text_content: f.textContent } : {}),
})) ?? [],
wants_mentorship: project.wantsMentorship ?? false,
submission_source: project.submissionSource,
submitted_date: project.submittedAt?.toISOString().split('T')[0] ?? null,
}
}
/**
* Anonymize multiple projects and return mappings
*/
export function anonymizeProjectsForAI(
projects: ProjectWithRelations[],
context: DescriptionContext = 'FILTERING'
): {
anonymized: AnonymizedProjectForAI[]
mappings: ProjectAIMapping[]
} {
const mappings: ProjectAIMapping[] = []
const anonymized = projects.map((project, index) => {
mappings.push({
anonymousId: `P${index + 1}`,
realId: project.id,
})
return anonymizeProjectForAI(project, index, context)
})
return { anonymized, mappings }
}
// ─── De-anonymization ────────────────────────────────────────────────────────
/**
* De-anonymize AI results back to real IDs
*/
export function deanonymizeResults<
T extends { jurorId: string; projectId: string }
>(
results: T[],
jurorMappings: JurorMapping[],
projectMappings: ProjectMapping[]
): (T & { realJurorId: string; realProjectId: string })[] {
const jurorMap = new Map(
jurorMappings.map((m) => [m.anonymousId, m.realId])
)
const projectMap = new Map(
projectMappings.map((m) => [m.anonymousId, m.realId])
)
return results.map((result) => ({
...result,
realJurorId: jurorMap.get(result.jurorId) || result.jurorId,
realProjectId: projectMap.get(result.projectId) || result.projectId,
}))
}
/**
* De-anonymize project-only results (for filtering/awards)
*/
export function deanonymizeProjectResults<T extends { project_id: string }>(
results: T[],
mappings: ProjectAIMapping[]
): (T & { realProjectId: string })[] {
const projectMap = new Map(mappings.map((m) => [m.anonymousId, m.realId]))
return results.map((result) => ({
...result,
realProjectId: projectMap.get(result.project_id) || result.project_id,
}))
}
// ─── Text Sanitization ───────────────────────────────────────────────────────
/**
* Sanitize text to remove potential PII patterns
* Removes emails, phone numbers, URLs, and other identifying information
*/
export function sanitizeText(text: string): string {
let sanitized = text
// Remove email addresses
sanitized = sanitized.replace(PII_PATTERNS.email, '[email removed]')
// Remove phone numbers (various formats)
sanitized = sanitized.replace(PII_PATTERNS.phone, '[phone removed]')
// Remove URLs
sanitized = sanitized.replace(PII_PATTERNS.url, '[url removed]')
// Remove SSN-like patterns
sanitized = sanitized.replace(PII_PATTERNS.ssn, '[id removed]')
return sanitized
}
/**
* Truncate text to a maximum length and sanitize
*/
export function truncateAndSanitize(
text: string | null | undefined,
maxLength: number
): string {
if (!text) return ''
const sanitized = sanitizeText(text)
if (sanitized.length <= maxLength) {
return sanitized
}
return sanitized.slice(0, maxLength - 3) + '...'
}
// ─── GDPR Compliance Validation ──────────────────────────────────────────────
export interface PIIValidationResult {
valid: boolean
violations: string[]
}
/**
* Validate that data contains no personal information
* Used for GDPR compliance before sending data to AI
*/
export function validateNoPersonalData(
data: Record<string, unknown>
): PIIValidationResult {
const violations: string[] = []
const textContent = JSON.stringify(data)
// Check each PII pattern
for (const [type, pattern] of Object.entries(PII_PATTERNS)) {
// Reset regex state (global flag)
pattern.lastIndex = 0
if (pattern.test(textContent)) {
violations.push(`Potential ${type} detected in data`)
}
}
// Additional checks for common PII fields
const sensitiveFields = [
'email',
'phone',
'password',
'ssn',
'socialSecurity',
'creditCard',
'bankAccount',
'drivingLicense',
]
const keys = Object.keys(data).map((k) => k.toLowerCase())
for (const field of sensitiveFields) {
if (keys.includes(field)) {
violations.push(`Sensitive field "${field}" present in data`)
}
}
return {
valid: violations.length === 0,
violations,
}
}
/**
* Enforce GDPR compliance before EVERY AI call
* Throws an error if PII is detected
*/
export function enforceGDPRCompliance(data: unknown[]): void {
for (let i = 0; i < data.length; i++) {
const item = data[i]
if (typeof item === 'object' && item !== null) {
const { valid, violations } = validateNoPersonalData(
item as Record<string, unknown>
)
if (!valid) {
console.error(
`[GDPR] PII validation failed for item ${i}:`,
violations
)
throw new Error(
`GDPR compliance check failed: ${violations.join(', ')}`
)
}
}
}
}
/**
* Validate that data has been properly anonymized
* Returns true if no PII patterns are detected
*/
export function validateAnonymization(data: AnonymizationResult): boolean {
const checkText = (text: string | null | undefined): boolean => {
if (!text) return true
// Reset regex state for each check
for (const pattern of Object.values(PII_PATTERNS)) {
pattern.lastIndex = 0
if (pattern.test(text)) return false
}
return true
}
// Check jurors
for (const juror of data.jurors) {
for (const tag of juror.expertiseTags) {
if (!checkText(tag)) return false
}
}
// Check projects
for (const project of data.projects) {
if (!checkText(project.title)) return false
if (!checkText(project.description)) return false
for (const tag of project.tags) {
if (!checkText(typeof tag === 'string' ? tag : tag.name)) return false
}
}
return true
}
/**
* Validate anonymized projects for AI (enhanced version)
*/
export function validateAnonymizedProjects(
projects: AnonymizedProjectForAI[]
): boolean {
const checkText = (text: string | null | undefined): boolean => {
if (!text) return true
for (const pattern of Object.values(PII_PATTERNS)) {
pattern.lastIndex = 0
if (pattern.test(text)) return false
}
return true
}
for (const project of projects) {
if (!checkText(project.title)) return false
if (!checkText(project.description)) return false
if (!checkText(project.institution)) return false
for (const tag of project.tags) {
if (!checkText(tag)) return false
}
}
return true
}