All checks were successful
Build and Push Docker Image / build (push) Successful in 8m19s
- Add aiPreview mutation with full project/juror data (bios, descriptions, documents, categories, ocean issues, countries, team sizes) - Increase AI description limit from 300 to 2000 chars for richer context - Update GPT system prompt to use all available data fields - Add mode toggle (AI default / Algorithm fallback) in assignment preview - Lift AI mutation to parent page for background generation persistence - Show visual indicator on page while AI generates (spinner + progress card) - Toast notification with "Review" action when AI completes - Staggered reveal animation for assignment results (streaming feel) - Fix assignment balance with dynamic penalty (25pts per existing assignment) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
591 lines
18 KiB
TypeScript
591 lines
18 KiB
TypeScript
/**
|
||
* Data Anonymization Service
|
||
*
|
||
* Strips PII (names, emails, etc.) from data before sending to AI services.
|
||
* Returns ID mappings for de-anonymization of results.
|
||
*
|
||
* GDPR Compliance:
|
||
* - All personal identifiers are stripped before AI processing
|
||
* - Project/user IDs are replaced with sequential anonymous IDs
|
||
* - Text content is sanitized to remove emails, phones, URLs
|
||
* - Validation ensures no PII leakage before each AI call
|
||
*/
|
||
|
||
import type {
|
||
CompetitionCategory,
|
||
OceanIssue,
|
||
FileType,
|
||
SubmissionSource,
|
||
} from '@prisma/client'
|
||
|
||
// ─── Description Limits ──────────────────────────────────────────────────────
|
||
|
||
export const DESCRIPTION_LIMITS = {
|
||
ASSIGNMENT: 2000,
|
||
FILTERING: 500,
|
||
ELIGIBILITY: 400,
|
||
MENTOR: 350,
|
||
} as const
|
||
|
||
export type DescriptionContext = keyof typeof DESCRIPTION_LIMITS
|
||
|
||
// ─── PII Patterns ────────────────────────────────────────────────────────────
|
||
|
||
const PII_PATTERNS = {
|
||
email: /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g,
|
||
phone: /(\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/g,
|
||
url: /https?:\/\/[^\s]+/g,
|
||
ssn: /\d{3}-\d{2}-\d{4}/g,
|
||
ipv4: /\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/g,
|
||
} as const
|
||
|
||
// ─── Basic Anonymization Types (Assignment Service) ──────────────────────────
|
||
|
||
export interface AnonymizedJuror {
|
||
anonymousId: string
|
||
expertiseTags: string[]
|
||
currentAssignmentCount: number
|
||
maxAssignments: number | null
|
||
bio?: string | null
|
||
country?: string | null
|
||
}
|
||
|
||
export interface AnonymizedProject {
|
||
anonymousId: string
|
||
title: string
|
||
description: string | null
|
||
tags: Array<{ name: string; confidence: number }>
|
||
teamName: string | null
|
||
category?: string | null
|
||
oceanIssue?: string | null
|
||
country?: string | null
|
||
institution?: string | null
|
||
teamSize?: number
|
||
fileTypes?: string[]
|
||
}
|
||
|
||
export interface JurorMapping {
|
||
anonymousId: string
|
||
realId: string
|
||
}
|
||
|
||
export interface ProjectMapping {
|
||
anonymousId: string
|
||
realId: string
|
||
}
|
||
|
||
export interface AnonymizationResult {
|
||
jurors: AnonymizedJuror[]
|
||
projects: AnonymizedProject[]
|
||
jurorMappings: JurorMapping[]
|
||
projectMappings: ProjectMapping[]
|
||
}
|
||
|
||
// ─── Enhanced Project Types (Filtering/Awards) ───────────────────────────────
|
||
|
||
/**
|
||
* Comprehensive anonymized project data for AI filtering
|
||
* Includes all fields needed for flexible filtering criteria
|
||
*/
|
||
export interface AnonymizedFileInfo {
|
||
file_type: string // FileType enum value
|
||
page_count: number | null // Number of pages if known
|
||
size_kb: number // File size in KB
|
||
detected_lang?: string | null // ISO 639-3 language code (e.g. 'eng', 'fra')
|
||
lang_confidence?: number | null // 0.0–1.0 confidence score
|
||
round_name?: string | null // Which round the file was submitted for
|
||
is_current_round?: boolean // Whether this file belongs to the current filtering/evaluation round
|
||
text_content?: string // Extracted text content (when aiParseFiles is enabled)
|
||
}
|
||
|
||
export interface AnonymizedProjectForAI {
|
||
project_id: string // P1, P2, etc.
|
||
title: string // Sanitized
|
||
description: string // Truncated + PII stripped
|
||
category: CompetitionCategory | null // STARTUP | BUSINESS_CONCEPT
|
||
ocean_issue: OceanIssue | null // Enum value
|
||
country: string | null
|
||
region: string | null // geographicZone
|
||
institution: string | null
|
||
tags: string[]
|
||
founded_year: number | null // Just the year
|
||
team_size: number
|
||
has_description: boolean
|
||
file_count: number
|
||
file_types: string[] // FileType values
|
||
files: AnonymizedFileInfo[] // Per-file details for document analysis
|
||
wants_mentorship: boolean
|
||
submission_source: SubmissionSource
|
||
submitted_date: string | null // YYYY-MM-DD only
|
||
}
|
||
|
||
/**
|
||
* Project input with all relations needed for comprehensive anonymization
|
||
*/
|
||
export interface ProjectWithRelations {
|
||
id: string
|
||
title: string
|
||
description?: string | null
|
||
teamName?: string | null
|
||
competitionCategory?: CompetitionCategory | null
|
||
oceanIssue?: OceanIssue | null
|
||
country?: string | null
|
||
geographicZone?: string | null
|
||
institution?: string | null
|
||
tags: string[]
|
||
foundedAt?: Date | null
|
||
wantsMentorship?: boolean
|
||
submissionSource: SubmissionSource
|
||
submittedAt?: Date | null
|
||
_count?: {
|
||
teamMembers?: number
|
||
files?: number
|
||
}
|
||
files?: Array<{ fileType: FileType | null; size?: number; pageCount?: number | null }>
|
||
}
|
||
|
||
/**
|
||
* Mapping for de-anonymization
|
||
*/
|
||
export interface ProjectAIMapping {
|
||
anonymousId: string
|
||
realId: string
|
||
}
|
||
|
||
// ─── Project Conversion Helper ──────────────────────────────────────────────
|
||
|
||
/**
|
||
* Convert a loosely-typed Prisma project result to ProjectWithRelations.
|
||
* Used by ai-tagging, ai-filtering, and ai-award-eligibility services.
|
||
*/
|
||
export function toProjectWithRelations(project: {
|
||
id: string
|
||
title: string
|
||
description?: string | null
|
||
competitionCategory?: string | null
|
||
oceanIssue?: string | null
|
||
country?: string | null
|
||
geographicZone?: string | null
|
||
institution?: string | null
|
||
tags: string[]
|
||
foundedAt?: Date | null
|
||
wantsMentorship?: boolean | null
|
||
submissionSource?: string
|
||
submittedAt?: Date | null
|
||
_count?: { teamMembers?: number; files?: number }
|
||
files?: Array<{ fileType?: string | null; size?: number; pageCount?: number | null; [key: string]: unknown }>
|
||
}): ProjectWithRelations {
|
||
return {
|
||
id: project.id,
|
||
title: project.title,
|
||
description: project.description,
|
||
competitionCategory: project.competitionCategory as ProjectWithRelations['competitionCategory'],
|
||
oceanIssue: project.oceanIssue as ProjectWithRelations['oceanIssue'],
|
||
country: project.country,
|
||
geographicZone: project.geographicZone,
|
||
institution: project.institution,
|
||
tags: project.tags,
|
||
foundedAt: project.foundedAt,
|
||
wantsMentorship: project.wantsMentorship ?? false,
|
||
submissionSource: (project.submissionSource as ProjectWithRelations['submissionSource']) ?? 'MANUAL',
|
||
submittedAt: project.submittedAt,
|
||
_count: {
|
||
teamMembers: project._count?.teamMembers ?? 0,
|
||
files: project._count?.files ?? project.files?.length ?? 0,
|
||
},
|
||
files: project.files?.map((f) => ({
|
||
fileType: (f.fileType as FileType) ?? null,
|
||
size: f.size,
|
||
pageCount: f.pageCount ?? null,
|
||
})) ?? [],
|
||
}
|
||
}
|
||
|
||
// ─── Basic Anonymization (Assignment Service) ────────────────────────────────
|
||
|
||
interface JurorInput {
|
||
id: string
|
||
name?: string | null
|
||
email: string
|
||
expertiseTags: string[]
|
||
bio?: string | null
|
||
country?: string | null
|
||
maxAssignments?: number | null
|
||
_count?: {
|
||
assignments: number
|
||
}
|
||
}
|
||
|
||
interface ProjectInput {
|
||
id: string
|
||
title: string
|
||
description?: string | null
|
||
tags: string[]
|
||
tagConfidences?: Array<{ name: string; confidence: number }>
|
||
teamName?: string | null
|
||
competitionCategory?: string | null
|
||
oceanIssue?: string | null
|
||
country?: string | null
|
||
institution?: string | null
|
||
teamSize?: number
|
||
fileTypes?: string[]
|
||
}
|
||
|
||
/**
|
||
* Anonymize juror and project data for AI processing (Assignment service)
|
||
*/
|
||
export function anonymizeForAI(
|
||
jurors: JurorInput[],
|
||
projects: ProjectInput[]
|
||
): AnonymizationResult {
|
||
const jurorMappings: JurorMapping[] = []
|
||
const projectMappings: ProjectMapping[] = []
|
||
|
||
const anonymizedJurors: AnonymizedJuror[] = jurors.map((juror, index) => {
|
||
const anonymousId = `juror_${(index + 1).toString().padStart(3, '0')}`
|
||
|
||
jurorMappings.push({
|
||
anonymousId,
|
||
realId: juror.id,
|
||
})
|
||
|
||
return {
|
||
anonymousId,
|
||
expertiseTags: juror.expertiseTags,
|
||
currentAssignmentCount: juror._count?.assignments ?? 0,
|
||
maxAssignments: juror.maxAssignments ?? null,
|
||
bio: juror.bio ? truncateAndSanitize(juror.bio, 500) : null,
|
||
country: juror.country ?? null,
|
||
}
|
||
})
|
||
|
||
const anonymizedProjects: AnonymizedProject[] = projects.map(
|
||
(project, index) => {
|
||
const anonymousId = `project_${(index + 1).toString().padStart(3, '0')}`
|
||
|
||
projectMappings.push({
|
||
anonymousId,
|
||
realId: project.id,
|
||
})
|
||
|
||
return {
|
||
anonymousId,
|
||
title: sanitizeText(project.title),
|
||
description: project.description
|
||
? truncateAndSanitize(project.description, DESCRIPTION_LIMITS.ASSIGNMENT)
|
||
: null,
|
||
tags: project.tagConfidences && project.tagConfidences.length > 0
|
||
? project.tagConfidences
|
||
: project.tags.map((t) => ({ name: t, confidence: 1.0 })),
|
||
teamName: project.teamName ? `Team ${index + 1}` : null,
|
||
category: project.competitionCategory ?? null,
|
||
oceanIssue: project.oceanIssue ?? null,
|
||
country: project.country ?? null,
|
||
institution: project.institution ? sanitizeText(project.institution) : null,
|
||
teamSize: project.teamSize,
|
||
fileTypes: project.fileTypes,
|
||
}
|
||
}
|
||
)
|
||
|
||
return {
|
||
jurors: anonymizedJurors,
|
||
projects: anonymizedProjects,
|
||
jurorMappings,
|
||
projectMappings,
|
||
}
|
||
}
|
||
|
||
// ─── Enhanced Anonymization (Filtering/Awards) ───────────────────────────────
|
||
|
||
/**
|
||
* Anonymize a single project with comprehensive data for AI filtering
|
||
*
|
||
* GDPR Compliance:
|
||
* - Strips team names, email references, phone numbers, URLs
|
||
* - Replaces IDs with sequential anonymous IDs
|
||
* - Truncates descriptions to limit data exposure
|
||
* - Keeps only necessary fields for filtering criteria
|
||
*/
|
||
export function anonymizeProjectForAI(
|
||
project: ProjectWithRelations,
|
||
index: number,
|
||
context: DescriptionContext = 'FILTERING'
|
||
): AnonymizedProjectForAI {
|
||
const descriptionLimit = DESCRIPTION_LIMITS[context]
|
||
|
||
return {
|
||
project_id: `P${index + 1}`,
|
||
title: sanitizeText(project.title),
|
||
description: truncateAndSanitize(project.description, descriptionLimit),
|
||
category: project.competitionCategory ?? null,
|
||
ocean_issue: project.oceanIssue ?? null,
|
||
country: project.country ?? null,
|
||
region: project.geographicZone ?? null,
|
||
institution: project.institution ?? null,
|
||
tags: project.tags,
|
||
founded_year: project.foundedAt?.getFullYear() ?? null,
|
||
team_size: project._count?.teamMembers ?? 0,
|
||
has_description: !!project.description?.trim(),
|
||
file_count: project._count?.files ?? 0,
|
||
file_types: project.files
|
||
?.map((f) => f.fileType)
|
||
.filter((ft): ft is FileType => ft !== null) ?? [],
|
||
files: project.files?.map((f: any) => ({
|
||
file_type: f.fileType ?? 'OTHER',
|
||
page_count: f.pageCount ?? null,
|
||
size_kb: Math.round((f.size ?? 0) / 1024),
|
||
...(f.detectedLang ? { detected_lang: f.detectedLang } : {}),
|
||
...(f.langConfidence != null ? { lang_confidence: f.langConfidence } : {}),
|
||
...(f.roundName ? { round_name: f.roundName } : {}),
|
||
...(f.isCurrentRound !== undefined ? { is_current_round: f.isCurrentRound } : {}),
|
||
...(f.textContent ? { text_content: f.textContent } : {}),
|
||
})) ?? [],
|
||
wants_mentorship: project.wantsMentorship ?? false,
|
||
submission_source: project.submissionSource,
|
||
submitted_date: project.submittedAt?.toISOString().split('T')[0] ?? null,
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Anonymize multiple projects and return mappings
|
||
*/
|
||
export function anonymizeProjectsForAI(
|
||
projects: ProjectWithRelations[],
|
||
context: DescriptionContext = 'FILTERING'
|
||
): {
|
||
anonymized: AnonymizedProjectForAI[]
|
||
mappings: ProjectAIMapping[]
|
||
} {
|
||
const mappings: ProjectAIMapping[] = []
|
||
const anonymized = projects.map((project, index) => {
|
||
mappings.push({
|
||
anonymousId: `P${index + 1}`,
|
||
realId: project.id,
|
||
})
|
||
return anonymizeProjectForAI(project, index, context)
|
||
})
|
||
|
||
return { anonymized, mappings }
|
||
}
|
||
|
||
// ─── De-anonymization ────────────────────────────────────────────────────────
|
||
|
||
/**
|
||
* De-anonymize AI results back to real IDs
|
||
*/
|
||
export function deanonymizeResults<
|
||
T extends { jurorId: string; projectId: string }
|
||
>(
|
||
results: T[],
|
||
jurorMappings: JurorMapping[],
|
||
projectMappings: ProjectMapping[]
|
||
): (T & { realJurorId: string; realProjectId: string })[] {
|
||
const jurorMap = new Map(
|
||
jurorMappings.map((m) => [m.anonymousId, m.realId])
|
||
)
|
||
const projectMap = new Map(
|
||
projectMappings.map((m) => [m.anonymousId, m.realId])
|
||
)
|
||
|
||
return results.map((result) => ({
|
||
...result,
|
||
realJurorId: jurorMap.get(result.jurorId) || result.jurorId,
|
||
realProjectId: projectMap.get(result.projectId) || result.projectId,
|
||
}))
|
||
}
|
||
|
||
/**
|
||
* De-anonymize project-only results (for filtering/awards)
|
||
*/
|
||
export function deanonymizeProjectResults<T extends { project_id: string }>(
|
||
results: T[],
|
||
mappings: ProjectAIMapping[]
|
||
): (T & { realProjectId: string })[] {
|
||
const projectMap = new Map(mappings.map((m) => [m.anonymousId, m.realId]))
|
||
|
||
return results.map((result) => ({
|
||
...result,
|
||
realProjectId: projectMap.get(result.project_id) || result.project_id,
|
||
}))
|
||
}
|
||
|
||
// ─── Text Sanitization ───────────────────────────────────────────────────────
|
||
|
||
/**
|
||
* Sanitize text to remove potential PII patterns
|
||
* Removes emails, phone numbers, URLs, and other identifying information
|
||
*/
|
||
export function sanitizeText(text: string): string {
|
||
let sanitized = text
|
||
|
||
// Remove email addresses
|
||
sanitized = sanitized.replace(PII_PATTERNS.email, '[email removed]')
|
||
|
||
// Remove phone numbers (various formats)
|
||
sanitized = sanitized.replace(PII_PATTERNS.phone, '[phone removed]')
|
||
|
||
// Remove URLs
|
||
sanitized = sanitized.replace(PII_PATTERNS.url, '[url removed]')
|
||
|
||
// Remove SSN-like patterns
|
||
sanitized = sanitized.replace(PII_PATTERNS.ssn, '[id removed]')
|
||
|
||
return sanitized
|
||
}
|
||
|
||
/**
|
||
* Truncate text to a maximum length and sanitize
|
||
*/
|
||
export function truncateAndSanitize(
|
||
text: string | null | undefined,
|
||
maxLength: number
|
||
): string {
|
||
if (!text) return ''
|
||
|
||
const sanitized = sanitizeText(text)
|
||
|
||
if (sanitized.length <= maxLength) {
|
||
return sanitized
|
||
}
|
||
|
||
return sanitized.slice(0, maxLength - 3) + '...'
|
||
}
|
||
|
||
// ─── GDPR Compliance Validation ──────────────────────────────────────────────
|
||
|
||
export interface PIIValidationResult {
|
||
valid: boolean
|
||
violations: string[]
|
||
}
|
||
|
||
/**
|
||
* Validate that data contains no personal information
|
||
* Used for GDPR compliance before sending data to AI
|
||
*/
|
||
export function validateNoPersonalData(
|
||
data: Record<string, unknown>
|
||
): PIIValidationResult {
|
||
const violations: string[] = []
|
||
const textContent = JSON.stringify(data)
|
||
|
||
// Check each PII pattern
|
||
for (const [type, pattern] of Object.entries(PII_PATTERNS)) {
|
||
// Reset regex state (global flag)
|
||
pattern.lastIndex = 0
|
||
|
||
if (pattern.test(textContent)) {
|
||
violations.push(`Potential ${type} detected in data`)
|
||
}
|
||
}
|
||
|
||
// Additional checks for common PII fields
|
||
const sensitiveFields = [
|
||
'email',
|
||
'phone',
|
||
'password',
|
||
'ssn',
|
||
'socialSecurity',
|
||
'creditCard',
|
||
'bankAccount',
|
||
'drivingLicense',
|
||
]
|
||
|
||
const keys = Object.keys(data).map((k) => k.toLowerCase())
|
||
for (const field of sensitiveFields) {
|
||
if (keys.includes(field)) {
|
||
violations.push(`Sensitive field "${field}" present in data`)
|
||
}
|
||
}
|
||
|
||
return {
|
||
valid: violations.length === 0,
|
||
violations,
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Enforce GDPR compliance before EVERY AI call
|
||
* Throws an error if PII is detected
|
||
*/
|
||
export function enforceGDPRCompliance(data: unknown[]): void {
|
||
for (let i = 0; i < data.length; i++) {
|
||
const item = data[i]
|
||
if (typeof item === 'object' && item !== null) {
|
||
const { valid, violations } = validateNoPersonalData(
|
||
item as Record<string, unknown>
|
||
)
|
||
if (!valid) {
|
||
console.error(
|
||
`[GDPR] PII validation failed for item ${i}:`,
|
||
violations
|
||
)
|
||
throw new Error(
|
||
`GDPR compliance check failed: ${violations.join(', ')}`
|
||
)
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Validate that data has been properly anonymized
|
||
* Returns true if no PII patterns are detected
|
||
*/
|
||
export function validateAnonymization(data: AnonymizationResult): boolean {
|
||
const checkText = (text: string | null | undefined): boolean => {
|
||
if (!text) return true
|
||
// Reset regex state for each check
|
||
for (const pattern of Object.values(PII_PATTERNS)) {
|
||
pattern.lastIndex = 0
|
||
if (pattern.test(text)) return false
|
||
}
|
||
return true
|
||
}
|
||
|
||
// Check jurors
|
||
for (const juror of data.jurors) {
|
||
for (const tag of juror.expertiseTags) {
|
||
if (!checkText(tag)) return false
|
||
}
|
||
}
|
||
|
||
// Check projects
|
||
for (const project of data.projects) {
|
||
if (!checkText(project.title)) return false
|
||
if (!checkText(project.description)) return false
|
||
for (const tag of project.tags) {
|
||
if (!checkText(typeof tag === 'string' ? tag : tag.name)) return false
|
||
}
|
||
}
|
||
|
||
return true
|
||
}
|
||
|
||
/**
|
||
* Validate anonymized projects for AI (enhanced version)
|
||
*/
|
||
export function validateAnonymizedProjects(
|
||
projects: AnonymizedProjectForAI[]
|
||
): boolean {
|
||
const checkText = (text: string | null | undefined): boolean => {
|
||
if (!text) return true
|
||
for (const pattern of Object.values(PII_PATTERNS)) {
|
||
pattern.lastIndex = 0
|
||
if (pattern.test(text)) return false
|
||
}
|
||
return true
|
||
}
|
||
|
||
for (const project of projects) {
|
||
if (!checkText(project.title)) return false
|
||
if (!checkText(project.description)) return false
|
||
if (!checkText(project.institution)) return false
|
||
for (const tag of project.tags) {
|
||
if (!checkText(tag)) return false
|
||
}
|
||
}
|
||
|
||
return true
|
||
}
|