Initial commit: MOPC platform with Docker deployment setup
Full Next.js 15 platform with tRPC, Prisma, PostgreSQL, NextAuth. Includes production Dockerfile (multi-stage, port 7600), docker-compose with registry-based image pull, Gitea Actions CI workflow, nginx config for portal.monaco-opc.com, deployment scripts, and DEPLOYMENT.md guide. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
211
src/server/services/anonymization.ts
Normal file
211
src/server/services/anonymization.ts
Normal file
@@ -0,0 +1,211 @@
|
||||
/**
|
||||
* Data Anonymization Service
|
||||
*
|
||||
* Strips PII (names, emails, etc.) from data before sending to AI services.
|
||||
* Returns ID mappings for de-anonymization of results.
|
||||
*/
|
||||
|
||||
export interface AnonymizedJuror {
|
||||
anonymousId: string
|
||||
expertiseTags: string[]
|
||||
currentAssignmentCount: number
|
||||
maxAssignments: number | null
|
||||
}
|
||||
|
||||
export interface AnonymizedProject {
|
||||
anonymousId: string
|
||||
title: string
|
||||
description: string | null
|
||||
tags: string[]
|
||||
teamName: string | null
|
||||
}
|
||||
|
||||
export interface JurorMapping {
|
||||
anonymousId: string
|
||||
realId: string
|
||||
}
|
||||
|
||||
export interface ProjectMapping {
|
||||
anonymousId: string
|
||||
realId: string
|
||||
}
|
||||
|
||||
export interface AnonymizationResult {
|
||||
jurors: AnonymizedJuror[]
|
||||
projects: AnonymizedProject[]
|
||||
jurorMappings: JurorMapping[]
|
||||
projectMappings: ProjectMapping[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Juror data from database
|
||||
*/
|
||||
interface JurorInput {
|
||||
id: string
|
||||
name?: string | null
|
||||
email: string
|
||||
expertiseTags: string[]
|
||||
maxAssignments?: number | null
|
||||
_count?: {
|
||||
assignments: number
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Project data from database
|
||||
*/
|
||||
interface ProjectInput {
|
||||
id: string
|
||||
title: string
|
||||
description?: string | null
|
||||
tags: string[]
|
||||
teamName?: string | null
|
||||
}
|
||||
|
||||
/**
|
||||
* Anonymize juror and project data for AI processing
|
||||
*
|
||||
* This function:
|
||||
* 1. Strips all PII (names, emails) from juror data
|
||||
* 2. Replaces real IDs with sequential anonymous IDs
|
||||
* 3. Keeps only expertise tags and assignment counts
|
||||
* 4. Returns mappings for de-anonymization
|
||||
*/
|
||||
export function anonymizeForAI(
|
||||
jurors: JurorInput[],
|
||||
projects: ProjectInput[]
|
||||
): AnonymizationResult {
|
||||
const jurorMappings: JurorMapping[] = []
|
||||
const projectMappings: ProjectMapping[] = []
|
||||
|
||||
// Anonymize jurors
|
||||
const anonymizedJurors: AnonymizedJuror[] = jurors.map((juror, index) => {
|
||||
const anonymousId = `juror_${(index + 1).toString().padStart(3, '0')}`
|
||||
|
||||
jurorMappings.push({
|
||||
anonymousId,
|
||||
realId: juror.id,
|
||||
})
|
||||
|
||||
return {
|
||||
anonymousId,
|
||||
expertiseTags: juror.expertiseTags,
|
||||
currentAssignmentCount: juror._count?.assignments ?? 0,
|
||||
maxAssignments: juror.maxAssignments ?? null,
|
||||
}
|
||||
})
|
||||
|
||||
// Anonymize projects (keep content but replace IDs)
|
||||
const anonymizedProjects: AnonymizedProject[] = projects.map(
|
||||
(project, index) => {
|
||||
const anonymousId = `project_${(index + 1).toString().padStart(3, '0')}`
|
||||
|
||||
projectMappings.push({
|
||||
anonymousId,
|
||||
realId: project.id,
|
||||
})
|
||||
|
||||
return {
|
||||
anonymousId,
|
||||
title: sanitizeText(project.title),
|
||||
description: project.description
|
||||
? sanitizeText(project.description)
|
||||
: null,
|
||||
tags: project.tags,
|
||||
// Replace specific team names with generic identifier
|
||||
teamName: project.teamName ? `Team ${index + 1}` : null,
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
jurors: anonymizedJurors,
|
||||
projects: anonymizedProjects,
|
||||
jurorMappings,
|
||||
projectMappings,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* De-anonymize AI results back to real IDs
|
||||
*/
|
||||
export function deanonymizeResults<T extends { jurorId: string; projectId: string }>(
|
||||
results: T[],
|
||||
jurorMappings: JurorMapping[],
|
||||
projectMappings: ProjectMapping[]
|
||||
): (T & { realJurorId: string; realProjectId: string })[] {
|
||||
const jurorMap = new Map(
|
||||
jurorMappings.map((m) => [m.anonymousId, m.realId])
|
||||
)
|
||||
const projectMap = new Map(
|
||||
projectMappings.map((m) => [m.anonymousId, m.realId])
|
||||
)
|
||||
|
||||
return results.map((result) => ({
|
||||
...result,
|
||||
realJurorId: jurorMap.get(result.jurorId) || result.jurorId,
|
||||
realProjectId: projectMap.get(result.projectId) || result.projectId,
|
||||
}))
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitize text to remove potential PII patterns
|
||||
* Removes emails, phone numbers, and URLs from text
|
||||
*/
|
||||
function sanitizeText(text: string): string {
|
||||
// Remove email addresses
|
||||
let sanitized = text.replace(
|
||||
/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g,
|
||||
'[email removed]'
|
||||
)
|
||||
|
||||
// Remove phone numbers (various formats)
|
||||
sanitized = sanitized.replace(
|
||||
/(\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/g,
|
||||
'[phone removed]'
|
||||
)
|
||||
|
||||
// Remove URLs
|
||||
sanitized = sanitized.replace(
|
||||
/https?:\/\/[^\s]+/g,
|
||||
'[url removed]'
|
||||
)
|
||||
|
||||
return sanitized
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate that data has been properly anonymized
|
||||
* Returns true if no PII patterns are detected
|
||||
*/
|
||||
export function validateAnonymization(data: AnonymizationResult): boolean {
|
||||
const piiPatterns = [
|
||||
/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/, // Email
|
||||
/(\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/, // Phone
|
||||
]
|
||||
|
||||
const checkText = (text: string | null | undefined): boolean => {
|
||||
if (!text) return true
|
||||
return !piiPatterns.some((pattern) => pattern.test(text))
|
||||
}
|
||||
|
||||
// Check jurors (they should only have expertise tags)
|
||||
for (const juror of data.jurors) {
|
||||
// Jurors should not have any text fields that could contain PII
|
||||
// Only check expertiseTags
|
||||
for (const tag of juror.expertiseTags) {
|
||||
if (!checkText(tag)) return false
|
||||
}
|
||||
}
|
||||
|
||||
// Check projects
|
||||
for (const project of data.projects) {
|
||||
if (!checkText(project.title)) return false
|
||||
if (!checkText(project.description)) return false
|
||||
for (const tag of project.tags) {
|
||||
if (!checkText(tag)) return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
Reference in New Issue
Block a user