MOPC-Portal/src/server/services/anonymization.ts

/**
 * Data Anonymization Service
 *
 * Strips PII (names, emails, etc.) from data before sending to AI services.
 * Returns ID mappings for de-anonymization of results.
 *
 * GDPR Compliance:
 * - All personal identifiers are stripped before AI processing
 * - Project/user IDs are replaced with sequential anonymous IDs
 * - Text content is sanitized to remove emails, phones, URLs
 * - Validation ensures no PII leakage before each AI call
 */

import type {
  CompetitionCategory,
  OceanIssue,
  FileType,
  SubmissionSource,
} from '@prisma/client'

// ─── Description Limits ──────────────────────────────────────────────────────

export const DESCRIPTION_LIMITS = {
  ASSIGNMENT: 2000,
  FILTERING: 500,
  ELIGIBILITY: 400,
  MENTOR: 350,
} as const

export type DescriptionContext = keyof typeof DESCRIPTION_LIMITS

// ─── PII Patterns ────────────────────────────────────────────────────────────

const PII_PATTERNS = {
  email: /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g,
  phone: /(\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}/g,
  url: /https?:\/\/[^\s]+/g,
  ssn: /\d{3}-\d{2}-\d{4}/g,
  ipv4: /\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/g,
} as const

// ─── Basic Anonymization Types (Assignment Service) ──────────────────────────

export interface AnonymizedJuror {
  anonymousId: string
  expertiseTags: string[]
  currentAssignmentCount: number
  maxAssignments: number | null
  bio?: string | null
  country?: string | null
}

export interface AnonymizedProject {
  anonymousId: string
  title: string
  description: string | null
  tags: Array<{ name: string; confidence: number }>
  teamName: string | null
  category?: string | null
  oceanIssue?: string | null
  country?: string | null
  institution?: string | null
  teamSize?: number
  fileTypes?: string[]
}

export interface JurorMapping {
  anonymousId: string
  realId: string
}

export interface ProjectMapping {
  anonymousId: string
  realId: string
}

export interface AnonymizationResult {
  jurors: AnonymizedJuror[]
  projects: AnonymizedProject[]
  jurorMappings: JurorMapping[]
  projectMappings: ProjectMapping[]
}

// ─── Enhanced Project Types (Filtering/Awards) ───────────────────────────────

/**
 * Comprehensive anonymized project data for AI filtering
 * Includes all fields needed for flexible filtering criteria
 */
export interface AnonymizedFileInfo {
  file_type: string // FileType enum value
  page_count: number | null // Number of pages if known
  size_kb: number // File size in KB
  detected_lang?: string | null // ISO 639-3 language code (e.g. 'eng', 'fra')
  lang_confidence?: number | null // 0.0–1.0 confidence score
  round_name?: string | null // Which round the file was submitted for
  is_current_round?: boolean // Whether this file belongs to the current filtering/evaluation round
  text_content?: string // Extracted text content (when aiParseFiles is enabled)
}

export interface AnonymizedProjectForAI {
  project_id: string // P1, P2, etc.
  title: string // Sanitized
  description: string // Truncated + PII stripped
  category: CompetitionCategory | null // STARTUP | BUSINESS_CONCEPT
  ocean_issue: OceanIssue | null // Enum value
  country: string | null
  region: string | null // geographicZone
  institution: string | null
  tags: string[]
  founded_year: number | null // Just the year
  team_size: number
  has_description: boolean
  file_count: number
  file_types: string[] // FileType values
  files: AnonymizedFileInfo[] // Per-file details for document analysis
  wants_mentorship: boolean
  submission_source: SubmissionSource
  submitted_date: string | null // YYYY-MM-DD only
}

/**
 * Project input with all relations needed for comprehensive anonymization
 */
export interface ProjectWithRelations {
  id: string
  title: string
  description?: string | null
  teamName?: string | null
  competitionCategory?: CompetitionCategory | null
  oceanIssue?: OceanIssue | null
  country?: string | null
  geographicZone?: string | null
  institution?: string | null
  tags: string[]
  foundedAt?: Date | null
  wantsMentorship?: boolean
  submissionSource: SubmissionSource
  submittedAt?: Date | null
  _count?: {
    teamMembers?: number
    files?: number
  }
  files?: Array<{ fileType: FileType | null; size?: number; pageCount?: number | null }>
}

/**
 * Mapping for de-anonymization
 */
export interface ProjectAIMapping {
  anonymousId: string
  realId: string
}

// ─── Project Conversion Helper ──────────────────────────────────────────────

/**
 * Convert a loosely-typed Prisma project result to ProjectWithRelations.
 * Used by ai-tagging, ai-filtering, and ai-award-eligibility services.
 */
export function toProjectWithRelations(project: {
  id: string
  title: string
  description?: string | null
  competitionCategory?: string | null
  oceanIssue?: string | null
  country?: string | null
  geographicZone?: string | null
  institution?: string | null
  tags: string[]
  foundedAt?: Date | null
  wantsMentorship?: boolean | null
  submissionSource?: string
  submittedAt?: Date | null
  _count?: { teamMembers?: number; files?: number }
  files?: Array<{ fileType?: string | null; size?: number; pageCount?: number | null; [key: string]: unknown }>
}): ProjectWithRelations {
  return {
    id: project.id,
    title: project.title,
    description: project.description,
    competitionCategory: project.competitionCategory as ProjectWithRelations['competitionCategory'],
    oceanIssue: project.oceanIssue as ProjectWithRelations['oceanIssue'],
    country: project.country,
    geographicZone: project.geographicZone,
    institution: project.institution,
    tags: project.tags,
    foundedAt: project.foundedAt,
    wantsMentorship: project.wantsMentorship ?? false,
    submissionSource: (project.submissionSource as ProjectWithRelations['submissionSource']) ?? 'MANUAL',
    submittedAt: project.submittedAt,
    _count: {
      teamMembers: project._count?.teamMembers ?? 0,
      files: project._count?.files ?? project.files?.length ?? 0,
    },
    files: project.files?.map((f) => ({
      fileType: (f.fileType as FileType) ?? null,
      size: f.size,
      pageCount: f.pageCount ?? null,
    })) ?? [],
  }
}

// ─── Basic Anonymization (Assignment Service) ────────────────────────────────

interface JurorInput {
  id: string
  name?: string | null
  email: string
  expertiseTags: string[]
  bio?: string | null
  country?: string | null
  maxAssignments?: number | null
  _count?: {
    assignments: number
  }
}

interface ProjectInput {
  id: string
  title: string
  description?: string | null
  tags: string[]
  tagConfidences?: Array<{ name: string; confidence: number }>
  teamName?: string | null
  competitionCategory?: string | null
  oceanIssue?: string | null
  country?: string | null
  institution?: string | null
  teamSize?: number
  fileTypes?: string[]
}

/**
 * Anonymize juror and project data for AI processing (Assignment service)
 */
export function anonymizeForAI(
  jurors: JurorInput[],
  projects: ProjectInput[]
): AnonymizationResult {
  const jurorMappings: JurorMapping[] = []
  const projectMappings: ProjectMapping[] = []

  const anonymizedJurors: AnonymizedJuror[] = jurors.map((juror, index) => {
    const anonymousId = `juror_${(index + 1).toString().padStart(3, '0')}`

    jurorMappings.push({
      anonymousId,
      realId: juror.id,
    })

    return {
      anonymousId,
      expertiseTags: juror.expertiseTags,
      currentAssignmentCount: juror._count?.assignments ?? 0,
      maxAssignments: juror.maxAssignments ?? null,
      bio: juror.bio ? truncateAndSanitize(juror.bio, 500) : null,
      country: juror.country ?? null,
    }
  })

  const anonymizedProjects: AnonymizedProject[] = projects.map(
    (project, index) => {
      const anonymousId = `project_${(index + 1).toString().padStart(3, '0')}`

      projectMappings.push({
        anonymousId,
        realId: project.id,
      })

      return {
        anonymousId,
        title: sanitizeText(project.title),
        description: project.description
          ? truncateAndSanitize(project.description, DESCRIPTION_LIMITS.ASSIGNMENT)
          : null,
        tags: project.tagConfidences && project.tagConfidences.length > 0
          ? project.tagConfidences
          : project.tags.map((t) => ({ name: t, confidence: 1.0 })),
        teamName: project.teamName ? `Team ${index + 1}` : null,
        category: project.competitionCategory ?? null,
        oceanIssue: project.oceanIssue ?? null,
        country: project.country ?? null,
        institution: project.institution ? sanitizeText(project.institution) : null,
        teamSize: project.teamSize,
        fileTypes: project.fileTypes,
      }
    }
  )

  return {
    jurors: anonymizedJurors,
    projects: anonymizedProjects,
    jurorMappings,
    projectMappings,
  }
}

// ─── Enhanced Anonymization (Filtering/Awards) ───────────────────────────────

/**
 * Anonymize a single project with comprehensive data for AI filtering
 *
 * GDPR Compliance:
 * - Strips team names, email references, phone numbers, URLs
 * - Replaces IDs with sequential anonymous IDs
 * - Truncates descriptions to limit data exposure
 * - Keeps only necessary fields for filtering criteria
 */
export function anonymizeProjectForAI(
  project: ProjectWithRelations,
  index: number,
  context: DescriptionContext = 'FILTERING'
): AnonymizedProjectForAI {
  const descriptionLimit = DESCRIPTION_LIMITS[context]

  return {
    project_id: `P${index + 1}`,
    title: sanitizeText(project.title),
    description: truncateAndSanitize(project.description, descriptionLimit),
    category: project.competitionCategory ?? null,
    ocean_issue: project.oceanIssue ?? null,
    country: project.country ?? null,
    region: project.geographicZone ?? null,
    institution: project.institution ?? null,
    tags: project.tags,
    founded_year: project.foundedAt?.getFullYear() ?? null,
    team_size: project._count?.teamMembers ?? 0,
    has_description: !!project.description?.trim(),
    file_count: project._count?.files ?? 0,
    file_types: project.files
      ?.map((f) => f.fileType)
      .filter((ft): ft is FileType => ft !== null) ?? [],
    files: project.files?.map((f: any) => ({
      file_type: f.fileType ?? 'OTHER',
      page_count: f.pageCount ?? null,
      size_kb: Math.round((f.size ?? 0) / 1024),
      ...(f.detectedLang ? { detected_lang: f.detectedLang } : {}),
      ...(f.langConfidence != null ? { lang_confidence: f.langConfidence } : {}),
      ...(f.roundName ? { round_name: f.roundName } : {}),
      ...(f.isCurrentRound !== undefined ? { is_current_round: f.isCurrentRound } : {}),
      ...(f.textContent ? { text_content: f.textContent } : {}),
    })) ?? [],
    wants_mentorship: project.wantsMentorship ?? false,
    submission_source: project.submissionSource,
    submitted_date: project.submittedAt?.toISOString().split('T')[0] ?? null,
  }
}

/**
 * Anonymize multiple projects and return mappings
 */
export function anonymizeProjectsForAI(
  projects: ProjectWithRelations[],
  context: DescriptionContext = 'FILTERING'
): {
  anonymized: AnonymizedProjectForAI[]
  mappings: ProjectAIMapping[]
} {
  const mappings: ProjectAIMapping[] = []
  const anonymized = projects.map((project, index) => {
    mappings.push({
      anonymousId: `P${index + 1}`,
      realId: project.id,
    })
    return anonymizeProjectForAI(project, index, context)
  })

  return { anonymized, mappings }
}

// ─── De-anonymization ────────────────────────────────────────────────────────

/**
 * De-anonymize AI results back to real IDs
 */
export function deanonymizeResults<
  T extends { jurorId: string; projectId: string }
>(
  results: T[],
  jurorMappings: JurorMapping[],
  projectMappings: ProjectMapping[]
): (T & { realJurorId: string; realProjectId: string })[] {
  const jurorMap = new Map(
    jurorMappings.map((m) => [m.anonymousId, m.realId])
  )
  const projectMap = new Map(
    projectMappings.map((m) => [m.anonymousId, m.realId])
  )

  return results.map((result) => ({
    ...result,
    realJurorId: jurorMap.get(result.jurorId) || result.jurorId,
    realProjectId: projectMap.get(result.projectId) || result.projectId,
  }))
}

/**
 * De-anonymize project-only results (for filtering/awards)
 */
export function deanonymizeProjectResults<T extends { project_id: string }>(
  results: T[],
  mappings: ProjectAIMapping[]
): (T & { realProjectId: string })[] {
  const projectMap = new Map(mappings.map((m) => [m.anonymousId, m.realId]))

  return results.map((result) => ({
    ...result,
    realProjectId: projectMap.get(result.project_id) || result.project_id,
  }))
}

// ─── Text Sanitization ───────────────────────────────────────────────────────

/**
 * Sanitize text to remove potential PII patterns
 * Removes emails, phone numbers, URLs, and other identifying information
 */
export function sanitizeText(text: string): string {
  let sanitized = text

  // Remove email addresses
  sanitized = sanitized.replace(PII_PATTERNS.email, '[email removed]')

  // Remove phone numbers (various formats)
  sanitized = sanitized.replace(PII_PATTERNS.phone, '[phone removed]')

  // Remove URLs
  sanitized = sanitized.replace(PII_PATTERNS.url, '[url removed]')

  // Remove SSN-like patterns
  sanitized = sanitized.replace(PII_PATTERNS.ssn, '[id removed]')

  return sanitized
}

/**
 * Truncate text to a maximum length and sanitize
 */
export function truncateAndSanitize(
  text: string | null | undefined,
  maxLength: number
): string {
  if (!text) return ''

  const sanitized = sanitizeText(text)

  if (sanitized.length <= maxLength) {
    return sanitized
  }

  return sanitized.slice(0, maxLength - 3) + '...'
}

// ─── GDPR Compliance Validation ──────────────────────────────────────────────

export interface PIIValidationResult {
  valid: boolean
  violations: string[]
}

/**
 * Validate that data contains no personal information
 * Used for GDPR compliance before sending data to AI
 */
export function validateNoPersonalData(
  data: Record<string, unknown>
): PIIValidationResult {
  const violations: string[] = []
  const textContent = JSON.stringify(data)

  // Check each PII pattern
  for (const [type, pattern] of Object.entries(PII_PATTERNS)) {
    // Reset regex state (global flag)
    pattern.lastIndex = 0

    if (pattern.test(textContent)) {
      violations.push(`Potential ${type} detected in data`)
    }
  }

  // Additional checks for common PII fields
  const sensitiveFields = [
    'email',
    'phone',
    'password',
    'ssn',
    'socialSecurity',
    'creditCard',
    'bankAccount',
    'drivingLicense',
  ]

  const keys = Object.keys(data).map((k) => k.toLowerCase())
  for (const field of sensitiveFields) {
    if (keys.includes(field)) {
      violations.push(`Sensitive field "${field}" present in data`)
    }
  }

  return {
    valid: violations.length === 0,
    violations,
  }
}

/**
 * Enforce GDPR compliance before EVERY AI call
 * Throws an error if PII is detected
 */
export function enforceGDPRCompliance(data: unknown[]): void {
  for (let i = 0; i < data.length; i++) {
    const item = data[i]
    if (typeof item === 'object' && item !== null) {
      const { valid, violations } = validateNoPersonalData(
        item as Record<string, unknown>
      )
      if (!valid) {
        console.error(
          `[GDPR] PII validation failed for item ${i}:`,
          violations
        )
        throw new Error(
          `GDPR compliance check failed: ${violations.join(', ')}`
        )
      }
    }
  }
}

/**
 * Validate that data has been properly anonymized
 * Returns true if no PII patterns are detected
 */
export function validateAnonymization(data: AnonymizationResult): boolean {
  const checkText = (text: string | null | undefined): boolean => {
    if (!text) return true
    // Reset regex state for each check
    for (const pattern of Object.values(PII_PATTERNS)) {
      pattern.lastIndex = 0
      if (pattern.test(text)) return false
    }
    return true
  }

  // Check jurors
  for (const juror of data.jurors) {
    for (const tag of juror.expertiseTags) {
      if (!checkText(tag)) return false
    }
  }

  // Check projects
  for (const project of data.projects) {
    if (!checkText(project.title)) return false
    if (!checkText(project.description)) return false
    for (const tag of project.tags) {
      if (!checkText(typeof tag === 'string' ? tag : tag.name)) return false
    }
  }

  return true
}

/**
 * Validate anonymized projects for AI (enhanced version)
 */
export function validateAnonymizedProjects(
  projects: AnonymizedProjectForAI[]
): boolean {
  const checkText = (text: string | null | undefined): boolean => {
    if (!text) return true
    for (const pattern of Object.values(PII_PATTERNS)) {
      pattern.lastIndex = 0
      if (pattern.test(text)) return false
    }
    return true
  }

  for (const project of projects) {
    if (!checkText(project.title)) return false
    if (!checkText(project.description)) return false
    if (!checkText(project.institution)) return false
    for (const tag of project.tags) {
      if (!checkText(tag)) return false
    }
  }

  return true
}