Re-apply: seed all CSV entries, fix category mapping, add duplicate detection
Some checks failed
Build and Push Docker Image / build (push) Failing after 3m40s
Some checks failed
Build and Push Docker Image / build (push) Failing after 3m40s
Rebase had inverted ours/theirs, reverting our changes. Re-applying: - normalizeSpaces() for non-breaking space fix in category mapping - Remove isValidEntry filter, include all CSV rows for AI screening - Duplicate submission detection in stage-filtering (always flags, never auto-rejects) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -50,9 +50,14 @@ const issueMap: Record<string, OceanIssue> = {
|
|||||||
'Other': OceanIssue.OTHER,
|
'Other': OceanIssue.OTHER,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function normalizeSpaces(s: string): string {
|
||||||
|
// Replace non-breaking spaces (U+00A0) and other whitespace variants with regular spaces
|
||||||
|
return s.replace(/\u00A0/g, ' ')
|
||||||
|
}
|
||||||
|
|
||||||
function mapCategory(raw: string | undefined): CompetitionCategory | null {
|
function mapCategory(raw: string | undefined): CompetitionCategory | null {
|
||||||
if (!raw) return null
|
if (!raw) return null
|
||||||
const trimmed = raw.trim()
|
const trimmed = normalizeSpaces(raw.trim())
|
||||||
for (const [prefix, value] of Object.entries(categoryMap)) {
|
for (const [prefix, value] of Object.entries(categoryMap)) {
|
||||||
if (trimmed.startsWith(prefix)) return value
|
if (trimmed.startsWith(prefix)) return value
|
||||||
}
|
}
|
||||||
@@ -61,7 +66,7 @@ function mapCategory(raw: string | undefined): CompetitionCategory | null {
|
|||||||
|
|
||||||
function mapIssue(raw: string | undefined): OceanIssue | null {
|
function mapIssue(raw: string | undefined): OceanIssue | null {
|
||||||
if (!raw) return null
|
if (!raw) return null
|
||||||
const trimmed = raw.trim()
|
const trimmed = normalizeSpaces(raw.trim())
|
||||||
for (const [prefix, value] of Object.entries(issueMap)) {
|
for (const [prefix, value] of Object.entries(issueMap)) {
|
||||||
if (trimmed.startsWith(prefix)) return value
|
if (trimmed.startsWith(prefix)) return value
|
||||||
}
|
}
|
||||||
@@ -76,17 +81,11 @@ function parseFoundedDate(raw: string | undefined): Date | null {
|
|||||||
return isNaN(d.getTime()) ? null : d
|
return isNaN(d.getTime()) ? null : d
|
||||||
}
|
}
|
||||||
|
|
||||||
function isValidEntry(row: Record<string, string>): boolean {
|
function isEmptyRow(row: Record<string, string>): boolean {
|
||||||
const status = (row['Application status'] || '').trim().toLowerCase()
|
|
||||||
if (status === 'ignore' || status === 'doublon') return false
|
|
||||||
|
|
||||||
const name = (row['Full name'] || '').trim()
|
const name = (row['Full name'] || '').trim()
|
||||||
if (name.length <= 2) return false // skip test entries
|
|
||||||
|
|
||||||
const email = (row['E-mail'] || '').trim()
|
const email = (row['E-mail'] || '').trim()
|
||||||
if (!email || !email.includes('@')) return false
|
const project = (row["Project's name"] || '').trim()
|
||||||
|
return !name && !email && !project
|
||||||
return true
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
@@ -814,21 +813,9 @@ async function main() {
|
|||||||
|
|
||||||
console.log(` Raw CSV rows: ${records.length}`)
|
console.log(` Raw CSV rows: ${records.length}`)
|
||||||
|
|
||||||
// Filter and deduplicate
|
// Skip only completely empty rows (no name, no email, no project)
|
||||||
const seenEmails = new Set<string>()
|
const validRecords = records.filter((row: Record<string, string>) => !isEmptyRow(row))
|
||||||
const validRecords: Record<string, string>[] = []
|
console.log(` Entries to seed: ${validRecords.length}`)
|
||||||
|
|
||||||
for (const row of records) {
|
|
||||||
if (!isValidEntry(row)) continue
|
|
||||||
|
|
||||||
const email = (row['E-mail'] || '').trim().toLowerCase()
|
|
||||||
if (seenEmails.has(email)) continue
|
|
||||||
|
|
||||||
seenEmails.add(email)
|
|
||||||
validRecords.push(row)
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(` Valid entries after filtering: ${validRecords.length}`)
|
|
||||||
|
|
||||||
// Create applicant users and projects
|
// Create applicant users and projects
|
||||||
console.log('\n🚀 Creating applicant users and projects...')
|
console.log('\n🚀 Creating applicant users and projects...')
|
||||||
@@ -836,7 +823,9 @@ async function main() {
|
|||||||
const intakeStage = mainStages[0] // INTAKE - CLOSED
|
const intakeStage = mainStages[0] // INTAKE - CLOSED
|
||||||
const filterStage = mainStages[1] // FILTER - ACTIVE
|
const filterStage = mainStages[1] // FILTER - ACTIVE
|
||||||
|
|
||||||
for (const row of validRecords) {
|
let skippedNoEmail = 0
|
||||||
|
for (let rowIdx = 0; rowIdx < validRecords.length; rowIdx++) {
|
||||||
|
const row = validRecords[rowIdx]
|
||||||
const email = (row['E-mail'] || '').trim().toLowerCase()
|
const email = (row['E-mail'] || '').trim().toLowerCase()
|
||||||
const name = (row['Full name'] || '').trim()
|
const name = (row['Full name'] || '').trim()
|
||||||
const phone = (row['Téléphone'] || '').trim() || null
|
const phone = (row['Téléphone'] || '').trim() || null
|
||||||
@@ -855,7 +844,14 @@ async function main() {
|
|||||||
const phase2Url = (row['PHASE 2 - Submission'] || '').trim() || null
|
const phase2Url = (row['PHASE 2 - Submission'] || '').trim() || null
|
||||||
const foundedAt = parseFoundedDate(row['Date of creation'])
|
const foundedAt = parseFoundedDate(row['Date of creation'])
|
||||||
|
|
||||||
// Create or get applicant user
|
// Skip rows with no usable email (can't create user without one)
|
||||||
|
if (!email || !email.includes('@')) {
|
||||||
|
skippedNoEmail++
|
||||||
|
console.log(` ⚠ Row ${rowIdx + 2}: skipped (no valid email)`)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create or get applicant user (upsert handles duplicate emails)
|
||||||
const user = await prisma.user.upsert({
|
const user = await prisma.user.upsert({
|
||||||
where: { email },
|
where: { email },
|
||||||
update: {
|
update: {
|
||||||
@@ -864,7 +860,7 @@ async function main() {
|
|||||||
},
|
},
|
||||||
create: {
|
create: {
|
||||||
email,
|
email,
|
||||||
name,
|
name: name || `Applicant ${rowIdx + 1}`,
|
||||||
role: UserRole.APPLICANT,
|
role: UserRole.APPLICANT,
|
||||||
status: UserStatus.NONE,
|
status: UserStatus.NONE,
|
||||||
phoneNumber: phone,
|
phoneNumber: phone,
|
||||||
@@ -930,6 +926,9 @@ async function main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
console.log(` ✓ Created ${projectCount} projects with stage states`)
|
console.log(` ✓ Created ${projectCount} projects with stage states`)
|
||||||
|
if (skippedNoEmail > 0) {
|
||||||
|
console.log(` ⚠ Skipped ${skippedNoEmail} rows with no valid email`)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ==========================================================================
|
// ==========================================================================
|
||||||
|
|||||||
@@ -261,6 +261,34 @@ export async function runStageFiltering(
|
|||||||
)
|
)
|
||||||
const aiRules = rules.filter((r: any) => r.ruleType === 'AI_SCREENING')
|
const aiRules = rules.filter((r: any) => r.ruleType === 'AI_SCREENING')
|
||||||
|
|
||||||
|
// ── Built-in: Duplicate submission detection ──────────────────────────────
|
||||||
|
// Group projects by submitter email to detect duplicate submissions.
|
||||||
|
// Duplicates are ALWAYS flagged for admin review (never auto-rejected).
|
||||||
|
const duplicateProjectIds = new Set<string>()
|
||||||
|
const emailToProjects = new Map<string, Array<{ id: string; title: string }>>()
|
||||||
|
|
||||||
|
for (const project of projects) {
|
||||||
|
const email = (project.submittedByEmail ?? '').toLowerCase().trim()
|
||||||
|
if (!email) continue
|
||||||
|
if (!emailToProjects.has(email)) emailToProjects.set(email, [])
|
||||||
|
emailToProjects.get(email)!.push({ id: project.id, title: project.title })
|
||||||
|
}
|
||||||
|
|
||||||
|
const duplicateGroups: Map<string, string[]> = new Map() // projectId → sibling ids
|
||||||
|
emailToProjects.forEach((group, _email) => {
|
||||||
|
if (group.length <= 1) return
|
||||||
|
const ids = group.map((p) => p.id)
|
||||||
|
for (const p of group) {
|
||||||
|
duplicateProjectIds.add(p.id)
|
||||||
|
duplicateGroups.set(p.id, ids.filter((id) => id !== p.id))
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
if (duplicateProjectIds.size > 0) {
|
||||||
|
console.log(`[Stage Filtering] Detected ${duplicateProjectIds.size} projects in duplicate groups`)
|
||||||
|
}
|
||||||
|
// ── End duplicate detection ───────────────────────────────────────────────
|
||||||
|
|
||||||
let passed = 0
|
let passed = 0
|
||||||
let rejected = 0
|
let rejected = 0
|
||||||
let manualQueue = 0
|
let manualQueue = 0
|
||||||
@@ -271,6 +299,20 @@ export async function runStageFiltering(
|
|||||||
let deterministicPassed = true
|
let deterministicPassed = true
|
||||||
let deterministicOutcome: 'PASSED' | 'FILTERED_OUT' | 'FLAGGED' = 'PASSED'
|
let deterministicOutcome: 'PASSED' | 'FILTERED_OUT' | 'FLAGGED' = 'PASSED'
|
||||||
|
|
||||||
|
// 0. Check for duplicate submissions (always FLAG, never auto-reject)
|
||||||
|
if (duplicateProjectIds.has(project.id)) {
|
||||||
|
const siblingIds = duplicateGroups.get(project.id) ?? []
|
||||||
|
ruleResults.push({
|
||||||
|
ruleId: '__duplicate_check',
|
||||||
|
ruleName: 'Duplicate Submission Check',
|
||||||
|
ruleType: 'DUPLICATE_CHECK',
|
||||||
|
passed: false,
|
||||||
|
action: 'FLAG',
|
||||||
|
reasoning: `Duplicate submission detected: same applicant email submitted ${siblingIds.length + 1} project(s). Sibling project IDs: ${siblingIds.join(', ')}. Admin must review and decide which to keep.`,
|
||||||
|
})
|
||||||
|
deterministicOutcome = 'FLAGGED'
|
||||||
|
}
|
||||||
|
|
||||||
// 1. Run deterministic rules
|
// 1. Run deterministic rules
|
||||||
for (const rule of deterministicRules) {
|
for (const rule of deterministicRules) {
|
||||||
const config = rule.configJson as unknown as RuleConfig
|
const config = rule.configJson as unknown as RuleConfig
|
||||||
@@ -312,11 +354,12 @@ export async function runStageFiltering(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2. AI screening (only if deterministic passed)
|
// 2. AI screening (run if deterministic passed, OR if duplicate—so AI can recommend which to keep)
|
||||||
|
const isDuplicate = duplicateProjectIds.has(project.id)
|
||||||
let aiScreeningJson: Record<string, unknown> | null = null
|
let aiScreeningJson: Record<string, unknown> | null = null
|
||||||
let finalOutcome: 'PASSED' | 'FILTERED_OUT' | 'FLAGGED' = deterministicOutcome
|
let finalOutcome: 'PASSED' | 'FILTERED_OUT' | 'FLAGGED' = deterministicOutcome
|
||||||
|
|
||||||
if (deterministicPassed && aiRules.length > 0) {
|
if ((deterministicPassed || isDuplicate) && aiRules.length > 0) {
|
||||||
// Build a simplified AI screening result using the existing AI criteria
|
// Build a simplified AI screening result using the existing AI criteria
|
||||||
// In production this would call OpenAI via the ai-filtering service
|
// In production this would call OpenAI via the ai-filtering service
|
||||||
const aiRule = aiRules[0]
|
const aiRule = aiRules[0]
|
||||||
@@ -337,12 +380,25 @@ export async function runStageFiltering(
|
|||||||
: 'Insufficient project data for AI screening',
|
: 'Insufficient project data for AI screening',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Attach duplicate metadata so admin can see sibling projects
|
||||||
|
if (isDuplicate) {
|
||||||
|
const siblingIds = duplicateGroups.get(project.id) ?? []
|
||||||
|
aiScreeningJson.isDuplicate = true
|
||||||
|
aiScreeningJson.siblingProjectIds = siblingIds
|
||||||
|
aiScreeningJson.duplicateNote =
|
||||||
|
`This project shares a submitter email with ${siblingIds.length} other project(s). ` +
|
||||||
|
'AI screening should compare these and recommend which to keep.'
|
||||||
|
}
|
||||||
|
|
||||||
const banded = bandByConfidence({
|
const banded = bandByConfidence({
|
||||||
confidence,
|
confidence,
|
||||||
meetsAllCriteria: hasMinimalData,
|
meetsAllCriteria: hasMinimalData,
|
||||||
})
|
})
|
||||||
|
|
||||||
finalOutcome = banded.outcome
|
// For non-duplicate projects, use AI banding; for duplicates, keep FLAGGED
|
||||||
|
if (!isDuplicate) {
|
||||||
|
finalOutcome = banded.outcome
|
||||||
|
}
|
||||||
|
|
||||||
ruleResults.push({
|
ruleResults.push({
|
||||||
ruleId: aiRule.id,
|
ruleId: aiRule.id,
|
||||||
@@ -354,6 +410,12 @@ export async function runStageFiltering(
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Duplicate submissions must ALWAYS be flagged for admin review,
|
||||||
|
// even if other rules would auto-reject them.
|
||||||
|
if (duplicateProjectIds.has(project.id) && finalOutcome === 'FILTERED_OUT') {
|
||||||
|
finalOutcome = 'FLAGGED'
|
||||||
|
}
|
||||||
|
|
||||||
await prisma.filteringResult.upsert({
|
await prisma.filteringResult.upsert({
|
||||||
where: {
|
where: {
|
||||||
stageId_projectId: {
|
stageId_projectId: {
|
||||||
|
|||||||
Reference in New Issue
Block a user