Re-apply: seed all CSV entries, fix category mapping, add duplicate detection
Some checks failed
Build and Push Docker Image / build (push) Failing after 3m40s
Some checks failed
Build and Push Docker Image / build (push) Failing after 3m40s
Rebase had inverted ours/theirs, reverting our changes. Re-applying: - normalizeSpaces() for non-breaking space fix in category mapping - Remove isValidEntry filter, include all CSV rows for AI screening - Duplicate submission detection in stage-filtering (always flags, never auto-rejects) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -50,9 +50,14 @@ const issueMap: Record<string, OceanIssue> = {
|
||||
'Other': OceanIssue.OTHER,
|
||||
}
|
||||
|
||||
function normalizeSpaces(s: string): string {
|
||||
// Replace non-breaking spaces (U+00A0) and other whitespace variants with regular spaces
|
||||
return s.replace(/\u00A0/g, ' ')
|
||||
}
|
||||
|
||||
function mapCategory(raw: string | undefined): CompetitionCategory | null {
|
||||
if (!raw) return null
|
||||
const trimmed = raw.trim()
|
||||
const trimmed = normalizeSpaces(raw.trim())
|
||||
for (const [prefix, value] of Object.entries(categoryMap)) {
|
||||
if (trimmed.startsWith(prefix)) return value
|
||||
}
|
||||
@@ -61,7 +66,7 @@ function mapCategory(raw: string | undefined): CompetitionCategory | null {
|
||||
|
||||
function mapIssue(raw: string | undefined): OceanIssue | null {
|
||||
if (!raw) return null
|
||||
const trimmed = raw.trim()
|
||||
const trimmed = normalizeSpaces(raw.trim())
|
||||
for (const [prefix, value] of Object.entries(issueMap)) {
|
||||
if (trimmed.startsWith(prefix)) return value
|
||||
}
|
||||
@@ -76,17 +81,11 @@ function parseFoundedDate(raw: string | undefined): Date | null {
|
||||
return isNaN(d.getTime()) ? null : d
|
||||
}
|
||||
|
||||
function isValidEntry(row: Record<string, string>): boolean {
|
||||
const status = (row['Application status'] || '').trim().toLowerCase()
|
||||
if (status === 'ignore' || status === 'doublon') return false
|
||||
|
||||
function isEmptyRow(row: Record<string, string>): boolean {
|
||||
const name = (row['Full name'] || '').trim()
|
||||
if (name.length <= 2) return false // skip test entries
|
||||
|
||||
const email = (row['E-mail'] || '').trim()
|
||||
if (!email || !email.includes('@')) return false
|
||||
|
||||
return true
|
||||
const project = (row["Project's name"] || '').trim()
|
||||
return !name && !email && !project
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
@@ -814,21 +813,9 @@ async function main() {
|
||||
|
||||
console.log(` Raw CSV rows: ${records.length}`)
|
||||
|
||||
// Filter and deduplicate
|
||||
const seenEmails = new Set<string>()
|
||||
const validRecords: Record<string, string>[] = []
|
||||
|
||||
for (const row of records) {
|
||||
if (!isValidEntry(row)) continue
|
||||
|
||||
const email = (row['E-mail'] || '').trim().toLowerCase()
|
||||
if (seenEmails.has(email)) continue
|
||||
|
||||
seenEmails.add(email)
|
||||
validRecords.push(row)
|
||||
}
|
||||
|
||||
console.log(` Valid entries after filtering: ${validRecords.length}`)
|
||||
// Skip only completely empty rows (no name, no email, no project)
|
||||
const validRecords = records.filter((row: Record<string, string>) => !isEmptyRow(row))
|
||||
console.log(` Entries to seed: ${validRecords.length}`)
|
||||
|
||||
// Create applicant users and projects
|
||||
console.log('\n🚀 Creating applicant users and projects...')
|
||||
@@ -836,7 +823,9 @@ async function main() {
|
||||
const intakeStage = mainStages[0] // INTAKE - CLOSED
|
||||
const filterStage = mainStages[1] // FILTER - ACTIVE
|
||||
|
||||
for (const row of validRecords) {
|
||||
let skippedNoEmail = 0
|
||||
for (let rowIdx = 0; rowIdx < validRecords.length; rowIdx++) {
|
||||
const row = validRecords[rowIdx]
|
||||
const email = (row['E-mail'] || '').trim().toLowerCase()
|
||||
const name = (row['Full name'] || '').trim()
|
||||
const phone = (row['Téléphone'] || '').trim() || null
|
||||
@@ -855,7 +844,14 @@ async function main() {
|
||||
const phase2Url = (row['PHASE 2 - Submission'] || '').trim() || null
|
||||
const foundedAt = parseFoundedDate(row['Date of creation'])
|
||||
|
||||
// Create or get applicant user
|
||||
// Skip rows with no usable email (can't create user without one)
|
||||
if (!email || !email.includes('@')) {
|
||||
skippedNoEmail++
|
||||
console.log(` ⚠ Row ${rowIdx + 2}: skipped (no valid email)`)
|
||||
continue
|
||||
}
|
||||
|
||||
// Create or get applicant user (upsert handles duplicate emails)
|
||||
const user = await prisma.user.upsert({
|
||||
where: { email },
|
||||
update: {
|
||||
@@ -864,7 +860,7 @@ async function main() {
|
||||
},
|
||||
create: {
|
||||
email,
|
||||
name,
|
||||
name: name || `Applicant ${rowIdx + 1}`,
|
||||
role: UserRole.APPLICANT,
|
||||
status: UserStatus.NONE,
|
||||
phoneNumber: phone,
|
||||
@@ -930,6 +926,9 @@ async function main() {
|
||||
}
|
||||
|
||||
console.log(` ✓ Created ${projectCount} projects with stage states`)
|
||||
if (skippedNoEmail > 0) {
|
||||
console.log(` ⚠ Skipped ${skippedNoEmail} rows with no valid email`)
|
||||
}
|
||||
}
|
||||
|
||||
// ==========================================================================
|
||||
|
||||
Reference in New Issue
Block a user