Optimize AI system with batching, token tracking, and GDPR compliance
- Add AIUsageLog model for persistent token/cost tracking - Implement batched processing for all AI services: - Assignment: 15 projects/batch - Filtering: 20 projects/batch - Award eligibility: 20 projects/batch - Mentor matching: 15 projects/batch - Create unified error classification (ai-errors.ts) - Enhance anonymization with comprehensive project data - Add AI usage dashboard to Settings page - Add usage stats endpoints to settings router - Create AI system documentation (5 files) - Create GDPR compliance documentation (2 files) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -4,9 +4,33 @@
|
||||
* Determines project eligibility for special awards using:
|
||||
* - Deterministic field matching (tags, country, category)
|
||||
* - AI interpretation of plain-language criteria
|
||||
*
|
||||
* GDPR Compliance:
|
||||
* - All project data is anonymized before AI processing
|
||||
* - IDs replaced with sequential identifiers
|
||||
* - No personal information sent to OpenAI
|
||||
*/
|
||||
|
||||
import { getOpenAI, getConfiguredModel } from '@/lib/openai'
|
||||
import { getOpenAI, getConfiguredModel, buildCompletionParams } from '@/lib/openai'
|
||||
import { logAIUsage, extractTokenUsage } from '@/server/utils/ai-usage'
|
||||
import { classifyAIError, createParseError, logAIError } from './ai-errors'
|
||||
import {
|
||||
anonymizeProjectsForAI,
|
||||
validateAnonymizedProjects,
|
||||
type ProjectWithRelations,
|
||||
type AnonymizedProjectForAI,
|
||||
type ProjectAIMapping,
|
||||
} from './anonymization'
|
||||
import type { SubmissionSource } from '@prisma/client'
|
||||
|
||||
// ─── Constants ───────────────────────────────────────────────────────────────
|
||||
|
||||
const BATCH_SIZE = 20
|
||||
|
||||
// Optimized system prompt
|
||||
const AI_ELIGIBILITY_SYSTEM_PROMPT = `Award eligibility evaluator. Evaluate projects against criteria, return JSON.
|
||||
Format: {"evaluations": [{project_id, eligible: bool, confidence: 0-1, reasoning: str}]}
|
||||
Be objective. Base evaluation only on provided data. No personal identifiers in reasoning.`
|
||||
|
||||
// ─── Types ──────────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -33,6 +57,16 @@ interface ProjectForEligibility {
|
||||
geographicZone?: string | null
|
||||
tags: string[]
|
||||
oceanIssue?: string | null
|
||||
institution?: string | null
|
||||
foundedAt?: Date | null
|
||||
wantsMentorship?: boolean
|
||||
submissionSource?: SubmissionSource
|
||||
submittedAt?: Date | null
|
||||
_count?: {
|
||||
teamMembers?: number
|
||||
files?: number
|
||||
}
|
||||
files?: Array<{ fileType: string | null }>
|
||||
}
|
||||
|
||||
// ─── Auto Tag Rules ─────────────────────────────────────────────────────────
|
||||
@@ -97,32 +131,162 @@ function getFieldValue(
|
||||
|
||||
// ─── AI Criteria Interpretation ─────────────────────────────────────────────
|
||||
|
||||
const AI_ELIGIBILITY_SYSTEM_PROMPT = `You are a special award eligibility evaluator. Given a list of projects and award criteria, determine which projects are eligible.
|
||||
|
||||
Return a JSON object with this structure:
|
||||
{
|
||||
"evaluations": [
|
||||
{
|
||||
"project_id": "string",
|
||||
"eligible": boolean,
|
||||
"confidence": number (0-1),
|
||||
"reasoning": "string"
|
||||
}
|
||||
]
|
||||
/**
|
||||
* Convert project to enhanced format for anonymization
|
||||
*/
|
||||
function toProjectWithRelations(project: ProjectForEligibility): ProjectWithRelations {
|
||||
return {
|
||||
id: project.id,
|
||||
title: project.title,
|
||||
description: project.description,
|
||||
competitionCategory: project.competitionCategory as any,
|
||||
oceanIssue: project.oceanIssue as any,
|
||||
country: project.country,
|
||||
geographicZone: project.geographicZone,
|
||||
institution: project.institution,
|
||||
tags: project.tags,
|
||||
foundedAt: project.foundedAt,
|
||||
wantsMentorship: project.wantsMentorship ?? false,
|
||||
submissionSource: project.submissionSource ?? 'MANUAL',
|
||||
submittedAt: project.submittedAt,
|
||||
_count: {
|
||||
teamMembers: project._count?.teamMembers ?? 0,
|
||||
files: project._count?.files ?? 0,
|
||||
},
|
||||
files: project.files?.map(f => ({ fileType: f.fileType as any })) ?? [],
|
||||
}
|
||||
}
|
||||
|
||||
Be fair, objective, and base your evaluation only on the provided information. Do not include personal identifiers in reasoning.`
|
||||
/**
|
||||
* Process a batch for AI eligibility evaluation
|
||||
*/
|
||||
async function processEligibilityBatch(
|
||||
openai: NonNullable<Awaited<ReturnType<typeof getOpenAI>>>,
|
||||
model: string,
|
||||
criteriaText: string,
|
||||
anonymized: AnonymizedProjectForAI[],
|
||||
mappings: ProjectAIMapping[],
|
||||
userId?: string,
|
||||
entityId?: string
|
||||
): Promise<{
|
||||
results: EligibilityResult[]
|
||||
tokensUsed: number
|
||||
}> {
|
||||
const results: EligibilityResult[] = []
|
||||
let tokensUsed = 0
|
||||
|
||||
const userPrompt = `CRITERIA: ${criteriaText}
|
||||
PROJECTS: ${JSON.stringify(anonymized)}
|
||||
Evaluate eligibility for each project.`
|
||||
|
||||
try {
|
||||
const params = buildCompletionParams(model, {
|
||||
messages: [
|
||||
{ role: 'system', content: AI_ELIGIBILITY_SYSTEM_PROMPT },
|
||||
{ role: 'user', content: userPrompt },
|
||||
],
|
||||
jsonMode: true,
|
||||
temperature: 0.3,
|
||||
maxTokens: 4000,
|
||||
})
|
||||
|
||||
const response = await openai.chat.completions.create(params)
|
||||
const usage = extractTokenUsage(response)
|
||||
tokensUsed = usage.totalTokens
|
||||
|
||||
// Log usage
|
||||
await logAIUsage({
|
||||
userId,
|
||||
action: 'AWARD_ELIGIBILITY',
|
||||
entityType: 'Award',
|
||||
entityId,
|
||||
model,
|
||||
promptTokens: usage.promptTokens,
|
||||
completionTokens: usage.completionTokens,
|
||||
totalTokens: usage.totalTokens,
|
||||
batchSize: anonymized.length,
|
||||
itemsProcessed: anonymized.length,
|
||||
status: 'SUCCESS',
|
||||
})
|
||||
|
||||
const content = response.choices[0]?.message?.content
|
||||
if (!content) {
|
||||
throw new Error('Empty response from AI')
|
||||
}
|
||||
|
||||
const parsed = JSON.parse(content) as {
|
||||
evaluations: Array<{
|
||||
project_id: string
|
||||
eligible: boolean
|
||||
confidence: number
|
||||
reasoning: string
|
||||
}>
|
||||
}
|
||||
|
||||
// Map results back to real IDs
|
||||
for (const eval_ of parsed.evaluations || []) {
|
||||
const mapping = mappings.find((m) => m.anonymousId === eval_.project_id)
|
||||
if (mapping) {
|
||||
results.push({
|
||||
projectId: mapping.realId,
|
||||
eligible: eval_.eligible,
|
||||
confidence: eval_.confidence,
|
||||
reasoning: eval_.reasoning,
|
||||
method: 'AI',
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
if (error instanceof SyntaxError) {
|
||||
const parseError = createParseError(error.message)
|
||||
logAIError('AwardEligibility', 'batch processing', parseError)
|
||||
|
||||
await logAIUsage({
|
||||
userId,
|
||||
action: 'AWARD_ELIGIBILITY',
|
||||
entityType: 'Award',
|
||||
entityId,
|
||||
model,
|
||||
promptTokens: 0,
|
||||
completionTokens: 0,
|
||||
totalTokens: tokensUsed,
|
||||
batchSize: anonymized.length,
|
||||
itemsProcessed: 0,
|
||||
status: 'ERROR',
|
||||
errorMessage: parseError.message,
|
||||
})
|
||||
|
||||
// Flag all for manual review
|
||||
for (const mapping of mappings) {
|
||||
results.push({
|
||||
projectId: mapping.realId,
|
||||
eligible: false,
|
||||
confidence: 0,
|
||||
reasoning: 'AI response parse error — requires manual review',
|
||||
method: 'AI',
|
||||
})
|
||||
}
|
||||
} else {
|
||||
throw error
|
||||
}
|
||||
}
|
||||
|
||||
return { results, tokensUsed }
|
||||
}
|
||||
|
||||
export async function aiInterpretCriteria(
|
||||
criteriaText: string,
|
||||
projects: ProjectForEligibility[]
|
||||
projects: ProjectForEligibility[],
|
||||
userId?: string,
|
||||
awardId?: string
|
||||
): Promise<EligibilityResult[]> {
|
||||
const results: EligibilityResult[] = []
|
||||
|
||||
try {
|
||||
const openai = await getOpenAI()
|
||||
if (!openai) {
|
||||
// No OpenAI — mark all as needing manual review
|
||||
console.warn('[AI Eligibility] OpenAI not configured')
|
||||
return projects.map((p) => ({
|
||||
projectId: p.id,
|
||||
eligible: false,
|
||||
@@ -133,91 +297,69 @@ export async function aiInterpretCriteria(
|
||||
}
|
||||
|
||||
const model = await getConfiguredModel()
|
||||
console.log(`[AI Eligibility] Using model: ${model} for ${projects.length} projects`)
|
||||
|
||||
// Anonymize and batch
|
||||
const anonymized = projects.map((p, i) => ({
|
||||
project_id: `P${i + 1}`,
|
||||
real_id: p.id,
|
||||
title: p.title,
|
||||
description: p.description?.slice(0, 500) || '',
|
||||
category: p.competitionCategory || 'Unknown',
|
||||
ocean_issue: p.oceanIssue || 'Unknown',
|
||||
country: p.country || 'Unknown',
|
||||
region: p.geographicZone || 'Unknown',
|
||||
tags: p.tags.join(', '),
|
||||
}))
|
||||
// Convert and anonymize projects
|
||||
const projectsWithRelations = projects.map(toProjectWithRelations)
|
||||
const { anonymized, mappings } = anonymizeProjectsForAI(projectsWithRelations, 'ELIGIBILITY')
|
||||
|
||||
const batchSize = 20
|
||||
for (let i = 0; i < anonymized.length; i += batchSize) {
|
||||
const batch = anonymized.slice(i, i + batchSize)
|
||||
|
||||
const userPrompt = `Award criteria: ${criteriaText}
|
||||
|
||||
Projects to evaluate:
|
||||
${JSON.stringify(
|
||||
batch.map(({ real_id, ...rest }) => rest),
|
||||
null,
|
||||
2
|
||||
)}
|
||||
|
||||
Evaluate each project against the award criteria.`
|
||||
|
||||
const response = await openai.chat.completions.create({
|
||||
model,
|
||||
messages: [
|
||||
{ role: 'system', content: AI_ELIGIBILITY_SYSTEM_PROMPT },
|
||||
{ role: 'user', content: userPrompt },
|
||||
],
|
||||
response_format: { type: 'json_object' },
|
||||
temperature: 0.3,
|
||||
max_tokens: 4000,
|
||||
})
|
||||
|
||||
const content = response.choices[0]?.message?.content
|
||||
if (content) {
|
||||
try {
|
||||
const parsed = JSON.parse(content) as {
|
||||
evaluations: Array<{
|
||||
project_id: string
|
||||
eligible: boolean
|
||||
confidence: number
|
||||
reasoning: string
|
||||
}>
|
||||
}
|
||||
|
||||
for (const eval_ of parsed.evaluations) {
|
||||
const anon = batch.find((b) => b.project_id === eval_.project_id)
|
||||
if (anon) {
|
||||
results.push({
|
||||
projectId: anon.real_id,
|
||||
eligible: eval_.eligible,
|
||||
confidence: eval_.confidence,
|
||||
reasoning: eval_.reasoning,
|
||||
method: 'AI',
|
||||
})
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Parse error — mark batch for manual review
|
||||
for (const item of batch) {
|
||||
results.push({
|
||||
projectId: item.real_id,
|
||||
eligible: false,
|
||||
confidence: 0,
|
||||
reasoning: 'AI response parse error — requires manual review',
|
||||
method: 'AI',
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
// Validate anonymization
|
||||
if (!validateAnonymizedProjects(anonymized)) {
|
||||
console.error('[AI Eligibility] Anonymization validation failed')
|
||||
throw new Error('GDPR compliance check failed: PII detected in anonymized data')
|
||||
}
|
||||
} catch {
|
||||
// OpenAI error — mark all for manual review
|
||||
|
||||
let totalTokens = 0
|
||||
|
||||
// Process in batches
|
||||
for (let i = 0; i < anonymized.length; i += BATCH_SIZE) {
|
||||
const batchAnon = anonymized.slice(i, i + BATCH_SIZE)
|
||||
const batchMappings = mappings.slice(i, i + BATCH_SIZE)
|
||||
|
||||
console.log(`[AI Eligibility] Processing batch ${Math.floor(i / BATCH_SIZE) + 1}/${Math.ceil(anonymized.length / BATCH_SIZE)}`)
|
||||
|
||||
const { results: batchResults, tokensUsed } = await processEligibilityBatch(
|
||||
openai,
|
||||
model,
|
||||
criteriaText,
|
||||
batchAnon,
|
||||
batchMappings,
|
||||
userId,
|
||||
awardId
|
||||
)
|
||||
|
||||
results.push(...batchResults)
|
||||
totalTokens += tokensUsed
|
||||
}
|
||||
|
||||
console.log(`[AI Eligibility] Completed. Total tokens: ${totalTokens}`)
|
||||
|
||||
} catch (error) {
|
||||
const classified = classifyAIError(error)
|
||||
logAIError('AwardEligibility', 'aiInterpretCriteria', classified)
|
||||
|
||||
// Log failed attempt
|
||||
await logAIUsage({
|
||||
userId,
|
||||
action: 'AWARD_ELIGIBILITY',
|
||||
entityType: 'Award',
|
||||
entityId: awardId,
|
||||
model: 'unknown',
|
||||
promptTokens: 0,
|
||||
completionTokens: 0,
|
||||
totalTokens: 0,
|
||||
batchSize: projects.length,
|
||||
itemsProcessed: 0,
|
||||
status: 'ERROR',
|
||||
errorMessage: classified.message,
|
||||
})
|
||||
|
||||
// Return all as needing manual review
|
||||
return projects.map((p) => ({
|
||||
projectId: p.id,
|
||||
eligible: false,
|
||||
confidence: 0,
|
||||
reasoning: 'AI error — requires manual eligibility review',
|
||||
reasoning: `AI error: ${classified.message}`,
|
||||
method: 'AI' as const,
|
||||
}))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user