image processing lambda

This commit is contained in:
jackiettran
2026-01-14 12:11:50 -05:00
parent f5fdcbfb82
commit da82872297
15 changed files with 8090 additions and 17 deletions

View File

@@ -0,0 +1,77 @@
# Image Processor Lambda
Processes uploaded images by extracting metadata (EXIF, GPS, camera info) and stripping it before storing publicly. Triggered by S3 uploads to the `staging/` prefix.
## How It Works
1. User uploads image to `staging/{folder}/{uuid}.jpg` via presigned URL
2. S3 triggers this Lambda
3. Lambda extracts metadata and saves to `ImageMetadata` table
4. Lambda strips EXIF data from image
5. Lambda moves clean image to final location `{folder}/{uuid}.jpg`
6. Lambda deletes staging file
## Local Development
### Install Dependencies
```bash
cd lambdas/shared && npm install
cd ../imageProcessor && npm install
```
### Set Up Environment
## Environment Variables
| Variable | Description | Example |
| -------------- | ---------------------------- | ----------------------------------------------- |
| `DATABASE_URL` | PostgreSQL connection string | `postgresql://user:pass@localhost:5432/db-name` |
| `S3_BUCKET` | S3 bucket name | `bucket-name` |
| `AWS_REGION` | AWS region | `us-east-1` |
| `LOG_LEVEL` | Logging level | `debug`, `info`, `warn`, `error` |
### Run Locally
```bash
aws s3 cp source s3://bucket-name/staging/image-type/key --profile your-profile-name
```
```bash
npm run local -- staging/items/test-image.jpg my-bucket
```
### Run Migration
```bash
cd backend
npx sequelize-cli db:migrate
```
## Supported Image Types
- JPEG (`.jpg`, `.jpeg`)
- PNG (`.png`)
- GIF (`.gif`) - preserves animation
- WebP (`.webp`)
## Metadata Extracted
| Field | Description |
| --------------------------- | -------------------------------------------- |
| `latitude`, `longitude` | GPS coordinates (stripped from public image) |
| `cameraMake`, `cameraModel` | Camera manufacturer and model |
| `cameraSoftware` | Software used to create image |
| `dateTaken` | Original capture date |
| `width`, `height` | Image dimensions |
| `orientation` | EXIF orientation flag |
| `fileSize` | File size in bytes |
## Deployment
See [infrastructure/cdk/README.md](../../infrastructure/cdk/README.md) for deployment instructions.
```bash
cd infrastructure/cdk
npm run deploy:staging
```

View File

@@ -0,0 +1,170 @@
const sharp = require("sharp");
const exifReader = require("exif-reader");
const { logger } = require("../shared");
/**
* Extract metadata from an image buffer
* @param {Buffer} buffer - Image buffer
* @returns {Object} Extracted metadata
*/
async function extractMetadata(buffer) {
const image = sharp(buffer);
const metadata = await image.metadata();
let exifData = {};
if (metadata.exif) {
try {
exifData = exifReader(metadata.exif);
} catch (e) {
// EXIF parsing failed, continue without it
logger.warn("Failed to parse EXIF data", { error: e.message });
}
}
return {
// GPS data
latitude: parseGpsCoordinate(
exifData?.gps?.GPSLatitude,
exifData?.gps?.GPSLatitudeRef
),
longitude: parseGpsCoordinate(
exifData?.gps?.GPSLongitude,
exifData?.gps?.GPSLongitudeRef
),
// Camera info
cameraMake: exifData?.image?.Make || null,
cameraModel: exifData?.image?.Model || null,
cameraSoftware: exifData?.image?.Software || null,
// Date/time
dateTaken: parseExifDate(
exifData?.exif?.DateTimeOriginal || exifData?.image?.DateTime
),
// Dimensions
width: metadata.width,
height: metadata.height,
orientation: metadata.orientation || 1,
// File info
fileSize: buffer.length,
};
}
/**
* Strip all metadata from an image buffer
* Uses sharp's rotate() which auto-orients AND strips EXIF
* @param {Buffer} buffer - Image buffer
* @param {string} format - Output format (default: 'jpeg')
* @returns {Buffer} Processed image buffer
*/
async function stripMetadata(buffer, format = "jpeg") {
const image = sharp(buffer);
const metadata = await image.metadata();
// Handle different formats
let processed;
if (metadata.format === "gif") {
// For GIFs, try to preserve animation but strip metadata
processed = await image
.gif()
.toBuffer();
} else if (metadata.format === "png") {
// For PNGs, rotate strips metadata and we output as PNG
processed = await image
.rotate() // Auto-orient and strip EXIF
.png()
.toBuffer();
} else if (metadata.format === "webp") {
processed = await image
.rotate()
.webp()
.toBuffer();
} else {
// Default to JPEG for best compatibility
processed = await image
.rotate() // Auto-orient and strip EXIF
.jpeg({ quality: 90 })
.toBuffer();
}
return processed;
}
/**
* Convert GPS DMS (degrees, minutes, seconds) to decimal
* @param {Array} dms - [degrees, minutes, seconds]
* @param {string} ref - N/S/E/W reference
* @returns {number|null} Decimal coordinate
*/
function parseGpsCoordinate(dms, ref) {
if (!dms || !Array.isArray(dms) || dms.length !== 3) {
return null;
}
const [degrees, minutes, seconds] = dms;
let decimal = degrees + minutes / 60 + seconds / 3600;
// South and West are negative
if (ref === "S" || ref === "W") {
decimal = -decimal;
}
// Round to 8 decimal places (about 1mm precision)
return Math.round(decimal * 100000000) / 100000000;
}
/**
* Parse EXIF date string to ISO date
* EXIF format: "YYYY:MM:DD HH:MM:SS"
* @param {string|Date} dateStr - EXIF date string or Date object
* @returns {Date|null} Parsed date
*/
function parseExifDate(dateStr) {
if (!dateStr) {
return null;
}
// If already a Date object
if (dateStr instanceof Date) {
return dateStr;
}
// EXIF format: "YYYY:MM:DD HH:MM:SS"
const match = String(dateStr).match(
/(\d{4}):(\d{2}):(\d{2}) (\d{2}):(\d{2}):(\d{2})/
);
if (match) {
const [, year, month, day, hour, minute, second] = match;
return new Date(`${year}-${month}-${day}T${hour}:${minute}:${second}`);
}
// Try parsing as ISO date
const date = new Date(dateStr);
return isNaN(date.getTime()) ? null : date;
}
/**
* Get the MIME type for a format
* @param {string} format - Image format
* @returns {string} MIME type
*/
function getMimeType(format) {
const mimeTypes = {
jpeg: "image/jpeg",
jpg: "image/jpeg",
png: "image/png",
gif: "image/gif",
webp: "image/webp",
};
return mimeTypes[format] || "image/jpeg";
}
module.exports = {
extractMetadata,
stripMetadata,
parseGpsCoordinate,
parseExifDate,
getMimeType,
};

View File

@@ -0,0 +1,161 @@
const {
S3Client,
GetObjectCommand,
PutObjectCommand,
DeleteObjectCommand,
} = require("@aws-sdk/client-s3");
const { extractMetadata, stripMetadata, getMimeType } = require("./imageProcessor");
const { saveImageMetadata, updateProcessingStatus } = require("./queries");
const { logger } = require("../shared");
const s3Client = new S3Client({ region: process.env.AWS_REGION });
/**
* Lambda handler for S3 image processing.
* Triggered by S3 ObjectCreated events on staging/ prefix.
*
* @param {Object} event - S3 event
* @returns {Object} Processing results
*/
exports.handler = async (event) => {
logger.info("Lambda invoked", { recordCount: event.Records?.length });
const results = [];
for (const record of event.Records) {
const bucket = record.s3.bucket.name;
const stagingKey = decodeURIComponent(record.s3.object.key.replace(/\+/g, " "));
logger.info("Processing image", { bucket, stagingKey });
try {
// Only process files in staging/ folder
if (!stagingKey.startsWith("staging/")) {
logger.info("Skipping non-staging key", { stagingKey });
results.push({ key: stagingKey, status: "skipped", reason: "not in staging" });
continue;
}
// Calculate final key: staging/items/uuid.jpg -> items/uuid.jpg
const finalKey = stagingKey.replace(/^staging\//, "");
// Check if this is an image file
if (!isImageFile(stagingKey)) {
logger.info("Skipping non-image file", { stagingKey });
results.push({ key: stagingKey, status: "skipped", reason: "not an image" });
continue;
}
// Process the image
await processImage(bucket, stagingKey, finalKey);
results.push({ key: finalKey, status: "success" });
logger.info("Successfully processed image", { finalKey });
} catch (error) {
logger.error("Error processing image", { stagingKey, error: error.message, stack: error.stack });
results.push({ key: stagingKey, status: "error", error: error.message });
// Try to update status to failed if we have a finalKey
try {
const finalKey = stagingKey.replace(/^staging\//, "");
await updateProcessingStatus(finalKey, "failed", error.message);
} catch (dbError) {
logger.error("Failed to update error status in DB", { error: dbError.message });
}
}
}
return {
statusCode: 200,
body: JSON.stringify({ processed: results.length, results }),
};
};
/**
* Process a single image: extract metadata, strip, and move to final location.
*
* @param {string} bucket - S3 bucket name
* @param {string} stagingKey - Staging key (e.g., staging/items/uuid.jpg)
* @param {string} finalKey - Final key (e.g., items/uuid.jpg)
*/
async function processImage(bucket, stagingKey, finalKey) {
// 1. Download image from staging location
logger.debug("Downloading from staging", { stagingKey });
const getCommand = new GetObjectCommand({
Bucket: bucket,
Key: stagingKey,
});
const response = await s3Client.send(getCommand);
const buffer = Buffer.from(await response.Body.transformToByteArray());
// 2. Extract metadata BEFORE stripping
logger.debug("Extracting metadata");
const metadata = await extractMetadata(buffer);
logger.info("Extracted metadata", { finalKey, metadata });
// 3. Save metadata to database
logger.debug("Saving metadata to DB", { finalKey });
await saveImageMetadata(finalKey, metadata);
// 4. Strip metadata from image
logger.debug("Stripping metadata");
const strippedBuffer = await stripMetadata(buffer);
// 5. Determine content type from original
const format = stagingKey.split(".").pop().toLowerCase();
const contentType = getMimeType(format);
// 6. Upload clean image to FINAL location
logger.debug("Uploading to final location", { finalKey });
const putCommand = new PutObjectCommand({
Bucket: bucket,
Key: finalKey,
Body: strippedBuffer,
ContentType: contentType,
CacheControl: getCacheControl(finalKey),
Metadata: {
"x-processed": "true",
"x-processed-at": new Date().toISOString(),
},
});
await s3Client.send(putCommand);
// 7. Delete staging file
logger.debug("Deleting staging file", { stagingKey });
const deleteCommand = new DeleteObjectCommand({
Bucket: bucket,
Key: stagingKey,
});
await s3Client.send(deleteCommand);
// 8. Update processing status to completed
logger.debug("Updating processing status to completed");
await updateProcessingStatus(finalKey, "completed");
}
/**
* Check if a file is an image based on extension.
*
* @param {string} key - S3 key
* @returns {boolean}
*/
function isImageFile(key) {
const imageExtensions = [".jpg", ".jpeg", ".png", ".gif", ".webp"];
const ext = key.toLowerCase().slice(key.lastIndexOf("."));
return imageExtensions.includes(ext);
}
/**
* Get Cache-Control header based on folder.
*
* @param {string} key - S3 key
* @returns {string}
*/
function getCacheControl(key) {
// Private folders get shorter cache
if (key.startsWith("messages/") || key.startsWith("condition-checks/")) {
return "private, max-age=3600";
}
// Public folders get longer cache
return "public, max-age=86400";
}

6574
lambdas/imageProcessor/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,23 @@
{
"name": "image-processor-lambda",
"version": "1.0.0",
"description": "Lambda function to extract and strip image metadata from uploads",
"main": "index.js",
"dependencies": {
"@aws-sdk/client-s3": "^3.400.0",
"@rentall/lambda-shared": "file:../shared",
"exif-reader": "^2.0.0",
"sharp": "^0.33.0"
},
"devDependencies": {
"dotenv": "^17.2.3",
"jest": "^30.1.3"
},
"scripts": {
"test": "jest",
"local": "node -r dotenv/config test-local.js dotenv_config_path=.env.dev"
},
"engines": {
"node": ">=18.0.0"
}
}

View File

@@ -0,0 +1,80 @@
const { db } = require("../shared");
/**
* Save image metadata to the database.
* Uses UPSERT pattern to handle re-uploads.
*
* @param {string} s3Key - Final S3 key (without staging/ prefix)
* @param {Object} metadata - Extracted metadata
* @returns {Promise<string>} The inserted/updated record ID
*/
async function saveImageMetadata(s3Key, metadata) {
const query = `
INSERT INTO "ImageMetadata" (
id, "s3Key", latitude, longitude,
"cameraMake", "cameraModel", "cameraSoftware",
"dateTaken", width, height, orientation, "fileSize",
"processingStatus", "createdAt", "updatedAt"
) VALUES (
gen_random_uuid(), $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, NOW(), NOW()
)
ON CONFLICT ("s3Key") DO UPDATE SET
latitude = EXCLUDED.latitude,
longitude = EXCLUDED.longitude,
"cameraMake" = EXCLUDED."cameraMake",
"cameraModel" = EXCLUDED."cameraModel",
"cameraSoftware" = EXCLUDED."cameraSoftware",
"dateTaken" = EXCLUDED."dateTaken",
width = EXCLUDED.width,
height = EXCLUDED.height,
orientation = EXCLUDED.orientation,
"fileSize" = EXCLUDED."fileSize",
"processingStatus" = EXCLUDED."processingStatus",
"updatedAt" = NOW()
RETURNING id
`;
const values = [
s3Key,
metadata.latitude,
metadata.longitude,
metadata.cameraMake,
metadata.cameraModel,
metadata.cameraSoftware,
metadata.dateTaken,
metadata.width,
metadata.height,
metadata.orientation,
metadata.fileSize,
"processing",
];
const result = await db.query(query, values);
return result.rows[0].id;
}
/**
* Update processing status after completion.
*
* @param {string} s3Key - S3 key
* @param {string} status - 'completed' or 'failed'
* @param {string} errorMessage - Error message if failed
*/
async function updateProcessingStatus(s3Key, status, errorMessage = null) {
const query = `
UPDATE "ImageMetadata"
SET
"processingStatus" = $2::"enum_ImageMetadata_processingStatus",
"processedAt" = CASE WHEN $2 = 'completed' THEN NOW() ELSE "processedAt" END,
"errorMessage" = $3,
"updatedAt" = NOW()
WHERE "s3Key" = $1
`;
await db.query(query, [s3Key, status, errorMessage]);
}
module.exports = {
saveImageMetadata,
updateProcessingStatus,
};

View File

@@ -0,0 +1,59 @@
/**
* Local testing script for the image processor lambda.
*
* Usage:
* npm run local -- <stagingKey> [bucket]
*
* Example:
* npm run local -- staging/items/test-image.jpg my-bucket
*
* Note: Requires .env.dev file with DATABASE_URL and AWS credentials configured.
*/
const { handler } = require("./index");
async function main() {
// Filter out dotenv config args from process.argv
const args = process.argv.slice(2).filter(arg => !arg.startsWith("dotenv_config_path"));
// Get staging key from command line args
const stagingKey = args[0] || "staging/items/test-image.jpg";
const bucket = args[1] || process.env.S3_BUCKET;
console.log("Testing image processor lambda locally...");
console.log(`Bucket: ${bucket}`);
console.log(`Staging Key: ${stagingKey}`);
console.log("---");
// Simulate S3 event
const event = {
Records: [
{
eventSource: "aws:s3",
eventName: "ObjectCreated:Put",
s3: {
bucket: {
name: bucket,
},
object: {
key: stagingKey,
},
},
},
],
};
console.log("Event:", JSON.stringify(event, null, 2));
console.log("---");
try {
const result = await handler(event);
console.log("Result:", JSON.stringify(result, null, 2));
process.exit(0);
} catch (error) {
console.error("Error:", error);
process.exit(1);
}
}
main();