160 lines
3.1 KiB
Markdown
160 lines
3.1 KiB
Markdown
|
|
# Data Platform Migration Guide
|
||
|
|
|
||
|
|
**Date**: 2025-01-27
|
||
|
|
**Purpose**: Guide for migrating projects to data platform
|
||
|
|
**Status**: Complete
|
||
|
|
|
||
|
|
---
|
||
|
|
|
||
|
|
## Overview
|
||
|
|
|
||
|
|
This guide provides instructions for migrating projects to use the centralized data platform (MinIO/S3).
|
||
|
|
|
||
|
|
---
|
||
|
|
|
||
|
|
## Prerequisites
|
||
|
|
|
||
|
|
- MinIO deployed and configured
|
||
|
|
- Buckets created
|
||
|
|
- Access credentials configured
|
||
|
|
- Data catalog set up (optional)
|
||
|
|
|
||
|
|
---
|
||
|
|
|
||
|
|
## Migration Steps
|
||
|
|
|
||
|
|
### Step 1: Install S3 Client
|
||
|
|
|
||
|
|
```bash
|
||
|
|
pnpm add @aws-sdk/client-s3
|
||
|
|
```
|
||
|
|
|
||
|
|
### Step 2: Configure S3 Client
|
||
|
|
|
||
|
|
```typescript
|
||
|
|
import { S3Client } from '@aws-sdk/client-s3';
|
||
|
|
|
||
|
|
const s3Client = new S3Client({
|
||
|
|
endpoint: process.env.MINIO_ENDPOINT || 'http://minio:9000',
|
||
|
|
region: 'us-east-1',
|
||
|
|
credentials: {
|
||
|
|
accessKeyId: process.env.MINIO_ACCESS_KEY || 'minioadmin',
|
||
|
|
secretAccessKey: process.env.MINIO_SECRET_KEY || 'minioadmin',
|
||
|
|
},
|
||
|
|
forcePathStyle: true, // Required for MinIO
|
||
|
|
});
|
||
|
|
```
|
||
|
|
|
||
|
|
### Step 3: Upload Data
|
||
|
|
|
||
|
|
```typescript
|
||
|
|
import { PutObjectCommand } from '@aws-sdk/client-s3';
|
||
|
|
|
||
|
|
async function uploadData(bucket: string, key: string, data: Buffer) {
|
||
|
|
const command = new PutObjectCommand({
|
||
|
|
Bucket: bucket,
|
||
|
|
Key: key,
|
||
|
|
Body: data,
|
||
|
|
ContentType: 'application/json',
|
||
|
|
});
|
||
|
|
|
||
|
|
await s3Client.send(command);
|
||
|
|
}
|
||
|
|
```
|
||
|
|
|
||
|
|
### Step 4: Download Data
|
||
|
|
|
||
|
|
```typescript
|
||
|
|
import { GetObjectCommand } from '@aws-sdk/client-s3';
|
||
|
|
|
||
|
|
async function downloadData(bucket: string, key: string): Promise<Buffer> {
|
||
|
|
const command = new GetObjectCommand({
|
||
|
|
Bucket: bucket,
|
||
|
|
Key: key,
|
||
|
|
});
|
||
|
|
|
||
|
|
const response = await s3Client.send(command);
|
||
|
|
const chunks: Uint8Array[] = [];
|
||
|
|
|
||
|
|
for await (const chunk of response.Body as any) {
|
||
|
|
chunks.push(chunk);
|
||
|
|
}
|
||
|
|
|
||
|
|
return Buffer.concat(chunks);
|
||
|
|
}
|
||
|
|
```
|
||
|
|
|
||
|
|
### Step 5: List Objects
|
||
|
|
|
||
|
|
```typescript
|
||
|
|
import { ListObjectsV2Command } from '@aws-sdk/client-s3';
|
||
|
|
|
||
|
|
async function listObjects(bucket: string, prefix?: string) {
|
||
|
|
const command = new ListObjectsV2Command({
|
||
|
|
Bucket: bucket,
|
||
|
|
Prefix: prefix,
|
||
|
|
});
|
||
|
|
|
||
|
|
const response = await s3Client.send(command);
|
||
|
|
return response.Contents || [];
|
||
|
|
}
|
||
|
|
```
|
||
|
|
|
||
|
|
### Step 6: Register in Data Catalog
|
||
|
|
|
||
|
|
```typescript
|
||
|
|
async function registerDataset(metadata: DatasetMetadata) {
|
||
|
|
// Register in data catalog
|
||
|
|
await fetch('/api/catalog/datasets', {
|
||
|
|
method: 'POST',
|
||
|
|
headers: {
|
||
|
|
'Content-Type': 'application/json',
|
||
|
|
},
|
||
|
|
body: JSON.stringify(metadata),
|
||
|
|
});
|
||
|
|
}
|
||
|
|
```
|
||
|
|
|
||
|
|
---
|
||
|
|
|
||
|
|
## Best Practices
|
||
|
|
|
||
|
|
### Bucket Organization
|
||
|
|
- Use consistent naming: `{project}-{environment}-{type}`
|
||
|
|
- Examples: `analytics-prod-events`, `user-data-dev-profiles`
|
||
|
|
|
||
|
|
### Data Formats
|
||
|
|
- Use Parquet for analytics data
|
||
|
|
- Use JSON for configuration data
|
||
|
|
- Use CSV for simple data exports
|
||
|
|
|
||
|
|
### Access Control
|
||
|
|
- Use bucket policies
|
||
|
|
- Implement IAM-like permissions
|
||
|
|
- Encrypt sensitive data
|
||
|
|
|
||
|
|
### Data Catalog
|
||
|
|
- Register all datasets
|
||
|
|
- Include metadata
|
||
|
|
- Tag appropriately
|
||
|
|
|
||
|
|
---
|
||
|
|
|
||
|
|
## Migration Checklist
|
||
|
|
|
||
|
|
- [ ] Install S3 client
|
||
|
|
- [ ] Configure S3 client
|
||
|
|
- [ ] Create buckets
|
||
|
|
- [ ] Set up access credentials
|
||
|
|
- [ ] Migrate data
|
||
|
|
- [ ] Update code to use S3
|
||
|
|
- [ ] Register in data catalog
|
||
|
|
- [ ] Test data access
|
||
|
|
- [ ] Update documentation
|
||
|
|
- [ ] Set up monitoring
|
||
|
|
|
||
|
|
---
|
||
|
|
|
||
|
|
**Last Updated**: 2025-01-27
|
||
|
|
|