chore: sync submodule state (parent ref update)

Made-with: Cursor
This commit is contained in:
defiQUG
2026-03-02 12:14:07 -08:00
parent 6c4555cebd
commit 89b82cdadb
883 changed files with 78752 additions and 18180 deletions

View File

@@ -0,0 +1,7 @@
-- 001_ledger_idempotency.sql
-- Add unique constraint for ledger entry idempotency
-- Prevents duplicate postings with same reference_id per ledger
ALTER TABLE ledger_entries
ADD CONSTRAINT ledger_entries_unique_ledger_reference
UNIQUE (ledger_id, reference_id);

View File

@@ -0,0 +1,60 @@
-- 002_dual_ledger_outbox.sql
-- Create outbox table for dual-ledger synchronization
-- Enables transactional outbox pattern for SCB ledger sync
CREATE TABLE IF NOT EXISTS dual_ledger_outbox (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
outbox_id text NOT NULL UNIQUE,
internal_entry_id text NOT NULL,
internal_settlement_id text NULL,
sovereign_bank_id text NOT NULL,
ledger_id text NOT NULL,
reference_id text NOT NULL,
payload jsonb NOT NULL,
payload_hash text NOT NULL,
status text NOT NULL DEFAULT 'QUEUED', -- QUEUED|SENT|ACKED|FINALIZED|FAILED
scb_transaction_id text NULL,
scb_ledger_hash text NULL,
scb_signature text NULL,
attempts int NOT NULL DEFAULT 0,
last_attempt_at timestamptz NULL,
last_error text NULL,
acked_at timestamptz NULL,
finalized_at timestamptz NULL,
created_at timestamptz NOT NULL DEFAULT now(),
updated_at timestamptz NOT NULL DEFAULT now()
);
-- Idempotency per SCB ledger (prevents duplicate sync attempts)
CREATE UNIQUE INDEX IF NOT EXISTS dual_ledger_outbox_unique_scb_ref
ON dual_ledger_outbox (sovereign_bank_id, reference_id);
-- Work-queue indexes for efficient job claiming
CREATE INDEX IF NOT EXISTS dual_ledger_outbox_status_idx
ON dual_ledger_outbox (status);
CREATE INDEX IF NOT EXISTS dual_ledger_outbox_created_idx
ON dual_ledger_outbox (created_at);
CREATE INDEX IF NOT EXISTS dual_ledger_outbox_payload_hash_idx
ON dual_ledger_outbox (payload_hash);
-- Auto-update updated_at timestamp
CREATE OR REPLACE FUNCTION set_updated_at()
RETURNS trigger AS $$
BEGIN
NEW.updated_at := now();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
DROP TRIGGER IF EXISTS dual_ledger_outbox_set_updated_at ON dual_ledger_outbox;
CREATE TRIGGER dual_ledger_outbox_set_updated_at
BEFORE UPDATE ON dual_ledger_outbox
FOR EACH ROW EXECUTE FUNCTION set_updated_at();

View File

@@ -0,0 +1,45 @@
-- 003_outbox_state_machine.sql
-- Enforce state machine constraints and valid transitions
-- Prevents invalid status transitions (e.g., FINALIZED -> QUEUED)
ALTER TABLE dual_ledger_outbox
ADD CONSTRAINT dual_ledger_outbox_status_check
CHECK (status IN ('QUEUED','SENT','ACKED','FINALIZED','FAILED'));
CREATE OR REPLACE FUNCTION enforce_outbox_status_transition()
RETURNS trigger AS $$
DECLARE
allowed boolean := false;
BEGIN
-- No-op if status unchanged
IF OLD.status = NEW.status THEN
RETURN NEW;
END IF;
-- Allowed transitions:
-- QUEUED -> SENT | FAILED
-- SENT -> ACKED | FAILED
-- ACKED -> FINALIZED | FAILED
-- FAILED -> QUEUED (retry) | FAILED (no change)
IF OLD.status = 'QUEUED' AND NEW.status IN ('SENT','FAILED') THEN
allowed := true;
ELSIF OLD.status = 'SENT' AND NEW.status IN ('ACKED','FAILED') THEN
allowed := true;
ELSIF OLD.status = 'ACKED' AND NEW.status IN ('FINALIZED','FAILED') THEN
allowed := true;
ELSIF OLD.status = 'FAILED' AND NEW.status IN ('QUEUED','FAILED') THEN
allowed := true;
END IF;
IF NOT allowed THEN
RAISE EXCEPTION 'Invalid outbox transition: % -> %', OLD.status, NEW.status;
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
DROP TRIGGER IF EXISTS dual_ledger_outbox_status_transition ON dual_ledger_outbox;
CREATE TRIGGER dual_ledger_outbox_status_transition
BEFORE UPDATE ON dual_ledger_outbox
FOR EACH ROW EXECUTE FUNCTION enforce_outbox_status_transition();

View File

@@ -0,0 +1,18 @@
-- 004_balance_constraints.sql
-- Enforce balance integrity constraints
-- WARNING: Apply after data cleanup if you have existing inconsistent data
ALTER TABLE bank_accounts
ADD CONSTRAINT bank_accounts_reserved_nonnegative
CHECK (reserved_balance >= 0);
ALTER TABLE bank_accounts
ADD CONSTRAINT bank_accounts_available_nonnegative
CHECK (available_balance >= 0);
ALTER TABLE bank_accounts
ADD CONSTRAINT bank_accounts_balance_consistency
CHECK (
available_balance <= balance
AND (available_balance + reserved_balance) <= balance
);

View File

@@ -0,0 +1,136 @@
-- 005_post_ledger_entry.sql
-- Atomic ledger posting function with balance updates
-- Enforces idempotency, hash chaining, and balance integrity at DB level
CREATE EXTENSION IF NOT EXISTS pgcrypto;
CREATE OR REPLACE FUNCTION post_ledger_entry(
p_ledger_id text,
p_debit_account_id text,
p_credit_account_id text,
p_amount numeric,
p_currency_code text,
p_asset_type text,
p_transaction_type text,
p_reference_id text,
p_fx_rate numeric DEFAULT NULL,
p_metadata jsonb DEFAULT NULL
) RETURNS TABLE(
entry_id text,
block_hash text,
debit_balance numeric,
credit_balance numeric
) AS $$
DECLARE
v_entry_id text := gen_random_uuid()::text;
v_prev_hash text;
v_now timestamptz := now();
v_payload text;
v_block_hash text;
v_debit record;
v_credit record;
a1 text;
a2 text;
BEGIN
-- Validate amount
IF p_amount IS NULL OR p_amount <= 0 THEN
RAISE EXCEPTION 'Amount must be > 0';
END IF;
-- Idempotency check
IF EXISTS (
SELECT 1 FROM ledger_entries
WHERE ledger_id = p_ledger_id
AND reference_id = p_reference_id
) THEN
RAISE EXCEPTION 'Duplicate reference_id for ledger: %', p_reference_id;
END IF;
-- Lock ledger stream (prevents hash-chain races)
PERFORM pg_advisory_xact_lock(hashtext(p_ledger_id));
-- Deadlock-safe lock ordering (always lock accounts in id order)
a1 := LEAST(p_debit_account_id, p_credit_account_id);
a2 := GREATEST(p_debit_account_id, p_credit_account_id);
PERFORM 1 FROM bank_accounts WHERE id = a1 FOR UPDATE;
PERFORM 1 FROM bank_accounts WHERE id = a2 FOR UPDATE;
-- Fetch accounts (already locked)
SELECT * INTO v_debit FROM bank_accounts WHERE id = p_debit_account_id;
SELECT * INTO v_credit FROM bank_accounts WHERE id = p_credit_account_id;
IF v_debit.id IS NULL OR v_credit.id IS NULL THEN
RAISE EXCEPTION 'Account not found';
END IF;
-- Currency validation
IF v_debit.currency_code <> p_currency_code OR v_credit.currency_code <> p_currency_code THEN
RAISE EXCEPTION 'Currency mismatch';
END IF;
-- Sufficient funds check
IF v_debit.available_balance < p_amount THEN
RAISE EXCEPTION 'Insufficient balance: available=%, requested=%',
v_debit.available_balance, p_amount;
END IF;
-- Get previous hash for chain
SELECT block_hash INTO v_prev_hash
FROM ledger_entries
WHERE ledger_id = p_ledger_id
ORDER BY timestamp_utc DESC
LIMIT 1;
-- Compute canonical payload for block hash
v_payload :=
COALESCE(v_prev_hash,'') || '|' ||
v_entry_id || '|' ||
p_ledger_id || '|' ||
p_debit_account_id || '|' ||
p_credit_account_id || '|' ||
p_amount::text || '|' ||
p_currency_code || '|' ||
p_asset_type || '|' ||
p_transaction_type || '|' ||
p_reference_id || '|' ||
v_now::text;
-- Compute block hash
v_block_hash := encode(digest(v_payload, 'sha256'), 'hex');
-- Insert ledger entry
INSERT INTO ledger_entries (
id, ledger_id, debit_account_id, credit_account_id,
amount, currency_code, fx_rate, asset_type, transaction_type,
reference_id, timestamp_utc, block_hash, previous_hash,
status, metadata, created_at, updated_at
) VALUES (
v_entry_id, p_ledger_id, p_debit_account_id, p_credit_account_id,
p_amount, p_currency_code, p_fx_rate, p_asset_type, p_transaction_type,
p_reference_id, v_now, v_block_hash, v_prev_hash,
'POSTED', p_metadata, v_now, v_now
);
-- Update balances atomically
UPDATE bank_accounts
SET balance = balance - p_amount,
available_balance = available_balance - p_amount,
updated_at = v_now
WHERE id = p_debit_account_id;
UPDATE bank_accounts
SET balance = balance + p_amount,
available_balance = available_balance + p_amount,
updated_at = v_now
WHERE id = p_credit_account_id;
-- Return result
RETURN QUERY
SELECT
v_entry_id,
v_block_hash,
(SELECT balance FROM bank_accounts WHERE id = p_debit_account_id),
(SELECT balance FROM bank_accounts WHERE id = p_credit_account_id);
END;
$$ LANGUAGE plpgsql;

View File

@@ -0,0 +1,52 @@
-- SAL extension: positions (asset x chain), fees, reconciliation snapshots.
-- Run after 005_post_ledger_entry.sql.
-- Positions: inventory per account per asset per chain.
CREATE TABLE IF NOT EXISTS sal_positions (
id TEXT PRIMARY KEY,
account_id TEXT NOT NULL,
asset TEXT NOT NULL,
chain_id INTEGER NOT NULL,
balance NUMERIC(32, 18) NOT NULL DEFAULT 0,
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE(account_id, asset, chain_id)
);
CREATE INDEX IF NOT EXISTS idx_sal_positions_account ON sal_positions(account_id);
CREATE INDEX IF NOT EXISTS idx_sal_positions_chain ON sal_positions(chain_id);
CREATE INDEX IF NOT EXISTS idx_sal_positions_asset ON sal_positions(asset);
-- Fees: gas and protocol fees per chain/tx.
CREATE TABLE IF NOT EXISTS sal_fees (
id TEXT PRIMARY KEY,
reference_id TEXT NOT NULL,
chain_id INTEGER NOT NULL,
tx_hash TEXT,
fee_type TEXT NOT NULL,
amount NUMERIC(32, 18) NOT NULL,
currency_code TEXT NOT NULL DEFAULT 'native',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_sal_fees_reference ON sal_fees(reference_id);
CREATE INDEX IF NOT EXISTS idx_sal_fees_chain ON sal_fees(chain_id);
CREATE INDEX IF NOT EXISTS idx_sal_fees_tx ON sal_fees(tx_hash);
-- Reconciliation snapshots: on-chain balance vs SAL.
CREATE TABLE IF NOT EXISTS sal_reconciliation_snapshots (
id TEXT PRIMARY KEY,
account_id TEXT NOT NULL,
asset TEXT NOT NULL,
chain_id INTEGER NOT NULL,
sal_balance NUMERIC(32, 18) NOT NULL,
on_chain_balance NUMERIC(32, 18),
block_number BIGINT,
discrepancy NUMERIC(32, 18),
status TEXT NOT NULL DEFAULT 'ok',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_sal_recon_account ON sal_reconciliation_snapshots(account_id);
CREATE INDEX IF NOT EXISTS idx_sal_recon_chain ON sal_reconciliation_snapshots(chain_id);
CREATE INDEX IF NOT EXISTS idx_sal_recon_created ON sal_reconciliation_snapshots(created_at);

View File

@@ -0,0 +1,955 @@
# Ledger Backfill Strategy
**Version**: 1.0.0
**Last Updated**: 2025-01-20
**Status**: Active Documentation
## Overview
This document outlines the strategy for backfilling historical ledger data into the DBIS Core Banking System ledger. The backfill process ensures data integrity, maintains idempotency, and supports resumable operations.
---
## Backfill Scenarios
### Scenario 1: Initial System Setup (Empty Ledger)
**Use Case**: Setting up a new DBIS instance with empty ledger, populating from external source (e.g., legacy system, CSV export, external API).
**Approach**:
1. Validate source data integrity
2. Transform source data to DBIS ledger format
3. Batch insert with idempotency checks
4. Verify balance consistency
5. Apply constraints after backfill
### Scenario 2: Schema Migration (Existing Ledger Data)
**Use Case**: Migrating existing ledger data to new schema (e.g., adding new fields, restructuring).
**Approach**:
1. Audit existing data
2. Transform to new schema format
3. Migrate in batches
4. Verify data integrity
5. Update schema constraints
### Scenario 3: Data Reconciliation (Fix Inconsistencies)
**Use Case**: Fixing inconsistent balances or missing entries discovered during audit.
**Approach**:
1. Identify inconsistencies
2. Generate correction entries
3. Apply corrections via normal posting function
4. Verify balance consistency
5. Document corrections in audit log
### Scenario 4: Dual-Ledger Sync (SCB Ledger Backfill)
**Use Case**: Backfilling historical entries from SCB (Sovereign Central Bank) ledger to DBIS.
**Approach**:
1. Extract entries from SCB ledger
2. Transform to DBIS format
3. Post to DBIS via outbox pattern
4. Track sync status
5. Verify dual-ledger consistency
---
## Backfill Architecture
### Component Overview
```
┌─────────────────────────────────────────────────────────────┐
│ Backfill Architecture │
├─────────────────────────────────────────────────────────────┤
│ │
│ ┌─────────────┐ ┌──────────────┐ ┌─────────────┐ │
│ │ Source │────▶│ Transform │────▶│ Validate │ │
│ │ Reader │ │ Service │ │ Service │ │
│ └─────────────┘ └──────────────┘ └─────────────┘ │
│ │
│ │ │
│ ▼ │
│ ┌──────────────┐ │
│ │ Batch │ │
│ │ Processor │ │
│ └──────────────┘ │
│ │ │
│ ┌───────────┴───────────┐ │
│ ▼ ▼ │
│ ┌──────────────┐ ┌──────────────┐ │
│ │ Ledger │ │ Checkpoint │ │
│ │ Posting │ │ Service │ │
│ │ Module │ └──────────────┘ │
│ └──────────────┘ │
│ │ │
│ ▼ │
│ ┌──────────────┐ │
│ │ Audit & │ │
│ │ Verification │ │
│ └──────────────┘ │
│ │
└─────────────────────────────────────────────────────────────┘
```
### Key Components
1. **Source Reader**: Reads data from source (CSV, API, database, etc.)
2. **Transform Service**: Transforms source data to DBIS ledger format
3. **Validate Service**: Validates entries before posting
4. **Batch Processor**: Processes entries in batches with checkpointing
5. **Ledger Posting Module**: Uses atomic posting function for entries
6. **Checkpoint Service**: Tracks progress for resumable backfill
7. **Audit & Verification**: Validates backfill results
---
## Backfill Process
### Step 1: Pre-Backfill Preparation
#### 1.1 Audit Existing Data
Before starting backfill, audit existing data:
```sql
-- Check for existing ledger entries
SELECT COUNT(*), MIN(timestamp_utc), MAX(timestamp_utc)
FROM ledger_entries;
-- Check for inconsistent balances
SELECT id, balance, available_balance, reserved_balance
FROM bank_accounts
WHERE available_balance < 0
OR reserved_balance < 0
OR available_balance > balance
OR (available_balance + reserved_balance) > balance;
-- Check for duplicate reference IDs
SELECT ledger_id, reference_id, COUNT(*)
FROM ledger_entries
GROUP BY ledger_id, reference_id
HAVING COUNT(*) > 1;
```
#### 1.2 Verify Schema
Ensure all required migrations are applied:
```sql
-- Verify idempotency constraint exists
SELECT constraint_name
FROM information_schema.table_constraints
WHERE table_name = 'ledger_entries'
AND constraint_name LIKE '%reference%';
-- Verify outbox table exists
SELECT COUNT(*) FROM dual_ledger_outbox;
-- Verify posting function exists
SELECT routine_name
FROM information_schema.routines
WHERE routine_name = 'post_ledger_entry';
```
#### 1.3 Prepare Source Data
- **CSV Export**: Ensure format matches expected schema
- **API Extraction**: Configure API endpoints and authentication
- **Database Extraction**: Set up connection and query filters
- **Legacy System**: Configure export format and mapping
---
### Step 2: Data Transformation
#### 2.1 Source Data Format
Source data should be transformed to this format:
```typescript
interface LedgerEntrySource {
ledgerId: string; // e.g., "MASTER", "SOVEREIGN"
debitAccountId: string; // Account ID
creditAccountId: string; // Account ID
amount: string; // Decimal as string (e.g., "1000.00")
currencyCode: string; // ISO 4217 (e.g., "USD")
assetType: string; // "fiat", "cbdc", "commodity", "security"
transactionType: string; // Transaction type classification
referenceId: string; // Unique reference ID (required for idempotency)
timestampUtc?: string; // ISO 8601 timestamp
fxRate?: string; // FX rate if applicable
metadata?: Record<string, unknown>; // Additional metadata
}
```
#### 2.2 Transformation Rules
1. **Account ID Mapping**: Map source account identifiers to DBIS account IDs
2. **Amount Normalization**: Convert amounts to standard format (decimal string)
3. **Currency Validation**: Validate currency codes against ISO 4217
4. **Timestamp Normalization**: Convert timestamps to UTC ISO 8601 format
5. **Reference ID Generation**: Generate unique reference IDs if not present
6. **Metadata Extraction**: Extract relevant metadata from source
#### 2.3 Example Transformation Script
```typescript
// Example: Transform CSV data
function transformCSVToLedgerEntry(csvRow: CSVRow): LedgerEntrySource {
return {
ledgerId: csvRow.ledger || 'MASTER',
debitAccountId: mapAccountId(csvRow.debit_account),
creditAccountId: mapAccountId(csvRow.credit_account),
amount: normalizeAmount(csvRow.amount),
currencyCode: csvRow.currency || 'USD',
assetType: csvRow.asset_type || 'fiat',
transactionType: mapTransactionType(csvRow.txn_type),
referenceId: csvRow.reference_id || generateReferenceId(csvRow),
timestampUtc: csvRow.timestamp || new Date().toISOString(),
fxRate: csvRow.fx_rate || undefined,
metadata: extractMetadata(csvRow),
};
}
```
---
### Step 3: Batch Processing
#### 3.1 Batch Configuration
Configure batch processing parameters:
```typescript
interface BackfillConfig {
batchSize: number; // Entries per batch (default: 1000)
checkpointInterval: number; // Checkpoint every N batches (default: 10)
maxRetries: number; // Max retries per batch (default: 3)
retryDelay: number; // Initial retry delay in ms (default: 1000)
parallelWorkers: number; // Number of parallel workers (default: 1)
skipDuplicates: boolean; // Skip entries with duplicate reference IDs (default: true)
validateBalances: boolean; // Validate balances after each batch (default: true)
}
```
#### 3.2 Checkpointing Strategy
Use checkpointing to enable resumable backfill:
```sql
-- Create checkpoint table for ledger backfill
CREATE TABLE IF NOT EXISTS ledger_backfill_checkpoints (
id SERIAL PRIMARY KEY,
source_id VARCHAR(255) NOT NULL,
source_type VARCHAR(50) NOT NULL, -- 'CSV', 'API', 'DATABASE', 'SCB'
last_processed_id VARCHAR(255),
last_processed_timestamp TIMESTAMP,
total_processed BIGINT DEFAULT 0,
total_successful BIGINT DEFAULT 0,
total_failed BIGINT DEFAULT 0,
status VARCHAR(50) DEFAULT 'IN_PROGRESS', -- 'IN_PROGRESS', 'COMPLETED', 'FAILED', 'PAUSED'
started_at TIMESTAMP DEFAULT NOW(),
last_checkpoint_at TIMESTAMP DEFAULT NOW(),
completed_at TIMESTAMP,
error_message TEXT,
metadata JSONB,
UNIQUE(source_id, source_type)
);
CREATE INDEX idx_backfill_checkpoints_status
ON ledger_backfill_checkpoints(status);
CREATE INDEX idx_backfill_checkpoints_source
ON ledger_backfill_checkpoints(source_id, source_type);
```
#### 3.3 Batch Processing Loop
```typescript
async function processBackfill(
source: DataSource,
config: BackfillConfig
): Promise<BackfillResult> {
const checkpoint = await loadCheckpoint(source.id, source.type);
let processed = 0;
let successful = 0;
let failed = 0;
let lastProcessedId: string | null = null;
let lastProcessedTimestamp: Date | null = null;
while (true) {
// Load batch from source (starting from checkpoint)
const batch = await source.readBatch({
startId: checkpoint?.lastProcessedId,
startTimestamp: checkpoint?.lastProcessedTimestamp,
limit: config.batchSize,
});
if (batch.length === 0) {
break; // No more data
}
// Process batch
const results = await processBatch(batch, config);
// Update counters
processed += batch.length;
successful += results.successful;
failed += results.failed;
// Update checkpoint
lastProcessedId = batch[batch.length - 1].id;
lastProcessedTimestamp = batch[batch.length - 1].timestamp;
await saveCheckpoint({
sourceId: source.id,
sourceType: source.type,
lastProcessedId,
lastProcessedTimestamp,
totalProcessed: processed,
totalSuccessful: successful,
totalFailed: failed,
status: 'IN_PROGRESS',
});
// Validate balances if configured
if (config.validateBalances && processed % (config.checkpointInterval * config.batchSize) === 0) {
await validateBalances();
}
}
// Mark as completed
await saveCheckpoint({
sourceId: source.id,
sourceType: source.type,
status: 'COMPLETED',
completedAt: new Date(),
});
return {
totalProcessed: processed,
totalSuccessful: successful,
totalFailed: failed,
};
}
```
---
### Step 4: Entry Posting
#### 4.1 Use Atomic Posting Function
Always use the atomic posting function for backfill entries:
```typescript
async function postBackfillEntry(entry: LedgerEntrySource): Promise<void> {
try {
// Use atomic posting function via SQL
const result = await prisma.$executeRaw`
SELECT post_ledger_entry(
${entry.ledgerId}::TEXT,
${entry.debitAccountId}::TEXT,
${entry.creditAccountId}::TEXT,
${entry.amount}::NUMERIC,
${entry.currencyCode}::TEXT,
${entry.assetType}::TEXT,
${entry.transactionType}::TEXT,
${entry.referenceId}::TEXT,
${entry.fxRate || null}::NUMERIC,
${entry.metadata ? JSON.stringify(entry.metadata) : null}::JSONB
)
`;
// Verify result
if (!result) {
throw new Error('Failed to post ledger entry');
}
} catch (error) {
// Handle idempotency violation (duplicate reference ID)
if (error.code === '23505' || error.message?.includes('duplicate')) {
// Skip duplicate entries if configured
if (config.skipDuplicates) {
return; // Entry already exists, skip
}
throw new Error(`Duplicate reference ID: ${entry.referenceId}`);
}
throw error;
}
}
```
#### 4.2 Batch Posting
Post entries in batches for efficiency:
```typescript
async function processBatch(
entries: LedgerEntrySource[],
config: BackfillConfig
): Promise<{ successful: number; failed: number }> {
let successful = 0;
let failed = 0;
// Process in parallel if configured
if (config.parallelWorkers > 1) {
const chunks = chunkArray(entries, config.parallelWorkers);
const results = await Promise.allSettled(
chunks.map((chunk) => processChunk(chunk, config))
);
for (const result of results) {
if (result.status === 'fulfilled') {
successful += result.value.successful;
failed += result.value.failed;
} else {
failed += entries.length;
}
}
} else {
// Sequential processing
for (const entry of entries) {
try {
await postBackfillEntry(entry);
successful++;
} catch (error) {
failed++;
logError(entry, error);
}
}
}
return { successful, failed };
}
```
---
### Step 5: Balance Constraints Application
#### 5.1 Pre-Constraint Validation
Before applying balance constraints, validate all balances:
```sql
-- Validate all balances are consistent
DO $$
DECLARE
inconsistent_count INTEGER;
BEGIN
SELECT COUNT(*) INTO inconsistent_count
FROM bank_accounts
WHERE available_balance < 0
OR reserved_balance < 0
OR available_balance > balance
OR (available_balance + reserved_balance) > balance;
IF inconsistent_count > 0 THEN
RAISE EXCEPTION 'Found % inconsistent balances. Fix before applying constraints.', inconsistent_count;
END IF;
END $$;
```
#### 5.2 Apply Constraints
After backfill completes and balances are validated, apply constraints:
```bash
# Apply balance constraints migration
psql $DATABASE_URL -f db/migrations/004_balance_constraints.sql
```
#### 5.3 Post-Constraint Verification
Verify constraints are applied correctly:
```sql
-- Check constraint exists
SELECT constraint_name, constraint_type
FROM information_schema.table_constraints
WHERE table_name = 'bank_accounts'
AND constraint_name LIKE '%balance%';
-- Verify constraint is enforced
-- This should fail if constraints are working
UPDATE bank_accounts
SET available_balance = -1
WHERE id = (SELECT id FROM bank_accounts LIMIT 1);
```
---
### Step 6: Verification and Reconciliation
#### 6.1 Entry Verification
Verify all entries were posted correctly:
```sql
-- Compare source count vs. posted count
SELECT
COUNT(*) as total_entries,
COUNT(DISTINCT reference_id) as unique_references,
COUNT(DISTINCT ledger_id) as unique_ledgers,
MIN(timestamp_utc) as earliest_entry,
MAX(timestamp_utc) as latest_entry
FROM ledger_entries
WHERE reference_id LIKE 'BACKFILL-%'; -- If using prefix for backfill entries
-- Check for missing entries
SELECT source_id, reference_id
FROM backfill_source_data
WHERE NOT EXISTS (
SELECT 1 FROM ledger_entries
WHERE reference_id = backfill_source_data.reference_id
);
```
#### 6.2 Balance Reconciliation
Reconcile balances after backfill:
```sql
-- Compare expected vs. actual balances
SELECT
account_id,
expected_balance,
actual_balance,
(expected_balance - actual_balance) as difference
FROM (
SELECT
account_id,
SUM(CASE WHEN side = 'debit' THEN amount ELSE -amount END) as expected_balance,
(SELECT balance FROM bank_accounts WHERE id = account_id) as actual_balance
FROM ledger_entries
WHERE account_id IN (SELECT id FROM bank_accounts)
GROUP BY account_id
) reconciliation
WHERE ABS(expected_balance - actual_balance) > 0.01; -- Allow small rounding differences
```
#### 6.3 Dual-Ledger Reconciliation
If backfilling from SCB ledger, reconcile dual-ledger consistency:
```sql
-- Check outbox sync status
SELECT
status,
COUNT(*) as count,
MIN(created_at) as oldest,
MAX(created_at) as newest
FROM dual_ledger_outbox
WHERE created_at >= (SELECT MIN(timestamp_utc) FROM ledger_entries WHERE reference_id LIKE 'BACKFILL-%')
GROUP BY status;
-- Verify all entries have corresponding outbox records (for SCB sync)
SELECT le.id, le.reference_id
FROM ledger_entries le
WHERE le.reference_id LIKE 'BACKFILL-%'
AND NOT EXISTS (
SELECT 1 FROM dual_ledger_outbox dlo
WHERE dlo.reference_id = le.reference_id
);
```
---
## Implementation Scripts
### TypeScript Backfill Script
**File**: `dbis_core/scripts/backfill-ledger.ts`
```typescript
#!/usr/bin/env ts-node
import { PrismaClient } from '@prisma/client';
import { readFileSync } from 'fs';
import { parse } from 'csv-parse/sync';
const prisma = new PrismaClient();
interface BackfillConfig {
sourceFile: string;
ledgerId: string;
batchSize: number;
skipDuplicates: boolean;
}
async function backfillFromCSV(config: BackfillConfig) {
// Read and parse CSV
const csvData = readFileSync(config.sourceFile, 'utf-8');
const records = parse(csvData, {
columns: true,
skip_empty_lines: true,
});
let processed = 0;
let successful = 0;
let failed = 0;
// Process in batches
for (let i = 0; i < records.length; i += config.batchSize) {
const batch = records.slice(i, i + config.batchSize);
for (const record of batch) {
try {
// Transform and post entry
await prisma.$executeRaw`
SELECT post_ledger_entry(
${config.ledgerId}::TEXT,
${record.debitAccountId}::TEXT,
${record.creditAccountId}::TEXT,
${record.amount}::NUMERIC,
${record.currencyCode}::TEXT,
${record.assetType || 'fiat'}::TEXT,
${record.transactionType}::TEXT,
${record.referenceId}::TEXT,
${record.fxRate || null}::NUMERIC,
${record.metadata ? JSON.stringify(JSON.parse(record.metadata)) : null}::JSONB
)
`;
successful++;
} catch (error) {
if (config.skipDuplicates && error.code === '23505') {
// Skip duplicates
continue;
}
failed++;
console.error(`Failed to post entry ${record.referenceId}:`, error.message);
}
processed++;
}
console.log(`Processed ${processed}/${records.length} entries (${successful} successful, ${failed} failed)`);
}
return { processed, successful, failed };
}
// CLI entry point
const config: BackfillConfig = {
sourceFile: process.env.BACKFILL_SOURCE_FILE || 'backfill.csv',
ledgerId: process.env.LEDGER_ID || 'MASTER',
batchSize: parseInt(process.env.BATCH_SIZE || '1000', 10),
skipDuplicates: process.env.SKIP_DUPLICATES === 'true',
};
backfillFromCSV(config)
.then((result) => {
console.log('Backfill completed:', result);
process.exit(0);
})
.catch((error) => {
console.error('Backfill failed:', error);
process.exit(1);
})
.finally(() => {
prisma.$disconnect();
});
```
### SQL Backfill Script
**File**: `dbis_core/scripts/backfill-ledger.sql`
```sql
-- Ledger Backfill Script
-- Use this for direct SQL-based backfill from another database
-- Example: Backfill from external ledger_entries_legacy table
DO $$
DECLARE
batch_size INTEGER := 1000;
processed INTEGER := 0;
successful INTEGER := 0;
failed INTEGER := 0;
entry RECORD;
BEGIN
-- Create temporary table for batch processing
CREATE TEMP TABLE backfill_batch AS
SELECT * FROM ledger_entries_legacy
ORDER BY id
LIMIT 0;
-- Process in batches
FOR entry IN
SELECT * FROM ledger_entries_legacy
ORDER BY id
LOOP
BEGIN
-- Post entry using atomic function
PERFORM post_ledger_entry(
entry.ledger_id::TEXT,
entry.debit_account_id::TEXT,
entry.credit_account_id::TEXT,
entry.amount::NUMERIC,
entry.currency_code::TEXT,
entry.asset_type::TEXT,
entry.transaction_type::TEXT,
entry.reference_id::TEXT,
entry.fx_rate::NUMERIC,
entry.metadata::JSONB
);
successful := successful + 1;
EXCEPTION
WHEN unique_violation THEN
-- Duplicate reference ID, skip if configured
failed := failed + 1;
RAISE NOTICE 'Skipping duplicate reference ID: %', entry.reference_id;
WHEN OTHERS THEN
failed := failed + 1;
RAISE NOTICE 'Error processing entry %: %', entry.reference_id, SQLERRM;
END;
processed := processed + 1;
-- Checkpoint every batch_size entries
IF processed % batch_size = 0 THEN
RAISE NOTICE 'Processed % entries (% successful, % failed)', processed, successful, failed;
END IF;
END LOOP;
RAISE NOTICE 'Backfill completed: % total, % successful, % failed', processed, successful, failed;
END $$;
```
---
## Best Practices
### 1. Idempotency
- Always use unique `reference_id` for each entry
- Use atomic posting function that enforces idempotency
- Skip duplicates during backfill if they already exist
### 2. Checkpointing
- Save checkpoint after each batch
- Enable resumable backfill from last checkpoint
- Track progress with metrics (processed, successful, failed)
### 3. Validation
- Validate source data before transformation
- Validate transformed entries before posting
- Verify balances after backfill completion
- Reconcile with source system if possible
### 4. Error Handling
- Log all errors with full context
- Retry transient errors with exponential backoff
- Skip permanent errors (e.g., duplicate reference IDs)
- Generate error report after completion
### 5. Performance
- Process in batches (1000-10000 entries per batch)
- Use parallel processing for large backfills
- Monitor database performance during backfill
- Schedule during low-traffic periods
### 6. Testing
- Test backfill process on staging environment first
- Use small test dataset to verify transformation
- Verify balances match expected values
- Test rollback procedures if needed
---
## Monitoring and Metrics
### Key Metrics to Track
1. **Progress Metrics**:
- Total entries to process
- Entries processed
- Entries successful
- Entries failed
- Processing rate (entries/second)
2. **Performance Metrics**:
- Batch processing time
- Database query time
- Checkpoint save time
- Total elapsed time
3. **Quality Metrics**:
- Duplicate entries skipped
- Validation errors
- Balance inconsistencies
- Reconciliation mismatches
### Monitoring Queries
```sql
-- Check backfill progress
SELECT
source_id,
source_type,
status,
total_processed,
total_successful,
total_failed,
last_checkpoint_at,
NOW() - last_checkpoint_at as time_since_last_checkpoint
FROM ledger_backfill_checkpoints
WHERE status = 'IN_PROGRESS';
-- Check for stalled backfills
SELECT *
FROM ledger_backfill_checkpoints
WHERE status = 'IN_PROGRESS'
AND last_checkpoint_at < NOW() - INTERVAL '1 hour';
-- Verify backfill completion
SELECT
COUNT(*) as total_entries,
MIN(timestamp_utc) as earliest,
MAX(timestamp_utc) as latest
FROM ledger_entries
WHERE reference_id LIKE 'BACKFILL-%';
```
---
## Rollback Procedures
### Scenario 1: Rollback Before Constraints Applied
If constraints have not been applied, rollback is straightforward:
```sql
-- Remove backfilled entries
DELETE FROM ledger_entries
WHERE reference_id LIKE 'BACKFILL-%';
-- Remove outbox records
DELETE FROM dual_ledger_outbox
WHERE reference_id LIKE 'BACKFILL-%';
-- Reset balances (if needed)
UPDATE bank_accounts
SET balance = balance - (
SELECT COALESCE(SUM(amount), 0)
FROM ledger_entries
WHERE debit_account_id = bank_accounts.id
AND reference_id LIKE 'BACKFILL-%'
) + (
SELECT COALESCE(SUM(amount), 0)
FROM ledger_entries
WHERE credit_account_id = bank_accounts.id
AND reference_id LIKE 'BACKFILL-%'
);
```
### Scenario 2: Rollback After Constraints Applied
If constraints have been applied, rollback is more complex:
1. Temporarily disable constraints
2. Remove backfilled entries
3. Recalculate balances
4. Re-enable constraints
5. Verify balance consistency
**Note**: This should only be done during maintenance window.
---
## Troubleshooting
### Common Issues
#### 1. Duplicate Reference ID Errors
**Symptom**: `unique_violation` error on `reference_id`
**Solution**:
- Check if entries were already backfilled
- Use `skipDuplicates: true` to skip existing entries
- Or regenerate reference IDs for duplicates
#### 2. Balance Inconsistencies
**Symptom**: Balance validation fails
**Solution**:
- Identify accounts with inconsistent balances
- Generate correction entries
- Post corrections before applying constraints
#### 3. Slow Performance
**Symptom**: Backfill processing is slow
**Solution**:
- Increase batch size (if memory allows)
- Use parallel processing
- Optimize database indexes
- Run during off-peak hours
#### 4. Out of Memory
**Symptom**: Process runs out of memory
**Solution**:
- Reduce batch size
- Process sequentially instead of parallel
- Use streaming instead of loading all data
---
## Examples
### Example 1: CSV Backfill
```bash
# Configure environment
export DATABASE_URL="postgresql://user:password@host:port/database"
export BACKFILL_SOURCE_FILE="ledger_export.csv"
export LEDGER_ID="MASTER"
export BATCH_SIZE="1000"
export SKIP_DUPLICATES="true"
# Run backfill script
cd dbis_core
ts-node scripts/backfill-ledger.ts
```
### Example 2: SCB Ledger Sync
```typescript
// Backfill from SCB ledger via API
async function backfillFromSCB(sovereignBankId: string, startDate: Date, endDate: Date) {
const scbApi = new SCBLedgerAPI(sovereignBankId);
const entries = await scbApi.getLedgerEntries(startDate, endDate);
for (const entry of entries) {
// Transform SCB entry to DBIS format
const dbisEntry = transformSCBEntry(entry);
// Post to DBIS (will create outbox record for dual-ledger sync)
await ledgerPostingModule.postEntry(dbisEntry);
}
}
```
---
## References
- Migration Files: `dbis_core/db/migrations/`
- Ledger Posting Module: `dbis_core/src/core/ledger/ledger-posting.module.ts`
- Atomic Posting Function: `dbis_core/db/migrations/005_post_ledger_entry.sql`
- Block Indexer Backfill: `explorer-monorepo/backend/indexer/backfill/backfill.go` (reference pattern)

99
db/migrations/README.md Normal file
View File

@@ -0,0 +1,99 @@
# Database Migrations
This directory contains SQL migrations that enforce ledger correctness boundaries.
## Migration Order
Run migrations in this order:
1. `001_ledger_idempotency.sql` - Add unique constraint for idempotency
2. `002_dual_ledger_outbox.sql` - Create outbox table
3. `003_outbox_state_machine.sql` - Enforce state machine constraints
4. `004_balance_constraints.sql` - Enforce balance integrity (apply after data cleanup)
5. `005_post_ledger_entry.sql` - Create atomic posting function
6. `006_sal_positions_fees.sql` - SAL extension: positions (asset x chain), fees, reconciliation snapshots
## Running Migrations
### Option 1: Direct SQL execution
```bash
# Set your database connection
export DATABASE_URL="postgresql://user:password@host:port/database"
# Run migrations in order
psql $DATABASE_URL -f db/migrations/001_ledger_idempotency.sql
psql $DATABASE_URL -f db/migrations/002_dual_ledger_outbox.sql
psql $DATABASE_URL -f db/migrations/003_outbox_state_machine.sql
psql $DATABASE_URL -f db/migrations/004_balance_constraints.sql
psql $DATABASE_URL -f db/migrations/005_post_ledger_entry.sql
psql $DATABASE_URL -f db/migrations/006_sal_positions_fees.sql
```
### Option 2: Prisma migrate (if using Prisma migrations)
These SQL files can be added to a Prisma migration:
```bash
npx prisma migrate dev --name add_ledger_correctness_boundaries
```
Then copy the SQL into the generated migration file.
## Important Notes
### Column Naming
These migrations assume **snake_case** column names in the database (Prisma default).
If your database uses camelCase, adjust the SQL accordingly:
- `ledger_id``ledgerId`
- `debit_account_id``debitAccountId`
- etc.
### Balance Constraints
The balance constraints in `004_balance_constraints.sql` will fail if you have existing inconsistent data.
**Before applying:**
1. Audit existing balances
2. Fix any inconsistencies
3. Then apply the constraints
### Testing
After applying migrations, verify:
```sql
-- Check idempotency constraint exists
SELECT constraint_name
FROM information_schema.table_constraints
WHERE table_name = 'ledger_entries'
AND constraint_name LIKE '%reference%';
-- Check outbox table exists
SELECT COUNT(*) FROM dual_ledger_outbox;
-- Test posting function
SELECT * FROM post_ledger_entry(
'Test'::TEXT,
'account1'::TEXT,
'account2'::TEXT,
100::NUMERIC,
'USD'::TEXT,
'fiat'::TEXT,
'Type_A'::TEXT,
'test-ref-123'::TEXT,
NULL::NUMERIC,
NULL::JSONB
);
```
## Rollback
These migrations are designed to be additive. To rollback:
1. Drop the function: `DROP FUNCTION IF EXISTS post_ledger_entry(...);`
2. Drop the outbox table: `DROP TABLE IF EXISTS dual_ledger_outbox CASCADE;`
3. Remove constraints: `ALTER TABLE ledger_entries DROP CONSTRAINT IF EXISTS ledger_entries_unique_ledger_reference;`
4. Remove balance constraints: `ALTER TABLE bank_accounts DROP CONSTRAINT IF EXISTS ...;`