# Prometheus Alerting Rules for AS4 Settlement groups: - name: as4_settlement interval: 30s rules: # High Latency Alert - alert: AS4HighLatency expr: as4_message_latency_p99 > 5 for: 5m labels: severity: warning annotations: summary: "AS4 message processing latency is high" description: "P99 latency is {{ $value }}s (threshold: 5s)" # High Failure Rate Alert - alert: AS4HighFailureRate expr: rate(as4_instructions_failed[5m]) > 0.01 for: 5m labels: severity: critical annotations: summary: "AS4 instruction failure rate is high" description: "Failure rate is {{ $value }} (threshold: 1%)" # Certificate Expiring Alert - alert: AS4CertificateExpiring expr: as4_certificate_days_until_expiry < 30 for: 1h labels: severity: warning annotations: summary: "AS4 certificate expiring soon" description: "Certificate expires in {{ $value }} days" # System Unavailable Alert - alert: AS4SystemUnavailable expr: up{job="as4-settlement"} == 0 for: 1m labels: severity: critical annotations: summary: "AS4 Settlement system is down" description: "AS4 service is not responding" # Database Connection Alert - alert: AS4DatabaseConnectionFailed expr: as4_database_connection_status == 0 for: 1m labels: severity: critical annotations: summary: "AS4 database connection failed" description: "Cannot connect to database" # Redis Connection Alert - alert: AS4RedisConnectionFailed expr: as4_redis_connection_status == 0 for: 1m labels: severity: warning annotations: summary: "AS4 Redis connection failed" description: "Cannot connect to Redis (nonce tracking may be affected)" # High Memory Usage Alert - alert: AS4HighMemoryUsage expr: as4_memory_usage_percent > 80 for: 5m labels: severity: warning annotations: summary: "AS4 system memory usage is high" description: "Memory usage is {{ $value }}%" # Queue Backlog Alert - alert: AS4QueueBacklog expr: as4_instruction_queue_length > 1000 for: 5m labels: severity: warning annotations: summary: "AS4 instruction queue backlog" description: "Queue length is {{ $value }} instructions"