diff --git a/middleware/redis/README.md b/middleware/redis/README.md index c375374..5b6919e 100644 --- a/middleware/redis/README.md +++ b/middleware/redis/README.md @@ -20,6 +20,11 @@ Creates DataDog monitors with the following checks: - Redis too many expired keys - Redis too many blocked clients - Redis keyspace seems full +- Redis too many ram memory used +- Redis memory ram fragmented +- Redis too many rejected connections +- Redis latency is too high +- Redis hitrate is too low ## Inputs @@ -47,14 +52,44 @@ Creates DataDog monitors with the following checks: | expirations_silenced | Groups to mute for Redis keys expirations monitor | map | `` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| hitrate_message | Custom message for Redis hitrate monitor | string | `` | no | +| hitrate_silenced | Groups to mute for Redis hitrate monitor | map | `` | no | +| hitrate_threshold_critical | hitrate limit (critical threshold) | string | `90` | no | +| hitrate_threshold_warning | hitrate limit (warning threshold) | string | `70` | no | +| hitrate_time_aggregator | Monitor aggregator for Redis hitrate [available values: min, max or avg] | string | `min` | no | +| hitrate_timeframe | Monitor timeframe for Redis hitrate [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | keyspace_message | Custom message for Redis keyspace monitor | string | `` | no | | keyspace_silenced | Groups to mute for Redis keyspace monitor | map | `` | no | | keyspace_threshold_critical | Keyspace changement (critical threshold) | string | `90` | no | | keyspace_threshold_warning | Keyspace changement (warning threshold) | string | `70` | no | | keyspace_time_aggregator | Monitor aggregator for Redis keyspace [available values: min, max or avg] | string | `min` | no | | keyspace_timeframe | Monitor timeframe for Redis keyspace [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| latency_message | Custom message for Redis latency monitor | string | `` | no | +| latency_silenced | Groups to mute for Redis latency monitor | map | `` | no | +| latency_threshold_critical | latency limit (critical threshold) | string | `90` | no | +| latency_threshold_warning | latency limit (warning threshold) | string | `70` | no | +| latency_time_aggregator | Monitor aggregator for Redis latency [available values: min, max or avg] | string | `min` | no | +| latency_timeframe | Monitor timeframe for Redis latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| mem_frag_message | Custom message for Redis memory RAM fragmentation monitor | string | `` | no | +| mem_frag_silenced | Groups to mute for Redis memory RAM fragmentation monitor | map | `` | no | +| mem_frag_threshold_critical | memory RAM fragmentation limit (critical threshold) | string | `90` | no | +| mem_frag_threshold_warning | memory RAM fragmentation limit (warning threshold) | string | `70` | no | +| mem_frag_time_aggregator | Monitor aggregator for Redis memory RAM fragmentation [available values: min, max or avg] | string | `min` | no | +| mem_frag_timeframe | Monitor timeframe for Redis memory RAM fragmentation [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| mem_used_message | Custom message for Redis RAM memory used monitor | string | `` | no | +| mem_used_silenced | Groups to mute for Redis RAM memory used monitor | map | `` | no | +| mem_used_threshold_critical | RAM memory used limit (critical threshold) | string | `90` | no | +| mem_used_threshold_warning | RAM memory used limit (warning threshold) | string | `70` | no | +| mem_used_time_aggregator | Monitor aggregator for Redis RAM memory used [available values: min, max or avg] | string | `min` | no | +| mem_used_timeframe | Monitor timeframe for Redis RAM memory used [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when a Redis monitor is triggered | string | - | yes | | redis_silenced | Groups to mute for Redis monitors | map | `` | no | +| rejected_con_message | Custom message for Redis rejected connections errors monitor | string | `` | no | +| rejected_con_silenced | Groups to mute for Redis rejected connections errors monitor | map | `` | no | +| rejected_con_threshold_critical | rejected connections errors limit (critical threshold) | string | `90` | no | +| rejected_con_threshold_warning | rejected connections errors limit (warning threshold) | string | `70` | no | +| rejected_con_time_aggregator | Monitor aggregator for Redis rejected connections errors [available values: min, max or avg] | string | `min` | no | +| rejected_con_timeframe | Monitor timeframe for Redis rejected connections errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | ## Outputs @@ -63,7 +98,12 @@ Creates DataDog monitors with the following checks: | redis_blocked_clients_id | id for monitor redis_blocked_clients | | redis_evictedkeys_id | id for monitor redis_evictedkeys | | redis_expirations_id | id for monitor redis_expirations | +| redis_hitrate_id | id for monitor redis_hitrate | | redis_keyspace_id | id for monitor redis_keyspace | +| redis_latency_id | id for monitor redis_latency | +| redis_mem_frag_id | id for monitor redis_mem_frag | +| redis_mem_used_id | id for monitor redis_mem_used | +| redis_rejected_con_id | id for monitor redis_rejected_con | ## Related documentation diff --git a/middleware/redis/inputs.tf b/middleware/redis/inputs.tf index c6fdab0..cabf2ab 100644 --- a/middleware/redis/inputs.tf +++ b/middleware/redis/inputs.tf @@ -166,3 +166,173 @@ variable "keyspace_threshold_warning" { description = "Keyspace changement (warning threshold)" default = 70 } + +variable "mem_used_silenced" { + description = "Groups to mute for Redis RAM memory used monitor" + type = "map" + default = {} +} + +variable "mem_used_message" { + description = "Custom message for Redis RAM memory used monitor" + type = "string" + default = "" +} + +variable "mem_used_time_aggregator" { + description = "Monitor aggregator for Redis RAM memory used [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "mem_used_timeframe" { + description = "Monitor timeframe for Redis RAM memory used [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +variable "mem_used_threshold_critical" { + description = "RAM memory used limit (critical threshold)" + default = 90 +} + +variable "mem_used_threshold_warning" { + description = "RAM memory used limit (warning threshold)" + default = 70 +} + +variable "mem_frag_silenced" { + description = "Groups to mute for Redis memory RAM fragmentation monitor" + type = "map" + default = {} +} + +variable "mem_frag_message" { + description = "Custom message for Redis memory RAM fragmentation monitor" + type = "string" + default = "" +} + +variable "mem_frag_time_aggregator" { + description = "Monitor aggregator for Redis memory RAM fragmentation [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "mem_frag_timeframe" { + description = "Monitor timeframe for Redis memory RAM fragmentation [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +variable "mem_frag_threshold_critical" { + description = "memory RAM fragmentation limit (critical threshold)" + default = 90 +} + +variable "mem_frag_threshold_warning" { + description = "memory RAM fragmentation limit (warning threshold)" + default = 70 +} + +variable "rejected_con_silenced" { + description = "Groups to mute for Redis rejected connections errors monitor" + type = "map" + default = {} +} + +variable "rejected_con_message" { + description = "Custom message for Redis rejected connections errors monitor" + type = "string" + default = "" +} + +variable "rejected_con_time_aggregator" { + description = "Monitor aggregator for Redis rejected connections errors [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "rejected_con_timeframe" { + description = "Monitor timeframe for Redis rejected connections errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +variable "rejected_con_threshold_critical" { + description = "rejected connections errors limit (critical threshold)" + default = 90 +} + +variable "rejected_con_threshold_warning" { + description = "rejected connections errors limit (warning threshold)" + default = 70 +} + +variable "latency_silenced" { + description = "Groups to mute for Redis latency monitor" + type = "map" + default = {} +} + +variable "latency_message" { + description = "Custom message for Redis latency monitor" + type = "string" + default = "" +} + +variable "latency_time_aggregator" { + description = "Monitor aggregator for Redis latency [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "latency_timeframe" { + description = "Monitor timeframe for Redis latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +variable "latency_threshold_critical" { + description = "latency limit (critical threshold)" + default = 90 +} + +variable "latency_threshold_warning" { + description = "latency limit (warning threshold)" + default = 70 +} + +variable "hitrate_silenced" { + description = "Groups to mute for Redis hitrate monitor" + type = "map" + default = {} +} + +variable "hitrate_message" { + description = "Custom message for Redis hitrate monitor" + type = "string" + default = "" +} + +variable "hitrate_time_aggregator" { + description = "Monitor aggregator for Redis hitrate [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "hitrate_timeframe" { + description = "Monitor timeframe for Redis hitrate [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +variable "hitrate_threshold_critical" { + description = "hitrate limit (critical threshold)" + default = 90 +} + +variable "hitrate_threshold_warning" { + description = "hitrate limit (warning threshold)" + default = 70 +} diff --git a/middleware/redis/monitors-redis.tf b/middleware/redis/monitors-redis.tf index 347a255..98eac73 100644 --- a/middleware/redis/monitors-redis.tf +++ b/middleware/redis/monitors-redis.tf @@ -133,3 +133,165 @@ EOL tags = ["env:${var.environment}", "resource:redis"] } + +resource "datadog_monitor" "redis_mem_used" { + name = "[${var.environment}] Redis too many ram memory used {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + message = "${coalesce(var.mem_used_message, var.message)}" + + query = < ${var.mem_used_threshold_critical} +EOL + + type = "metric alert" + + thresholds { + warning = "${var.mem_used_threshold_warning}" + critical = "${var.mem_used_threshold_critical}" + } + + silenced = "${var.mem_used_silenced}" + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.delay}" + + tags = ["env:${var.environment}", "resource:redis"] +} + +resource "datadog_monitor" "redis_mem_frag" { + name = "[${var.environment}] Redis memory ram fragmented {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + message = "${coalesce(var.mem_frag_message, var.message)}" + + query = < ${var.mem_frag_threshold_critical} +EOL + + type = "metric alert" + + thresholds { + warning = "${var.mem_frag_threshold_warning}" + critical = "${var.mem_frag_threshold_critical}" + } + + silenced = "${var.mem_frag_silenced}" + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.delay}" + + tags = ["env:${var.environment}", "resource:redis"] +} + +resource "datadog_monitor" "redis_rejected_con" { + name = "[${var.environment}] Redis too many rejected connections {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + message = "${coalesce(var.rejected_con_message, var.message)}" + + query = < ${var.rejected_con_threshold_critical} +EOL + + type = "metric alert" + + thresholds { + warning = "${var.rejected_con_threshold_warning}" + critical = "${var.rejected_con_threshold_critical}" + } + + silenced = "${var.rejected_con_silenced}" + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.delay}" + + tags = ["env:${var.environment}", "resource:redis"] +} + +resource "datadog_monitor" "redis_latency" { + name = "[${var.environment}] Redis latency is too high {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + message = "${coalesce(var.latency_message, var.message)}" + + query = < ${var.latency_threshold_critical} +EOL + + type = "metric alert" + + thresholds { + warning = "${var.latency_threshold_warning}" + critical = "${var.latency_threshold_critical}" + } + + silenced = "${var.latency_silenced}" + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.delay}" + + tags = ["env:${var.environment}", "resource:redis"] +} + +resource "datadog_monitor" "redis_hitrate" { + name = "[${var.environment}] Redis hitrate is too low {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + message = "${coalesce(var.hitrate_message, var.message)}" + + query = <