From ac4ec07788c69ebe834b77c4c37ee0afce69152e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Respaut?= Date: Thu, 19 Jul 2018 14:43:25 +0200 Subject: [PATCH 01/21] MON-271: Basic monitors for Redis --- README.md | 1 + middleware/redis/README.md | 72 +++++++++++++ middleware/redis/inputs.tf | 168 +++++++++++++++++++++++++++++ middleware/redis/monitors-redis.tf | 135 +++++++++++++++++++++++ middleware/redis/outputs.tf | 19 ++++ 5 files changed, 395 insertions(+) create mode 100644 middleware/redis/README.md create mode 100644 middleware/redis/inputs.tf create mode 100644 middleware/redis/monitors-redis.tf create mode 100644 middleware/redis/outputs.tf diff --git a/README.md b/README.md index 9c91a51..38a965e 100644 --- a/README.md +++ b/README.md @@ -100,5 +100,6 @@ The `//` is very important, it's a terraform specific syntax used to separate gi - [apache](https://bitbucket.org/morea/terraform.feature.datadog/src/master/middleware/apache/) - [nginx](https://bitbucket.org/morea/terraform.feature.datadog/src/master/middleware/nginx/) - [php-fpm](https://bitbucket.org/morea/terraform.feature.datadog/src/master/middleware/php-fpm/) + - [redis](https://bitbucket.org/morea/terraform.feature.datadog/src/master/middleware/redis/) - [system](https://bitbucket.org/morea/terraform.feature.datadog/src/master/system/) - [generic](https://bitbucket.org/morea/terraform.feature.datadog/src/master/system/generic/) diff --git a/middleware/redis/README.md b/middleware/redis/README.md new file mode 100644 index 0000000..c375374 --- /dev/null +++ b/middleware/redis/README.md @@ -0,0 +1,72 @@ +# MIDDLEWARE REDIS DataDog monitors + +## How to use this module + +``` +module "datadog-monitors-middleware-redis" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//middleware/redis?ref={revision}" + + environment = "${var.environment}" + message = "${module.datadog-message-alerting.alerting-message}" +} + +``` + +## Purpose + +Creates DataDog monitors with the following checks: + +- Redis too many evictedkeys +- Redis too many expired keys +- Redis too many blocked clients +- Redis keyspace seems full + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| blocked_clients_message | Custom message for Redis Blocked clients monitor | string | `` | no | +| blocked_clients_silenced | Groups to mute for Redis Blocked clients monitor | map | `` | no | +| blocked_clients_threshold_critical | Blocked clients rate (critical threshold) | string | `90` | no | +| blocked_clients_threshold_warning | Blocked clients rate (warning threshold) | string | `70` | no | +| blocked_clients_time_aggregator | Monitor aggregator for Redis Blocked clients [available values: min, max or avg] | string | `min` | no | +| blocked_clients_timeframe | Monitor timeframe for Redis Blocked clients [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| delay | Delay in seconds for the metric evaluation | string | `900` | no | +| environment | Architecture environment | string | - | yes | +| evictedkeys_limit_message | Custom message for Redis evicted keys monitor | string | `` | no | +| evictedkeys_limit_silenced | Groups to mute for Redis evicted keys monitor | map | `` | no | +| evictedkeys_limit_threshold_critical | Evicted keys limit (critical threshold) | string | `100` | no | +| evictedkeys_limit_threshold_warning | Evicted keys limit (warning threshold) | string | `0` | no | +| evictedkeys_limit_time_aggregator | Monitor aggregator for Redis evicted keys [available values: min, max or avg] | string | `avg` | no | +| evictedkeys_limit_timeframe | Monitor timeframe for Redis evicted keys [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| expirations_limit_message | Custom message for Redis keys expirations monitor | string | `` | no | +| expirations_limit_threshold_critical | Expirations percent (critical threshold) | string | `80` | no | +| expirations_limit_threshold_warning | Expirations percent (warning threshold) | string | `60` | no | +| expirations_limit_time_aggregator | Monitor aggregator for Redis keys expirations [available values: min, max or avg] | string | `min` | no | +| expirations_limit_timeframe | Monitor timeframe for Redis keys expirations [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| expirations_silenced | Groups to mute for Redis keys expirations monitor | map | `` | no | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| keyspace_message | Custom message for Redis keyspace monitor | string | `` | no | +| keyspace_silenced | Groups to mute for Redis keyspace monitor | map | `` | no | +| keyspace_threshold_critical | Keyspace changement (critical threshold) | string | `90` | no | +| keyspace_threshold_warning | Keyspace changement (warning threshold) | string | `70` | no | +| keyspace_time_aggregator | Monitor aggregator for Redis keyspace [available values: min, max or avg] | string | `min` | no | +| keyspace_timeframe | Monitor timeframe for Redis keyspace [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| message | Message sent when a Redis monitor is triggered | string | - | yes | +| redis_silenced | Groups to mute for Redis monitors | map | `` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| redis_blocked_clients_id | id for monitor redis_blocked_clients | +| redis_evictedkeys_id | id for monitor redis_evictedkeys | +| redis_expirations_id | id for monitor redis_expirations | +| redis_keyspace_id | id for monitor redis_keyspace | + +## Related documentation + +[Datadog blog: How to monitor Redis](https://www.datadoghq.com/blog/how-to-monitor-redis-performance-metrics/) + +[Datadog Redis integration doc](https://docs.datadoghq.com/integrations/redisdb/) diff --git a/middleware/redis/inputs.tf b/middleware/redis/inputs.tf new file mode 100644 index 0000000..c6fdab0 --- /dev/null +++ b/middleware/redis/inputs.tf @@ -0,0 +1,168 @@ +# Global Terraform +variable "environment" { + description = "Architecture environment" + type = "string" +} + +# Global DataDog +variable "message" { + description = "Message sent when a Redis monitor is triggered" +} + +variable "delay" { + description = "Delay in seconds for the metric evaluation" + default = 900 +} + +variable "filter_tags_use_defaults" { + description = "Use default filter tags convention" + default = "true" +} + +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_use_defaults is false" + default = "*" +} + +# Redis specific variables +variable "redis_silenced" { + description = "Groups to mute for Redis monitors" + type = "map" + default = {} +} + +variable "evictedkeys_limit_silenced" { + description = "Groups to mute for Redis evicted keys monitor" + type = "map" + default = {} +} + +variable "evictedkeys_limit_message" { + description = "Custom message for Redis evicted keys monitor" + type = "string" + default = "" +} + +variable "evictedkeys_limit_time_aggregator" { + description = "Monitor aggregator for Redis evicted keys [available values: min, max or avg]" + type = "string" + default = "avg" +} + +variable "evictedkeys_limit_timeframe" { + description = "Monitor timeframe for Redis evicted keys [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +variable "evictedkeys_limit_threshold_warning" { + description = "Evicted keys limit (warning threshold)" + default = 0 +} + +variable "evictedkeys_limit_threshold_critical" { + description = "Evicted keys limit (critical threshold)" + default = 100 +} + +variable "expirations_silenced" { + description = "Groups to mute for Redis keys expirations monitor" + type = "map" + default = {} +} + +variable "expirations_limit_message" { + description = "Custom message for Redis keys expirations monitor" + type = "string" + default = "" +} + +variable "expirations_limit_time_aggregator" { + description = "Monitor aggregator for Redis keys expirations [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "expirations_limit_timeframe" { + description = "Monitor timeframe for Redis keys expirations [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +variable "expirations_limit_threshold_critical" { + description = "Expirations percent (critical threshold)" + default = 80 +} + +variable "expirations_limit_threshold_warning" { + description = "Expirations percent (warning threshold)" + default = 60 +} + +variable "blocked_clients_silenced" { + description = "Groups to mute for Redis Blocked clients monitor" + type = "map" + default = {} +} + +variable "blocked_clients_message" { + description = "Custom message for Redis Blocked clients monitor" + type = "string" + default = "" +} + +variable "blocked_clients_time_aggregator" { + description = "Monitor aggregator for Redis Blocked clients [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "blocked_clients_timeframe" { + description = "Monitor timeframe for Redis Blocked clients [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +variable "blocked_clients_threshold_critical" { + description = "Blocked clients rate (critical threshold)" + default = 90 +} + +variable "blocked_clients_threshold_warning" { + description = "Blocked clients rate (warning threshold)" + default = 70 +} + +variable "keyspace_silenced" { + description = "Groups to mute for Redis keyspace monitor" + type = "map" + default = {} +} + +variable "keyspace_message" { + description = "Custom message for Redis keyspace monitor" + type = "string" + default = "" +} + +variable "keyspace_time_aggregator" { + description = "Monitor aggregator for Redis keyspace [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "keyspace_timeframe" { + description = "Monitor timeframe for Redis keyspace [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +variable "keyspace_threshold_critical" { + description = "Keyspace changement (critical threshold)" + default = 90 +} + +variable "keyspace_threshold_warning" { + description = "Keyspace changement (warning threshold)" + default = 70 +} diff --git a/middleware/redis/monitors-redis.tf b/middleware/redis/monitors-redis.tf new file mode 100644 index 0000000..347a255 --- /dev/null +++ b/middleware/redis/monitors-redis.tf @@ -0,0 +1,135 @@ +data "template_file" "filter" { + template = "$${filter}" + + vars { + filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_redis:enabled,db_env:%s", var.environment) : "${var.filter_tags_custom}"}" + } +} + +resource "datadog_monitor" "redis_evictedkeys" { + name = "[${var.environment}] Redis too many evictedkeys {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + message = "${coalesce(var.evictedkeys_limit_message, var.message)}" + + query = < ${var.evictedkeys_limit_threshold_critical} +EOL + + type = "metric alert" + + thresholds { + warning = "${var.evictedkeys_limit_threshold_warning}" + critical = "${var.evictedkeys_limit_threshold_critical}" + } + + silenced = "${var.evictedkeys_limit_silenced}" + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.delay}" + + tags = ["env:${var.environment}", "resource:redis"] +} + +resource "datadog_monitor" "redis_expirations" { + name = "[${var.environment}] Redis too many expired keys {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + message = "${coalesce(var.expirations_limit_message, var.message)}" + + query = < ${var.expirations_limit_threshold_critical} +EOL + + type = "metric alert" + + thresholds { + warning = "${var.expirations_limit_threshold_warning}" + critical = "${var.expirations_limit_threshold_critical}" + } + + silenced = "${var.expirations_silenced}" + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.delay}" + + tags = ["env:${var.environment}", "resource:redis"] +} + +resource "datadog_monitor" "redis_blocked_clients" { + name = "[${var.environment}] Redis too many blocked clients {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + message = "${coalesce(var.blocked_clients_message, var.message)}" + + query = < ${var.blocked_clients_threshold_critical} +EOL + + type = "metric alert" + + thresholds { + warning = "${var.blocked_clients_threshold_warning}" + critical = "${var.blocked_clients_threshold_critical}" + } + + silenced = "${var.blocked_clients_silenced}" + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.delay}" + + tags = ["env:${var.environment}", "resource:redis"] +} + +resource "datadog_monitor" "redis_keyspace" { + name = "[${var.environment}] Redis keyspace seems full {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + message = "${coalesce(var.keyspace_message, var.message)}" + + query = < ${var.keyspace_threshold_critical} +EOL + + type = "metric alert" + + thresholds { + warning = "${var.keyspace_threshold_warning}" + critical = "${var.keyspace_threshold_critical}" + } + + silenced = "${var.keyspace_silenced}" + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.delay}" + + tags = ["env:${var.environment}", "resource:redis"] +} diff --git a/middleware/redis/outputs.tf b/middleware/redis/outputs.tf new file mode 100644 index 0000000..f2a4d21 --- /dev/null +++ b/middleware/redis/outputs.tf @@ -0,0 +1,19 @@ +output "redis_evictedkeys_id" { + description = "id for monitor redis_evictedkeys" + value = "${datadog_monitor.redis_evictedkeys.id}" +} + +output "redis_expirations_id" { + description = "id for monitor redis_expirations" + value = "${datadog_monitor.redis_expirations.id}" +} + +output "redis_blocked_clients_id" { + description = "id for monitor redis_blocked_clients" + value = "${datadog_monitor.redis_blocked_clients.id}" +} + +output "redis_keyspace_id" { + description = "id for monitor redis_keyspace" + value = "${datadog_monitor.redis_keyspace.id}" +} From 7224c97ac5c7e1e63b51aaf859670e0cb2aca9ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Respaut?= Date: Fri, 20 Jul 2018 18:07:56 +0200 Subject: [PATCH 02/21] MON-271: More Redis monitors --- middleware/redis/README.md | 40 +++++++ middleware/redis/inputs.tf | 170 +++++++++++++++++++++++++++++ middleware/redis/monitors-redis.tf | 162 +++++++++++++++++++++++++++ middleware/redis/outputs.tf | 25 +++++ 4 files changed, 397 insertions(+) diff --git a/middleware/redis/README.md b/middleware/redis/README.md index c375374..5b6919e 100644 --- a/middleware/redis/README.md +++ b/middleware/redis/README.md @@ -20,6 +20,11 @@ Creates DataDog monitors with the following checks: - Redis too many expired keys - Redis too many blocked clients - Redis keyspace seems full +- Redis too many ram memory used +- Redis memory ram fragmented +- Redis too many rejected connections +- Redis latency is too high +- Redis hitrate is too low ## Inputs @@ -47,14 +52,44 @@ Creates DataDog monitors with the following checks: | expirations_silenced | Groups to mute for Redis keys expirations monitor | map | `` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| hitrate_message | Custom message for Redis hitrate monitor | string | `` | no | +| hitrate_silenced | Groups to mute for Redis hitrate monitor | map | `` | no | +| hitrate_threshold_critical | hitrate limit (critical threshold) | string | `90` | no | +| hitrate_threshold_warning | hitrate limit (warning threshold) | string | `70` | no | +| hitrate_time_aggregator | Monitor aggregator for Redis hitrate [available values: min, max or avg] | string | `min` | no | +| hitrate_timeframe | Monitor timeframe for Redis hitrate [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | keyspace_message | Custom message for Redis keyspace monitor | string | `` | no | | keyspace_silenced | Groups to mute for Redis keyspace monitor | map | `` | no | | keyspace_threshold_critical | Keyspace changement (critical threshold) | string | `90` | no | | keyspace_threshold_warning | Keyspace changement (warning threshold) | string | `70` | no | | keyspace_time_aggregator | Monitor aggregator for Redis keyspace [available values: min, max or avg] | string | `min` | no | | keyspace_timeframe | Monitor timeframe for Redis keyspace [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| latency_message | Custom message for Redis latency monitor | string | `` | no | +| latency_silenced | Groups to mute for Redis latency monitor | map | `` | no | +| latency_threshold_critical | latency limit (critical threshold) | string | `90` | no | +| latency_threshold_warning | latency limit (warning threshold) | string | `70` | no | +| latency_time_aggregator | Monitor aggregator for Redis latency [available values: min, max or avg] | string | `min` | no | +| latency_timeframe | Monitor timeframe for Redis latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| mem_frag_message | Custom message for Redis memory RAM fragmentation monitor | string | `` | no | +| mem_frag_silenced | Groups to mute for Redis memory RAM fragmentation monitor | map | `` | no | +| mem_frag_threshold_critical | memory RAM fragmentation limit (critical threshold) | string | `90` | no | +| mem_frag_threshold_warning | memory RAM fragmentation limit (warning threshold) | string | `70` | no | +| mem_frag_time_aggregator | Monitor aggregator for Redis memory RAM fragmentation [available values: min, max or avg] | string | `min` | no | +| mem_frag_timeframe | Monitor timeframe for Redis memory RAM fragmentation [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| mem_used_message | Custom message for Redis RAM memory used monitor | string | `` | no | +| mem_used_silenced | Groups to mute for Redis RAM memory used monitor | map | `` | no | +| mem_used_threshold_critical | RAM memory used limit (critical threshold) | string | `90` | no | +| mem_used_threshold_warning | RAM memory used limit (warning threshold) | string | `70` | no | +| mem_used_time_aggregator | Monitor aggregator for Redis RAM memory used [available values: min, max or avg] | string | `min` | no | +| mem_used_timeframe | Monitor timeframe for Redis RAM memory used [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when a Redis monitor is triggered | string | - | yes | | redis_silenced | Groups to mute for Redis monitors | map | `` | no | +| rejected_con_message | Custom message for Redis rejected connections errors monitor | string | `` | no | +| rejected_con_silenced | Groups to mute for Redis rejected connections errors monitor | map | `` | no | +| rejected_con_threshold_critical | rejected connections errors limit (critical threshold) | string | `90` | no | +| rejected_con_threshold_warning | rejected connections errors limit (warning threshold) | string | `70` | no | +| rejected_con_time_aggregator | Monitor aggregator for Redis rejected connections errors [available values: min, max or avg] | string | `min` | no | +| rejected_con_timeframe | Monitor timeframe for Redis rejected connections errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | ## Outputs @@ -63,7 +98,12 @@ Creates DataDog monitors with the following checks: | redis_blocked_clients_id | id for monitor redis_blocked_clients | | redis_evictedkeys_id | id for monitor redis_evictedkeys | | redis_expirations_id | id for monitor redis_expirations | +| redis_hitrate_id | id for monitor redis_hitrate | | redis_keyspace_id | id for monitor redis_keyspace | +| redis_latency_id | id for monitor redis_latency | +| redis_mem_frag_id | id for monitor redis_mem_frag | +| redis_mem_used_id | id for monitor redis_mem_used | +| redis_rejected_con_id | id for monitor redis_rejected_con | ## Related documentation diff --git a/middleware/redis/inputs.tf b/middleware/redis/inputs.tf index c6fdab0..cabf2ab 100644 --- a/middleware/redis/inputs.tf +++ b/middleware/redis/inputs.tf @@ -166,3 +166,173 @@ variable "keyspace_threshold_warning" { description = "Keyspace changement (warning threshold)" default = 70 } + +variable "mem_used_silenced" { + description = "Groups to mute for Redis RAM memory used monitor" + type = "map" + default = {} +} + +variable "mem_used_message" { + description = "Custom message for Redis RAM memory used monitor" + type = "string" + default = "" +} + +variable "mem_used_time_aggregator" { + description = "Monitor aggregator for Redis RAM memory used [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "mem_used_timeframe" { + description = "Monitor timeframe for Redis RAM memory used [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +variable "mem_used_threshold_critical" { + description = "RAM memory used limit (critical threshold)" + default = 90 +} + +variable "mem_used_threshold_warning" { + description = "RAM memory used limit (warning threshold)" + default = 70 +} + +variable "mem_frag_silenced" { + description = "Groups to mute for Redis memory RAM fragmentation monitor" + type = "map" + default = {} +} + +variable "mem_frag_message" { + description = "Custom message for Redis memory RAM fragmentation monitor" + type = "string" + default = "" +} + +variable "mem_frag_time_aggregator" { + description = "Monitor aggregator for Redis memory RAM fragmentation [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "mem_frag_timeframe" { + description = "Monitor timeframe for Redis memory RAM fragmentation [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +variable "mem_frag_threshold_critical" { + description = "memory RAM fragmentation limit (critical threshold)" + default = 90 +} + +variable "mem_frag_threshold_warning" { + description = "memory RAM fragmentation limit (warning threshold)" + default = 70 +} + +variable "rejected_con_silenced" { + description = "Groups to mute for Redis rejected connections errors monitor" + type = "map" + default = {} +} + +variable "rejected_con_message" { + description = "Custom message for Redis rejected connections errors monitor" + type = "string" + default = "" +} + +variable "rejected_con_time_aggregator" { + description = "Monitor aggregator for Redis rejected connections errors [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "rejected_con_timeframe" { + description = "Monitor timeframe for Redis rejected connections errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +variable "rejected_con_threshold_critical" { + description = "rejected connections errors limit (critical threshold)" + default = 90 +} + +variable "rejected_con_threshold_warning" { + description = "rejected connections errors limit (warning threshold)" + default = 70 +} + +variable "latency_silenced" { + description = "Groups to mute for Redis latency monitor" + type = "map" + default = {} +} + +variable "latency_message" { + description = "Custom message for Redis latency monitor" + type = "string" + default = "" +} + +variable "latency_time_aggregator" { + description = "Monitor aggregator for Redis latency [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "latency_timeframe" { + description = "Monitor timeframe for Redis latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +variable "latency_threshold_critical" { + description = "latency limit (critical threshold)" + default = 90 +} + +variable "latency_threshold_warning" { + description = "latency limit (warning threshold)" + default = 70 +} + +variable "hitrate_silenced" { + description = "Groups to mute for Redis hitrate monitor" + type = "map" + default = {} +} + +variable "hitrate_message" { + description = "Custom message for Redis hitrate monitor" + type = "string" + default = "" +} + +variable "hitrate_time_aggregator" { + description = "Monitor aggregator for Redis hitrate [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "hitrate_timeframe" { + description = "Monitor timeframe for Redis hitrate [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +variable "hitrate_threshold_critical" { + description = "hitrate limit (critical threshold)" + default = 90 +} + +variable "hitrate_threshold_warning" { + description = "hitrate limit (warning threshold)" + default = 70 +} diff --git a/middleware/redis/monitors-redis.tf b/middleware/redis/monitors-redis.tf index 347a255..98eac73 100644 --- a/middleware/redis/monitors-redis.tf +++ b/middleware/redis/monitors-redis.tf @@ -133,3 +133,165 @@ EOL tags = ["env:${var.environment}", "resource:redis"] } + +resource "datadog_monitor" "redis_mem_used" { + name = "[${var.environment}] Redis too many ram memory used {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + message = "${coalesce(var.mem_used_message, var.message)}" + + query = < ${var.mem_used_threshold_critical} +EOL + + type = "metric alert" + + thresholds { + warning = "${var.mem_used_threshold_warning}" + critical = "${var.mem_used_threshold_critical}" + } + + silenced = "${var.mem_used_silenced}" + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.delay}" + + tags = ["env:${var.environment}", "resource:redis"] +} + +resource "datadog_monitor" "redis_mem_frag" { + name = "[${var.environment}] Redis memory ram fragmented {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + message = "${coalesce(var.mem_frag_message, var.message)}" + + query = < ${var.mem_frag_threshold_critical} +EOL + + type = "metric alert" + + thresholds { + warning = "${var.mem_frag_threshold_warning}" + critical = "${var.mem_frag_threshold_critical}" + } + + silenced = "${var.mem_frag_silenced}" + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.delay}" + + tags = ["env:${var.environment}", "resource:redis"] +} + +resource "datadog_monitor" "redis_rejected_con" { + name = "[${var.environment}] Redis too many rejected connections {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + message = "${coalesce(var.rejected_con_message, var.message)}" + + query = < ${var.rejected_con_threshold_critical} +EOL + + type = "metric alert" + + thresholds { + warning = "${var.rejected_con_threshold_warning}" + critical = "${var.rejected_con_threshold_critical}" + } + + silenced = "${var.rejected_con_silenced}" + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.delay}" + + tags = ["env:${var.environment}", "resource:redis"] +} + +resource "datadog_monitor" "redis_latency" { + name = "[${var.environment}] Redis latency is too high {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + message = "${coalesce(var.latency_message, var.message)}" + + query = < ${var.latency_threshold_critical} +EOL + + type = "metric alert" + + thresholds { + warning = "${var.latency_threshold_warning}" + critical = "${var.latency_threshold_critical}" + } + + silenced = "${var.latency_silenced}" + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.delay}" + + tags = ["env:${var.environment}", "resource:redis"] +} + +resource "datadog_monitor" "redis_hitrate" { + name = "[${var.environment}] Redis hitrate is too low {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + message = "${coalesce(var.hitrate_message, var.message)}" + + query = < Date: Tue, 24 Jul 2018 14:24:40 +0200 Subject: [PATCH 03/21] MON-271: [Redis] Update queries and default thresholds --- middleware/redis/README.md | 52 ++++++++++++------------ middleware/redis/inputs.tf | 65 ++++++++++++++++-------------- middleware/redis/monitors-redis.tf | 50 ++++++++++++----------- 3 files changed, 86 insertions(+), 81 deletions(-) diff --git a/middleware/redis/README.md b/middleware/redis/README.md index 5b6919e..d3300db 100644 --- a/middleware/redis/README.md +++ b/middleware/redis/README.md @@ -16,7 +16,7 @@ module "datadog-monitors-middleware-redis" { Creates DataDog monitors with the following checks: -- Redis too many evictedkeys +- Redis suddenly too many evicted keys - Redis too many expired keys - Redis too many blocked clients - Redis keyspace seems full @@ -32,62 +32,62 @@ Creates DataDog monitors with the following checks: |------|-------------|:----:|:-----:|:-----:| | blocked_clients_message | Custom message for Redis Blocked clients monitor | string | `` | no | | blocked_clients_silenced | Groups to mute for Redis Blocked clients monitor | map | `` | no | -| blocked_clients_threshold_critical | Blocked clients rate (critical threshold) | string | `90` | no | -| blocked_clients_threshold_warning | Blocked clients rate (warning threshold) | string | `70` | no | +| blocked_clients_threshold_critical | Blocked clients rate (critical threshold) | string | `30` | no | +| blocked_clients_threshold_warning | Blocked clients rate (warning threshold) | string | `10` | no | | blocked_clients_time_aggregator | Monitor aggregator for Redis Blocked clients [available values: min, max or avg] | string | `min` | no | | blocked_clients_timeframe | Monitor timeframe for Redis Blocked clients [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | | environment | Architecture environment | string | - | yes | -| evictedkeys_limit_message | Custom message for Redis evicted keys monitor | string | `` | no | -| evictedkeys_limit_silenced | Groups to mute for Redis evicted keys monitor | map | `` | no | -| evictedkeys_limit_threshold_critical | Evicted keys limit (critical threshold) | string | `100` | no | -| evictedkeys_limit_threshold_warning | Evicted keys limit (warning threshold) | string | `0` | no | -| evictedkeys_limit_time_aggregator | Monitor aggregator for Redis evicted keys [available values: min, max or avg] | string | `avg` | no | -| evictedkeys_limit_timeframe | Monitor timeframe for Redis evicted keys [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| expirations_limit_message | Custom message for Redis keys expirations monitor | string | `` | no | -| expirations_limit_threshold_critical | Expirations percent (critical threshold) | string | `80` | no | -| expirations_limit_threshold_warning | Expirations percent (warning threshold) | string | `60` | no | -| expirations_limit_time_aggregator | Monitor aggregator for Redis keys expirations [available values: min, max or avg] | string | `min` | no | -| expirations_limit_timeframe | Monitor timeframe for Redis keys expirations [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| evictedkeys_change_message | Custom message for Redis evicted keys monitor | string | `` | no | +| evictedkeys_change_silenced | Groups to mute for Redis evicted keys monitor | map | `` | no | +| evictedkeys_change_threshold_critical | Evicted keys change (critical threshold) | string | `100` | no | +| evictedkeys_change_threshold_warning | Evicted keys change (warning threshold) | string | `20` | no | +| evictedkeys_change_time_aggregator | Monitor aggregator for Redis evicted keys [available values: min, max or avg] | string | `avg` | no | +| evictedkeys_change_timeframe | Monitor timeframe for Redis evicted keys [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| expirations_rate_message | Custom message for Redis keys expirations monitor | string | `` | no | +| expirations_rate_threshold_critical | Expirations percent (critical threshold) | string | `80` | no | +| expirations_rate_threshold_warning | Expirations percent (warning threshold) | string | `60` | no | +| expirations_rate_time_aggregator | Monitor aggregator for Redis keys expirations [available values: min, max or avg] | string | `min` | no | +| expirations_rate_timeframe | Monitor timeframe for Redis keys expirations [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | expirations_silenced | Groups to mute for Redis keys expirations monitor | map | `` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | hitrate_message | Custom message for Redis hitrate monitor | string | `` | no | | hitrate_silenced | Groups to mute for Redis hitrate monitor | map | `` | no | -| hitrate_threshold_critical | hitrate limit (critical threshold) | string | `90` | no | -| hitrate_threshold_warning | hitrate limit (warning threshold) | string | `70` | no | +| hitrate_threshold_critical | hitrate limit (critical threshold) | string | `10` | no | +| hitrate_threshold_warning | hitrate limit (warning threshold) | string | `30` | no | | hitrate_time_aggregator | Monitor aggregator for Redis hitrate [available values: min, max or avg] | string | `min` | no | | hitrate_timeframe | Monitor timeframe for Redis hitrate [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | keyspace_message | Custom message for Redis keyspace monitor | string | `` | no | | keyspace_silenced | Groups to mute for Redis keyspace monitor | map | `` | no | -| keyspace_threshold_critical | Keyspace changement (critical threshold) | string | `90` | no | -| keyspace_threshold_warning | Keyspace changement (warning threshold) | string | `70` | no | +| keyspace_threshold_critical | Keyspace no changement (critical threshold) | string | `0` | no | +| keyspace_threshold_warning | Keyspace no changement (warning threshold) | string | `1` | no | | keyspace_time_aggregator | Monitor aggregator for Redis keyspace [available values: min, max or avg] | string | `min` | no | | keyspace_timeframe | Monitor timeframe for Redis keyspace [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | latency_message | Custom message for Redis latency monitor | string | `` | no | | latency_silenced | Groups to mute for Redis latency monitor | map | `` | no | -| latency_threshold_critical | latency limit (critical threshold) | string | `90` | no | -| latency_threshold_warning | latency limit (warning threshold) | string | `70` | no | +| latency_threshold_critical | latency limit (critical threshold) | string | `100` | no | +| latency_threshold_warning | latency limit (warning threshold) | string | `50` | no | | latency_time_aggregator | Monitor aggregator for Redis latency [available values: min, max or avg] | string | `min` | no | | latency_timeframe | Monitor timeframe for Redis latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | mem_frag_message | Custom message for Redis memory RAM fragmentation monitor | string | `` | no | | mem_frag_silenced | Groups to mute for Redis memory RAM fragmentation monitor | map | `` | no | -| mem_frag_threshold_critical | memory RAM fragmentation limit (critical threshold) | string | `90` | no | -| mem_frag_threshold_warning | memory RAM fragmentation limit (warning threshold) | string | `70` | no | +| mem_frag_threshold_critical | memory RAM fragmentation limit (critical threshold) | string | `130` | no | +| mem_frag_threshold_warning | memory RAM fragmentation limit (warning threshold) | string | `100` | no | | mem_frag_time_aggregator | Monitor aggregator for Redis memory RAM fragmentation [available values: min, max or avg] | string | `min` | no | | mem_frag_timeframe | Monitor timeframe for Redis memory RAM fragmentation [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | mem_used_message | Custom message for Redis RAM memory used monitor | string | `` | no | | mem_used_silenced | Groups to mute for Redis RAM memory used monitor | map | `` | no | -| mem_used_threshold_critical | RAM memory used limit (critical threshold) | string | `90` | no | -| mem_used_threshold_warning | RAM memory used limit (warning threshold) | string | `70` | no | +| mem_used_threshold_critical | RAM memory used limit (critical threshold) | string | `95` | no | +| mem_used_threshold_warning | RAM memory used limit (warning threshold) | string | `85` | no | | mem_used_time_aggregator | Monitor aggregator for Redis RAM memory used [available values: min, max or avg] | string | `min` | no | | mem_used_timeframe | Monitor timeframe for Redis RAM memory used [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when a Redis monitor is triggered | string | - | yes | | redis_silenced | Groups to mute for Redis monitors | map | `` | no | | rejected_con_message | Custom message for Redis rejected connections errors monitor | string | `` | no | | rejected_con_silenced | Groups to mute for Redis rejected connections errors monitor | map | `` | no | -| rejected_con_threshold_critical | rejected connections errors limit (critical threshold) | string | `90` | no | -| rejected_con_threshold_warning | rejected connections errors limit (warning threshold) | string | `70` | no | +| rejected_con_threshold_critical | rejected connections errors limit (critical threshold) | string | `50` | no | +| rejected_con_threshold_warning | rejected connections errors limit (warning threshold) | string | `10` | no | | rejected_con_time_aggregator | Monitor aggregator for Redis rejected connections errors [available values: min, max or avg] | string | `min` | no | | rejected_con_timeframe | Monitor timeframe for Redis rejected connections errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | diff --git a/middleware/redis/inputs.tf b/middleware/redis/inputs.tf index cabf2ab..527b5d4 100644 --- a/middleware/redis/inputs.tf +++ b/middleware/redis/inputs.tf @@ -31,37 +31,37 @@ variable "redis_silenced" { default = {} } -variable "evictedkeys_limit_silenced" { +variable "evictedkeys_change_silenced" { description = "Groups to mute for Redis evicted keys monitor" type = "map" default = {} } -variable "evictedkeys_limit_message" { +variable "evictedkeys_change_message" { description = "Custom message for Redis evicted keys monitor" type = "string" default = "" } -variable "evictedkeys_limit_time_aggregator" { +variable "evictedkeys_change_time_aggregator" { description = "Monitor aggregator for Redis evicted keys [available values: min, max or avg]" type = "string" default = "avg" } -variable "evictedkeys_limit_timeframe" { +variable "evictedkeys_change_timeframe" { description = "Monitor timeframe for Redis evicted keys [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" default = "last_5m" } -variable "evictedkeys_limit_threshold_warning" { - description = "Evicted keys limit (warning threshold)" - default = 0 +variable "evictedkeys_change_threshold_warning" { + description = "Evicted keys change (warning threshold)" + default = 20 } -variable "evictedkeys_limit_threshold_critical" { - description = "Evicted keys limit (critical threshold)" +variable "evictedkeys_change_threshold_critical" { + description = "Evicted keys change (critical threshold)" default = 100 } @@ -71,30 +71,30 @@ variable "expirations_silenced" { default = {} } -variable "expirations_limit_message" { +variable "expirations_rate_message" { description = "Custom message for Redis keys expirations monitor" type = "string" default = "" } -variable "expirations_limit_time_aggregator" { +variable "expirations_rate_time_aggregator" { description = "Monitor aggregator for Redis keys expirations [available values: min, max or avg]" type = "string" default = "min" } -variable "expirations_limit_timeframe" { +variable "expirations_rate_timeframe" { description = "Monitor timeframe for Redis keys expirations [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" default = "last_5m" } -variable "expirations_limit_threshold_critical" { +variable "expirations_rate_threshold_critical" { description = "Expirations percent (critical threshold)" default = 80 } -variable "expirations_limit_threshold_warning" { +variable "expirations_rate_threshold_warning" { description = "Expirations percent (warning threshold)" default = 60 } @@ -125,18 +125,21 @@ variable "blocked_clients_timeframe" { variable "blocked_clients_threshold_critical" { description = "Blocked clients rate (critical threshold)" - default = 90 + default = 30 } variable "blocked_clients_threshold_warning" { description = "Blocked clients rate (warning threshold)" - default = 70 + default = 10 } variable "keyspace_silenced" { description = "Groups to mute for Redis keyspace monitor" type = "map" - default = {} + + default = { + "*" = 0 # Mute all for now by default + } } variable "keyspace_message" { @@ -158,13 +161,13 @@ variable "keyspace_timeframe" { } variable "keyspace_threshold_critical" { - description = "Keyspace changement (critical threshold)" - default = 90 + description = "Keyspace no changement (critical threshold)" + default = 0 } variable "keyspace_threshold_warning" { - description = "Keyspace changement (warning threshold)" - default = 70 + description = "Keyspace no changement (warning threshold)" + default = 1 } variable "mem_used_silenced" { @@ -193,12 +196,12 @@ variable "mem_used_timeframe" { variable "mem_used_threshold_critical" { description = "RAM memory used limit (critical threshold)" - default = 90 + default = 95 } variable "mem_used_threshold_warning" { description = "RAM memory used limit (warning threshold)" - default = 70 + default = 85 } variable "mem_frag_silenced" { @@ -227,12 +230,12 @@ variable "mem_frag_timeframe" { variable "mem_frag_threshold_critical" { description = "memory RAM fragmentation limit (critical threshold)" - default = 90 + default = 130 } variable "mem_frag_threshold_warning" { description = "memory RAM fragmentation limit (warning threshold)" - default = 70 + default = 100 } variable "rejected_con_silenced" { @@ -261,12 +264,12 @@ variable "rejected_con_timeframe" { variable "rejected_con_threshold_critical" { description = "rejected connections errors limit (critical threshold)" - default = 90 + default = 50 } variable "rejected_con_threshold_warning" { description = "rejected connections errors limit (warning threshold)" - default = 70 + default = 10 } variable "latency_silenced" { @@ -295,12 +298,12 @@ variable "latency_timeframe" { variable "latency_threshold_critical" { description = "latency limit (critical threshold)" - default = 90 + default = 100 } variable "latency_threshold_warning" { description = "latency limit (warning threshold)" - default = 70 + default = 50 } variable "hitrate_silenced" { @@ -329,10 +332,10 @@ variable "hitrate_timeframe" { variable "hitrate_threshold_critical" { description = "hitrate limit (critical threshold)" - default = 90 + default = 10 } variable "hitrate_threshold_warning" { description = "hitrate limit (warning threshold)" - default = 70 + default = 30 } diff --git a/middleware/redis/monitors-redis.tf b/middleware/redis/monitors-redis.tf index 98eac73..0389fea 100644 --- a/middleware/redis/monitors-redis.tf +++ b/middleware/redis/monitors-redis.tf @@ -7,23 +7,23 @@ data "template_file" "filter" { } resource "datadog_monitor" "redis_evictedkeys" { - name = "[${var.environment}] Redis too many evictedkeys {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" - message = "${coalesce(var.evictedkeys_limit_message, var.message)}" + name = "[${var.environment}] Redis suddenly too many evicted keys {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + message = "${coalesce(var.evictedkeys_change_message, var.message)}" query = < ${var.evictedkeys_limit_threshold_critical} + ) > ${var.evictedkeys_change_threshold_critical} EOL type = "metric alert" thresholds { - warning = "${var.evictedkeys_limit_threshold_warning}" - critical = "${var.evictedkeys_limit_threshold_critical}" + warning = "${var.evictedkeys_change_threshold_warning}" + critical = "${var.evictedkeys_change_threshold_critical}" } - silenced = "${var.evictedkeys_limit_silenced}" + silenced = "${var.evictedkeys_change_silenced}" notify_no_data = false evaluation_delay = "${var.delay}" @@ -40,19 +40,19 @@ EOL resource "datadog_monitor" "redis_expirations" { name = "[${var.environment}] Redis too many expired keys {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" - message = "${coalesce(var.expirations_limit_message, var.message)}" + message = "${coalesce(var.expirations_rate_message, var.message)}" query = < ${var.expirations_limit_threshold_critical} + ) > ${var.expirations_rate_threshold_critical} EOL type = "metric alert" thresholds { - warning = "${var.expirations_limit_threshold_warning}" - critical = "${var.expirations_limit_threshold_critical}" + warning = "${var.expirations_rate_threshold_warning}" + critical = "${var.expirations_rate_threshold_critical}" } silenced = "${var.expirations_silenced}" @@ -76,8 +76,9 @@ resource "datadog_monitor" "redis_blocked_clients" { query = < ${var.blocked_clients_threshold_critical} + sum:redis.clients.blocked{${data.template_file.filter.rendered}} by {name,host} + / sum:redis.net.clients{${data.template_file.filter.rendered}} by {name,host} + ) * 100 > ${var.blocked_clients_threshold_critical} EOL type = "metric alert" @@ -108,8 +109,8 @@ resource "datadog_monitor" "redis_keyspace" { query = < ${var.keyspace_threshold_critical} + abs(diff(avg:redis.keys{${data.template_file.filter.rendered}} by {name,host})) + ) == ${var.keyspace_threshold_critical} EOL type = "metric alert" @@ -141,7 +142,8 @@ resource "datadog_monitor" "redis_mem_used" { query = < ${var.mem_used_threshold_critical} + / max:redis.mem.maxmemory{${data.template_file.filter.rendered}} by {name,host} + ) * 100 > ${var.mem_used_threshold_critical} EOL type = "metric alert" @@ -173,7 +175,7 @@ resource "datadog_monitor" "redis_mem_frag" { query = < ${var.mem_frag_threshold_critical} + ) * 100 > ${var.mem_frag_threshold_critical} EOL type = "metric alert" @@ -203,7 +205,7 @@ resource "datadog_monitor" "redis_rejected_con" { message = "${coalesce(var.rejected_con_message, var.message)}" query = < ${var.rejected_con_threshold_critical} EOL @@ -235,7 +237,7 @@ resource "datadog_monitor" "redis_latency" { message = "${coalesce(var.latency_message, var.message)}" query = < ${var.latency_threshold_critical} EOL @@ -268,10 +270,10 @@ resource "datadog_monitor" "redis_hitrate" { query = < Date: Tue, 24 Jul 2018 14:41:16 +0200 Subject: [PATCH 04/21] =?UTF-8?q?=F0=9F=98=AB=09=20MON-271:=20Ignore=20tab?= =?UTF-8?q?ernacle=20gloubi-boulga=20changes=20over=20`metric=20alert`=20v?= =?UTF-8?q?s=20`query=20alert`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- middleware/redis/monitors-redis.tf | 36 ++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/middleware/redis/monitors-redis.tf b/middleware/redis/monitors-redis.tf index 0389fea..383caa0 100644 --- a/middleware/redis/monitors-redis.tf +++ b/middleware/redis/monitors-redis.tf @@ -18,6 +18,10 @@ EOL type = "metric alert" + lifecycle { + ignore_changes = ["type"] + } + thresholds { warning = "${var.evictedkeys_change_threshold_warning}" critical = "${var.evictedkeys_change_threshold_critical}" @@ -50,6 +54,10 @@ EOL type = "metric alert" + lifecycle { + ignore_changes = ["type"] + } + thresholds { warning = "${var.expirations_rate_threshold_warning}" critical = "${var.expirations_rate_threshold_critical}" @@ -83,6 +91,10 @@ EOL type = "metric alert" + lifecycle { + ignore_changes = ["type"] + } + thresholds { warning = "${var.blocked_clients_threshold_warning}" critical = "${var.blocked_clients_threshold_critical}" @@ -115,6 +127,10 @@ EOL type = "metric alert" + lifecycle { + ignore_changes = ["type"] + } + thresholds { warning = "${var.keyspace_threshold_warning}" critical = "${var.keyspace_threshold_critical}" @@ -148,6 +164,10 @@ EOL type = "metric alert" + lifecycle { + ignore_changes = ["type"] + } + thresholds { warning = "${var.mem_used_threshold_warning}" critical = "${var.mem_used_threshold_critical}" @@ -180,6 +200,10 @@ EOL type = "metric alert" + lifecycle { + ignore_changes = ["type"] + } + thresholds { warning = "${var.mem_frag_threshold_warning}" critical = "${var.mem_frag_threshold_critical}" @@ -212,6 +236,10 @@ EOL type = "metric alert" + lifecycle { + ignore_changes = ["type"] + } + thresholds { warning = "${var.rejected_con_threshold_warning}" critical = "${var.rejected_con_threshold_critical}" @@ -244,6 +272,10 @@ EOL type = "metric alert" + lifecycle { + ignore_changes = ["type"] + } + thresholds { warning = "${var.latency_threshold_warning}" critical = "${var.latency_threshold_critical}" @@ -278,6 +310,10 @@ EOL type = "metric alert" + lifecycle { + ignore_changes = ["type"] + } + thresholds { warning = "${var.hitrate_threshold_warning}" critical = "${var.hitrate_threshold_critical}" From a9366c3709eba09432bd65fcc46606d91b8a1aea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Respaut?= Date: Tue, 24 Jul 2018 15:44:06 +0200 Subject: [PATCH 05/21] MON-271: Redis monitors fine tunning --- middleware/redis/README.md | 4 +-- middleware/redis/monitors-redis.tf | 46 +++++++++++++++--------------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/middleware/redis/README.md b/middleware/redis/README.md index d3300db..d560174 100644 --- a/middleware/redis/README.md +++ b/middleware/redis/README.md @@ -16,10 +16,10 @@ module "datadog-monitors-middleware-redis" { Creates DataDog monitors with the following checks: -- Redis suddenly too many evicted keys +- Redis too many evicted keys - Redis too many expired keys - Redis too many blocked clients -- Redis keyspace seems full +- Redis keyspace seems full (no changes since ${var.keyspace_timeframe}) - Redis too many ram memory used - Redis memory ram fragmented - Redis too many rejected connections diff --git a/middleware/redis/monitors-redis.tf b/middleware/redis/monitors-redis.tf index 383caa0..db4a5f2 100644 --- a/middleware/redis/monitors-redis.tf +++ b/middleware/redis/monitors-redis.tf @@ -2,17 +2,17 @@ data "template_file" "filter" { template = "$${filter}" vars { - filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_redis:enabled,db_env:%s", var.environment) : "${var.filter_tags_custom}"}" + filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_redis:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" } } resource "datadog_monitor" "redis_evictedkeys" { - name = "[${var.environment}] Redis suddenly too many evicted keys {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + name = "[${var.environment}] Redis too many evicted keys {{#is_alert}}{{{comparator}}} {{threshold}}% (+{{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% (+{{value}}%){{/is_warning}}" message = "${coalesce(var.evictedkeys_change_message, var.message)}" query = < ${var.evictedkeys_change_threshold_critical} EOL @@ -36,19 +36,19 @@ EOL timeout_h = 0 include_tags = true locked = false - require_full_window = false + require_full_window = true new_host_delay = "${var.delay}" tags = ["env:${var.environment}", "resource:redis"] } resource "datadog_monitor" "redis_expirations" { - name = "[${var.environment}] Redis too many expired keys {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + name = "[${var.environment}] Redis too many expired keys {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.expirations_rate_message, var.message)}" query = < ${var.expirations_rate_threshold_critical} EOL @@ -79,13 +79,13 @@ EOL } resource "datadog_monitor" "redis_blocked_clients" { - name = "[${var.environment}] Redis too many blocked clients {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + name = "[${var.environment}] Redis too many blocked clients {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.blocked_clients_message, var.message)}" query = < ${var.blocked_clients_threshold_critical} EOL @@ -116,12 +116,12 @@ EOL } resource "datadog_monitor" "redis_keyspace" { - name = "[${var.environment}] Redis keyspace seems full {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + name = "[${var.environment}] Redis keyspace seems full (no changes since ${var.keyspace_timeframe})" message = "${coalesce(var.keyspace_message, var.message)}" query = < ${var.mem_used_threshold_critical} EOL @@ -189,12 +189,12 @@ EOL } resource "datadog_monitor" "redis_mem_frag" { - name = "[${var.environment}] Redis memory ram fragmented {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + name = "[${var.environment}] Redis memory ram fragmented {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.mem_frag_message, var.message)}" query = < ${var.mem_frag_threshold_critical} EOL @@ -230,7 +230,7 @@ resource "datadog_monitor" "redis_rejected_con" { query = < ${var.rejected_con_threshold_critical} EOL @@ -261,12 +261,12 @@ EOL } resource "datadog_monitor" "redis_latency" { - name = "[${var.environment}] Redis latency is too high {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + name = "[${var.environment}] Redis latency is too high {{#is_alert}}{{{comparator}}} {{threshold}}ms ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}ms ({{value}}){{/is_warning}}" message = "${coalesce(var.latency_message, var.message)}" query = < ${var.latency_threshold_critical} EOL @@ -297,14 +297,14 @@ EOL } resource "datadog_monitor" "redis_hitrate" { - name = "[${var.environment}] Redis hitrate is too low {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + name = "[${var.environment}] Redis hitrate is too low {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.hitrate_message, var.message)}" query = < Date: Tue, 31 Jul 2018 17:24:37 +0200 Subject: [PATCH 06/21] MON-271 created_by:terraform added --- middleware/redis/monitors-redis.tf | 55 +++++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 9 deletions(-) diff --git a/middleware/redis/monitors-redis.tf b/middleware/redis/monitors-redis.tf index db4a5f2..d59eed1 100644 --- a/middleware/redis/monitors-redis.tf +++ b/middleware/redis/monitors-redis.tf @@ -39,7 +39,12 @@ EOL require_full_window = true new_host_delay = "${var.delay}" - tags = ["env:${var.environment}", "resource:redis"] + tags = [ + "created_by:terraform", + "created_by:terraform", + "env:${var.environment}", + "resource:redis", + ] } resource "datadog_monitor" "redis_expirations" { @@ -75,7 +80,11 @@ EOL require_full_window = false new_host_delay = "${var.delay}" - tags = ["env:${var.environment}", "resource:redis"] + tags = [ + "created_by:terraform", + "env:${var.environment}", + "resource:redis", + ] } resource "datadog_monitor" "redis_blocked_clients" { @@ -112,7 +121,11 @@ EOL require_full_window = false new_host_delay = "${var.delay}" - tags = ["env:${var.environment}", "resource:redis"] + tags = [ + "created_by:terraform", + "env:${var.environment}", + "resource:redis", + ] } resource "datadog_monitor" "redis_keyspace" { @@ -148,7 +161,11 @@ EOL require_full_window = false new_host_delay = "${var.delay}" - tags = ["env:${var.environment}", "resource:redis"] + tags = [ + "created_by:terraform", + "env:${var.environment}", + "resource:redis", + ] } resource "datadog_monitor" "redis_mem_used" { @@ -185,7 +202,11 @@ EOL require_full_window = false new_host_delay = "${var.delay}" - tags = ["env:${var.environment}", "resource:redis"] + tags = [ + "created_by:terraform", + "env:${var.environment}", + "resource:redis", + ] } resource "datadog_monitor" "redis_mem_frag" { @@ -221,7 +242,11 @@ EOL require_full_window = false new_host_delay = "${var.delay}" - tags = ["env:${var.environment}", "resource:redis"] + tags = [ + "created_by:terraform", + "env:${var.environment}", + "resource:redis", + ] } resource "datadog_monitor" "redis_rejected_con" { @@ -257,7 +282,11 @@ EOL require_full_window = false new_host_delay = "${var.delay}" - tags = ["env:${var.environment}", "resource:redis"] + tags = [ + "created_by:terraform", + "env:${var.environment}", + "resource:redis", + ] } resource "datadog_monitor" "redis_latency" { @@ -293,7 +322,11 @@ EOL require_full_window = false new_host_delay = "${var.delay}" - tags = ["env:${var.environment}", "resource:redis"] + tags = [ + "created_by:terraform", + "env:${var.environment}", + "resource:redis", + ] } resource "datadog_monitor" "redis_hitrate" { @@ -331,5 +364,9 @@ EOL require_full_window = false new_host_delay = "${var.delay}" - tags = ["env:${var.environment}", "resource:redis"] + tags = [ + "created_by:terraform", + "env:${var.environment}", + "resource:redis", + ] } From cfdfb647396b0eda27859b0fb74dd7ec720029f3 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Tue, 31 Jul 2018 17:27:28 +0200 Subject: [PATCH 07/21] MON-271 Readable name for all monitors --- middleware/redis/README.md | 18 +++++----- middleware/redis/monitors-redis.tf | 19 +++++------ middleware/redis/outputs.tf | 54 +++++++++++++++--------------- 3 files changed, 45 insertions(+), 46 deletions(-) diff --git a/middleware/redis/README.md b/middleware/redis/README.md index d560174..492fd66 100644 --- a/middleware/redis/README.md +++ b/middleware/redis/README.md @@ -95,15 +95,15 @@ Creates DataDog monitors with the following checks: | Name | Description | |------|-------------| -| redis_blocked_clients_id | id for monitor redis_blocked_clients | -| redis_evictedkeys_id | id for monitor redis_evictedkeys | -| redis_expirations_id | id for monitor redis_expirations | -| redis_hitrate_id | id for monitor redis_hitrate | -| redis_keyspace_id | id for monitor redis_keyspace | -| redis_latency_id | id for monitor redis_latency | -| redis_mem_frag_id | id for monitor redis_mem_frag | -| redis_mem_used_id | id for monitor redis_mem_used | -| redis_rejected_con_id | id for monitor redis_rejected_con | +| blocked_clients_id | id for monitor blocked_clients | +| evicted_keys_id | id for monitor evicted_keys | +| expirations_id | id for monitor expirations | +| hitrate_id | id for monitor hitrate | +| keyspace_full_id | id for monitor keyspace_full | +| latency_id | id for monitor latency | +| memory_frag_id | id for monitor memory_frag | +| memory_used_id | id for monitor memory_used | +| rejected_connections_id | id for monitor rejected_connections | ## Related documentation diff --git a/middleware/redis/monitors-redis.tf b/middleware/redis/monitors-redis.tf index d59eed1..7556eb7 100644 --- a/middleware/redis/monitors-redis.tf +++ b/middleware/redis/monitors-redis.tf @@ -6,7 +6,7 @@ data "template_file" "filter" { } } -resource "datadog_monitor" "redis_evictedkeys" { +resource "datadog_monitor" "evicted_keys" { name = "[${var.environment}] Redis too many evicted keys {{#is_alert}}{{{comparator}}} {{threshold}}% (+{{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% (+{{value}}%){{/is_warning}}" message = "${coalesce(var.evictedkeys_change_message, var.message)}" @@ -40,14 +40,13 @@ EOL new_host_delay = "${var.delay}" tags = [ - "created_by:terraform", "created_by:terraform", "env:${var.environment}", "resource:redis", ] } -resource "datadog_monitor" "redis_expirations" { +resource "datadog_monitor" "expirations" { name = "[${var.environment}] Redis too many expired keys {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.expirations_rate_message, var.message)}" @@ -87,7 +86,7 @@ EOL ] } -resource "datadog_monitor" "redis_blocked_clients" { +resource "datadog_monitor" "blocked_clients" { name = "[${var.environment}] Redis too many blocked clients {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.blocked_clients_message, var.message)}" @@ -128,7 +127,7 @@ EOL ] } -resource "datadog_monitor" "redis_keyspace" { +resource "datadog_monitor" "keyspace_full" { name = "[${var.environment}] Redis keyspace seems full (no changes since ${var.keyspace_timeframe})" message = "${coalesce(var.keyspace_message, var.message)}" @@ -168,7 +167,7 @@ EOL ] } -resource "datadog_monitor" "redis_mem_used" { +resource "datadog_monitor" "memory_used" { name = "[${var.environment}] Redis too many ram memory used {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.mem_used_message, var.message)}" @@ -209,7 +208,7 @@ EOL ] } -resource "datadog_monitor" "redis_mem_frag" { +resource "datadog_monitor" "memory_frag" { name = "[${var.environment}] Redis memory ram fragmented {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.mem_frag_message, var.message)}" @@ -249,7 +248,7 @@ EOL ] } -resource "datadog_monitor" "redis_rejected_con" { +resource "datadog_monitor" "rejected_connections" { name = "[${var.environment}] Redis too many rejected connections {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" message = "${coalesce(var.rejected_con_message, var.message)}" @@ -289,7 +288,7 @@ EOL ] } -resource "datadog_monitor" "redis_latency" { +resource "datadog_monitor" "latency" { name = "[${var.environment}] Redis latency is too high {{#is_alert}}{{{comparator}}} {{threshold}}ms ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}ms ({{value}}){{/is_warning}}" message = "${coalesce(var.latency_message, var.message)}" @@ -329,7 +328,7 @@ EOL ] } -resource "datadog_monitor" "redis_hitrate" { +resource "datadog_monitor" "hitrate" { name = "[${var.environment}] Redis hitrate is too low {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.hitrate_message, var.message)}" diff --git a/middleware/redis/outputs.tf b/middleware/redis/outputs.tf index 94506b3..f7a2c70 100644 --- a/middleware/redis/outputs.tf +++ b/middleware/redis/outputs.tf @@ -1,44 +1,44 @@ -output "redis_evictedkeys_id" { - description = "id for monitor redis_evictedkeys" - value = "${datadog_monitor.redis_evictedkeys.id}" +output "evicted_keys_id" { + description = "id for monitor evicted_keys" + value = "${datadog_monitor.evicted_keys.id}" } -output "redis_expirations_id" { - description = "id for monitor redis_expirations" - value = "${datadog_monitor.redis_expirations.id}" +output "expirations_id" { + description = "id for monitor expirations" + value = "${datadog_monitor.expirations.id}" } -output "redis_blocked_clients_id" { - description = "id for monitor redis_blocked_clients" - value = "${datadog_monitor.redis_blocked_clients.id}" +output "blocked_clients_id" { + description = "id for monitor blocked_clients" + value = "${datadog_monitor.blocked_clients.id}" } -output "redis_keyspace_id" { - description = "id for monitor redis_keyspace" - value = "${datadog_monitor.redis_keyspace.id}" +output "keyspace_full_id" { + description = "id for monitor keyspace_full" + value = "${datadog_monitor.keyspace_full.id}" } -output "redis_mem_used_id" { - description = "id for monitor redis_mem_used" - value = "${datadog_monitor.redis_mem_used.id}" +output "memory_used_id" { + description = "id for monitor memory_used" + value = "${datadog_monitor.memory_used.id}" } -output "redis_mem_frag_id" { - description = "id for monitor redis_mem_frag" - value = "${datadog_monitor.redis_mem_frag.id}" +output "memory_frag_id" { + description = "id for monitor memory_frag" + value = "${datadog_monitor.memory_frag.id}" } -output "redis_rejected_con_id" { - description = "id for monitor redis_rejected_con" - value = "${datadog_monitor.redis_rejected_con.id}" +output "rejected_connections_id" { + description = "id for monitor rejected_connections" + value = "${datadog_monitor.rejected_connections.id}" } -output "redis_latency_id" { - description = "id for monitor redis_latency" - value = "${datadog_monitor.redis_latency.id}" +output "latency_id" { + description = "id for monitor latency" + value = "${datadog_monitor.latency.id}" } -output "redis_hitrate_id" { - description = "id for monitor redis_hitrate" - value = "${datadog_monitor.redis_hitrate.id}" +output "hitrate_id" { + description = "id for monitor hitrate" + value = "${datadog_monitor.hitrate.id}" } From fe659be1a9e667a517c0c6b9b52e431b0e888d5c Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Fri, 3 Aug 2018 15:51:33 +0200 Subject: [PATCH 08/21] MON-271 Added service check --- middleware/redis/README.md | 9 +++++++ middleware/redis/inputs.tf | 42 ++++++++++++++++++++++++++++++ middleware/redis/monitors-redis.tf | 40 ++++++++++++++++++++++++++++ middleware/redis/outputs.tf | 5 ++++ 4 files changed, 96 insertions(+) diff --git a/middleware/redis/README.md b/middleware/redis/README.md index 492fd66..0dd6acd 100644 --- a/middleware/redis/README.md +++ b/middleware/redis/README.md @@ -25,6 +25,7 @@ Creates DataDog monitors with the following checks: - Redis too many rejected connections - Redis latency is too high - Redis hitrate is too low +- Redis does not respond ## Inputs @@ -83,6 +84,13 @@ Creates DataDog monitors with the following checks: | mem_used_time_aggregator | Monitor aggregator for Redis RAM memory used [available values: min, max or avg] | string | `min` | no | | mem_used_timeframe | Monitor timeframe for Redis RAM memory used [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when a Redis monitor is triggered | string | - | yes | +| not_responding_by | Group by for the service check | string | `"host","redis_host","redis_port"` | no | +| not_responding_last | Parameter 'last' for the service check | string | `6` | no | +| not_responding_message | Custom message for Redis does not respond monitor | string | `` | no | +| not_responding_silenced | Groups to mute for Redis does not respond monitor | map | `` | no | +| not_responding_threshold_critical | Not responding limit (critical threshold) | string | `5` | no | +| not_responding_threshold_ok | Not responding limit (ok threshold) | string | `1` | no | +| not_responding_threshold_warning | Not responding limit (warning threshold) | string | `1` | no | | redis_silenced | Groups to mute for Redis monitors | map | `` | no | | rejected_con_message | Custom message for Redis rejected connections errors monitor | string | `` | no | | rejected_con_silenced | Groups to mute for Redis rejected connections errors monitor | map | `` | no | @@ -103,6 +111,7 @@ Creates DataDog monitors with the following checks: | latency_id | id for monitor latency | | memory_frag_id | id for monitor memory_frag | | memory_used_id | id for monitor memory_used | +| not_responding_id | id for monitor not_responding | | rejected_connections_id | id for monitor rejected_connections | ## Related documentation diff --git a/middleware/redis/inputs.tf b/middleware/redis/inputs.tf index 527b5d4..1a4a1e3 100644 --- a/middleware/redis/inputs.tf +++ b/middleware/redis/inputs.tf @@ -339,3 +339,45 @@ variable "hitrate_threshold_warning" { description = "hitrate limit (warning threshold)" default = 30 } + +# +# Connection Down +# +variable "not_responding_silenced" { + description = "Groups to mute for Redis does not respond monitor" + type = "map" + default = {} +} + +variable "not_responding_message" { + description = "Custom message for Redis does not respond monitor" + type = "string" + default = "" +} + +variable "not_responding_by" { + description = "Group by for the service check" + type = "string" + default = "\"host\",\"redis_host\",\"redis_port\"" +} + +variable "not_responding_last" { + description = "Parameter 'last' for the service check" + type = "string" + default = 6 +} + +variable "not_responding_threshold_critical" { + description = "Not responding limit (critical threshold)" + default = 5 +} + +variable "not_responding_threshold_warning" { + description = "Not responding limit (warning threshold)" + default = 1 +} + +variable "not_responding_threshold_ok" { + description = "Not responding limit (ok threshold)" + default = 1 +} diff --git a/middleware/redis/monitors-redis.tf b/middleware/redis/monitors-redis.tf index 7556eb7..ce16da4 100644 --- a/middleware/redis/monitors-redis.tf +++ b/middleware/redis/monitors-redis.tf @@ -369,3 +369,43 @@ EOL "resource:redis", ] } + +# +# Service Check +# +resource "datadog_monitor" "not_responding" { + name = "[${var.environment}] Redis does not respond" + message = "${coalesce(var.not_responding_message, var.message)}" + + query = < Date: Tue, 7 Aug 2018 17:41:37 +0200 Subject: [PATCH 09/21] MON-271 Evaluation delay and no_data_timeframe removed from service check --- middleware/redis/monitors-redis.tf | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/middleware/redis/monitors-redis.tf b/middleware/redis/monitors-redis.tf index ce16da4..72115f3 100644 --- a/middleware/redis/monitors-redis.tf +++ b/middleware/redis/monitors-redis.tf @@ -395,13 +395,11 @@ EOL locked = false timeout_h = 0 include_tags = true - no_data_timeframe = 2 require_full_window = true notify_no_data = true renotify_interval = 0 - evaluation_delay = "${var.delay}" - new_host_delay = "${var.delay}" + new_host_delay = "${var.delay}" tags = [ "created_by:terraform", From 9a35aee91cc237586d6fb56af2fa593fea948654 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Mon, 13 Aug 2018 10:05:30 +0200 Subject: [PATCH 10/21] MON-271 threshold adjustment for fragmentation ratio --- middleware/redis/README.md | 4 ++-- middleware/redis/inputs.tf | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/middleware/redis/README.md b/middleware/redis/README.md index 0dd6acd..d5283c2 100644 --- a/middleware/redis/README.md +++ b/middleware/redis/README.md @@ -73,8 +73,8 @@ Creates DataDog monitors with the following checks: | latency_timeframe | Monitor timeframe for Redis latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | mem_frag_message | Custom message for Redis memory RAM fragmentation monitor | string | `` | no | | mem_frag_silenced | Groups to mute for Redis memory RAM fragmentation monitor | map | `` | no | -| mem_frag_threshold_critical | memory RAM fragmentation limit (critical threshold) | string | `130` | no | -| mem_frag_threshold_warning | memory RAM fragmentation limit (warning threshold) | string | `100` | no | +| mem_frag_threshold_critical | memory RAM fragmentation limit (critical threshold) | string | `150` | no | +| mem_frag_threshold_warning | memory RAM fragmentation limit (warning threshold) | string | `130` | no | | mem_frag_time_aggregator | Monitor aggregator for Redis memory RAM fragmentation [available values: min, max or avg] | string | `min` | no | | mem_frag_timeframe | Monitor timeframe for Redis memory RAM fragmentation [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | mem_used_message | Custom message for Redis RAM memory used monitor | string | `` | no | diff --git a/middleware/redis/inputs.tf b/middleware/redis/inputs.tf index 1a4a1e3..290653b 100644 --- a/middleware/redis/inputs.tf +++ b/middleware/redis/inputs.tf @@ -230,12 +230,12 @@ variable "mem_frag_timeframe" { variable "mem_frag_threshold_critical" { description = "memory RAM fragmentation limit (critical threshold)" - default = 130 + default = 150 } variable "mem_frag_threshold_warning" { description = "memory RAM fragmentation limit (warning threshold)" - default = 100 + default = 130 } variable "rejected_con_silenced" { From b63d9b12b4971777cc5307d673a402e659a7053f Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Mon, 13 Aug 2018 10:56:09 +0200 Subject: [PATCH 11/21] MON-271 change hitrate time aggregator to max to make it more tolerant --- middleware/redis/README.md | 2 +- middleware/redis/inputs.tf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/middleware/redis/README.md b/middleware/redis/README.md index d5283c2..9de3971 100644 --- a/middleware/redis/README.md +++ b/middleware/redis/README.md @@ -57,7 +57,7 @@ Creates DataDog monitors with the following checks: | hitrate_silenced | Groups to mute for Redis hitrate monitor | map | `` | no | | hitrate_threshold_critical | hitrate limit (critical threshold) | string | `10` | no | | hitrate_threshold_warning | hitrate limit (warning threshold) | string | `30` | no | -| hitrate_time_aggregator | Monitor aggregator for Redis hitrate [available values: min, max or avg] | string | `min` | no | +| hitrate_time_aggregator | Monitor aggregator for Redis hitrate [available values: min, max or avg] | string | `max` | no | | hitrate_timeframe | Monitor timeframe for Redis hitrate [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | keyspace_message | Custom message for Redis keyspace monitor | string | `` | no | | keyspace_silenced | Groups to mute for Redis keyspace monitor | map | `` | no | diff --git a/middleware/redis/inputs.tf b/middleware/redis/inputs.tf index 290653b..cc52f8b 100644 --- a/middleware/redis/inputs.tf +++ b/middleware/redis/inputs.tf @@ -321,7 +321,7 @@ variable "hitrate_message" { variable "hitrate_time_aggregator" { description = "Monitor aggregator for Redis hitrate [available values: min, max or avg]" type = "string" - default = "min" + default = "max" } variable "hitrate_timeframe" { From 6af25e65136b6ea88d2f0f355045db13c1887d97 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Mon, 13 Aug 2018 15:32:13 +0200 Subject: [PATCH 12/21] MON-271 Split delay in evaluation and new_host delays --- middleware/redis/README.md | 3 ++- middleware/redis/inputs.tf | 9 +++++-- middleware/redis/monitors-redis.tf | 38 +++++++++++++++--------------- 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/middleware/redis/README.md b/middleware/redis/README.md index 9de3971..3e5f7c8 100644 --- a/middleware/redis/README.md +++ b/middleware/redis/README.md @@ -37,8 +37,8 @@ Creates DataDog monitors with the following checks: | blocked_clients_threshold_warning | Blocked clients rate (warning threshold) | string | `10` | no | | blocked_clients_time_aggregator | Monitor aggregator for Redis Blocked clients [available values: min, max or avg] | string | `min` | no | | blocked_clients_timeframe | Monitor timeframe for Redis Blocked clients [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| delay | Delay in seconds for the metric evaluation | string | `900` | no | | environment | Architecture environment | string | - | yes | +| evaluation_delay | Delay in seconds for the metric evaluation | string | `15` | no | | evictedkeys_change_message | Custom message for Redis evicted keys monitor | string | `` | no | | evictedkeys_change_silenced | Groups to mute for Redis evicted keys monitor | map | `` | no | | evictedkeys_change_threshold_critical | Evicted keys change (critical threshold) | string | `100` | no | @@ -84,6 +84,7 @@ Creates DataDog monitors with the following checks: | mem_used_time_aggregator | Monitor aggregator for Redis RAM memory used [available values: min, max or avg] | string | `min` | no | | mem_used_timeframe | Monitor timeframe for Redis RAM memory used [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when a Redis monitor is triggered | string | - | yes | +| new_host_delay | Delay in seconds for the metric evaluation | string | `300` | no | | not_responding_by | Group by for the service check | string | `"host","redis_host","redis_port"` | no | | not_responding_last | Parameter 'last' for the service check | string | `6` | no | | not_responding_message | Custom message for Redis does not respond monitor | string | `` | no | diff --git a/middleware/redis/inputs.tf b/middleware/redis/inputs.tf index cc52f8b..2c781bc 100644 --- a/middleware/redis/inputs.tf +++ b/middleware/redis/inputs.tf @@ -9,9 +9,14 @@ variable "message" { description = "Message sent when a Redis monitor is triggered" } -variable "delay" { +variable "evaluation_delay" { description = "Delay in seconds for the metric evaluation" - default = 900 + default = 15 +} + +variable "new_host_delay" { + description = "Delay in seconds for the metric evaluation" + default = 300 } variable "filter_tags_use_defaults" { diff --git a/middleware/redis/monitors-redis.tf b/middleware/redis/monitors-redis.tf index 72115f3..c1e3e57 100644 --- a/middleware/redis/monitors-redis.tf +++ b/middleware/redis/monitors-redis.tf @@ -30,14 +30,14 @@ EOL silenced = "${var.evictedkeys_change_silenced}" notify_no_data = false - evaluation_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false require_full_window = true - new_host_delay = "${var.delay}" + new_host_delay = "${var.new_host_delay}" tags = [ "created_by:terraform", @@ -70,14 +70,14 @@ EOL silenced = "${var.expirations_silenced}" notify_no_data = false - evaluation_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false require_full_window = false - new_host_delay = "${var.delay}" + new_host_delay = "${var.new_host_delay}" tags = [ "created_by:terraform", @@ -111,14 +111,14 @@ EOL silenced = "${var.blocked_clients_silenced}" notify_no_data = false - evaluation_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false require_full_window = false - new_host_delay = "${var.delay}" + new_host_delay = "${var.new_host_delay}" tags = [ "created_by:terraform", @@ -151,14 +151,14 @@ EOL silenced = "${var.keyspace_silenced}" notify_no_data = false - evaluation_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false require_full_window = false - new_host_delay = "${var.delay}" + new_host_delay = "${var.new_host_delay}" tags = [ "created_by:terraform", @@ -192,14 +192,14 @@ EOL silenced = "${var.mem_used_silenced}" notify_no_data = false - evaluation_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false require_full_window = false - new_host_delay = "${var.delay}" + new_host_delay = "${var.new_host_delay}" tags = [ "created_by:terraform", @@ -232,14 +232,14 @@ EOL silenced = "${var.mem_frag_silenced}" notify_no_data = false - evaluation_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false require_full_window = false - new_host_delay = "${var.delay}" + new_host_delay = "${var.new_host_delay}" tags = [ "created_by:terraform", @@ -272,14 +272,14 @@ EOL silenced = "${var.rejected_con_silenced}" notify_no_data = false - evaluation_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false require_full_window = false - new_host_delay = "${var.delay}" + new_host_delay = "${var.new_host_delay}" tags = [ "created_by:terraform", @@ -312,14 +312,14 @@ EOL silenced = "${var.latency_silenced}" notify_no_data = false - evaluation_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false require_full_window = false - new_host_delay = "${var.delay}" + new_host_delay = "${var.new_host_delay}" tags = [ "created_by:terraform", @@ -354,14 +354,14 @@ EOL silenced = "${var.hitrate_silenced}" notify_no_data = false - evaluation_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false require_full_window = false - new_host_delay = "${var.delay}" + new_host_delay = "${var.new_host_delay}" tags = [ "created_by:terraform", @@ -399,7 +399,7 @@ EOL notify_no_data = true renotify_interval = 0 - new_host_delay = "${var.delay}" + new_host_delay = "${var.new_host_delay}" tags = [ "created_by:terraform", From 642eafd62cfca8583a37c69b3357ffef53a869d2 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Mon, 20 Aug 2018 16:45:45 +0200 Subject: [PATCH 13/21] MON-271 remove parentheses to keep the monitor editable in webui --- middleware/redis/monitors-redis.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/middleware/redis/monitors-redis.tf b/middleware/redis/monitors-redis.tf index c1e3e57..3b3acdd 100644 --- a/middleware/redis/monitors-redis.tf +++ b/middleware/redis/monitors-redis.tf @@ -213,9 +213,9 @@ resource "datadog_monitor" "memory_frag" { message = "${coalesce(var.mem_frag_message, var.message)}" query = < ${var.mem_frag_threshold_critical} + * 100 > ${var.mem_frag_threshold_critical} EOL type = "metric alert" From 9fbba39e9f8069d01a264e03349a93758cfe0a46 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Mon, 20 Aug 2018 16:46:20 +0200 Subject: [PATCH 14/21] MON-271 move redis in database directory --- {middleware => databases}/redis/README.md | 0 {middleware => databases}/redis/inputs.tf | 0 {middleware => databases}/redis/monitors-redis.tf | 0 {middleware => databases}/redis/outputs.tf | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename {middleware => databases}/redis/README.md (100%) rename {middleware => databases}/redis/inputs.tf (100%) rename {middleware => databases}/redis/monitors-redis.tf (100%) rename {middleware => databases}/redis/outputs.tf (100%) diff --git a/middleware/redis/README.md b/databases/redis/README.md similarity index 100% rename from middleware/redis/README.md rename to databases/redis/README.md diff --git a/middleware/redis/inputs.tf b/databases/redis/inputs.tf similarity index 100% rename from middleware/redis/inputs.tf rename to databases/redis/inputs.tf diff --git a/middleware/redis/monitors-redis.tf b/databases/redis/monitors-redis.tf similarity index 100% rename from middleware/redis/monitors-redis.tf rename to databases/redis/monitors-redis.tf diff --git a/middleware/redis/outputs.tf b/databases/redis/outputs.tf similarity index 100% rename from middleware/redis/outputs.tf rename to databases/redis/outputs.tf From a707ac937db700382487271feebdb93489598719 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Mon, 20 Aug 2018 17:11:22 +0200 Subject: [PATCH 15/21] MON-271 auto update --- README.md | 2 +- {databases => database}/redis/README.md | 6 +++--- {databases => database}/redis/inputs.tf | 0 {databases => database}/redis/monitors-redis.tf | 0 {databases => database}/redis/outputs.tf | 0 5 files changed, 4 insertions(+), 4 deletions(-) rename {databases => database}/redis/README.md (98%) rename {databases => database}/redis/inputs.tf (100%) rename {databases => database}/redis/monitors-redis.tf (100%) rename {databases => database}/redis/outputs.tf (100%) diff --git a/README.md b/README.md index 38a965e..ca132ec 100644 --- a/README.md +++ b/README.md @@ -96,10 +96,10 @@ The `//` is very important, it's a terraform specific syntax used to separate gi - [filter-tags](https://bitbucket.org/morea/terraform.feature.datadog/src/master/common/filter-tags/) - [database](https://bitbucket.org/morea/terraform.feature.datadog/src/master/database/) - [mongodb](https://bitbucket.org/morea/terraform.feature.datadog/src/master/database/mongodb/) + - [redis](https://bitbucket.org/morea/terraform.feature.datadog/src/master/database/redis/) - [middleware](https://bitbucket.org/morea/terraform.feature.datadog/src/master/middleware/) - [apache](https://bitbucket.org/morea/terraform.feature.datadog/src/master/middleware/apache/) - [nginx](https://bitbucket.org/morea/terraform.feature.datadog/src/master/middleware/nginx/) - [php-fpm](https://bitbucket.org/morea/terraform.feature.datadog/src/master/middleware/php-fpm/) - - [redis](https://bitbucket.org/morea/terraform.feature.datadog/src/master/middleware/redis/) - [system](https://bitbucket.org/morea/terraform.feature.datadog/src/master/system/) - [generic](https://bitbucket.org/morea/terraform.feature.datadog/src/master/system/generic/) diff --git a/databases/redis/README.md b/database/redis/README.md similarity index 98% rename from databases/redis/README.md rename to database/redis/README.md index 3e5f7c8..1de21cf 100644 --- a/databases/redis/README.md +++ b/database/redis/README.md @@ -1,10 +1,10 @@ -# MIDDLEWARE REDIS DataDog monitors +# DATABASES REDIS DataDog monitors ## How to use this module ``` -module "datadog-monitors-middleware-redis" { - source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//middleware/redis?ref={revision}" +module "datadog-monitors-databases-redis" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//databases/redis?ref={revision}" environment = "${var.environment}" message = "${module.datadog-message-alerting.alerting-message}" diff --git a/databases/redis/inputs.tf b/database/redis/inputs.tf similarity index 100% rename from databases/redis/inputs.tf rename to database/redis/inputs.tf diff --git a/databases/redis/monitors-redis.tf b/database/redis/monitors-redis.tf similarity index 100% rename from databases/redis/monitors-redis.tf rename to database/redis/monitors-redis.tf diff --git a/databases/redis/outputs.tf b/database/redis/outputs.tf similarity index 100% rename from databases/redis/outputs.tf rename to database/redis/outputs.tf From 7f82a24e5528cb1f19a9d0932bc845a93eb41655 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Tue, 21 Aug 2018 16:43:47 +0200 Subject: [PATCH 16/21] MON-271 update name to generic as described in documentation --- database/redis/monitors-redis.tf | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/database/redis/monitors-redis.tf b/database/redis/monitors-redis.tf index 3b3acdd..d0c8cac 100644 --- a/database/redis/monitors-redis.tf +++ b/database/redis/monitors-redis.tf @@ -7,7 +7,7 @@ data "template_file" "filter" { } resource "datadog_monitor" "evicted_keys" { - name = "[${var.environment}] Redis too many evicted keys {{#is_alert}}{{{comparator}}} {{threshold}}% (+{{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% (+{{value}}%){{/is_warning}}" + name = "[${var.environment}] Redis evicted keys {{#is_alert}}{{{comparator}}} {{threshold}}% (+{{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% (+{{value}}%){{/is_warning}}" message = "${coalesce(var.evictedkeys_change_message, var.message)}" query = < Date: Tue, 21 Aug 2018 17:28:54 +0200 Subject: [PATCH 17/21] MON-271 use filter tags module --- database/redis/README.md | 22 +++++++++---------- database/redis/modules.tf | 8 +++++++ database/redis/monitors-redis.tf | 36 +++++++++++++------------------- 3 files changed, 33 insertions(+), 33 deletions(-) create mode 100644 database/redis/modules.tf diff --git a/database/redis/README.md b/database/redis/README.md index 1de21cf..1cad5cc 100644 --- a/database/redis/README.md +++ b/database/redis/README.md @@ -1,10 +1,10 @@ -# DATABASES REDIS DataDog monitors +# DATABASE REDIS DataDog monitors ## How to use this module ``` -module "datadog-monitors-databases-redis" { - source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//databases/redis?ref={revision}" +module "datadog-monitors-database-redis" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//database/redis?ref={revision}" environment = "${var.environment}" message = "${module.datadog-message-alerting.alerting-message}" @@ -16,15 +16,15 @@ module "datadog-monitors-databases-redis" { Creates DataDog monitors with the following checks: -- Redis too many evicted keys -- Redis too many expired keys -- Redis too many blocked clients +- Redis evicted keys +- Redis expired keys +- Redis blocked clients - Redis keyspace seems full (no changes since ${var.keyspace_timeframe}) -- Redis too many ram memory used -- Redis memory ram fragmented -- Redis too many rejected connections -- Redis latency is too high -- Redis hitrate is too low +- Redis memory used +- Redis memory fragmented +- Redis rejected connections +- Redis latency +- Redis hitrate - Redis does not respond ## Inputs diff --git a/database/redis/modules.tf b/database/redis/modules.tf new file mode 100644 index 0000000..48011c6 --- /dev/null +++ b/database/redis/modules.tf @@ -0,0 +1,8 @@ +module "filter-tags" { + source = "../../common/filter-tags" + + environment = "${var.environment}" + resource = "redis" + filter_tags_use_defaults = "${var.filter_tags_use_defaults}" + filter_tags_custom = "${var.filter_tags_custom}" +} diff --git a/database/redis/monitors-redis.tf b/database/redis/monitors-redis.tf index d0c8cac..7fd438c 100644 --- a/database/redis/monitors-redis.tf +++ b/database/redis/monitors-redis.tf @@ -1,18 +1,10 @@ -data "template_file" "filter" { - template = "$${filter}" - - vars { - filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_redis:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" - } -} - resource "datadog_monitor" "evicted_keys" { name = "[${var.environment}] Redis evicted keys {{#is_alert}}{{{comparator}}} {{threshold}}% (+{{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% (+{{value}}%){{/is_warning}}" message = "${coalesce(var.evictedkeys_change_message, var.message)}" query = < ${var.evictedkeys_change_threshold_critical} EOL @@ -52,7 +44,7 @@ resource "datadog_monitor" "expirations" { query = < ${var.expirations_rate_threshold_critical} EOL @@ -92,8 +84,8 @@ resource "datadog_monitor" "blocked_clients" { query = < ${var.blocked_clients_threshold_critical} EOL @@ -133,7 +125,7 @@ resource "datadog_monitor" "keyspace_full" { query = < ${var.mem_used_threshold_critical} EOL @@ -214,7 +206,7 @@ resource "datadog_monitor" "memory_frag" { query = < ${var.mem_frag_threshold_critical} EOL @@ -254,7 +246,7 @@ resource "datadog_monitor" "rejected_connections" { query = < ${var.rejected_con_threshold_critical} EOL @@ -294,7 +286,7 @@ resource "datadog_monitor" "latency" { query = < ${var.latency_threshold_critical} EOL @@ -334,9 +326,9 @@ resource "datadog_monitor" "hitrate" { query = < Date: Tue, 21 Aug 2018 18:09:24 +0200 Subject: [PATCH 18/21] MON-271 add extra tags --- database/mongodb/monitors-mongo.tf | 8 ++-- database/redis/README.md | 10 +++++ database/redis/inputs.tf | 60 ++++++++++++++++++++++++++++++ database/redis/monitors-redis.tf | 60 +++++------------------------- 4 files changed, 84 insertions(+), 54 deletions(-) diff --git a/database/mongodb/monitors-mongo.tf b/database/mongodb/monitors-mongo.tf index e51394a..8f52da0 100644 --- a/database/mongodb/monitors-mongo.tf +++ b/database/mongodb/monitors-mongo.tf @@ -20,7 +20,7 @@ resource "datadog_monitor" "mongodb_primary" { silenced = "${var.mongodb_primary_silenced}" - tags = ["env:${var.environment}", "type:database", "provider:mongodb", "resource:mongodb", "team:claranet", "created-by:terraform", "${var.mongodb_primary_extra_tags}"] + tags = ["env:${var.environment}", "type:database", "provider:mongo", "resource:mongodb", "team:claranet", "created-by:terraform", "${var.mongodb_primary_extra_tags}"] } resource "datadog_monitor" "mongodb_secondary" { @@ -52,7 +52,7 @@ resource "datadog_monitor" "mongodb_secondary" { silenced = "${var.mongodb_secondary_silenced}" - tags = ["env:${var.environment}", "type:database", "provider:mongodb", "resource:mongodb", "team:claranet", "created-by:terraform", "${var.mongodb_secondary_extra_tags}"] + tags = ["env:${var.environment}", "type:database", "provider:mongo", "resource:mongodb", "team:claranet", "created-by:terraform", "${var.mongodb_secondary_extra_tags}"] } resource "datadog_monitor" "mongodb_server_count" { @@ -83,7 +83,7 @@ resource "datadog_monitor" "mongodb_server_count" { silenced = "${var.mongodb_secondary_silenced}" - tags = ["env:${var.environment}", "type:database", "provider:mongodb", "resource:mongodb", "team:claranet", "created-by:terraform", "${var.mongodb_secondary_extra_tags}"] + tags = ["env:${var.environment}", "type:database", "provider:mongo", "resource:mongodb", "team:claranet", "created-by:terraform", "${var.mongodb_secondary_extra_tags}"] } resource "datadog_monitor" "mongodb_replication" { @@ -113,5 +113,5 @@ resource "datadog_monitor" "mongodb_replication" { silenced = "${var.mongodb_replication_silenced}" - tags = ["env:${var.environment}", "type:database", "provider:mongodb", "resource:mongodb", "team:claranet", "created-by:terraform", "${var.mongodb_replication_extra_tags}"] + tags = ["env:${var.environment}", "type:database", "provider:mongo", "resource:mongodb", "team:claranet", "created-by:terraform", "${var.mongodb_replication_extra_tags}"] } diff --git a/database/redis/README.md b/database/redis/README.md index 1cad5cc..dd3fb7e 100644 --- a/database/redis/README.md +++ b/database/redis/README.md @@ -31,6 +31,7 @@ Creates DataDog monitors with the following checks: | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| +| blocked_clients_extra_tags | Extra tags for Redis Blocked clients monitor | string | `` | no | | blocked_clients_message | Custom message for Redis Blocked clients monitor | string | `` | no | | blocked_clients_silenced | Groups to mute for Redis Blocked clients monitor | map | `` | no | | blocked_clients_threshold_critical | Blocked clients rate (critical threshold) | string | `30` | no | @@ -39,12 +40,14 @@ Creates DataDog monitors with the following checks: | blocked_clients_timeframe | Monitor timeframe for Redis Blocked clients [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | environment | Architecture environment | string | - | yes | | evaluation_delay | Delay in seconds for the metric evaluation | string | `15` | no | +| evictedkeys_change_extra_tags | Extra tags for Redis evicted keys monitor | string | `` | no | | evictedkeys_change_message | Custom message for Redis evicted keys monitor | string | `` | no | | evictedkeys_change_silenced | Groups to mute for Redis evicted keys monitor | map | `` | no | | evictedkeys_change_threshold_critical | Evicted keys change (critical threshold) | string | `100` | no | | evictedkeys_change_threshold_warning | Evicted keys change (warning threshold) | string | `20` | no | | evictedkeys_change_time_aggregator | Monitor aggregator for Redis evicted keys [available values: min, max or avg] | string | `avg` | no | | evictedkeys_change_timeframe | Monitor timeframe for Redis evicted keys [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| expirations_rate_extra_tags | Extra tags for Redis keys expirations monitor | string | `` | no | | expirations_rate_message | Custom message for Redis keys expirations monitor | string | `` | no | | expirations_rate_threshold_critical | Expirations percent (critical threshold) | string | `80` | no | | expirations_rate_threshold_warning | Expirations percent (warning threshold) | string | `60` | no | @@ -53,30 +56,35 @@ Creates DataDog monitors with the following checks: | expirations_silenced | Groups to mute for Redis keys expirations monitor | map | `` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| hitrate_extra_tags | Extra tags for Redis hitrate monitor | string | `` | no | | hitrate_message | Custom message for Redis hitrate monitor | string | `` | no | | hitrate_silenced | Groups to mute for Redis hitrate monitor | map | `` | no | | hitrate_threshold_critical | hitrate limit (critical threshold) | string | `10` | no | | hitrate_threshold_warning | hitrate limit (warning threshold) | string | `30` | no | | hitrate_time_aggregator | Monitor aggregator for Redis hitrate [available values: min, max or avg] | string | `max` | no | | hitrate_timeframe | Monitor timeframe for Redis hitrate [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| keyspace_extra_tags | Extra tags for Redis keyspace monitor | string | `` | no | | keyspace_message | Custom message for Redis keyspace monitor | string | `` | no | | keyspace_silenced | Groups to mute for Redis keyspace monitor | map | `` | no | | keyspace_threshold_critical | Keyspace no changement (critical threshold) | string | `0` | no | | keyspace_threshold_warning | Keyspace no changement (warning threshold) | string | `1` | no | | keyspace_time_aggregator | Monitor aggregator for Redis keyspace [available values: min, max or avg] | string | `min` | no | | keyspace_timeframe | Monitor timeframe for Redis keyspace [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| latency_extra_tags | Extra tags for Redis latency monitor | string | `` | no | | latency_message | Custom message for Redis latency monitor | string | `` | no | | latency_silenced | Groups to mute for Redis latency monitor | map | `` | no | | latency_threshold_critical | latency limit (critical threshold) | string | `100` | no | | latency_threshold_warning | latency limit (warning threshold) | string | `50` | no | | latency_time_aggregator | Monitor aggregator for Redis latency [available values: min, max or avg] | string | `min` | no | | latency_timeframe | Monitor timeframe for Redis latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| mem_frag_extra_tags | Extra tags for Redis memory RAM fragmentation monitor | string | `` | no | | mem_frag_message | Custom message for Redis memory RAM fragmentation monitor | string | `` | no | | mem_frag_silenced | Groups to mute for Redis memory RAM fragmentation monitor | map | `` | no | | mem_frag_threshold_critical | memory RAM fragmentation limit (critical threshold) | string | `150` | no | | mem_frag_threshold_warning | memory RAM fragmentation limit (warning threshold) | string | `130` | no | | mem_frag_time_aggregator | Monitor aggregator for Redis memory RAM fragmentation [available values: min, max or avg] | string | `min` | no | | mem_frag_timeframe | Monitor timeframe for Redis memory RAM fragmentation [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| mem_used_extra_tags | Extra tags for Redis RAM memory used monitor | string | `` | no | | mem_used_message | Custom message for Redis RAM memory used monitor | string | `` | no | | mem_used_silenced | Groups to mute for Redis RAM memory used monitor | map | `` | no | | mem_used_threshold_critical | RAM memory used limit (critical threshold) | string | `95` | no | @@ -86,6 +94,7 @@ Creates DataDog monitors with the following checks: | message | Message sent when a Redis monitor is triggered | string | - | yes | | new_host_delay | Delay in seconds for the metric evaluation | string | `300` | no | | not_responding_by | Group by for the service check | string | `"host","redis_host","redis_port"` | no | +| not_responding_extra_tags | Extra tags for Redis does not respond monitor | string | `` | no | | not_responding_last | Parameter 'last' for the service check | string | `6` | no | | not_responding_message | Custom message for Redis does not respond monitor | string | `` | no | | not_responding_silenced | Groups to mute for Redis does not respond monitor | map | `` | no | @@ -93,6 +102,7 @@ Creates DataDog monitors with the following checks: | not_responding_threshold_ok | Not responding limit (ok threshold) | string | `1` | no | | not_responding_threshold_warning | Not responding limit (warning threshold) | string | `1` | no | | redis_silenced | Groups to mute for Redis monitors | map | `` | no | +| rejected_con_extra_tags | Extra tags for Redis rejected connections errors monitor | string | `` | no | | rejected_con_message | Custom message for Redis rejected connections errors monitor | string | `` | no | | rejected_con_silenced | Groups to mute for Redis rejected connections errors monitor | map | `` | no | | rejected_con_threshold_critical | rejected connections errors limit (critical threshold) | string | `50` | no | diff --git a/database/redis/inputs.tf b/database/redis/inputs.tf index 2c781bc..0b6108c 100644 --- a/database/redis/inputs.tf +++ b/database/redis/inputs.tf @@ -48,6 +48,12 @@ variable "evictedkeys_change_message" { default = "" } +variable "evictedkeys_change_extra_tags" { + description = "Extra tags for Redis evicted keys monitor" + type = "string" + default = "" +} + variable "evictedkeys_change_time_aggregator" { description = "Monitor aggregator for Redis evicted keys [available values: min, max or avg]" type = "string" @@ -82,6 +88,12 @@ variable "expirations_rate_message" { default = "" } +variable "expirations_rate_extra_tags" { + description = "Extra tags for Redis keys expirations monitor" + type = "string" + default = "" +} + variable "expirations_rate_time_aggregator" { description = "Monitor aggregator for Redis keys expirations [available values: min, max or avg]" type = "string" @@ -116,6 +128,12 @@ variable "blocked_clients_message" { default = "" } +variable "blocked_clients_extra_tags" { + description = "Extra tags for Redis Blocked clients monitor" + type = "string" + default = "" +} + variable "blocked_clients_time_aggregator" { description = "Monitor aggregator for Redis Blocked clients [available values: min, max or avg]" type = "string" @@ -153,6 +171,12 @@ variable "keyspace_message" { default = "" } +variable "keyspace_extra_tags" { + description = "Extra tags for Redis keyspace monitor" + type = "string" + default = "" +} + variable "keyspace_time_aggregator" { description = "Monitor aggregator for Redis keyspace [available values: min, max or avg]" type = "string" @@ -187,6 +211,12 @@ variable "mem_used_message" { default = "" } +variable "mem_used_extra_tags" { + description = "Extra tags for Redis RAM memory used monitor" + type = "string" + default = "" +} + variable "mem_used_time_aggregator" { description = "Monitor aggregator for Redis RAM memory used [available values: min, max or avg]" type = "string" @@ -221,6 +251,12 @@ variable "mem_frag_message" { default = "" } +variable "mem_frag_extra_tags" { + description = "Extra tags for Redis memory RAM fragmentation monitor" + type = "string" + default = "" +} + variable "mem_frag_time_aggregator" { description = "Monitor aggregator for Redis memory RAM fragmentation [available values: min, max or avg]" type = "string" @@ -255,6 +291,12 @@ variable "rejected_con_message" { default = "" } +variable "rejected_con_extra_tags" { + description = "Extra tags for Redis rejected connections errors monitor" + type = "string" + default = "" +} + variable "rejected_con_time_aggregator" { description = "Monitor aggregator for Redis rejected connections errors [available values: min, max or avg]" type = "string" @@ -289,6 +331,12 @@ variable "latency_message" { default = "" } +variable "latency_extra_tags" { + description = "Extra tags for Redis latency monitor" + type = "string" + default = "" +} + variable "latency_time_aggregator" { description = "Monitor aggregator for Redis latency [available values: min, max or avg]" type = "string" @@ -323,6 +371,12 @@ variable "hitrate_message" { default = "" } +variable "hitrate_extra_tags" { + description = "Extra tags for Redis hitrate monitor" + type = "string" + default = "" +} + variable "hitrate_time_aggregator" { description = "Monitor aggregator for Redis hitrate [available values: min, max or avg]" type = "string" @@ -360,6 +414,12 @@ variable "not_responding_message" { default = "" } +variable "not_responding_extra_tags" { + description = "Extra tags for Redis does not respond monitor" + type = "string" + default = "" +} + variable "not_responding_by" { description = "Group by for the service check" type = "string" diff --git a/database/redis/monitors-redis.tf b/database/redis/monitors-redis.tf index 7fd438c..d351ed8 100644 --- a/database/redis/monitors-redis.tf +++ b/database/redis/monitors-redis.tf @@ -31,11 +31,7 @@ EOL require_full_window = true new_host_delay = "${var.new_host_delay}" - tags = [ - "created_by:terraform", - "env:${var.environment}", - "resource:redis", - ] + tags = ["env:${var.environment}", "type:database", "provider:redisdb", "resource:redis", "team:claranet", "created-by:terraform", "${var.evictedkeys_change_extra_tags}"] } resource "datadog_monitor" "expirations" { @@ -71,11 +67,7 @@ EOL require_full_window = false new_host_delay = "${var.new_host_delay}" - tags = [ - "created_by:terraform", - "env:${var.environment}", - "resource:redis", - ] + tags = ["env:${var.environment}", "type:database", "provider:redisdb", "resource:redis", "team:claranet", "created-by:terraform", "${var.expirations_extra_tags}"] } resource "datadog_monitor" "blocked_clients" { @@ -112,11 +104,7 @@ EOL require_full_window = false new_host_delay = "${var.new_host_delay}" - tags = [ - "created_by:terraform", - "env:${var.environment}", - "resource:redis", - ] + tags = ["env:${var.environment}", "type:database", "provider:redisdb", "resource:redis", "team:claranet", "created-by:terraform", "${var.blocked_clients_extra_tags}"] } resource "datadog_monitor" "keyspace_full" { @@ -152,11 +140,7 @@ EOL require_full_window = false new_host_delay = "${var.new_host_delay}" - tags = [ - "created_by:terraform", - "env:${var.environment}", - "resource:redis", - ] + tags = ["env:${var.environment}", "type:database", "provider:redisdb", "resource:redis", "team:claranet", "created-by:terraform", "${var.keyspace_extra_tags}"] } resource "datadog_monitor" "memory_used" { @@ -193,11 +177,7 @@ EOL require_full_window = false new_host_delay = "${var.new_host_delay}" - tags = [ - "created_by:terraform", - "env:${var.environment}", - "resource:redis", - ] + tags = ["env:${var.environment}", "type:database", "provider:redisdb", "resource:redis", "team:claranet", "created-by:terraform", "${var.mem_used_extra_tags}"] } resource "datadog_monitor" "memory_frag" { @@ -233,11 +213,7 @@ EOL require_full_window = false new_host_delay = "${var.new_host_delay}" - tags = [ - "created_by:terraform", - "env:${var.environment}", - "resource:redis", - ] + tags = ["env:${var.environment}", "type:database", "provider:redisdb", "resource:redis", "team:claranet", "created-by:terraform", "${var.mem_frag_extra_tags}"] } resource "datadog_monitor" "rejected_connections" { @@ -273,11 +249,7 @@ EOL require_full_window = false new_host_delay = "${var.new_host_delay}" - tags = [ - "created_by:terraform", - "env:${var.environment}", - "resource:redis", - ] + tags = ["env:${var.environment}", "type:database", "provider:redisdb", "resource:redis", "team:claranet", "created-by:terraform", "${var.rejected_con_extra_tags}"] } resource "datadog_monitor" "latency" { @@ -313,11 +285,7 @@ EOL require_full_window = false new_host_delay = "${var.new_host_delay}" - tags = [ - "created_by:terraform", - "env:${var.environment}", - "resource:redis", - ] + tags = ["env:${var.environment}", "type:database", "provider:redisdb", "resource:redis", "team:claranet", "created-by:terraform", "${var.latency_extra_tags}"] } resource "datadog_monitor" "hitrate" { @@ -355,11 +323,7 @@ EOL require_full_window = false new_host_delay = "${var.new_host_delay}" - tags = [ - "created_by:terraform", - "env:${var.environment}", - "resource:redis", - ] + tags = ["env:${var.environment}", "type:database", "provider:redisdb", "resource:redis", "team:claranet", "created-by:terraform", "${var.hitrate_extra_tags}"] } # @@ -393,9 +357,5 @@ EOL new_host_delay = "${var.new_host_delay}" - tags = [ - "created_by:terraform", - "env:${var.environment}", - "resource:redis", - ] + tags = ["env:${var.environment}", "type:database", "provider:redisdb", "resource:redis", "team:claranet", "created-by:terraform", "${var.not_responding_extra_tags}"] } From 9e2d429213d50a2045aff2ae0f41d57a8f4897af Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Tue, 21 Aug 2018 18:21:14 +0200 Subject: [PATCH 19/21] MON-271 harcode service check threasholds --- database/redis/README.md | 5 ----- database/redis/inputs.tf | 27 --------------------------- database/redis/monitors-redis.tf | 13 ++++++------- 3 files changed, 6 insertions(+), 39 deletions(-) diff --git a/database/redis/README.md b/database/redis/README.md index dd3fb7e..e7e1e23 100644 --- a/database/redis/README.md +++ b/database/redis/README.md @@ -93,14 +93,9 @@ Creates DataDog monitors with the following checks: | mem_used_timeframe | Monitor timeframe for Redis RAM memory used [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when a Redis monitor is triggered | string | - | yes | | new_host_delay | Delay in seconds for the metric evaluation | string | `300` | no | -| not_responding_by | Group by for the service check | string | `"host","redis_host","redis_port"` | no | | not_responding_extra_tags | Extra tags for Redis does not respond monitor | string | `` | no | -| not_responding_last | Parameter 'last' for the service check | string | `6` | no | | not_responding_message | Custom message for Redis does not respond monitor | string | `` | no | | not_responding_silenced | Groups to mute for Redis does not respond monitor | map | `` | no | -| not_responding_threshold_critical | Not responding limit (critical threshold) | string | `5` | no | -| not_responding_threshold_ok | Not responding limit (ok threshold) | string | `1` | no | -| not_responding_threshold_warning | Not responding limit (warning threshold) | string | `1` | no | | redis_silenced | Groups to mute for Redis monitors | map | `` | no | | rejected_con_extra_tags | Extra tags for Redis rejected connections errors monitor | string | `` | no | | rejected_con_message | Custom message for Redis rejected connections errors monitor | string | `` | no | diff --git a/database/redis/inputs.tf b/database/redis/inputs.tf index 0b6108c..a4faa78 100644 --- a/database/redis/inputs.tf +++ b/database/redis/inputs.tf @@ -419,30 +419,3 @@ variable "not_responding_extra_tags" { type = "string" default = "" } - -variable "not_responding_by" { - description = "Group by for the service check" - type = "string" - default = "\"host\",\"redis_host\",\"redis_port\"" -} - -variable "not_responding_last" { - description = "Parameter 'last' for the service check" - type = "string" - default = 6 -} - -variable "not_responding_threshold_critical" { - description = "Not responding limit (critical threshold)" - default = 5 -} - -variable "not_responding_threshold_warning" { - description = "Not responding limit (warning threshold)" - default = 1 -} - -variable "not_responding_threshold_ok" { - description = "Not responding limit (ok threshold)" - default = 1 -} diff --git a/database/redis/monitors-redis.tf b/database/redis/monitors-redis.tf index d351ed8..7487865 100644 --- a/database/redis/monitors-redis.tf +++ b/database/redis/monitors-redis.tf @@ -333,16 +333,15 @@ resource "datadog_monitor" "not_responding" { name = "[${var.environment}] Redis does not respond" message = "${coalesce(var.not_responding_message, var.message)}" - query = < Date: Tue, 21 Aug 2018 18:23:38 +0200 Subject: [PATCH 20/21] MON-271 fix expiration variables names --- database/redis/inputs.tf | 2 +- database/redis/monitors-redis.tf | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/database/redis/inputs.tf b/database/redis/inputs.tf index a4faa78..101e01f 100644 --- a/database/redis/inputs.tf +++ b/database/redis/inputs.tf @@ -76,7 +76,7 @@ variable "evictedkeys_change_threshold_critical" { default = 100 } -variable "expirations_silenced" { +variable "expirations_rate_silenced" { description = "Groups to mute for Redis keys expirations monitor" type = "map" default = {} diff --git a/database/redis/monitors-redis.tf b/database/redis/monitors-redis.tf index 7487865..a5a1339 100644 --- a/database/redis/monitors-redis.tf +++ b/database/redis/monitors-redis.tf @@ -55,7 +55,7 @@ EOL critical = "${var.expirations_rate_threshold_critical}" } - silenced = "${var.expirations_silenced}" + silenced = "${var.expirations_rate_silenced}" notify_no_data = false evaluation_delay = "${var.evaluation_delay}" @@ -67,7 +67,7 @@ EOL require_full_window = false new_host_delay = "${var.new_host_delay}" - tags = ["env:${var.environment}", "type:database", "provider:redisdb", "resource:redis", "team:claranet", "created-by:terraform", "${var.expirations_extra_tags}"] + tags = ["env:${var.environment}", "type:database", "provider:redisdb", "resource:redis", "team:claranet", "created-by:terraform", "${var.expirations_rate_extra_tags}"] } resource "datadog_monitor" "blocked_clients" { From 93b64756bcb8e2ec75cf4aad9499826101dc34c0 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Wed, 22 Aug 2018 10:20:40 +0200 Subject: [PATCH 21/21] MON-271 fix type for extra tags variable --- database/redis/README.md | 22 +++++++++++----------- database/redis/inputs.tf | 40 ++++++++++++++++++++-------------------- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/database/redis/README.md b/database/redis/README.md index e7e1e23..e162a51 100644 --- a/database/redis/README.md +++ b/database/redis/README.md @@ -31,7 +31,7 @@ Creates DataDog monitors with the following checks: | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| -| blocked_clients_extra_tags | Extra tags for Redis Blocked clients monitor | string | `` | no | +| blocked_clients_extra_tags | Extra tags for Redis Blocked clients monitor | list | `` | no | | blocked_clients_message | Custom message for Redis Blocked clients monitor | string | `` | no | | blocked_clients_silenced | Groups to mute for Redis Blocked clients monitor | map | `` | no | | blocked_clients_threshold_critical | Blocked clients rate (critical threshold) | string | `30` | no | @@ -40,51 +40,51 @@ Creates DataDog monitors with the following checks: | blocked_clients_timeframe | Monitor timeframe for Redis Blocked clients [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | environment | Architecture environment | string | - | yes | | evaluation_delay | Delay in seconds for the metric evaluation | string | `15` | no | -| evictedkeys_change_extra_tags | Extra tags for Redis evicted keys monitor | string | `` | no | +| evictedkeys_change_extra_tags | Extra tags for Redis evicted keys monitor | list | `` | no | | evictedkeys_change_message | Custom message for Redis evicted keys monitor | string | `` | no | | evictedkeys_change_silenced | Groups to mute for Redis evicted keys monitor | map | `` | no | | evictedkeys_change_threshold_critical | Evicted keys change (critical threshold) | string | `100` | no | | evictedkeys_change_threshold_warning | Evicted keys change (warning threshold) | string | `20` | no | | evictedkeys_change_time_aggregator | Monitor aggregator for Redis evicted keys [available values: min, max or avg] | string | `avg` | no | | evictedkeys_change_timeframe | Monitor timeframe for Redis evicted keys [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| expirations_rate_extra_tags | Extra tags for Redis keys expirations monitor | string | `` | no | +| expirations_rate_extra_tags | Extra tags for Redis keys expirations monitor | list | `` | no | | expirations_rate_message | Custom message for Redis keys expirations monitor | string | `` | no | +| expirations_rate_silenced | Groups to mute for Redis keys expirations monitor | map | `` | no | | expirations_rate_threshold_critical | Expirations percent (critical threshold) | string | `80` | no | | expirations_rate_threshold_warning | Expirations percent (warning threshold) | string | `60` | no | | expirations_rate_time_aggregator | Monitor aggregator for Redis keys expirations [available values: min, max or avg] | string | `min` | no | | expirations_rate_timeframe | Monitor timeframe for Redis keys expirations [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| expirations_silenced | Groups to mute for Redis keys expirations monitor | map | `` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | -| hitrate_extra_tags | Extra tags for Redis hitrate monitor | string | `` | no | +| hitrate_extra_tags | Extra tags for Redis hitrate monitor | list | `` | no | | hitrate_message | Custom message for Redis hitrate monitor | string | `` | no | | hitrate_silenced | Groups to mute for Redis hitrate monitor | map | `` | no | | hitrate_threshold_critical | hitrate limit (critical threshold) | string | `10` | no | | hitrate_threshold_warning | hitrate limit (warning threshold) | string | `30` | no | | hitrate_time_aggregator | Monitor aggregator for Redis hitrate [available values: min, max or avg] | string | `max` | no | | hitrate_timeframe | Monitor timeframe for Redis hitrate [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| keyspace_extra_tags | Extra tags for Redis keyspace monitor | string | `` | no | +| keyspace_extra_tags | Extra tags for Redis keyspace monitor | list | `` | no | | keyspace_message | Custom message for Redis keyspace monitor | string | `` | no | | keyspace_silenced | Groups to mute for Redis keyspace monitor | map | `` | no | | keyspace_threshold_critical | Keyspace no changement (critical threshold) | string | `0` | no | | keyspace_threshold_warning | Keyspace no changement (warning threshold) | string | `1` | no | | keyspace_time_aggregator | Monitor aggregator for Redis keyspace [available values: min, max or avg] | string | `min` | no | | keyspace_timeframe | Monitor timeframe for Redis keyspace [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| latency_extra_tags | Extra tags for Redis latency monitor | string | `` | no | +| latency_extra_tags | Extra tags for Redis latency monitor | list | `` | no | | latency_message | Custom message for Redis latency monitor | string | `` | no | | latency_silenced | Groups to mute for Redis latency monitor | map | `` | no | | latency_threshold_critical | latency limit (critical threshold) | string | `100` | no | | latency_threshold_warning | latency limit (warning threshold) | string | `50` | no | | latency_time_aggregator | Monitor aggregator for Redis latency [available values: min, max or avg] | string | `min` | no | | latency_timeframe | Monitor timeframe for Redis latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| mem_frag_extra_tags | Extra tags for Redis memory RAM fragmentation monitor | string | `` | no | +| mem_frag_extra_tags | Extra tags for Redis memory RAM fragmentation monitor | list | `` | no | | mem_frag_message | Custom message for Redis memory RAM fragmentation monitor | string | `` | no | | mem_frag_silenced | Groups to mute for Redis memory RAM fragmentation monitor | map | `` | no | | mem_frag_threshold_critical | memory RAM fragmentation limit (critical threshold) | string | `150` | no | | mem_frag_threshold_warning | memory RAM fragmentation limit (warning threshold) | string | `130` | no | | mem_frag_time_aggregator | Monitor aggregator for Redis memory RAM fragmentation [available values: min, max or avg] | string | `min` | no | | mem_frag_timeframe | Monitor timeframe for Redis memory RAM fragmentation [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| mem_used_extra_tags | Extra tags for Redis RAM memory used monitor | string | `` | no | +| mem_used_extra_tags | Extra tags for Redis RAM memory used monitor | list | `` | no | | mem_used_message | Custom message for Redis RAM memory used monitor | string | `` | no | | mem_used_silenced | Groups to mute for Redis RAM memory used monitor | map | `` | no | | mem_used_threshold_critical | RAM memory used limit (critical threshold) | string | `95` | no | @@ -93,11 +93,11 @@ Creates DataDog monitors with the following checks: | mem_used_timeframe | Monitor timeframe for Redis RAM memory used [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when a Redis monitor is triggered | string | - | yes | | new_host_delay | Delay in seconds for the metric evaluation | string | `300` | no | -| not_responding_extra_tags | Extra tags for Redis does not respond monitor | string | `` | no | +| not_responding_extra_tags | Extra tags for Redis does not respond monitor | list | `` | no | | not_responding_message | Custom message for Redis does not respond monitor | string | `` | no | | not_responding_silenced | Groups to mute for Redis does not respond monitor | map | `` | no | | redis_silenced | Groups to mute for Redis monitors | map | `` | no | -| rejected_con_extra_tags | Extra tags for Redis rejected connections errors monitor | string | `` | no | +| rejected_con_extra_tags | Extra tags for Redis rejected connections errors monitor | list | `` | no | | rejected_con_message | Custom message for Redis rejected connections errors monitor | string | `` | no | | rejected_con_silenced | Groups to mute for Redis rejected connections errors monitor | map | `` | no | | rejected_con_threshold_critical | rejected connections errors limit (critical threshold) | string | `50` | no | diff --git a/database/redis/inputs.tf b/database/redis/inputs.tf index 101e01f..e651acf 100644 --- a/database/redis/inputs.tf +++ b/database/redis/inputs.tf @@ -50,8 +50,8 @@ variable "evictedkeys_change_message" { variable "evictedkeys_change_extra_tags" { description = "Extra tags for Redis evicted keys monitor" - type = "string" - default = "" + type = "list" + default = [] } variable "evictedkeys_change_time_aggregator" { @@ -90,8 +90,8 @@ variable "expirations_rate_message" { variable "expirations_rate_extra_tags" { description = "Extra tags for Redis keys expirations monitor" - type = "string" - default = "" + type = "list" + default = [] } variable "expirations_rate_time_aggregator" { @@ -130,8 +130,8 @@ variable "blocked_clients_message" { variable "blocked_clients_extra_tags" { description = "Extra tags for Redis Blocked clients monitor" - type = "string" - default = "" + type = "list" + default = [] } variable "blocked_clients_time_aggregator" { @@ -173,8 +173,8 @@ variable "keyspace_message" { variable "keyspace_extra_tags" { description = "Extra tags for Redis keyspace monitor" - type = "string" - default = "" + type = "list" + default = [] } variable "keyspace_time_aggregator" { @@ -213,8 +213,8 @@ variable "mem_used_message" { variable "mem_used_extra_tags" { description = "Extra tags for Redis RAM memory used monitor" - type = "string" - default = "" + type = "list" + default = [] } variable "mem_used_time_aggregator" { @@ -253,8 +253,8 @@ variable "mem_frag_message" { variable "mem_frag_extra_tags" { description = "Extra tags for Redis memory RAM fragmentation monitor" - type = "string" - default = "" + type = "list" + default = [] } variable "mem_frag_time_aggregator" { @@ -293,8 +293,8 @@ variable "rejected_con_message" { variable "rejected_con_extra_tags" { description = "Extra tags for Redis rejected connections errors monitor" - type = "string" - default = "" + type = "list" + default = [] } variable "rejected_con_time_aggregator" { @@ -333,8 +333,8 @@ variable "latency_message" { variable "latency_extra_tags" { description = "Extra tags for Redis latency monitor" - type = "string" - default = "" + type = "list" + default = [] } variable "latency_time_aggregator" { @@ -373,8 +373,8 @@ variable "hitrate_message" { variable "hitrate_extra_tags" { description = "Extra tags for Redis hitrate monitor" - type = "string" - default = "" + type = "list" + default = [] } variable "hitrate_time_aggregator" { @@ -416,6 +416,6 @@ variable "not_responding_message" { variable "not_responding_extra_tags" { description = "Extra tags for Redis does not respond monitor" - type = "string" - default = "" + type = "list" + default = [] }