From bdfb0f39fc792b1b98e6afe83b5396549c641185 Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Tue, 12 Jun 2018 10:50:12 +0200 Subject: [PATCH] MON-32 - Evictions growing monitor added --- cloud/aws/elasticache/common/README.md | 12 +++++-- cloud/aws/elasticache/common/inputs.tf | 32 +++++++++++++++++ .../common/monitors-elasticache.tf | 34 ++++++++++++++++++- cloud/aws/elasticache/memcached/README.md | 6 ++++ cloud/aws/elasticache/memcached/inputs.tf | 32 +++++++++++++++++ .../memcached/monitors-memcached.tf | 7 ++++ cloud/aws/elasticache/redis/README.md | 6 ++++ cloud/aws/elasticache/redis/inputs.tf | 32 +++++++++++++++++ cloud/aws/elasticache/redis/monitors-redis.tf | 7 ++++ 9 files changed, 164 insertions(+), 4 deletions(-) diff --git a/cloud/aws/elasticache/common/README.md b/cloud/aws/elasticache/common/README.md index ee27e38..5bf7f7a 100644 --- a/cloud/aws/elasticache/common/README.md +++ b/cloud/aws/elasticache/common/README.md @@ -35,6 +35,12 @@ Inputs |------|-------------|:----:|:-----:|:-----:| | delay | Delay in seconds for the metric evaluation | string | `900` | no | | environment | Infrastructure Environment | string | - | yes | +| eviction_growing_condition_timeframe | Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| eviction_growing_message | Custom message for Elasticache eviction growing monitor | string | `` | no | +| eviction_growing_silenced | Groups to mute for Elasticache eviction growing monitor | map | `` | no | +| eviction_growing_threshold_critical | Elasticache eviction growing critical threshold in percentage | string | `30` | no | +| eviction_growing_threshold_warning | Elasticache eviction growing warning threshold in percentage | string | `10` | no | +| eviction_growing_timeframe | Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | eviction_message | Custom message for Elasticache eviction monitor | string | `` | no | | eviction_silenced | Groups to mute for Elasticache eviction monitor | map | `` | no | | eviction_time_aggregator | Monitor aggregator for Elasticache eviction [available values: min, max or avg] | string | `min` | no | @@ -43,8 +49,8 @@ Inputs | free_memory_condition_timeframe | Monitor condition timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | free_memory_message | Custom message for Elasticache free memory monitor | string | `` | no | | free_memory_silenced | Groups to mute for Elasticache free memory monitor | map | `` | no | -| free_memory_threshold_critical | Elasticache free memory critical threshold in percentage | string | `300` | no | -| free_memory_threshold_warning | Elasticache free memory warning threshold in percentage | string | `200` | no | +| free_memory_threshold_critical | Elasticache free memory critical threshold in percentage | string | `-70` | no | +| free_memory_threshold_warning | Elasticache free memory warning threshold in percentage | string | `-50` | no | | free_memory_timeframe | Monitor timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | max_connection_message | Custom message for Elasticache max connection monitor | string | `` | no | | max_connection_silenced | Groups to mute for Elasticache max connection monitor | map | `` | no | @@ -58,7 +64,7 @@ Inputs | resource | Type of Elasticache used | string | - | yes | | swap_message | Custom message for Elasticache swap monitor | string | `` | no | | swap_silenced | Groups to mute for Elasticache swap monitor | map | `` | no | -| swap_threshold_critical | Elasticache swap critical threshold in percentage | string | `50` | no | +| swap_threshold_critical | Elasticache swap critical threshold in percentage | string | `50000000` | no | | swap_threshold_warning | Elasticache swap warning threshold in percentage | string | `0` | no | | swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | `min` | no | | swap_timeframe | Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | diff --git a/cloud/aws/elasticache/common/inputs.tf b/cloud/aws/elasticache/common/inputs.tf index ab249df..bda0bfd 100644 --- a/cloud/aws/elasticache/common/inputs.tf +++ b/cloud/aws/elasticache/common/inputs.tf @@ -156,3 +156,35 @@ variable "free_memory_threshold_critical" { description = "Elasticache free memory critical threshold in percentage" default = -70 } + +variable "eviction_growing_silenced" { + description = "Groups to mute for Elasticache eviction growing monitor" + type = "map" + default = {} +} + +variable "eviction_growing_message" { + description = "Custom message for Elasticache eviction growing monitor" + type = "string" + default = "" +} + +variable "eviction_growing_condition_timeframe" { + description = "Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "eviction_growing_timeframe" { + description = "Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "eviction_growing_threshold_warning" { + description = "Elasticache eviction growing warning threshold in percentage" + default = 10 +} + +variable "eviction_growing_threshold_critical" { + description = "Elasticache eviction growing critical threshold in percentage" + default = 30 +} diff --git a/cloud/aws/elasticache/common/monitors-elasticache.tf b/cloud/aws/elasticache/common/monitors-elasticache.tf index 0e83633..6d53ade 100644 --- a/cloud/aws/elasticache/common/monitors-elasticache.tf +++ b/cloud/aws/elasticache/common/monitors-elasticache.tf @@ -112,7 +112,7 @@ resource "datadog_monitor" "elasticache_swap" { } # POC - A approfondir -resource "datadog_monitor" "redis_free_memory" { +resource "datadog_monitor" "elasticache_free_memory" { name = "[${var.environment}] Elasticache ${var.resource} free memory {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.free_memory_message, var.message)}" @@ -143,3 +143,35 @@ resource "datadog_monitor" "redis_free_memory" { tags = ["env:${var.environment}", "engine:${var.resource}", "team:aws", "provider:aws"] } + +resource "datadog_monitor" "elasticache_eviction_growing" { + name = "[${var.environment}] Elasticache ${var.resource} evictions is growing {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + message = "${coalesce(var.eviction_growing_message, var.message)}" + + type = "metric alert" + + query = < ${var.eviction_growing_threshold_critical} + EOF + + thresholds { + warning = "${var.eviction_growing_threshold_warning}" + critical = "${var.eviction_growing_threshold_critical}" + } + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.delay}" + + silenced = "${var.eviction_growing_silenced}" + + tags = ["env:${var.environment}", "engine:${var.resource}", "team:aws", "provider:aws"] +} diff --git a/cloud/aws/elasticache/memcached/README.md b/cloud/aws/elasticache/memcached/README.md index 75360b0..58e910d 100644 --- a/cloud/aws/elasticache/memcached/README.md +++ b/cloud/aws/elasticache/memcached/README.md @@ -36,6 +36,12 @@ Inputs | delay | Delay in seconds for the metric evaluation | string | `900` | no | | elasticache_size | Size of the Elasticache instance | string | - | yes | | environment | Infrastructure Environment | string | - | yes | +| eviction_growing_condition_timeframe | Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| eviction_growing_message | Custom message for Elasticache eviction growing monitor | string | `` | no | +| eviction_growing_silenced | Groups to mute for Elasticache eviction growing monitor | map | `` | no | +| eviction_growing_threshold_critical | Elasticache eviction growing critical threshold in percentage | string | `30` | no | +| eviction_growing_threshold_warning | Elasticache eviction growing warning threshold in percentage | string | `10` | no | +| eviction_growing_timeframe | Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | eviction_message | Custom message for Elasticache eviction monitor | string | `` | no | | eviction_silenced | Groups to mute for Elasticache eviction monitor | map | `` | no | | eviction_time_aggregator | Monitor aggregator for Elasticache eviction [available values: min, max or avg] | string | `min` | no | diff --git a/cloud/aws/elasticache/memcached/inputs.tf b/cloud/aws/elasticache/memcached/inputs.tf index acdce22..74c0e39 100644 --- a/cloud/aws/elasticache/memcached/inputs.tf +++ b/cloud/aws/elasticache/memcached/inputs.tf @@ -225,3 +225,35 @@ variable "free_memory_threshold_critical" { description = "Elasticache free memory critical threshold in percentage" default = -70 } + +variable "eviction_growing_silenced" { + description = "Groups to mute for Elasticache eviction growing monitor" + type = "map" + default = {} +} + +variable "eviction_growing_message" { + description = "Custom message for Elasticache eviction growing monitor" + type = "string" + default = "" +} + +variable "eviction_growing_condition_timeframe" { + description = "Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "eviction_growing_timeframe" { + description = "Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "eviction_growing_threshold_warning" { + description = "Elasticache eviction growing warning threshold in percentage" + default = 10 +} + +variable "eviction_growing_threshold_critical" { + description = "Elasticache eviction growing critical threshold in percentage" + default = 30 +} diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf index ea8a1c8..1ac0125 100644 --- a/cloud/aws/elasticache/memcached/monitors-memcached.tf +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -19,6 +19,13 @@ module "datadog-monitors-aws-elasticache-common" { eviction_time_aggregator = "${var.eviction_time_aggregator}" eviction_timeframe = "${var.eviction_timeframe}" + eviction_growing_condition_timeframe = "${var.eviction_growing_condition_timeframe}" + eviction_growing_timeframe = "${var.eviction_growing_timeframe}" + eviction_growing_message = "${var.eviction_growing_message}" + eviction_growing_silenced = "${var.eviction_growing_silenced}" + eviction_growing_threshold_warning = "${var.eviction_growing_threshold_warning}" + eviction_growing_threshold_critical = "${var.eviction_growing_threshold_critical}" + free_memory_condition_timeframe = "${var.free_memory_condition_timeframe}" free_memory_timeframe = "${var.free_memory_timeframe}" free_memory_message = "${var.free_memory_message}" diff --git a/cloud/aws/elasticache/redis/README.md b/cloud/aws/elasticache/redis/README.md index e643fea..4d8e4aa 100644 --- a/cloud/aws/elasticache/redis/README.md +++ b/cloud/aws/elasticache/redis/README.md @@ -56,6 +56,12 @@ Inputs | delay | Delay in seconds for the metric evaluation | string | `900` | no | | elasticache_size | Size of the Elasticache instance | string | - | yes | | environment | Infrastructure Environment | string | - | yes | +| eviction_growing_condition_timeframe | Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| eviction_growing_message | Custom message for Elasticache eviction growing monitor | string | `` | no | +| eviction_growing_silenced | Groups to mute for Elasticache eviction growing monitor | map | `` | no | +| eviction_growing_threshold_critical | Elasticache eviction growing critical threshold in percentage | string | `30` | no | +| eviction_growing_threshold_warning | Elasticache eviction growing warning threshold in percentage | string | `10` | no | +| eviction_growing_timeframe | Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | eviction_message | Custom message for Elasticache eviction monitor | string | `` | no | | eviction_silenced | Groups to mute for Elasticache eviction monitor | map | `` | no | | eviction_time_aggregator | Monitor aggregator for Elasticache eviction [available values: min, max or avg] | string | `min` | no | diff --git a/cloud/aws/elasticache/redis/inputs.tf b/cloud/aws/elasticache/redis/inputs.tf index 940577b..255adff 100644 --- a/cloud/aws/elasticache/redis/inputs.tf +++ b/cloud/aws/elasticache/redis/inputs.tf @@ -280,3 +280,35 @@ variable "free_memory_threshold_critical" { description = "Elasticache free memory critical threshold in percentage" default = -70 } + +variable "eviction_growing_silenced" { + description = "Groups to mute for Elasticache eviction growing monitor" + type = "map" + default = {} +} + +variable "eviction_growing_message" { + description = "Custom message for Elasticache eviction growing monitor" + type = "string" + default = "" +} + +variable "eviction_growing_condition_timeframe" { + description = "Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "eviction_growing_timeframe" { + description = "Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "eviction_growing_threshold_warning" { + description = "Elasticache eviction growing warning threshold in percentage" + default = 10 +} + +variable "eviction_growing_threshold_critical" { + description = "Elasticache eviction growing critical threshold in percentage" + default = 30 +} diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index 6572a3f..16257a6 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -19,6 +19,13 @@ module "datadog-monitors-aws-elasticache-common" { eviction_time_aggregator = "${var.eviction_time_aggregator}" eviction_timeframe = "${var.eviction_timeframe}" + eviction_growing_condition_timeframe = "${var.eviction_growing_condition_timeframe}" + eviction_growing_timeframe = "${var.eviction_growing_timeframe}" + eviction_growing_message = "${var.eviction_growing_message}" + eviction_growing_silenced = "${var.eviction_growing_silenced}" + eviction_growing_threshold_warning = "${var.eviction_growing_threshold_warning}" + eviction_growing_threshold_critical = "${var.eviction_growing_threshold_critical}" + free_memory_condition_timeframe = "${var.free_memory_condition_timeframe}" free_memory_timeframe = "${var.free_memory_timeframe}" free_memory_message = "${var.free_memory_message}"