MON-32 - Evictions growing monitor added

This commit is contained in:
Alexandre Gaillet 2018-06-12 10:50:12 +02:00 committed by Quentin Manfroi
parent 3c24284384
commit bdfb0f39fc
9 changed files with 164 additions and 4 deletions

View File

@ -35,6 +35,12 @@ Inputs
|------|-------------|:----:|:-----:|:-----:|
| delay | Delay in seconds for the metric evaluation | string | `900` | no |
| environment | Infrastructure Environment | string | - | yes |
| eviction_growing_condition_timeframe | Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
| eviction_growing_message | Custom message for Elasticache eviction growing monitor | string | `` | no |
| eviction_growing_silenced | Groups to mute for Elasticache eviction growing monitor | map | `<map>` | no |
| eviction_growing_threshold_critical | Elasticache eviction growing critical threshold in percentage | string | `30` | no |
| eviction_growing_threshold_warning | Elasticache eviction growing warning threshold in percentage | string | `10` | no |
| eviction_growing_timeframe | Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
| eviction_message | Custom message for Elasticache eviction monitor | string | `` | no |
| eviction_silenced | Groups to mute for Elasticache eviction monitor | map | `<map>` | no |
| eviction_time_aggregator | Monitor aggregator for Elasticache eviction [available values: min, max or avg] | string | `min` | no |
@ -43,8 +49,8 @@ Inputs
| free_memory_condition_timeframe | Monitor condition timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no |
| free_memory_message | Custom message for Elasticache free memory monitor | string | `` | no |
| free_memory_silenced | Groups to mute for Elasticache free memory monitor | map | `<map>` | no |
| free_memory_threshold_critical | Elasticache free memory critical threshold in percentage | string | `300` | no |
| free_memory_threshold_warning | Elasticache free memory warning threshold in percentage | string | `200` | no |
| free_memory_threshold_critical | Elasticache free memory critical threshold in percentage | string | `-70` | no |
| free_memory_threshold_warning | Elasticache free memory warning threshold in percentage | string | `-50` | no |
| free_memory_timeframe | Monitor timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no |
| max_connection_message | Custom message for Elasticache max connection monitor | string | `` | no |
| max_connection_silenced | Groups to mute for Elasticache max connection monitor | map | `<map>` | no |
@ -58,7 +64,7 @@ Inputs
| resource | Type of Elasticache used | string | - | yes |
| swap_message | Custom message for Elasticache swap monitor | string | `` | no |
| swap_silenced | Groups to mute for Elasticache swap monitor | map | `<map>` | no |
| swap_threshold_critical | Elasticache swap critical threshold in percentage | string | `50` | no |
| swap_threshold_critical | Elasticache swap critical threshold in percentage | string | `50000000` | no |
| swap_threshold_warning | Elasticache swap warning threshold in percentage | string | `0` | no |
| swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | `min` | no |
| swap_timeframe | Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |

View File

@ -156,3 +156,35 @@ variable "free_memory_threshold_critical" {
description = "Elasticache free memory critical threshold in percentage"
default = -70
}
variable "eviction_growing_silenced" {
description = "Groups to mute for Elasticache eviction growing monitor"
type = "map"
default = {}
}
variable "eviction_growing_message" {
description = "Custom message for Elasticache eviction growing monitor"
type = "string"
default = ""
}
variable "eviction_growing_condition_timeframe" {
description = "Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
default = "last_5m"
}
variable "eviction_growing_timeframe" {
description = "Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
default = "last_5m"
}
variable "eviction_growing_threshold_warning" {
description = "Elasticache eviction growing warning threshold in percentage"
default = 10
}
variable "eviction_growing_threshold_critical" {
description = "Elasticache eviction growing critical threshold in percentage"
default = 30
}

View File

@ -112,7 +112,7 @@ resource "datadog_monitor" "elasticache_swap" {
}
# POC - A approfondir
resource "datadog_monitor" "redis_free_memory" {
resource "datadog_monitor" "elasticache_free_memory" {
name = "[${var.environment}] Elasticache ${var.resource} free memory {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
message = "${coalesce(var.free_memory_message, var.message)}"
@ -143,3 +143,35 @@ resource "datadog_monitor" "redis_free_memory" {
tags = ["env:${var.environment}", "engine:${var.resource}", "team:aws", "provider:aws"]
}
resource "datadog_monitor" "elasticache_eviction_growing" {
name = "[${var.environment}] Elasticache ${var.resource} evictions is growing {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}"
message = "${coalesce(var.eviction_growing_message, var.message)}"
type = "metric alert"
query = <<EOF
pct_change(avg(${var.eviction_growing_timeframe}),${var.eviction_growing_condition_timeframe}):
avg:aws.elasticache.evictions{${var.filter_tags}} by {region,cacheclusterid}
> ${var.eviction_growing_threshold_critical}
EOF
thresholds {
warning = "${var.eviction_growing_threshold_warning}"
critical = "${var.eviction_growing_threshold_critical}"
}
notify_no_data = false
evaluation_delay = "${var.delay}"
renotify_interval = 0
notify_audit = false
timeout_h = 0
include_tags = true
locked = false
require_full_window = false
new_host_delay = "${var.delay}"
silenced = "${var.eviction_growing_silenced}"
tags = ["env:${var.environment}", "engine:${var.resource}", "team:aws", "provider:aws"]
}

View File

@ -36,6 +36,12 @@ Inputs
| delay | Delay in seconds for the metric evaluation | string | `900` | no |
| elasticache_size | Size of the Elasticache instance | string | - | yes |
| environment | Infrastructure Environment | string | - | yes |
| eviction_growing_condition_timeframe | Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
| eviction_growing_message | Custom message for Elasticache eviction growing monitor | string | `` | no |
| eviction_growing_silenced | Groups to mute for Elasticache eviction growing monitor | map | `<map>` | no |
| eviction_growing_threshold_critical | Elasticache eviction growing critical threshold in percentage | string | `30` | no |
| eviction_growing_threshold_warning | Elasticache eviction growing warning threshold in percentage | string | `10` | no |
| eviction_growing_timeframe | Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
| eviction_message | Custom message for Elasticache eviction monitor | string | `` | no |
| eviction_silenced | Groups to mute for Elasticache eviction monitor | map | `<map>` | no |
| eviction_time_aggregator | Monitor aggregator for Elasticache eviction [available values: min, max or avg] | string | `min` | no |

View File

@ -225,3 +225,35 @@ variable "free_memory_threshold_critical" {
description = "Elasticache free memory critical threshold in percentage"
default = -70
}
variable "eviction_growing_silenced" {
description = "Groups to mute for Elasticache eviction growing monitor"
type = "map"
default = {}
}
variable "eviction_growing_message" {
description = "Custom message for Elasticache eviction growing monitor"
type = "string"
default = ""
}
variable "eviction_growing_condition_timeframe" {
description = "Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
default = "last_5m"
}
variable "eviction_growing_timeframe" {
description = "Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
default = "last_5m"
}
variable "eviction_growing_threshold_warning" {
description = "Elasticache eviction growing warning threshold in percentage"
default = 10
}
variable "eviction_growing_threshold_critical" {
description = "Elasticache eviction growing critical threshold in percentage"
default = 30
}

View File

@ -19,6 +19,13 @@ module "datadog-monitors-aws-elasticache-common" {
eviction_time_aggregator = "${var.eviction_time_aggregator}"
eviction_timeframe = "${var.eviction_timeframe}"
eviction_growing_condition_timeframe = "${var.eviction_growing_condition_timeframe}"
eviction_growing_timeframe = "${var.eviction_growing_timeframe}"
eviction_growing_message = "${var.eviction_growing_message}"
eviction_growing_silenced = "${var.eviction_growing_silenced}"
eviction_growing_threshold_warning = "${var.eviction_growing_threshold_warning}"
eviction_growing_threshold_critical = "${var.eviction_growing_threshold_critical}"
free_memory_condition_timeframe = "${var.free_memory_condition_timeframe}"
free_memory_timeframe = "${var.free_memory_timeframe}"
free_memory_message = "${var.free_memory_message}"

View File

@ -56,6 +56,12 @@ Inputs
| delay | Delay in seconds for the metric evaluation | string | `900` | no |
| elasticache_size | Size of the Elasticache instance | string | - | yes |
| environment | Infrastructure Environment | string | - | yes |
| eviction_growing_condition_timeframe | Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
| eviction_growing_message | Custom message for Elasticache eviction growing monitor | string | `` | no |
| eviction_growing_silenced | Groups to mute for Elasticache eviction growing monitor | map | `<map>` | no |
| eviction_growing_threshold_critical | Elasticache eviction growing critical threshold in percentage | string | `30` | no |
| eviction_growing_threshold_warning | Elasticache eviction growing warning threshold in percentage | string | `10` | no |
| eviction_growing_timeframe | Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
| eviction_message | Custom message for Elasticache eviction monitor | string | `` | no |
| eviction_silenced | Groups to mute for Elasticache eviction monitor | map | `<map>` | no |
| eviction_time_aggregator | Monitor aggregator for Elasticache eviction [available values: min, max or avg] | string | `min` | no |

View File

@ -280,3 +280,35 @@ variable "free_memory_threshold_critical" {
description = "Elasticache free memory critical threshold in percentage"
default = -70
}
variable "eviction_growing_silenced" {
description = "Groups to mute for Elasticache eviction growing monitor"
type = "map"
default = {}
}
variable "eviction_growing_message" {
description = "Custom message for Elasticache eviction growing monitor"
type = "string"
default = ""
}
variable "eviction_growing_condition_timeframe" {
description = "Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
default = "last_5m"
}
variable "eviction_growing_timeframe" {
description = "Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
default = "last_5m"
}
variable "eviction_growing_threshold_warning" {
description = "Elasticache eviction growing warning threshold in percentage"
default = 10
}
variable "eviction_growing_threshold_critical" {
description = "Elasticache eviction growing critical threshold in percentage"
default = 30
}

View File

@ -19,6 +19,13 @@ module "datadog-monitors-aws-elasticache-common" {
eviction_time_aggregator = "${var.eviction_time_aggregator}"
eviction_timeframe = "${var.eviction_timeframe}"
eviction_growing_condition_timeframe = "${var.eviction_growing_condition_timeframe}"
eviction_growing_timeframe = "${var.eviction_growing_timeframe}"
eviction_growing_message = "${var.eviction_growing_message}"
eviction_growing_silenced = "${var.eviction_growing_silenced}"
eviction_growing_threshold_warning = "${var.eviction_growing_threshold_warning}"
eviction_growing_threshold_critical = "${var.eviction_growing_threshold_critical}"
free_memory_condition_timeframe = "${var.free_memory_condition_timeframe}"
free_memory_timeframe = "${var.free_memory_timeframe}"
free_memory_message = "${var.free_memory_message}"