From 6c7b90c4db1442f62f1e55186bfc2ae24ac08f81 Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Thu, 31 May 2018 12:03:44 +0200 Subject: [PATCH] MON-32 - monitors updated with latest fix --- cloud/aws/elasticache/common/README.md | 15 +++++--- cloud/aws/elasticache/common/inputs.tf | 35 +++++++++++++++++- .../common/monitors-elasticache.tf | 32 +++++++++++++++++ cloud/aws/elasticache/memcached/README.md | 13 ++----- cloud/aws/elasticache/memcached/inputs.tf | 35 +----------------- .../memcached/monitors-memcached.tf | 36 ++----------------- cloud/aws/elasticache/redis/README.md | 13 +++---- cloud/aws/elasticache/redis/inputs.tf | 25 +------------ cloud/aws/elasticache/redis/monitors-redis.tf | 35 +++--------------- 9 files changed, 92 insertions(+), 147 deletions(-) diff --git a/cloud/aws/elasticache/common/README.md b/cloud/aws/elasticache/common/README.md index 2b29612..f0007ce 100644 --- a/cloud/aws/elasticache/common/README.md +++ b/cloud/aws/elasticache/common/README.md @@ -1,5 +1,5 @@ -AWS ElasticCache Service DataDog monitors -========================================= +AWS ElastiCache Service DataDog monitors +======================================== How to use this module ---------------------- @@ -23,6 +23,7 @@ Purpose Creates DataDog monitors with the following checks: * Eviction +* Swap * Max connections * No connection @@ -32,7 +33,7 @@ Inputs | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| | delay | Delay in seconds for the metric evaluation | string | `900` | no | -| environment | Architecture Environment | string | - | yes | +| environment | Infrastructure Environment | string | - | yes | | eviction_message | Custom message for Elasticache eviction monitor | string | `` | no | | eviction_silenced | Groups to mute for Elasticache eviction monitor | map | `` | no | | eviction_time_aggregator | Monitor aggregator for Elasticache eviction [available values: min, max or avg] | string | `min` | no | @@ -40,7 +41,7 @@ Inputs | filter_tags | Tags used for filtering | string | - | yes | | max_connection_message | Custom message for Elasticache max connection monitor | string | `` | no | | max_connection_silenced | Groups to mute for Elasticache max connection monitor | map | `` | no | -| max_connection_time_aggregator | Monitor aggregator for Elasticache max connection [available values: min, max or avg] | string | `min` | no | +| max_connection_time_aggregator | Monitor aggregator for Elasticache max connection [available values: min, max or avg] | string | `max` | no | | max_connection_timeframe | Monitor timeframe for Elasticache max connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when an alert is triggered | string | - | yes | | no_connection_message | Custom message for Elasticache no connection monitor | string | `` | no | @@ -48,6 +49,12 @@ Inputs | no_connection_time_aggregator | Monitor aggregator for Elasticache no connection [available values: min, max or avg] | string | `min` | no | | no_connection_timeframe | Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | resource | Type of Elasticache used | string | - | yes | +| swap_message | Custom message for Elasticache memcached swap monitor | string | `` | no | +| swap_silenced | Groups to mute for Elasticache memcached swap monitor | map | `` | no | +| swap_threshold_critical | Elasticache memcached swap critical threshold in percentage | string | `50` | no | +| swap_threshold_warning | Elasticache memcached swap warning threshold in percentage | string | `0` | no | +| swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | `min` | no | +| swap_timeframe | Monitor timeframe for Elasticache memcached swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | Related documentation --------------------- diff --git a/cloud/aws/elasticache/common/inputs.tf b/cloud/aws/elasticache/common/inputs.tf index d174c2f..d4bd386 100644 --- a/cloud/aws/elasticache/common/inputs.tf +++ b/cloud/aws/elasticache/common/inputs.tf @@ -1,6 +1,6 @@ # Global Terraform variable "environment" { - description = "Architecture Environment" + description = "Infrastructure Environment" type = "string" } @@ -91,3 +91,36 @@ variable "no_connection_timeframe" { description = "Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" default = "last_5m" } + +variable "swap_silenced" { + description = "Groups to mute for Elasticache memcached swap monitor" + type = "map" + default = {} +} + +variable "swap_message" { + description = "Custom message for Elasticache memcached swap monitor" + type = "string" + default = "" +} + +variable "swap_time_aggregator" { + description = "Monitor aggregator for Elasticache memcached swap [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "swap_timeframe" { + description = "Monitor timeframe for Elasticache memcached swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "swap_threshold_warning" { + description = "Elasticache memcached swap warning threshold in percentage" + default = 0 +} + +variable "swap_threshold_critical" { + description = "Elasticache memcached swap critical threshold in percentage" + default = 50 +} diff --git a/cloud/aws/elasticache/common/monitors-elasticache.tf b/cloud/aws/elasticache/common/monitors-elasticache.tf index 9a0121e..9ef7c72 100644 --- a/cloud/aws/elasticache/common/monitors-elasticache.tf +++ b/cloud/aws/elasticache/common/monitors-elasticache.tf @@ -78,3 +78,35 @@ resource "datadog_monitor" "elasticache_no_connection" { tags = ["env:${var.environment}", "engine:${var.resource}", "team:aws", "provider:aws"] } + +resource "datadog_monitor" "elasticache_swap" { + name = "[${var.environment}] Elasticache ${var.resource} swap {{#is_alert}}{{{comparator}}} {{threshold}}MB ({{value}}MB){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}MB ({{value}}MB){{/is_warning}}" + message = "${coalesce(var.swap_message, var.message)}" + + type = "metric alert" + + query = < ${var.swap_threshold_critical} + EOF + + thresholds { + warning = "${var.swap_threshold_warning}" + critical = "${var.swap_threshold_critical}" + } + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.delay}" + + silenced = "${var.swap_silenced}" + + tags = ["env:${var.environment}", "engine:${var.resource}", "team:aws", "provider:aws"] +} diff --git a/cloud/aws/elasticache/memcached/README.md b/cloud/aws/elasticache/memcached/README.md index 75c02f3..2c38e85 100644 --- a/cloud/aws/elasticache/memcached/README.md +++ b/cloud/aws/elasticache/memcached/README.md @@ -1,5 +1,5 @@ -AWS ElasticCache Memcached Service DataDog monitors -=================================================== +AWS ElastiCache Memcached Service DataDog monitors +================================================== How to use this module ---------------------- @@ -21,7 +21,6 @@ Creates DataDog monitors with the following checks : * Get Hit * CPU High -* Swap * Free memory Inputs @@ -37,7 +36,7 @@ Inputs | cpu_high_timeframe | Monitor timeframe for Elasticache memcached cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | | elasticache_size | Size of the Elasticache instance | string | - | yes | -| environment | Architecture Environment | string | - | yes | +| environment | Infrastructure Environment | string | - | yes | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | free_memory_message | Custom message for Elasticache memcached free memory monitor | string | `` | no | @@ -52,12 +51,6 @@ Inputs | get_hits_threshold_warning | Elasticache memcached get hits warning threshold in percentage | string | `20` | no | | get_hits_timeframe | Monitor timeframe for Elasticache memcached get hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | message | Message sent when an alert is triggered | string | - | yes | -| swap_message | Custom message for Elasticache memcached swap monitor | string | `` | no | -| swap_silenced | Groups to mute for Elasticache memcached swap monitor | map | `` | no | -| swap_threshold_critical | Elasticache memcached swap critical threshold in percentage | string | `50` | no | -| swap_threshold_warning | Elasticache memcached swap warning threshold in percentage | string | `0` | no | -| swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | `min` | no | -| swap_timeframe | Monitor timeframe for Elasticache memcached swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | Related documentation --------------------- diff --git a/cloud/aws/elasticache/memcached/inputs.tf b/cloud/aws/elasticache/memcached/inputs.tf index aa71bca..2288257 100644 --- a/cloud/aws/elasticache/memcached/inputs.tf +++ b/cloud/aws/elasticache/memcached/inputs.tf @@ -1,6 +1,6 @@ # Global Terraform variable "environment" { - description = "Architecture Environment" + description = "Infrastructure Environment" type = "string" } @@ -91,39 +91,6 @@ variable "cpu_high_threshold_critical" { default = 90 } -variable "swap_silenced" { - description = "Groups to mute for Elasticache memcached swap monitor" - type = "map" - default = {} -} - -variable "swap_message" { - description = "Custom message for Elasticache memcached swap monitor" - type = "string" - default = "" -} - -variable "swap_time_aggregator" { - description = "Monitor aggregator for Elasticache memcached swap [available values: min, max or avg]" - type = "string" - default = "min" -} - -variable "swap_timeframe" { - description = "Monitor timeframe for Elasticache memcached swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_5m" -} - -variable "swap_threshold_warning" { - description = "Elasticache memcached swap warning threshold in percentage" - default = 0 -} - -variable "swap_threshold_critical" { - description = "Elasticache memcached swap critical threshold in percentage" - default = 50 -} - variable "free_memory_silenced" { description = "Groups to mute for Elasticache memcached free memory monitor" type = "map" diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf index 027a43d..af4925b 100644 --- a/cloud/aws/elasticache/memcached/monitors-memcached.tf +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -2,7 +2,7 @@ data "template_file" "filter" { template = "$${filter}" vars { - filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_aws_mem:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" + filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_aws_elasticache_memcached:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" } } @@ -81,38 +81,6 @@ resource "datadog_monitor" "memcached_cpu_high" { tags = ["env:${var.environment}", "engine:memcached", "team:aws", "provider:aws"] } -resource "datadog_monitor" "memcached_swap" { - name = "[${var.environment}] Elasticache memcached swap {{#is_alert}}{{{comparator}}} {{threshold}}MB ({{value}}MB){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}MB ({{value}}MB){{/is_warning}}" - message = "${coalesce(var.swap_message, var.message)}" - - type = "metric alert" - - query = < ${var.swap_threshold_critical} - EOF - - thresholds { - warning = "${var.swap_threshold_warning}" - critical = "${var.swap_threshold_critical}" - } - - notify_no_data = false - evaluation_delay = "${var.delay}" - renotify_interval = 0 - notify_audit = false - timeout_h = 0 - include_tags = true - locked = false - require_full_window = false - new_host_delay = "${var.delay}" - - silenced = "${var.swap_silenced}" - - tags = ["env:${var.environment}", "engine:memcached", "team:aws", "provider:aws"] -} - resource "datadog_monitor" "memcached_free_memory" { name = "[${var.environment}] Elasticache memcached free memory {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.free_memory_message, var.message)}" @@ -123,7 +91,7 @@ resource "datadog_monitor" "memcached_free_memory" { query = <` | no | | cpu_high_threshold_critical | Elasticache redis cpu high critical threshold in percentage | string | `90` | no | | cpu_high_threshold_warning | Elasticache redis cpu high warning threshold in percentage | string | `75` | no | -| cpu_high_time_aggregator | Monitor aggregator for Elasticache redis cpu high [available values: min, max or avg] | string | `min` | no | +| cpu_high_time_aggregator | Monitor aggregator for Elasticache redis cpu high [available values: min, max or avg] | string | `avg` | no | | cpu_high_timeframe | Monitor timeframe for Elasticache redis cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | | elasticache_size | Size of the Elasticache instance | string | - | yes | -| environment | Architecture Environment | string | - | yes | +| environment | Infrastructure Environment | string | - | yes | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | free_memory_message | Custom message for Elasticache redis free memory monitor | string | `` | no | @@ -74,10 +73,6 @@ Inputs | replication_lag_threshold_warning | Elasticache redis replication lag warning threshold in seconds | string | `0` | no | | replication_lag_time_aggregator | Monitor aggregator for Elasticache redis replication lag [available values: min, max or avg] | string | `min` | no | | replication_lag_timeframe | Monitor timeframe for Elasticache redis replication lag [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| swap_message | Custom message for Elasticache redis swap monitor | string | `` | no | -| swap_silenced | Groups to mute for Elasticache redis swap monitor | map | `` | no | -| swap_time_aggregator | Monitor aggregator for Elasticache redis swap [available values: min, max or avg] | string | `min` | no | -| swap_timeframe | Monitor timeframe for Elasticache redis swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | Related documentation --------------------- diff --git a/cloud/aws/elasticache/redis/inputs.tf b/cloud/aws/elasticache/redis/inputs.tf index 3c0380c..c20aa25 100644 --- a/cloud/aws/elasticache/redis/inputs.tf +++ b/cloud/aws/elasticache/redis/inputs.tf @@ -1,6 +1,6 @@ # Global Terraform variable "environment" { - description = "Architecture Environment" + description = "Infrastructure Environment" type = "string" } @@ -96,29 +96,6 @@ variable "cpu_high_threshold_critical" { default = 90 } -variable "swap_silenced" { - description = "Groups to mute for Elasticache redis swap monitor" - type = "map" - default = {} -} - -variable "swap_message" { - description = "Custom message for Elasticache redis swap monitor" - type = "string" - default = "" -} - -variable "swap_time_aggregator" { - description = "Monitor aggregator for Elasticache redis swap [available values: min, max or avg]" - type = "string" - default = "min" -} - -variable "swap_timeframe" { - description = "Monitor timeframe for Elasticache redis swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_5m" -} - variable "replication_lag_silenced" { description = "Groups to mute for Elasticache redis replication lag monitor" type = "map" diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index f51e1ef..fa8bd56 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -2,11 +2,11 @@ data "template_file" "filter" { template = "$${filter}" vars { - filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_aws_red:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" + filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_aws_elasticache_redis:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" } } -module "datadog-monitors-aws-elasticcache-common" { +module "datadog-monitors-aws-elasticache-common" { source = "../common" message = "${var.message}" @@ -59,7 +59,7 @@ resource "datadog_monitor" "redis_cpu_high" { query = < ${var.cpu_high_threshold_critical / element(values(local.core), count.index)} EOF @@ -78,33 +78,6 @@ resource "datadog_monitor" "redis_cpu_high" { tags = ["env:${var.environment}", "engine:redis", "team:aws", "provider:aws"] } -resource "datadog_monitor" "redis_swap" { - name = "[${var.environment}] Elasticache redis is starting to swap ({{value}}MB)" - message = "${coalesce(var.swap_message, var.message)}" - - type = "metric alert" - - query = < 0 - EOF - - notify_no_data = false - evaluation_delay = "${var.delay}" - renotify_interval = 0 - notify_audit = false - timeout_h = 0 - include_tags = true - locked = false - require_full_window = false - new_host_delay = "${var.delay}" - - silenced = "${var.swap_silenced}" - - tags = ["env:${var.environment}", "engine:redis", "team:aws", "provider:aws"] -} - resource "datadog_monitor" "redis_replication_lag" { name = "[${var.environment}] Elasticache redis replication lag {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}" message = "${coalesce(var.replication_lag_message, var.message)}" @@ -175,7 +148,7 @@ resource "datadog_monitor" "redis_free_memory" { query = <