From fdc0b086475a762695a97e1c958b979a90a549c8 Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Thu, 3 May 2018 14:38:55 +0200 Subject: [PATCH 01/53] MON-32 - Elasticache CPU monitors added --- cloud/aws/elasticache/README.md | 45 ++++++++++++++ cloud/aws/elasticache/inputs.tf | 59 +++++++++++++++++++ cloud/aws/elasticache/monitors-elasticache.tf | 39 ++++++++++++ 3 files changed, 143 insertions(+) create mode 100644 cloud/aws/elasticache/README.md create mode 100644 cloud/aws/elasticache/inputs.tf create mode 100644 cloud/aws/elasticache/monitors-elasticache.tf diff --git a/cloud/aws/elasticache/README.md b/cloud/aws/elasticache/README.md new file mode 100644 index 0000000..6f395a4 --- /dev/null +++ b/cloud/aws/elasticache/README.md @@ -0,0 +1,45 @@ +AWS ElasticCache Service DataDog monitors +========================================= + +How to use this module +---------------------- + +``` +module "datadog-monitors-aws-elasticcache" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/aws/elasticcache?ref={revision}" + + message = "${module.datadog-message-alerting.alerting-message}" + environment = "${var.environment}" +} + +``` + +Purpose +------- +Creates DataDog monitors with the following checks : + +* CPU High + +Inputs +------ + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| cpu_aggregator | Monitor aggregator for Elasticache CPU high [available values: min, max, sum or avg] | string | `min` | no | +| cpu_message | Custom message for Elasticache CPU high monitor | string | `` | no | +| cpu_silenced | Groups to mute for Elasticache CPU high monitor | map | `` | no | +| cpu_threshold_critical | Elasticache CPU high critical threshold in percentage | string | `95` | no | +| cpu_threshold_warning | Elasticache CPU high warning threshold in percentage | string | `80` | no | +| cpu_timeframe | Monitor timeframe for Elasticache CPU high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| delay | Delay in seconds for the metric evaluation | string | `900` | no | +| environment | Architecture Environment | string | - | yes | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| message | Message sent when an alert is triggered | string | - | yes | + +Related documentation +--------------------- + +DataDog documentation: [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/) + +AWS ElasticSearch Service Instance metrics documentation: [https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html) diff --git a/cloud/aws/elasticache/inputs.tf b/cloud/aws/elasticache/inputs.tf new file mode 100644 index 0000000..2e6aa71 --- /dev/null +++ b/cloud/aws/elasticache/inputs.tf @@ -0,0 +1,59 @@ +# Global Terraform +variable "environment" { + description = "Architecture Environment" + type = "string" +} + +# Global DataDog +variable "delay" { + description = "Delay in seconds for the metric evaluation" + default = 900 +} + +variable "message" { + description = "Message sent when an alert is triggered" +} + +variable "filter_tags_use_defaults" { + description = "Use default filter tags convention" + default = "true" +} + +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_use_defaults is false" + default = "*" +} + +# Elasticache specific +variable "cpu_silenced" { + description = "Groups to mute for Elasticache CPU high monitor" + type = "map" + default = {} +} + +variable "cpu_message" { + description = "Custom message for Elasticache CPU high monitor" + type = "string" + default = "" +} + +variable "cpu_aggregator" { + description = "Monitor aggregator for Elasticache CPU high [available values: min, max, sum or avg]" + type = "string" + default = "min" +} + +variable "cpu_timeframe" { + description = "Monitor timeframe for Elasticache CPU high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_15m" +} + +variable "cpu_threshold_warning" { + description = "Elasticache CPU high warning threshold in percentage" + default = 80 +} + +variable "cpu_threshold_critical" { + description = "Elasticache CPU high critical threshold in percentage" + default = 95 +} diff --git a/cloud/aws/elasticache/monitors-elasticache.tf b/cloud/aws/elasticache/monitors-elasticache.tf new file mode 100644 index 0000000..0587feb --- /dev/null +++ b/cloud/aws/elasticache/monitors-elasticache.tf @@ -0,0 +1,39 @@ +data "template_file" "filter" { + template = "$${filter}" + + vars { + filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_aws_es:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" + } +} + +resource "datadog_monitor" "elasticache_cpu_high" { + name = "[${var.environment}] Elasticache CPU high {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.cpu_message, var.message)}" + + type = "metric alert" + + query = < ${var.cpu_threshold_critical} + EOF + + thresholds { + warning = "${var.cpu_threshold_warning}" + critical = "${var.cpu_threshold_critical}" + } + + notify_no_data = true + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.delay}" + + silenced = "${var.cpu_silenced}" + + tags = ["env:${var.environment}", "resource:elasticache", "team:aws", "provider:aws"] +} From 76d70875053dc6921d38705097f4c84515097089 Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Thu, 3 May 2018 15:44:55 +0200 Subject: [PATCH 02/53] [WIP]MON-32 - First try --- cloud/aws/elasticache/memcached/README.md | 45 ++++++++++++++ cloud/aws/elasticache/memcached/inputs.tf | 59 +++++++++++++++++++ .../memcached/monitors-memcached.tf | 41 +++++++++++++ cloud/aws/elasticache/monitors-elasticache.tf | 2 +- cloud/aws/elasticache/redis/README.md | 45 ++++++++++++++ cloud/aws/elasticache/redis/inputs.tf | 59 +++++++++++++++++++ cloud/aws/elasticache/redis/monitors-redis.tf | 41 +++++++++++++ 7 files changed, 291 insertions(+), 1 deletion(-) create mode 100644 cloud/aws/elasticache/memcached/README.md create mode 100644 cloud/aws/elasticache/memcached/inputs.tf create mode 100644 cloud/aws/elasticache/memcached/monitors-memcached.tf create mode 100644 cloud/aws/elasticache/redis/README.md create mode 100644 cloud/aws/elasticache/redis/inputs.tf create mode 100644 cloud/aws/elasticache/redis/monitors-redis.tf diff --git a/cloud/aws/elasticache/memcached/README.md b/cloud/aws/elasticache/memcached/README.md new file mode 100644 index 0000000..c73e482 --- /dev/null +++ b/cloud/aws/elasticache/memcached/README.md @@ -0,0 +1,45 @@ +AWS ElasticCache Memcached Service DataDog monitors +=================================================== + +How to use this module +---------------------- + +``` +module "datadog-monitors-aws-elasticcache-redis" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/aws/elasticache/memcached?ref={revision}" + + message = "${module.datadog-message-alerting.alerting-message}" + environment = "${var.environment}" +} + +``` + +Purpose +------- +Creates DataDog monitors with the following checks : + +* Get requests missed + +Inputs +------ + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| delay | Delay in seconds for the metric evaluation | string | `900` | no | +| environment | Architecture Environment | string | - | yes | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| get_requests_miss_aggregator | Monitor aggregator for Elasticache Memcached get requests missed [available values: min, max, sum or avg] | string | `min` | no | +| get_requests_miss_message | Custom message for Elasticache Memcached get requests missed monitor | string | `` | no | +| get_requests_miss_silenced | Groups to mute for Elasticache Memcached get requests missed monitor | map | `` | no | +| get_requests_miss_threshold_critical | Elasticache Memcached get requests missed critical threshold in percentage | string | `95` | no | +| get_requests_miss_threshold_warning | Elasticache Memcached get requests missed warning threshold in percentage | string | `80` | no | +| get_requests_miss_timeframe | Monitor timeframe for Elasticache Memcached get requests missed [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| message | Message sent when an alert is triggered | string | - | yes | + +Related documentation +--------------------- + +DataDog documentation: [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/) + +AWS ElasticSearch Service Instance metrics documentation: [https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html) diff --git a/cloud/aws/elasticache/memcached/inputs.tf b/cloud/aws/elasticache/memcached/inputs.tf new file mode 100644 index 0000000..987e401 --- /dev/null +++ b/cloud/aws/elasticache/memcached/inputs.tf @@ -0,0 +1,59 @@ +# Global Terraform +variable "environment" { + description = "Architecture Environment" + type = "string" +} + +# Global DataDog +variable "delay" { + description = "Delay in seconds for the metric evaluation" + default = 900 +} + +variable "message" { + description = "Message sent when an alert is triggered" +} + +variable "filter_tags_use_defaults" { + description = "Use default filter tags convention" + default = "true" +} + +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_use_defaults is false" + default = "*" +} + +# Memcached specific +variable "get_requests_miss_silenced" { + description = "Groups to mute for Elasticache Memcached get requests missed monitor" + type = "map" + default = {} +} + +variable "get_requests_miss_message" { + description = "Custom message for Elasticache Memcached get requests missed monitor" + type = "string" + default = "" +} + +variable "get_requests_miss_aggregator" { + description = "Monitor aggregator for Elasticache Memcached get requests missed [available values: min, max, sum or avg]" + type = "string" + default = "min" +} + +variable "get_requests_miss_timeframe" { + description = "Monitor timeframe for Elasticache Memcached get requests missed [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_15m" +} + +variable "get_requests_miss_threshold_warning" { + description = "Elasticache Memcached get requests missed warning threshold in percentage" + default = 80 +} + +variable "get_requests_miss_threshold_critical" { + description = "Elasticache Memcached get requests missed critical threshold in percentage" + default = 95 +} diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf new file mode 100644 index 0000000..0418e3f --- /dev/null +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -0,0 +1,41 @@ +data "template_file" "filter" { + template = "$${filter}" + + vars { + filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_aws_mem:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" + } +} + +resource "datadog_monitor" "memcached_get_miss" { + name = "[${var.environment}] Elasticache Memcached get requests missed {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.get_requests_miss_message, var.message)}" + + type = "metric alert" + + query = < ${var.get_requests_miss_threshold_critical} + EOF + + thresholds { + warning = "${var.get_requests_miss_threshold_warning}" + critical = "${var.get_requests_miss_threshold_critical}" + } + + notify_no_data = true + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.delay}" + + silenced = "${var.get_requests_miss_silenced}" + + tags = ["env:${var.environment}", "resource:memcached", "team:aws", "provider:aws"] +} diff --git a/cloud/aws/elasticache/monitors-elasticache.tf b/cloud/aws/elasticache/monitors-elasticache.tf index 0587feb..798c80e 100644 --- a/cloud/aws/elasticache/monitors-elasticache.tf +++ b/cloud/aws/elasticache/monitors-elasticache.tf @@ -2,7 +2,7 @@ data "template_file" "filter" { template = "$${filter}" vars { - filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_aws_es:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" + filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_aws_ec:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" } } diff --git a/cloud/aws/elasticache/redis/README.md b/cloud/aws/elasticache/redis/README.md new file mode 100644 index 0000000..dd09c3e --- /dev/null +++ b/cloud/aws/elasticache/redis/README.md @@ -0,0 +1,45 @@ +AWS ElasticCache Redis Service DataDog monitors +=============================================== + +How to use this module +---------------------- + +``` +module "datadog-monitors-aws-elasticcache-redis" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/aws/elasticache/redis?ref={revision}" + + message = "${module.datadog-message-alerting.alerting-message}" + environment = "${var.environment}" +} + +``` + +Purpose +------- +Creates DataDog monitors with the following checks : + +* Cache Miss + +Inputs +------ + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| cache_miss_aggregator | Monitor aggregator for Elasticache Redis cache miss [available values: min, max, sum or avg] | string | `min` | no | +| cache_miss_message | Custom message for Elasticache Redis cache miss monitor | string | `` | no | +| cache_miss_silenced | Groups to mute for Elasticache Redis cache miss monitor | map | `` | no | +| cache_miss_threshold_critical | Elasticache Redis cache miss critical threshold in percentage | string | `95` | no | +| cache_miss_threshold_warning | Elasticache Redis cache miss warning threshold in percentage | string | `80` | no | +| cache_miss_timeframe | Monitor timeframe for Elasticache Redis cache miss [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| delay | Delay in seconds for the metric evaluation | string | `900` | no | +| environment | Architecture Environment | string | - | yes | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| message | Message sent when an alert is triggered | string | - | yes | + +Related documentation +--------------------- + +DataDog documentation: [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/) + +AWS ElasticSearch Service Instance metrics documentation: [https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html) diff --git a/cloud/aws/elasticache/redis/inputs.tf b/cloud/aws/elasticache/redis/inputs.tf new file mode 100644 index 0000000..4ca8823 --- /dev/null +++ b/cloud/aws/elasticache/redis/inputs.tf @@ -0,0 +1,59 @@ +# Global Terraform +variable "environment" { + description = "Architecture Environment" + type = "string" +} + +# Global DataDog +variable "delay" { + description = "Delay in seconds for the metric evaluation" + default = 900 +} + +variable "message" { + description = "Message sent when an alert is triggered" +} + +variable "filter_tags_use_defaults" { + description = "Use default filter tags convention" + default = "true" +} + +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_use_defaults is false" + default = "*" +} + +# Redis specific +variable "cache_miss_silenced" { + description = "Groups to mute for Elasticache Redis cache miss monitor" + type = "map" + default = {} +} + +variable "cache_miss_message" { + description = "Custom message for Elasticache Redis cache miss monitor" + type = "string" + default = "" +} + +variable "cache_miss_aggregator" { + description = "Monitor aggregator for Elasticache Redis cache miss [available values: min, max, sum or avg]" + type = "string" + default = "min" +} + +variable "cache_miss_timeframe" { + description = "Monitor timeframe for Elasticache Redis cache miss [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_15m" +} + +variable "cache_miss_threshold_warning" { + description = "Elasticache Redis cache miss warning threshold in percentage" + default = 80 +} + +variable "cache_miss_threshold_critical" { + description = "Elasticache Redis cache miss critical threshold in percentage" + default = 95 +} diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf new file mode 100644 index 0000000..8734541 --- /dev/null +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -0,0 +1,41 @@ +data "template_file" "filter" { + template = "$${filter}" + + vars { + filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_aws_red:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" + } +} + +resource "datadog_monitor" "redis_cache_miss" { + name = "[${var.environment}] Elasticache Redis cache miss {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.cache_miss_message, var.message)}" + + type = "metric alert" + + query = < ${var.cache_miss_threshold_critical} + EOF + + thresholds { + warning = "${var.cache_miss_threshold_warning}" + critical = "${var.cache_miss_threshold_critical}" + } + + notify_no_data = true + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.delay}" + + silenced = "${var.cache_miss_silenced}" + + tags = ["env:${var.environment}", "resource:redis", "team:aws", "provider:aws"] +} From d182b0c380ef16d40436d42b0f001052d8a0c5df Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Fri, 4 May 2018 12:19:51 +0200 Subject: [PATCH 03/53] MON-32 - Test to deploy elasticache module from redis and memcached --- cloud/aws/elasticache/memcached/monitors-memcached.tf | 7 +++++++ cloud/aws/elasticache/redis/monitors-redis.tf | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf index 0418e3f..7cbe296 100644 --- a/cloud/aws/elasticache/memcached/monitors-memcached.tf +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -6,6 +6,13 @@ data "template_file" "filter" { } } +module "datadog-monitors-aws-elasticcache" { + source = "../." + + message = "${var.message}" + environment = "${var.environment}" +} + resource "datadog_monitor" "memcached_get_miss" { name = "[${var.environment}] Elasticache Memcached get requests missed {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.get_requests_miss_message, var.message)}" diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index 8734541..3dbacc0 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -6,6 +6,13 @@ data "template_file" "filter" { } } +module "datadog-monitors-aws-elasticcache" { + source = "../." + + message = "${var.message}" + environment = "${var.environment}" +} + resource "datadog_monitor" "redis_cache_miss" { name = "[${var.environment}] Elasticache Redis cache miss {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.cache_miss_message, var.message)}" From 491baf1781ed11d866a65a076fba756bea2192e9 Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Tue, 15 May 2018 17:44:41 +0200 Subject: [PATCH 04/53] MON-32 - Add new redis monitors --- cloud/aws/elasticache/README.md | 18 ++-- cloud/aws/elasticache/inputs.tf | 38 +++------ cloud/aws/elasticache/monitors-elasticache.tf | 31 ++----- cloud/aws/elasticache/redis/README.md | 33 +++++-- cloud/aws/elasticache/redis/core.tf | 31 +++++++ cloud/aws/elasticache/redis/inputs.tf | 77 +++++++++++++++-- cloud/aws/elasticache/redis/monitors-redis.tf | 85 ++++++++++++++++--- 7 files changed, 229 insertions(+), 84 deletions(-) create mode 100644 cloud/aws/elasticache/redis/core.tf diff --git a/cloud/aws/elasticache/README.md b/cloud/aws/elasticache/README.md index 6f395a4..14963ea 100644 --- a/cloud/aws/elasticache/README.md +++ b/cloud/aws/elasticache/README.md @@ -10,6 +10,8 @@ module "datadog-monitors-aws-elasticcache" { message = "${module.datadog-message-alerting.alerting-message}" environment = "${var.environment}" + filter_tags = "${var.filter_tags}" + resource = "${var.type_of_resource}" } ``` @@ -18,24 +20,22 @@ Purpose ------- Creates DataDog monitors with the following checks : -* CPU High +* Eviction Inputs ------ | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| -| cpu_aggregator | Monitor aggregator for Elasticache CPU high [available values: min, max, sum or avg] | string | `min` | no | -| cpu_message | Custom message for Elasticache CPU high monitor | string | `` | no | -| cpu_silenced | Groups to mute for Elasticache CPU high monitor | map | `` | no | -| cpu_threshold_critical | Elasticache CPU high critical threshold in percentage | string | `95` | no | -| cpu_threshold_warning | Elasticache CPU high warning threshold in percentage | string | `80` | no | -| cpu_timeframe | Monitor timeframe for Elasticache CPU high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | | environment | Architecture Environment | string | - | yes | -| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | -| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| eviction_aggregator | Monitor aggregator for Elasticache eviction [available values: min, max, sum or avg] | string | `min` | no | +| eviction_message | Custom message for Elasticache eviction monitor | string | `` | no | +| eviction_silenced | Groups to mute for Elasticache eviction monitor | map | `` | no | +| eviction_timeframe | Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| filter_tags | Tags used for filtering | string | - | yes | | message | Message sent when an alert is triggered | string | - | yes | +| resource | Type of Elasticache used | string | - | yes | Related documentation --------------------- diff --git a/cloud/aws/elasticache/inputs.tf b/cloud/aws/elasticache/inputs.tf index 2e6aa71..5106e23 100644 --- a/cloud/aws/elasticache/inputs.tf +++ b/cloud/aws/elasticache/inputs.tf @@ -14,46 +14,34 @@ variable "message" { description = "Message sent when an alert is triggered" } -variable "filter_tags_use_defaults" { - description = "Use default filter tags convention" - default = "true" +variable "filter_tags" { + description = "Tags used for filtering" } -variable "filter_tags_custom" { - description = "Tags used for custom filtering when filter_tags_use_defaults is false" - default = "*" +variable "resource" { + description = "Type of Elasticache used" } # Elasticache specific -variable "cpu_silenced" { - description = "Groups to mute for Elasticache CPU high monitor" +variable "eviction_silenced" { + description = "Groups to mute for Elasticache eviction monitor" type = "map" default = {} } -variable "cpu_message" { - description = "Custom message for Elasticache CPU high monitor" +variable "eviction_message" { + description = "Custom message for Elasticache eviction monitor" type = "string" default = "" } -variable "cpu_aggregator" { - description = "Monitor aggregator for Elasticache CPU high [available values: min, max, sum or avg]" +variable "eviction_aggregator" { + description = "Monitor aggregator for Elasticache eviction [available values: min, max, sum or avg]" type = "string" default = "min" } -variable "cpu_timeframe" { - description = "Monitor timeframe for Elasticache CPU high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_15m" -} - -variable "cpu_threshold_warning" { - description = "Elasticache CPU high warning threshold in percentage" - default = 80 -} - -variable "cpu_threshold_critical" { - description = "Elasticache CPU high critical threshold in percentage" - default = 95 +variable "eviction_timeframe" { + description = "Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" } diff --git a/cloud/aws/elasticache/monitors-elasticache.tf b/cloud/aws/elasticache/monitors-elasticache.tf index 798c80e..271d456 100644 --- a/cloud/aws/elasticache/monitors-elasticache.tf +++ b/cloud/aws/elasticache/monitors-elasticache.tf @@ -1,29 +1,16 @@ -data "template_file" "filter" { - template = "$${filter}" - - vars { - filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_aws_ec:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" - } -} - -resource "datadog_monitor" "elasticache_cpu_high" { - name = "[${var.environment}] Elasticache CPU high {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" - message = "${coalesce(var.cpu_message, var.message)}" +resource "datadog_monitor" "elasticache_eviction" { + name = "[${var.environment}] Elasticache ${var.resource} eviction {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}" + message = "${coalesce(var.eviction_message, var.message)}" type = "metric alert" query = < ${var.cpu_threshold_critical} + ${var.eviction_aggregator}(${var.eviction_timeframe}): ( + ${var.eviction_aggregator}:aws.elasticache.evictions{${var.filter_tags} by {region,cluster} + ) > 0 EOF - thresholds { - warning = "${var.cpu_threshold_warning}" - critical = "${var.cpu_threshold_critical}" - } - - notify_no_data = true + notify_no_data = false evaluation_delay = "${var.delay}" renotify_interval = 0 notify_audit = false @@ -33,7 +20,7 @@ resource "datadog_monitor" "elasticache_cpu_high" { require_full_window = false new_host_delay = "${var.delay}" - silenced = "${var.cpu_silenced}" + silenced = "${var.eviction_silenced}" - tags = ["env:${var.environment}", "resource:elasticache", "team:aws", "provider:aws"] + tags = ["env:${var.environment}", "resource:${var.resource}", "team:aws", "provider:aws"] } diff --git a/cloud/aws/elasticache/redis/README.md b/cloud/aws/elasticache/redis/README.md index dd09c3e..888b448 100644 --- a/cloud/aws/elasticache/redis/README.md +++ b/cloud/aws/elasticache/redis/README.md @@ -8,8 +8,9 @@ How to use this module module "datadog-monitors-aws-elasticcache-redis" { source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/aws/elasticache/redis?ref={revision}" - message = "${module.datadog-message-alerting.alerting-message}" + message = "${module.datadog-message-alerting.alerting-message}" environment = "${var.environment}" + redis_size = "${var.size_of_redis}" } ``` @@ -18,28 +19,44 @@ Purpose ------- Creates DataDog monitors with the following checks : -* Cache Miss +* Cache Hit +* CPU High +* Swap Inputs ------ | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| -| cache_miss_aggregator | Monitor aggregator for Elasticache Redis cache miss [available values: min, max, sum or avg] | string | `min` | no | -| cache_miss_message | Custom message for Elasticache Redis cache miss monitor | string | `` | no | -| cache_miss_silenced | Groups to mute for Elasticache Redis cache miss monitor | map | `` | no | -| cache_miss_threshold_critical | Elasticache Redis cache miss critical threshold in percentage | string | `95` | no | -| cache_miss_threshold_warning | Elasticache Redis cache miss warning threshold in percentage | string | `80` | no | -| cache_miss_timeframe | Monitor timeframe for Elasticache Redis cache miss [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| cache_hit_aggregator | Monitor aggregator for Elasticache Redis cache miss [available values: min, max, sum or avg] | string | `min` | no | +| cache_hit_message | Custom message for Elasticache Redis cache miss monitor | string | `` | no | +| cache_hit_silenced | Groups to mute for Elasticache Redis cache miss monitor | map | `` | no | +| cache_hit_threshold_critical | Elasticache Redis cache miss critical threshold in percentage | string | `10` | no | +| cache_hit_threshold_warning | Elasticache Redis cache miss warning threshold in percentage | string | `20` | no | +| cache_hit_timeframe | Monitor timeframe for Elasticache Redis cache miss [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| cpu_high_aggregator | Monitor aggregator for Elasticache Redis cpu high [available values: min, max, sum or avg] | string | `min` | no | +| cpu_high_message | Custom message for Elasticache Redis cpu high monitor | string | `` | no | +| cpu_high_silenced | Groups to mute for Elasticache Redis cpu high monitor | map | `` | no | +| cpu_high_threshold_critical | Elasticache Redis cpu high critical threshold in percentage | string | `90` | no | +| cpu_high_threshold_warning | Elasticache Redis cpu high warning threshold in percentage | string | `75` | no | +| cpu_high_timeframe | Monitor timeframe for Elasticache Redis cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | | environment | Architecture Environment | string | - | yes | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | message | Message sent when an alert is triggered | string | - | yes | +| redis_size | Size of the Elasticache Redis instance | string | - | yes | +| swap_aggregator | Monitor aggregator for Elasticache Redis swap [available values: min, max, sum or avg] | string | `min` | no | +| swap_message | Custom message for Elasticache Redis swap monitor | string | `` | no | +| swap_silenced | Groups to mute for Elasticache Redis swap monitor | map | `` | no | +| swap_timeframe | Monitor timeframe for Elasticache Redis swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | Related documentation --------------------- DataDog documentation: [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/) +And more here : +* [https://www.datadoghq.com/dashboards/elasticache-dashboard-redis/](https://www.datadoghq.com/dashboards/elasticache-dashboard-redis/) +* [https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/](https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/) AWS ElasticSearch Service Instance metrics documentation: [https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html) diff --git a/cloud/aws/elasticache/redis/core.tf b/cloud/aws/elasticache/redis/core.tf new file mode 100644 index 0000000..988ba69 --- /dev/null +++ b/cloud/aws/elasticache/redis/core.tf @@ -0,0 +1,31 @@ +variable "core" { + type = "map" + + description = "Mapping between Redis size and vCPU." + + default = { + cache.t2.micro = "1" + cache.t2.small = "1" + cache.t2.medium = "2" + cache.m3.medium = "1" + cache.m3.large = "2" + cache.m3.xlarge = "4" + cache.m3.2xlarge = "8" + cache.m4.large = "2" + cache.m4.xlarge = "4" + cache.m4.2xlarge = "8" + cache.m4.4xlarge = "16" + cache.m4.10xlarge = "40" + cache.r3.large = "2" + cache.r3.xlarge = "4" + cache.r3.2xlarge = "8" + cache.r3.4xlarge = "16" + cache.r3.8xlarge = "32" + cache.r4.large = "2" + cache.r4.xlarge = "4" + cache.r4.2xlarge = "8" + cache.r4.4xlarge = "16" + cache.r4.8xlarge = "32" + cache.r4.16xlarge = "64" + } +} diff --git a/cloud/aws/elasticache/redis/inputs.tf b/cloud/aws/elasticache/redis/inputs.tf index 4ca8823..5c963f2 100644 --- a/cloud/aws/elasticache/redis/inputs.tf +++ b/cloud/aws/elasticache/redis/inputs.tf @@ -25,35 +25,96 @@ variable "filter_tags_custom" { } # Redis specific -variable "cache_miss_silenced" { +variable "redis_size" { + description = "Size of the Elasticache Redis instance" + type = "string" +} + +variable "cache_hit_silenced" { description = "Groups to mute for Elasticache Redis cache miss monitor" type = "map" default = {} } -variable "cache_miss_message" { +variable "cache_hit_message" { description = "Custom message for Elasticache Redis cache miss monitor" type = "string" default = "" } -variable "cache_miss_aggregator" { +variable "cache_hit_aggregator" { description = "Monitor aggregator for Elasticache Redis cache miss [available values: min, max, sum or avg]" type = "string" default = "min" } -variable "cache_miss_timeframe" { +variable "cache_hit_timeframe" { description = "Monitor timeframe for Elasticache Redis cache miss [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" default = "last_15m" } -variable "cache_miss_threshold_warning" { +variable "cache_hit_threshold_warning" { description = "Elasticache Redis cache miss warning threshold in percentage" - default = 80 + default = 20 } -variable "cache_miss_threshold_critical" { +variable "cache_hit_threshold_critical" { description = "Elasticache Redis cache miss critical threshold in percentage" - default = 95 + default = 10 +} + +variable "cpu_high_silenced" { + description = "Groups to mute for Elasticache Redis cpu high monitor" + type = "map" + default = {} +} + +variable "cpu_high_message" { + description = "Custom message for Elasticache Redis cpu high monitor" + type = "string" + default = "" +} + +variable "cpu_high_aggregator" { + description = "Monitor aggregator for Elasticache Redis cpu high [available values: min, max, sum or avg]" + type = "string" + default = "min" +} + +variable "cpu_high_timeframe" { + description = "Monitor timeframe for Elasticache Redis cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_15m" +} + +variable "cpu_high_threshold_warning" { + description = "Elasticache Redis cpu high warning threshold in percentage" + default = 75 +} + +variable "cpu_high_threshold_critical" { + description = "Elasticache Redis cpu high critical threshold in percentage" + default = 90 +} + +variable "swap_silenced" { + description = "Groups to mute for Elasticache Redis swap monitor" + type = "map" + default = {} +} + +variable "swap_message" { + description = "Custom message for Elasticache Redis swap monitor" + type = "string" + default = "" +} + +variable "swap_aggregator" { + description = "Monitor aggregator for Elasticache Redis swap [available values: min, max, sum or avg]" + type = "string" + default = "min" +} + +variable "swap_timeframe" { + description = "Monitor timeframe for Elasticache Redis swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_15m" } diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index 3dbacc0..4b8100d 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -6,30 +6,32 @@ data "template_file" "filter" { } } -module "datadog-monitors-aws-elasticcache" { +module "datadog-monitors-aws-elasticcache-common" { source = "../." message = "${var.message}" environment = "${var.environment}" + filter_tags = "${data.template_file.filter.rendered}" + resource = "redis" } -resource "datadog_monitor" "redis_cache_miss" { - name = "[${var.environment}] Elasticache Redis cache miss {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" - message = "${coalesce(var.cache_miss_message, var.message)}" +resource "datadog_monitor" "redis_cache_hit" { + name = "[${var.environment}] Elasticache redis cache hit {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.cache_hit_message, var.message)}" type = "metric alert" query = < ${var.cache_miss_threshold_critical} + ${var.cache_hit_aggregator}(${var.cache_hit_timeframe}): ( + ${var.cache_hit_aggregator}:aws.elasticache.cache_hits{${data.template_file.filter.rendered}} by {region,cluster} / + (${var.cache_hit_aggregator}:aws.elasticache.cache_hits{${data.template_file.filter.rendered}} by {region,cluster} + + ${var.cache_hit_aggregator}:aws.elasticache.cache_misses{${data.template_file.filter.rendered}} by {region,cluster}) + ) < ${var.cache_hit_threshold_critical} EOF thresholds { - warning = "${var.cache_miss_threshold_warning}" - critical = "${var.cache_miss_threshold_critical}" + warning = "${var.cache_hit_threshold_warning}" + critical = "${var.cache_hit_threshold_critical}" } notify_no_data = true @@ -42,7 +44,66 @@ resource "datadog_monitor" "redis_cache_miss" { require_full_window = false new_host_delay = "${var.delay}" - silenced = "${var.cache_miss_silenced}" + silenced = "${var.cache_hit_silenced}" + + tags = ["env:${var.environment}", "resource:redis", "team:aws", "provider:aws"] +} + +resource "datadog_monitor" "redis_cpu_high" { + name = "[${var.environment}] Elasticache redis CPU {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.cpu_high_message, var.message)}" + + type = "metric alert" + + query = < ( ${var.cpu_high_threshold_critical} / ${var.core[var.redis_size]} ) + EOF + + thresholds { + warning = "${var.cpu_high_threshold_warning}" + critical = "${var.cpu_high_threshold_critical}" + } + + notify_no_data = true + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.delay}" + + silenced = "${var.cpu_high_silenced}" + + tags = ["env:${var.environment}", "resource:redis", "team:aws", "provider:aws"] +} + +resource "datadog_monitor" "redis_swap" { + name = "[${var.environment}] Elasticache redis is starting to swap ({{value}}MB)" + message = "${coalesce(var.swap_message, var.message)}" + + type = "metric alert" + + query = < 0 + EOF + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.delay}" + + silenced = "${var.swap_silenced}" tags = ["env:${var.environment}", "resource:redis", "team:aws", "provider:aws"] } From 9e01024563d061596080ccf2a1b5b9ebfc828ed4 Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Tue, 15 May 2018 17:53:45 +0200 Subject: [PATCH 05/53] MON-32 - Create common monitors directory --- cloud/aws/elasticache/{ => common}/README.md | 4 ++-- cloud/aws/elasticache/{ => common}/inputs.tf | 0 cloud/aws/elasticache/{ => common}/monitors-elasticache.tf | 2 +- cloud/aws/elasticache/redis/README.md | 7 ++++--- cloud/aws/elasticache/redis/monitors-redis.tf | 2 +- 5 files changed, 8 insertions(+), 7 deletions(-) rename cloud/aws/elasticache/{ => common}/README.md (89%) rename cloud/aws/elasticache/{ => common}/inputs.tf (100%) rename cloud/aws/elasticache/{ => common}/monitors-elasticache.tf (96%) diff --git a/cloud/aws/elasticache/README.md b/cloud/aws/elasticache/common/README.md similarity index 89% rename from cloud/aws/elasticache/README.md rename to cloud/aws/elasticache/common/README.md index 14963ea..ec333e4 100644 --- a/cloud/aws/elasticache/README.md +++ b/cloud/aws/elasticache/common/README.md @@ -5,8 +5,8 @@ How to use this module ---------------------- ``` -module "datadog-monitors-aws-elasticcache" { - source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/aws/elasticcache?ref={revision}" +module "datadog-monitors-aws-elasticache" { + source = "../common" message = "${module.datadog-message-alerting.alerting-message}" environment = "${var.environment}" diff --git a/cloud/aws/elasticache/inputs.tf b/cloud/aws/elasticache/common/inputs.tf similarity index 100% rename from cloud/aws/elasticache/inputs.tf rename to cloud/aws/elasticache/common/inputs.tf diff --git a/cloud/aws/elasticache/monitors-elasticache.tf b/cloud/aws/elasticache/common/monitors-elasticache.tf similarity index 96% rename from cloud/aws/elasticache/monitors-elasticache.tf rename to cloud/aws/elasticache/common/monitors-elasticache.tf index 271d456..da13264 100644 --- a/cloud/aws/elasticache/monitors-elasticache.tf +++ b/cloud/aws/elasticache/common/monitors-elasticache.tf @@ -6,7 +6,7 @@ resource "datadog_monitor" "elasticache_eviction" { query = < 0 EOF diff --git a/cloud/aws/elasticache/redis/README.md b/cloud/aws/elasticache/redis/README.md index 888b448..bf0c711 100644 --- a/cloud/aws/elasticache/redis/README.md +++ b/cloud/aws/elasticache/redis/README.md @@ -17,7 +17,7 @@ module "datadog-monitors-aws-elasticcache-redis" { Purpose ------- -Creates DataDog monitors with the following checks : +Creates DataDog monitors with the following checks: * Cache Hit * CPU High @@ -54,8 +54,9 @@ Inputs Related documentation --------------------- -DataDog documentation: [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/) -And more here : +DataDog documentation: [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/) +And more here: + * [https://www.datadoghq.com/dashboards/elasticache-dashboard-redis/](https://www.datadoghq.com/dashboards/elasticache-dashboard-redis/) * [https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/](https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/) diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index 4b8100d..68a9436 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -7,7 +7,7 @@ data "template_file" "filter" { } module "datadog-monitors-aws-elasticcache-common" { - source = "../." + source = "../common" message = "${var.message}" environment = "${var.environment}" From cf66c5c7f39adcaa2a7c6f8fb85f5a4984e17c95 Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Tue, 15 May 2018 18:33:56 +0200 Subject: [PATCH 06/53] MON-32 - Memcached monitors added --- cloud/aws/elasticache/common/README.md | 4 +- cloud/aws/elasticache/memcached/README.md | 25 +++++--- cloud/aws/elasticache/memcached/inputs.tf | 61 ++++++++++++++----- .../memcached/monitors-memcached.tf | 58 ++++++++++++++---- 4 files changed, 113 insertions(+), 35 deletions(-) diff --git a/cloud/aws/elasticache/common/README.md b/cloud/aws/elasticache/common/README.md index ec333e4..56c77d3 100644 --- a/cloud/aws/elasticache/common/README.md +++ b/cloud/aws/elasticache/common/README.md @@ -16,9 +16,11 @@ module "datadog-monitors-aws-elasticache" { ``` +This module is used by default when you define `memcached` or `redis` monitors + Purpose ------- -Creates DataDog monitors with the following checks : +Creates DataDog monitors with the following checks: * Eviction diff --git a/cloud/aws/elasticache/memcached/README.md b/cloud/aws/elasticache/memcached/README.md index c73e482..3e280d3 100644 --- a/cloud/aws/elasticache/memcached/README.md +++ b/cloud/aws/elasticache/memcached/README.md @@ -18,28 +18,39 @@ Purpose ------- Creates DataDog monitors with the following checks : -* Get requests missed +* CPU High +* Swap Inputs ------ | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| +| cpu_high_aggregator | Monitor aggregator for Elasticache memcached cpu high [available values: min, max, sum or avg] | string | `min` | no | +| cpu_high_message | Custom message for Elasticache memcached cpu high monitor | string | `` | no | +| cpu_high_silenced | Groups to mute for Elasticache memcached cpu high monitor | map | `` | no | +| cpu_high_threshold_critical | Elasticache memcached cpu high critical threshold in percentage | string | `90` | no | +| cpu_high_threshold_warning | Elasticache memcached cpu high warning threshold in percentage | string | `75` | no | +| cpu_high_timeframe | Monitor timeframe for Elasticache memcached cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | | environment | Architecture Environment | string | - | yes | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | -| get_requests_miss_aggregator | Monitor aggregator for Elasticache Memcached get requests missed [available values: min, max, sum or avg] | string | `min` | no | -| get_requests_miss_message | Custom message for Elasticache Memcached get requests missed monitor | string | `` | no | -| get_requests_miss_silenced | Groups to mute for Elasticache Memcached get requests missed monitor | map | `` | no | -| get_requests_miss_threshold_critical | Elasticache Memcached get requests missed critical threshold in percentage | string | `95` | no | -| get_requests_miss_threshold_warning | Elasticache Memcached get requests missed warning threshold in percentage | string | `80` | no | -| get_requests_miss_timeframe | Monitor timeframe for Elasticache Memcached get requests missed [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | message | Message sent when an alert is triggered | string | - | yes | +| swap_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max, sum or avg] | string | `min` | no | +| swap_message | Custom message for Elasticache memcached swap monitor | string | `` | no | +| swap_silenced | Groups to mute for Elasticache memcached swap monitor | map | `` | no | +| swap_threshold_critical | Elasticache memcached swap critical threshold in percentage | string | `50` | no | +| swap_threshold_warning | Elasticache memcached swap warning threshold in percentage | string | `0` | no | +| swap_timeframe | Monitor timeframe for Elasticache memcached swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | Related documentation --------------------- DataDog documentation: [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/) +And more here: + +* [https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/](https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/) + AWS ElasticSearch Service Instance metrics documentation: [https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html) diff --git a/cloud/aws/elasticache/memcached/inputs.tf b/cloud/aws/elasticache/memcached/inputs.tf index 987e401..8510bd1 100644 --- a/cloud/aws/elasticache/memcached/inputs.tf +++ b/cloud/aws/elasticache/memcached/inputs.tf @@ -25,35 +25,68 @@ variable "filter_tags_custom" { } # Memcached specific -variable "get_requests_miss_silenced" { - description = "Groups to mute for Elasticache Memcached get requests missed monitor" +variable "cpu_high_silenced" { + description = "Groups to mute for Elasticache memcached cpu high monitor" type = "map" default = {} } -variable "get_requests_miss_message" { - description = "Custom message for Elasticache Memcached get requests missed monitor" +variable "cpu_high_message" { + description = "Custom message for Elasticache memcached cpu high monitor" type = "string" default = "" } -variable "get_requests_miss_aggregator" { - description = "Monitor aggregator for Elasticache Memcached get requests missed [available values: min, max, sum or avg]" +variable "cpu_high_aggregator" { + description = "Monitor aggregator for Elasticache memcached cpu high [available values: min, max, sum or avg]" type = "string" default = "min" } -variable "get_requests_miss_timeframe" { - description = "Monitor timeframe for Elasticache Memcached get requests missed [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" +variable "cpu_high_timeframe" { + description = "Monitor timeframe for Elasticache memcached cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" default = "last_15m" } -variable "get_requests_miss_threshold_warning" { - description = "Elasticache Memcached get requests missed warning threshold in percentage" - default = 80 +variable "cpu_high_threshold_warning" { + description = "Elasticache memcached cpu high warning threshold in percentage" + default = 75 } -variable "get_requests_miss_threshold_critical" { - description = "Elasticache Memcached get requests missed critical threshold in percentage" - default = 95 +variable "cpu_high_threshold_critical" { + description = "Elasticache memcached cpu high critical threshold in percentage" + default = 90 +} + +variable "swap_silenced" { + description = "Groups to mute for Elasticache memcached swap monitor" + type = "map" + default = {} +} + +variable "swap_message" { + description = "Custom message for Elasticache memcached swap monitor" + type = "string" + default = "" +} + +variable "swap_aggregator" { + description = "Monitor aggregator for Elasticache memcached swap [available values: min, max, sum or avg]" + type = "string" + default = "min" +} + +variable "swap_timeframe" { + description = "Monitor timeframe for Elasticache memcached swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "swap_threshold_warning" { + description = "Elasticache memcached swap warning threshold in percentage" + default = 0 +} + +variable "swap_threshold_critical" { + description = "Elasticache memcached swap critical threshold in percentage" + default = 50 } diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf index 7cbe296..62264ad 100644 --- a/cloud/aws/elasticache/memcached/monitors-memcached.tf +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -6,30 +6,30 @@ data "template_file" "filter" { } } -module "datadog-monitors-aws-elasticcache" { - source = "../." +module "datadog-monitors-aws-elasticcache-common" { + source = "../common" message = "${var.message}" environment = "${var.environment}" + filter_tags = "${data.template_file.filter.rendered}" + resource = "memcached" } -resource "datadog_monitor" "memcached_get_miss" { - name = "[${var.environment}] Elasticache Memcached get requests missed {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" - message = "${coalesce(var.get_requests_miss_message, var.message)}" +resource "datadog_monitor" "memcached_cpu_high" { + name = "[${var.environment}] Elasticache memcached CPU {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.cpu_high_message, var.message)}" type = "metric alert" query = < ${var.get_requests_miss_threshold_critical} + ${var.cpu_high_aggregator}(${var.cpu_high_timeframe}): ( + ${var.cpu_high_aggregator}:aws.elasticache.cpuutilization{${data.template_file.filter.rendered}} by {region,cluster,node} + ) > ${var.cpu_high_threshold_critical} EOF thresholds { - warning = "${var.get_requests_miss_threshold_warning}" - critical = "${var.get_requests_miss_threshold_critical}" + warning = "${var.cpu_high_threshold_warning}" + critical = "${var.cpu_high_threshold_critical}" } notify_no_data = true @@ -42,7 +42,39 @@ resource "datadog_monitor" "memcached_get_miss" { require_full_window = false new_host_delay = "${var.delay}" - silenced = "${var.get_requests_miss_silenced}" + silenced = "${var.cpu_high_silenced}" + + tags = ["env:${var.environment}", "resource:memcached", "team:aws", "provider:aws"] +} + +resource "datadog_monitor" "memcached_swap" { + name = "[${var.environment}] Elasticache memcached swap {{#is_alert}}{{{comparator}}} {{threshold}}MB ({{value}}MB){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}MB ({{value}}MB){{/is_warning}}" + message = "${coalesce(var.swap_message, var.message)}" + + type = "metric alert" + + query = < ${var.swap_threshold_critical} + EOF + + thresholds { + warning = "${var.swap_threshold_warning}" + critical = "${var.swap_threshold_critical}" + } + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.delay}" + + silenced = "${var.swap_silenced}" tags = ["env:${var.environment}", "resource:memcached", "team:aws", "provider:aws"] } From c36f2069507958c5332f94ff1719efea26123d40 Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Wed, 16 May 2018 18:09:17 +0200 Subject: [PATCH 07/53] MON-32 - Monitors Updated --- cloud/aws/elasticache/memcached/README.md | 9 +++- cloud/aws/elasticache/memcached/inputs.tf | 33 +++++++++++++ .../memcached/monitors-memcached.tf | 34 +++++++++++++ cloud/aws/elasticache/redis/README.md | 36 +++++++------- cloud/aws/elasticache/redis/inputs.tf | 48 +++++++++---------- cloud/aws/elasticache/redis/monitors-redis.tf | 22 ++++----- 6 files changed, 128 insertions(+), 54 deletions(-) diff --git a/cloud/aws/elasticache/memcached/README.md b/cloud/aws/elasticache/memcached/README.md index 3e280d3..151ec88 100644 --- a/cloud/aws/elasticache/memcached/README.md +++ b/cloud/aws/elasticache/memcached/README.md @@ -18,6 +18,7 @@ Purpose ------- Creates DataDog monitors with the following checks : +* Get Hit * CPU High * Swap @@ -36,6 +37,12 @@ Inputs | environment | Architecture Environment | string | - | yes | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| get_hits_aggregator | Monitor aggregator for Elasticache memcached get hits [available values: min, max, sum or avg] | string | `min` | no | +| get_hits_message | Custom message for Elasticache memcached get hits monitor | string | `` | no | +| get_hits_silenced | Groups to mute for Elasticache memcached get hits monitor | map | `` | no | +| get_hits_threshold_critical | Elasticache memcached get hits critical threshold in percentage | string | `10` | no | +| get_hits_threshold_warning | Elasticache memcached get hits warning threshold in percentage | string | `20` | no | +| get_hits_timeframe | Monitor timeframe for Elasticache memcached get hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | message | Message sent when an alert is triggered | string | - | yes | | swap_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max, sum or avg] | string | `min` | no | | swap_message | Custom message for Elasticache memcached swap monitor | string | `` | no | @@ -47,7 +54,7 @@ Inputs Related documentation --------------------- -DataDog documentation: [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/) +DataDog documentation: [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/) And more here: * [https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/](https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/) diff --git a/cloud/aws/elasticache/memcached/inputs.tf b/cloud/aws/elasticache/memcached/inputs.tf index 8510bd1..e87d13b 100644 --- a/cloud/aws/elasticache/memcached/inputs.tf +++ b/cloud/aws/elasticache/memcached/inputs.tf @@ -25,6 +25,39 @@ variable "filter_tags_custom" { } # Memcached specific +variable "get_hits_silenced" { + description = "Groups to mute for Elasticache memcached get hits monitor" + type = "map" + default = {} +} + +variable "get_hits_message" { + description = "Custom message for Elasticache memcached get hits monitor" + type = "string" + default = "" +} + +variable "get_hits_aggregator" { + description = "Monitor aggregator for Elasticache memcached get hits [available values: min, max, sum or avg]" + type = "string" + default = "min" +} + +variable "get_hits_timeframe" { + description = "Monitor timeframe for Elasticache memcached get hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_15m" +} + +variable "get_hits_threshold_warning" { + description = "Elasticache memcached get hits warning threshold in percentage" + default = 20 +} + +variable "get_hits_threshold_critical" { + description = "Elasticache memcached get hits critical threshold in percentage" + default = 10 +} + variable "cpu_high_silenced" { description = "Groups to mute for Elasticache memcached cpu high monitor" type = "map" diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf index 62264ad..5fa6353 100644 --- a/cloud/aws/elasticache/memcached/monitors-memcached.tf +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -15,6 +15,40 @@ module "datadog-monitors-aws-elasticcache-common" { resource = "memcached" } +resource "datadog_monitor" "memcached_get_hits" { + name = "[${var.environment}] Elasticache memcached get hits {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.get_hits_message, var.message)}" + + type = "metric alert" + + query = <` | no | -| cache_hit_threshold_critical | Elasticache Redis cache miss critical threshold in percentage | string | `10` | no | -| cache_hit_threshold_warning | Elasticache Redis cache miss warning threshold in percentage | string | `20` | no | -| cache_hit_timeframe | Monitor timeframe for Elasticache Redis cache miss [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | -| cpu_high_aggregator | Monitor aggregator for Elasticache Redis cpu high [available values: min, max, sum or avg] | string | `min` | no | -| cpu_high_message | Custom message for Elasticache Redis cpu high monitor | string | `` | no | -| cpu_high_silenced | Groups to mute for Elasticache Redis cpu high monitor | map | `` | no | -| cpu_high_threshold_critical | Elasticache Redis cpu high critical threshold in percentage | string | `90` | no | -| cpu_high_threshold_warning | Elasticache Redis cpu high warning threshold in percentage | string | `75` | no | -| cpu_high_timeframe | Monitor timeframe for Elasticache Redis cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| cache_hits_aggregator | Monitor aggregator for Elasticache redis cache hits [available values: min, max, sum or avg] | string | `min` | no | +| cache_hits_message | Custom message for Elasticache redis cache hits monitor | string | `` | no | +| cache_hits_silenced | Groups to mute for Elasticache redis cache hits monitor | map | `` | no | +| cache_hits_threshold_critical | Elasticache redis cache hits critical threshold in percentage | string | `10` | no | +| cache_hits_threshold_warning | Elasticache redis cache hits warning threshold in percentage | string | `20` | no | +| cache_hits_timeframe | Monitor timeframe for Elasticache redis cache hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| cpu_high_aggregator | Monitor aggregator for Elasticache redis cpu high [available values: min, max, sum or avg] | string | `min` | no | +| cpu_high_message | Custom message for Elasticache redis cpu high monitor | string | `` | no | +| cpu_high_silenced | Groups to mute for Elasticache redis cpu high monitor | map | `` | no | +| cpu_high_threshold_critical | Elasticache redis cpu high critical threshold in percentage | string | `90` | no | +| cpu_high_threshold_warning | Elasticache redis cpu high warning threshold in percentage | string | `75` | no | +| cpu_high_timeframe | Monitor timeframe for Elasticache redis cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | | environment | Architecture Environment | string | - | yes | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | message | Message sent when an alert is triggered | string | - | yes | -| redis_size | Size of the Elasticache Redis instance | string | - | yes | -| swap_aggregator | Monitor aggregator for Elasticache Redis swap [available values: min, max, sum or avg] | string | `min` | no | -| swap_message | Custom message for Elasticache Redis swap monitor | string | `` | no | -| swap_silenced | Groups to mute for Elasticache Redis swap monitor | map | `` | no | -| swap_timeframe | Monitor timeframe for Elasticache Redis swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| redis_size | Size of the Elasticache redis instance | string | - | yes | +| swap_aggregator | Monitor aggregator for Elasticache redis swap [available values: min, max, sum or avg] | string | `min` | no | +| swap_message | Custom message for Elasticache redis swap monitor | string | `` | no | +| swap_silenced | Groups to mute for Elasticache redis swap monitor | map | `` | no | +| swap_timeframe | Monitor timeframe for Elasticache redis swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | Related documentation --------------------- -DataDog documentation: [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/) +DataDog documentation: [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/) And more here: * [https://www.datadoghq.com/dashboards/elasticache-dashboard-redis/](https://www.datadoghq.com/dashboards/elasticache-dashboard-redis/) diff --git a/cloud/aws/elasticache/redis/inputs.tf b/cloud/aws/elasticache/redis/inputs.tf index 5c963f2..39a03da 100644 --- a/cloud/aws/elasticache/redis/inputs.tf +++ b/cloud/aws/elasticache/redis/inputs.tf @@ -24,97 +24,97 @@ variable "filter_tags_custom" { default = "*" } -# Redis specific +# redis specific variable "redis_size" { - description = "Size of the Elasticache Redis instance" + description = "Size of the Elasticache redis instance" type = "string" } -variable "cache_hit_silenced" { - description = "Groups to mute for Elasticache Redis cache miss monitor" +variable "cache_hits_silenced" { + description = "Groups to mute for Elasticache redis cache hits monitor" type = "map" default = {} } -variable "cache_hit_message" { - description = "Custom message for Elasticache Redis cache miss monitor" +variable "cache_hits_message" { + description = "Custom message for Elasticache redis cache hits monitor" type = "string" default = "" } -variable "cache_hit_aggregator" { - description = "Monitor aggregator for Elasticache Redis cache miss [available values: min, max, sum or avg]" +variable "cache_hits_aggregator" { + description = "Monitor aggregator for Elasticache redis cache hits [available values: min, max, sum or avg]" type = "string" default = "min" } -variable "cache_hit_timeframe" { - description = "Monitor timeframe for Elasticache Redis cache miss [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" +variable "cache_hits_timeframe" { + description = "Monitor timeframe for Elasticache redis cache hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" default = "last_15m" } -variable "cache_hit_threshold_warning" { - description = "Elasticache Redis cache miss warning threshold in percentage" +variable "cache_hits_threshold_warning" { + description = "Elasticache redis cache hits warning threshold in percentage" default = 20 } -variable "cache_hit_threshold_critical" { - description = "Elasticache Redis cache miss critical threshold in percentage" +variable "cache_hits_threshold_critical" { + description = "Elasticache redis cache hits critical threshold in percentage" default = 10 } variable "cpu_high_silenced" { - description = "Groups to mute for Elasticache Redis cpu high monitor" + description = "Groups to mute for Elasticache redis cpu high monitor" type = "map" default = {} } variable "cpu_high_message" { - description = "Custom message for Elasticache Redis cpu high monitor" + description = "Custom message for Elasticache redis cpu high monitor" type = "string" default = "" } variable "cpu_high_aggregator" { - description = "Monitor aggregator for Elasticache Redis cpu high [available values: min, max, sum or avg]" + description = "Monitor aggregator for Elasticache redis cpu high [available values: min, max, sum or avg]" type = "string" default = "min" } variable "cpu_high_timeframe" { - description = "Monitor timeframe for Elasticache Redis cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + description = "Monitor timeframe for Elasticache redis cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" default = "last_15m" } variable "cpu_high_threshold_warning" { - description = "Elasticache Redis cpu high warning threshold in percentage" + description = "Elasticache redis cpu high warning threshold in percentage" default = 75 } variable "cpu_high_threshold_critical" { - description = "Elasticache Redis cpu high critical threshold in percentage" + description = "Elasticache redis cpu high critical threshold in percentage" default = 90 } variable "swap_silenced" { - description = "Groups to mute for Elasticache Redis swap monitor" + description = "Groups to mute for Elasticache redis swap monitor" type = "map" default = {} } variable "swap_message" { - description = "Custom message for Elasticache Redis swap monitor" + description = "Custom message for Elasticache redis swap monitor" type = "string" default = "" } variable "swap_aggregator" { - description = "Monitor aggregator for Elasticache Redis swap [available values: min, max, sum or avg]" + description = "Monitor aggregator for Elasticache redis swap [available values: min, max, sum or avg]" type = "string" default = "min" } variable "swap_timeframe" { - description = "Monitor timeframe for Elasticache Redis swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + description = "Monitor timeframe for Elasticache redis swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" default = "last_15m" } diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index 68a9436..9a7a47f 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -15,23 +15,23 @@ module "datadog-monitors-aws-elasticcache-common" { resource = "redis" } -resource "datadog_monitor" "redis_cache_hit" { - name = "[${var.environment}] Elasticache redis cache hit {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" - message = "${coalesce(var.cache_hit_message, var.message)}" +resource "datadog_monitor" "redis_cache_hits" { + name = "[${var.environment}] Elasticache redis cache hits {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.cache_hits_message, var.message)}" type = "metric alert" query = < Date: Thu, 17 May 2018 14:59:04 +0200 Subject: [PATCH 08/53] MON-32 - Some common monitors added + Redis monitors --- cloud/aws/elasticache/common/README.md | 12 +++- cloud/aws/elasticache/common/inputs.tf | 46 ++++++++++++++ .../common/monitors-elasticache.tf | 54 +++++++++++++++++ cloud/aws/elasticache/memcached/README.md | 4 +- cloud/aws/elasticache/redis/README.md | 22 +++++-- cloud/aws/elasticache/redis/inputs.tf | 58 +++++++++++++++++- cloud/aws/elasticache/redis/monitors-redis.tf | 60 +++++++++++++++++++ 7 files changed, 247 insertions(+), 9 deletions(-) diff --git a/cloud/aws/elasticache/common/README.md b/cloud/aws/elasticache/common/README.md index 56c77d3..802c0c1 100644 --- a/cloud/aws/elasticache/common/README.md +++ b/cloud/aws/elasticache/common/README.md @@ -16,13 +16,15 @@ module "datadog-monitors-aws-elasticache" { ``` -This module is used by default when you define `memcached` or `redis` monitors +This module is loaded by default when you define `memcached` or `redis` monitors Purpose ------- Creates DataDog monitors with the following checks: * Eviction +* Max connections +* No connection Inputs ------ @@ -36,7 +38,15 @@ Inputs | eviction_silenced | Groups to mute for Elasticache eviction monitor | map | `` | no | | eviction_timeframe | Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | filter_tags | Tags used for filtering | string | - | yes | +| max_connection_aggregator | Monitor aggregator for Elasticache max connection [available values: min, max, sum or avg] | string | `min` | no | +| max_connection_message | Custom message for Elasticache max connection monitor | string | `` | no | +| max_connection_silenced | Groups to mute for Elasticache max connection monitor | map | `` | no | +| max_connection_timeframe | Monitor timeframe for Elasticache max connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when an alert is triggered | string | - | yes | +| no_connection_aggregator | Monitor aggregator for Elasticache no connection [available values: min, max, sum or avg] | string | `min` | no | +| no_connection_message | Custom message for Elasticache no connection monitor | string | `` | no | +| no_connection_silenced | Groups to mute for Elasticache no connection monitor | map | `` | no | +| no_connection_timeframe | Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | resource | Type of Elasticache used | string | - | yes | Related documentation diff --git a/cloud/aws/elasticache/common/inputs.tf b/cloud/aws/elasticache/common/inputs.tf index 5106e23..2e4fe15 100644 --- a/cloud/aws/elasticache/common/inputs.tf +++ b/cloud/aws/elasticache/common/inputs.tf @@ -45,3 +45,49 @@ variable "eviction_timeframe" { description = "Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" default = "last_5m" } + +variable "max_connection_silenced" { + description = "Groups to mute for Elasticache max connection monitor" + type = "map" + default = {} +} + +variable "max_connection_message" { + description = "Custom message for Elasticache max connection monitor" + type = "string" + default = "" +} + +variable "max_connection_aggregator" { + description = "Monitor aggregator for Elasticache max connection [available values: min, max, sum or avg]" + type = "string" + default = "min" +} + +variable "max_connection_timeframe" { + description = "Monitor timeframe for Elasticache max connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "no_connection_silenced" { + description = "Groups to mute for Elasticache no connection monitor" + type = "map" + default = {} +} + +variable "no_connection_message" { + description = "Custom message for Elasticache no connection monitor" + type = "string" + default = "" +} + +variable "no_connection_aggregator" { + description = "Monitor aggregator for Elasticache no connection [available values: min, max, sum or avg]" + type = "string" + default = "min" +} + +variable "no_connection_timeframe" { + description = "Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} diff --git a/cloud/aws/elasticache/common/monitors-elasticache.tf b/cloud/aws/elasticache/common/monitors-elasticache.tf index da13264..98adc63 100644 --- a/cloud/aws/elasticache/common/monitors-elasticache.tf +++ b/cloud/aws/elasticache/common/monitors-elasticache.tf @@ -24,3 +24,57 @@ resource "datadog_monitor" "elasticache_eviction" { tags = ["env:${var.environment}", "resource:${var.resource}", "team:aws", "provider:aws"] } + +resource "datadog_monitor" "elasticache_max_connection" { + name = "[${var.environment}] Elasticache ${var.resource} connections {{#is_alert}}{{{comparator}}} {{threshold}} {{/is_alert}}" + message = "${coalesce(var.max_connection_message, var.message)}" + + type = "metric alert" + + query = <= 65000 + EOF + + notify_no_data = true + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.delay}" + + silenced = "${var.max_connection_silenced}" + + tags = ["env:${var.environment}", "resource:${var.resource}", "team:aws", "provider:aws"] +} + +resource "datadog_monitor" "elasticache_no_connection" { + name = "[${var.environment}] Elasticache ${var.resource} connections {{#is_alert}}{{{comparator}}} {{threshold}} {{/is_alert}}" + message = "${coalesce(var.no_connection_message, var.message)}" + + type = "metric alert" + + query = <` | no | +| commands_timeframe | Monitor timeframe for Elasticache redis commands [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | cpu_high_aggregator | Monitor aggregator for Elasticache redis cpu high [available values: min, max, sum or avg] | string | `min` | no | | cpu_high_message | Custom message for Elasticache redis cpu high monitor | string | `` | no | | cpu_high_silenced | Groups to mute for Elasticache redis cpu high monitor | map | `` | no | @@ -46,17 +52,23 @@ Inputs | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | message | Message sent when an alert is triggered | string | - | yes | | redis_size | Size of the Elasticache redis instance | string | - | yes | +| replication_lag_aggregator | Monitor aggregator for Elasticache redis replication lag [available values: min, max, sum or avg] | string | `min` | no | +| replication_lag_message | Custom message for Elasticache redis replication lag monitor | string | `` | no | +| replication_lag_silenced | Groups to mute for Elasticache redis replication lag monitor | map | `` | no | +| replication_lag_threshold_critical | Elasticache redis replication lag critical threshold in seconds | string | `1` | no | +| replication_lag_threshold_warning | Elasticache redis replication lag warning threshold in seconds | string | `0` | no | +| replication_lag_timeframe | Monitor timeframe for Elasticache redis replication lag [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | swap_aggregator | Monitor aggregator for Elasticache redis swap [available values: min, max, sum or avg] | string | `min` | no | | swap_message | Custom message for Elasticache redis swap monitor | string | `` | no | | swap_silenced | Groups to mute for Elasticache redis swap monitor | map | `` | no | -| swap_timeframe | Monitor timeframe for Elasticache redis swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| swap_timeframe | Monitor timeframe for Elasticache redis swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | Related documentation --------------------- -DataDog documentation: [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/) -And more here: +DataDog documentation: +* [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/) * [https://www.datadoghq.com/dashboards/elasticache-dashboard-redis/](https://www.datadoghq.com/dashboards/elasticache-dashboard-redis/) * [https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/](https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/) diff --git a/cloud/aws/elasticache/redis/inputs.tf b/cloud/aws/elasticache/redis/inputs.tf index 39a03da..0b063e4 100644 --- a/cloud/aws/elasticache/redis/inputs.tf +++ b/cloud/aws/elasticache/redis/inputs.tf @@ -116,5 +116,61 @@ variable "swap_aggregator" { variable "swap_timeframe" { description = "Monitor timeframe for Elasticache redis swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_15m" + default = "last_5m" +} + +variable "replication_lag_silenced" { + description = "Groups to mute for Elasticache redis replication lag monitor" + type = "map" + default = {} +} + +variable "replication_lag_message" { + description = "Custom message for Elasticache redis replication lag monitor" + type = "string" + default = "" +} + +variable "replication_lag_aggregator" { + description = "Monitor aggregator for Elasticache redis replication lag [available values: min, max, sum or avg]" + type = "string" + default = "min" +} + +variable "replication_lag_timeframe" { + description = "Monitor timeframe for Elasticache redis replication lag [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "replication_lag_threshold_warning" { + description = "Elasticache redis replication lag warning threshold in seconds" + default = 0 +} + +variable "replication_lag_threshold_critical" { + description = "Elasticache redis replication lag critical threshold in seconds" + default = 1 +} + +variable "commands_silenced" { + description = "Groups to mute for Elasticache redis commands monitor" + type = "map" + default = {} +} + +variable "commands_message" { + description = "Custom message for Elasticache redis commands monitor" + type = "string" + default = "" +} + +variable "commands_aggregator" { + description = "Monitor aggregator for Elasticache redis commands [available values: min, max, sum or avg]" + type = "string" + default = "min" +} + +variable "commands_timeframe" { + description = "Monitor timeframe for Elasticache redis commands [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" } diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index 9a7a47f..f951958 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -107,3 +107,63 @@ resource "datadog_monitor" "redis_swap" { tags = ["env:${var.environment}", "resource:redis", "team:aws", "provider:aws"] } + +resource "datadog_monitor" "redis_replication_lag" { + name = "[${var.environment}] Elasticache redis replication lag {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}" + message = "${coalesce(var.replication_lag_message, var.message)}" + + type = "metric alert" + + query = < ${var.replication_lag_threshold_critical} + EOF + + thresholds { + warning = "${var.replication_lag_threshold_warning}" + critical = "${var.replication_lag_threshold_critical}" + } + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.delay}" + + silenced = "${var.replication_lag_silenced}" + + tags = ["env:${var.environment}", "resource:redis", "team:aws", "provider:aws"] +} + +resource "datadog_monitor" "redis_commands" { + name = "[${var.environment}] Elasticache redis is receiving no commands" + message = "${coalesce(var.commands_message, var.message)}" + + type = "metric alert" + + query = < Date: Thu, 17 May 2018 18:05:03 +0200 Subject: [PATCH 09/53] MON-32 - Free memory monitors added --- cloud/aws/elasticache/memcached/README.md | 13 +++++- cloud/aws/elasticache/memcached/inputs.tf | 39 ++++++++++++++++ cloud/aws/elasticache/memcached/memory.tf | 31 +++++++++++++ .../memcached/monitors-memcached.tf | 33 ++++++++++++++ cloud/aws/elasticache/redis/README.md | 26 +++++++++-- cloud/aws/elasticache/redis/core.tf | 2 +- cloud/aws/elasticache/redis/inputs.tf | 45 +++++++++++++++++-- cloud/aws/elasticache/redis/memory.tf | 31 +++++++++++++ cloud/aws/elasticache/redis/monitors-redis.tf | 35 ++++++++++++++- 9 files changed, 244 insertions(+), 11 deletions(-) create mode 100644 cloud/aws/elasticache/memcached/memory.tf create mode 100644 cloud/aws/elasticache/redis/memory.tf diff --git a/cloud/aws/elasticache/memcached/README.md b/cloud/aws/elasticache/memcached/README.md index 0c3ae1c..1cf44c2 100644 --- a/cloud/aws/elasticache/memcached/README.md +++ b/cloud/aws/elasticache/memcached/README.md @@ -8,8 +8,9 @@ How to use this module module "datadog-monitors-aws-elasticcache-redis" { source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/aws/elasticache/memcached?ref={revision}" - message = "${module.datadog-message-alerting.alerting-message}" - environment = "${var.environment}" + message = "${module.datadog-message-alerting.alerting-message}" + environment = "${var.environment}" + elasticache_size = "${var.size_of_elsaticache}" } ``` @@ -21,6 +22,7 @@ Creates DataDog monitors with the following checks : * Get Hit * CPU High * Swap +* Free memory Inputs ------ @@ -34,9 +36,16 @@ Inputs | cpu_high_threshold_warning | Elasticache memcached cpu high warning threshold in percentage | string | `75` | no | | cpu_high_timeframe | Monitor timeframe for Elasticache memcached cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | +| elasticache_size | Size of the Elasticache instance | string | - | yes | | environment | Architecture Environment | string | - | yes | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| free_memory_aggregator | Monitor aggregator for Elasticache memcached free memory [available values: min, max, sum or avg] | string | `min` | no | +| free_memory_message | Custom message for Elasticache memcached free memory monitor | string | `` | no | +| free_memory_silenced | Groups to mute for Elasticache memcached free memory monitor | map | `` | no | +| free_memory_threshold_critical | Elasticache memcached free memory critical threshold in percentage | string | `5` | no | +| free_memory_threshold_warning | Elasticache memcached free memory warning threshold in percentage | string | `10` | no | +| free_memory_timeframe | Monitor timeframe for Elasticache memcached free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | get_hits_aggregator | Monitor aggregator for Elasticache memcached get hits [available values: min, max, sum or avg] | string | `min` | no | | get_hits_message | Custom message for Elasticache memcached get hits monitor | string | `` | no | | get_hits_silenced | Groups to mute for Elasticache memcached get hits monitor | map | `` | no | diff --git a/cloud/aws/elasticache/memcached/inputs.tf b/cloud/aws/elasticache/memcached/inputs.tf index e87d13b..ce8b253 100644 --- a/cloud/aws/elasticache/memcached/inputs.tf +++ b/cloud/aws/elasticache/memcached/inputs.tf @@ -24,6 +24,12 @@ variable "filter_tags_custom" { default = "*" } +# elasticache variable +variable "elasticache_size" { + description = "Size of the Elasticache instance" + type = "string" +} + # Memcached specific variable "get_hits_silenced" { description = "Groups to mute for Elasticache memcached get hits monitor" @@ -123,3 +129,36 @@ variable "swap_threshold_critical" { description = "Elasticache memcached swap critical threshold in percentage" default = 50 } + +variable "free_memory_silenced" { + description = "Groups to mute for Elasticache memcached free memory monitor" + type = "map" + default = {} +} + +variable "free_memory_message" { + description = "Custom message for Elasticache memcached free memory monitor" + type = "string" + default = "" +} + +variable "free_memory_aggregator" { + description = "Monitor aggregator for Elasticache memcached free memory [available values: min, max, sum or avg]" + type = "string" + default = "min" +} + +variable "free_memory_timeframe" { + description = "Monitor timeframe for Elasticache memcached free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_15m" +} + +variable "free_memory_threshold_warning" { + description = "Elasticache memcached free memory warning threshold in percentage" + default = 10 +} + +variable "free_memory_threshold_critical" { + description = "Elasticache memcached free memory critical threshold in percentage" + default = 5 +} diff --git a/cloud/aws/elasticache/memcached/memory.tf b/cloud/aws/elasticache/memcached/memory.tf new file mode 100644 index 0000000..0d41eeb --- /dev/null +++ b/cloud/aws/elasticache/memcached/memory.tf @@ -0,0 +1,31 @@ +variable "memory" { + type = "map" + + description = "Mapping between Elasticache size and Memory." + + default = { + cache.t2.micro = "595926712" + cache.t2.small = "1664299827" + cache.t2.medium = "3457448673" + cache.m3.medium = "2985002270" + cache.m3.large = "6496138035" + cache.m3.xlarge = "14280766259" + cache.m3.2xlarge = "29957396889" + cache.m4.large = "6893422510" + cache.m4.xlarge = "15333033246" + cache.m4.2xlarge = "31890132172" + cache.m4.4xlarge = "65262028062" + cache.m4.10xlarge = "166043435663" + cache.r3.large = "14495514624" + cache.r3.xlarge = "30494267801" + cache.r3.2xlarge = "62491774156" + cache.r3.4xlarge = "126701535232" + cache.r3.8xlarge = "254476812288" + cache.r4.large = "13207024435" + cache.r4.xlarge = "26897232691" + cache.r4.2xlarge = "54191749857" + cache.r4.4xlarge = "108855946117" + cache.r4.8xlarge = "218248763146" + cache.r4.16xlarge = "437012922368" + } +} diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf index 5fa6353..71b83e3 100644 --- a/cloud/aws/elasticache/memcached/monitors-memcached.tf +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -112,3 +112,36 @@ resource "datadog_monitor" "memcached_swap" { tags = ["env:${var.environment}", "resource:memcached", "team:aws", "provider:aws"] } + +resource "datadog_monitor" "memcached_free_memory" { + name = "[${var.environment}] Elasticache memcached free memory {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.free_memory_message, var.message)}" + + type = "metric alert" + + query = <` | no | +| free_memory_threshold_critical | Elasticache redis free memory critical threshold in percentage | string | `5` | no | +| free_memory_threshold_warning | Elasticache redis free memory warning threshold in percentage | string | `10` | no | +| free_memory_timeframe | Monitor timeframe for Elasticache redis free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | message | Message sent when an alert is triggered | string | - | yes | -| redis_size | Size of the Elasticache redis instance | string | - | yes | +| nodes | Number of Elasticache nodes | string | - | yes | | replication_lag_aggregator | Monitor aggregator for Elasticache redis replication lag [available values: min, max, sum or avg] | string | `min` | no | | replication_lag_message | Custom message for Elasticache redis replication lag monitor | string | `` | no | | replication_lag_silenced | Groups to mute for Elasticache redis replication lag monitor | map | `` | no | diff --git a/cloud/aws/elasticache/redis/core.tf b/cloud/aws/elasticache/redis/core.tf index 988ba69..0970ea6 100644 --- a/cloud/aws/elasticache/redis/core.tf +++ b/cloud/aws/elasticache/redis/core.tf @@ -1,7 +1,7 @@ variable "core" { type = "map" - description = "Mapping between Redis size and vCPU." + description = "Mapping between Elasticache size and vCPU." default = { cache.t2.micro = "1" diff --git a/cloud/aws/elasticache/redis/inputs.tf b/cloud/aws/elasticache/redis/inputs.tf index 0b063e4..319d118 100644 --- a/cloud/aws/elasticache/redis/inputs.tf +++ b/cloud/aws/elasticache/redis/inputs.tf @@ -24,12 +24,18 @@ variable "filter_tags_custom" { default = "*" } -# redis specific -variable "redis_size" { - description = "Size of the Elasticache redis instance" +# elasticache variable +variable "elasticache_size" { + description = "Size of the Elasticache instance" type = "string" } +variable "nodes" { + description = "Number of Elasticache nodes" + type = "string" +} + +# redis specific variable "cache_hits_silenced" { description = "Groups to mute for Elasticache redis cache hits monitor" type = "map" @@ -174,3 +180,36 @@ variable "commands_timeframe" { description = "Monitor timeframe for Elasticache redis commands [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" default = "last_5m" } + +variable "free_memory_silenced" { + description = "Groups to mute for Elasticache redis free memory monitor" + type = "map" + default = {} +} + +variable "free_memory_message" { + description = "Custom message for Elasticache redis free memory monitor" + type = "string" + default = "" +} + +variable "free_memory_aggregator" { + description = "Monitor aggregator for Elasticache redis free memory [available values: min, max, sum or avg]" + type = "string" + default = "min" +} + +variable "free_memory_timeframe" { + description = "Monitor timeframe for Elasticache redis free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_15m" +} + +variable "free_memory_threshold_warning" { + description = "Elasticache redis free memory warning threshold in percentage" + default = 10 +} + +variable "free_memory_threshold_critical" { + description = "Elasticache redis free memory critical threshold in percentage" + default = 5 +} diff --git a/cloud/aws/elasticache/redis/memory.tf b/cloud/aws/elasticache/redis/memory.tf new file mode 100644 index 0000000..0d41eeb --- /dev/null +++ b/cloud/aws/elasticache/redis/memory.tf @@ -0,0 +1,31 @@ +variable "memory" { + type = "map" + + description = "Mapping between Elasticache size and Memory." + + default = { + cache.t2.micro = "595926712" + cache.t2.small = "1664299827" + cache.t2.medium = "3457448673" + cache.m3.medium = "2985002270" + cache.m3.large = "6496138035" + cache.m3.xlarge = "14280766259" + cache.m3.2xlarge = "29957396889" + cache.m4.large = "6893422510" + cache.m4.xlarge = "15333033246" + cache.m4.2xlarge = "31890132172" + cache.m4.4xlarge = "65262028062" + cache.m4.10xlarge = "166043435663" + cache.r3.large = "14495514624" + cache.r3.xlarge = "30494267801" + cache.r3.2xlarge = "62491774156" + cache.r3.4xlarge = "126701535232" + cache.r3.8xlarge = "254476812288" + cache.r4.large = "13207024435" + cache.r4.xlarge = "26897232691" + cache.r4.2xlarge = "54191749857" + cache.r4.4xlarge = "108855946117" + cache.r4.8xlarge = "218248763146" + cache.r4.16xlarge = "437012922368" + } +} diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index f951958..dd98874 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -58,7 +58,7 @@ resource "datadog_monitor" "redis_cpu_high" { query = < ( ${var.cpu_high_threshold_critical} / ${var.core[var.redis_size]} ) + ) > ( ${var.cpu_high_threshold_critical} / ${var.core[var.elasticache_size]} ) EOF thresholds { @@ -167,3 +167,36 @@ resource "datadog_monitor" "redis_commands" { tags = ["env:${var.environment}", "resource:redis", "team:aws", "provider:aws"] } + +resource "datadog_monitor" "redis_free_memory" { + name = "[${var.environment}] Elasticache redis free memory {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.free_memory_message, var.message)}" + + type = "metric alert" + + query = < Date: Tue, 22 May 2018 10:58:36 +0200 Subject: [PATCH 10/53] MON-32 - static variable updated to locals --- .../memcached/{memory.tf => locals.tf} | 8 ++--- .../memcached/monitors-memcached.tf | 2 +- cloud/aws/elasticache/redis/core.tf | 31 ------------------ .../redis/{memory.tf => locals.tf} | 32 ++++++++++++++++--- cloud/aws/elasticache/redis/monitors-redis.tf | 4 +-- 5 files changed, 32 insertions(+), 45 deletions(-) rename cloud/aws/elasticache/memcached/{memory.tf => locals.tf} (88%) delete mode 100644 cloud/aws/elasticache/redis/core.tf rename cloud/aws/elasticache/redis/{memory.tf => locals.tf} (56%) diff --git a/cloud/aws/elasticache/memcached/memory.tf b/cloud/aws/elasticache/memcached/locals.tf similarity index 88% rename from cloud/aws/elasticache/memcached/memory.tf rename to cloud/aws/elasticache/memcached/locals.tf index 0d41eeb..310de51 100644 --- a/cloud/aws/elasticache/memcached/memory.tf +++ b/cloud/aws/elasticache/memcached/locals.tf @@ -1,9 +1,5 @@ -variable "memory" { - type = "map" - - description = "Mapping between Elasticache size and Memory." - - default = { +locals { + memory = { cache.t2.micro = "595926712" cache.t2.small = "1664299827" cache.t2.medium = "3457448673" diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf index 71b83e3..ea2ebec 100644 --- a/cloud/aws/elasticache/memcached/monitors-memcached.tf +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -122,7 +122,7 @@ resource "datadog_monitor" "memcached_free_memory" { query = < ( ${var.cpu_high_threshold_critical} / ${var.core[var.elasticache_size]} ) + ) > ( ${var.cpu_high_threshold_critical} / ${local.core[var.elasticache_size]} ) EOF thresholds { @@ -177,7 +177,7 @@ resource "datadog_monitor" "redis_free_memory" { query = < Date: Wed, 23 May 2018 11:25:15 +0200 Subject: [PATCH 11/53] MON-32 - Space aggregator hard coded --- cloud/aws/elasticache/common/README.md | 6 ++-- cloud/aws/elasticache/common/inputs.tf | 12 ++++---- .../common/monitors-elasticache.tf | 12 ++++---- cloud/aws/elasticache/memcached/README.md | 7 ++--- cloud/aws/elasticache/memcached/inputs.tf | 18 ++++------- .../memcached/monitors-memcached.tf | 20 ++++++------- cloud/aws/elasticache/redis/README.md | 10 +++---- cloud/aws/elasticache/redis/inputs.tf | 28 +++++------------ cloud/aws/elasticache/redis/monitors-redis.tf | 30 +++++++++---------- 9 files changed, 61 insertions(+), 82 deletions(-) diff --git a/cloud/aws/elasticache/common/README.md b/cloud/aws/elasticache/common/README.md index 802c0c1..2b29612 100644 --- a/cloud/aws/elasticache/common/README.md +++ b/cloud/aws/elasticache/common/README.md @@ -33,19 +33,19 @@ Inputs |------|-------------|:----:|:-----:|:-----:| | delay | Delay in seconds for the metric evaluation | string | `900` | no | | environment | Architecture Environment | string | - | yes | -| eviction_aggregator | Monitor aggregator for Elasticache eviction [available values: min, max, sum or avg] | string | `min` | no | | eviction_message | Custom message for Elasticache eviction monitor | string | `` | no | | eviction_silenced | Groups to mute for Elasticache eviction monitor | map | `` | no | +| eviction_time_aggregator | Monitor aggregator for Elasticache eviction [available values: min, max or avg] | string | `min` | no | | eviction_timeframe | Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | filter_tags | Tags used for filtering | string | - | yes | -| max_connection_aggregator | Monitor aggregator for Elasticache max connection [available values: min, max, sum or avg] | string | `min` | no | | max_connection_message | Custom message for Elasticache max connection monitor | string | `` | no | | max_connection_silenced | Groups to mute for Elasticache max connection monitor | map | `` | no | +| max_connection_time_aggregator | Monitor aggregator for Elasticache max connection [available values: min, max or avg] | string | `min` | no | | max_connection_timeframe | Monitor timeframe for Elasticache max connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when an alert is triggered | string | - | yes | -| no_connection_aggregator | Monitor aggregator for Elasticache no connection [available values: min, max, sum or avg] | string | `min` | no | | no_connection_message | Custom message for Elasticache no connection monitor | string | `` | no | | no_connection_silenced | Groups to mute for Elasticache no connection monitor | map | `` | no | +| no_connection_time_aggregator | Monitor aggregator for Elasticache no connection [available values: min, max or avg] | string | `min` | no | | no_connection_timeframe | Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | resource | Type of Elasticache used | string | - | yes | diff --git a/cloud/aws/elasticache/common/inputs.tf b/cloud/aws/elasticache/common/inputs.tf index 2e4fe15..ba43b3d 100644 --- a/cloud/aws/elasticache/common/inputs.tf +++ b/cloud/aws/elasticache/common/inputs.tf @@ -35,8 +35,8 @@ variable "eviction_message" { default = "" } -variable "eviction_aggregator" { - description = "Monitor aggregator for Elasticache eviction [available values: min, max, sum or avg]" +variable "eviction_time_aggregator" { + description = "Monitor aggregator for Elasticache eviction [available values: min, max or avg]" type = "string" default = "min" } @@ -58,8 +58,8 @@ variable "max_connection_message" { default = "" } -variable "max_connection_aggregator" { - description = "Monitor aggregator for Elasticache max connection [available values: min, max, sum or avg]" +variable "max_connection_time_aggregator" { + description = "Monitor aggregator for Elasticache max connection [available values: min, max or avg]" type = "string" default = "min" } @@ -81,8 +81,8 @@ variable "no_connection_message" { default = "" } -variable "no_connection_aggregator" { - description = "Monitor aggregator for Elasticache no connection [available values: min, max, sum or avg]" +variable "no_connection_time_aggregator" { + description = "Monitor aggregator for Elasticache no connection [available values: min, max or avg]" type = "string" default = "min" } diff --git a/cloud/aws/elasticache/common/monitors-elasticache.tf b/cloud/aws/elasticache/common/monitors-elasticache.tf index 98adc63..2b6bae1 100644 --- a/cloud/aws/elasticache/common/monitors-elasticache.tf +++ b/cloud/aws/elasticache/common/monitors-elasticache.tf @@ -5,8 +5,8 @@ resource "datadog_monitor" "elasticache_eviction" { type = "metric alert" query = < 0 EOF @@ -32,8 +32,8 @@ resource "datadog_monitor" "elasticache_max_connection" { type = "metric alert" query = <= 65000 EOF @@ -59,8 +59,8 @@ resource "datadog_monitor" "elasticache_no_connection" { type = "metric alert" query = <` | no | | cpu_high_threshold_critical | Elasticache memcached cpu high critical threshold in percentage | string | `90` | no | | cpu_high_threshold_warning | Elasticache memcached cpu high warning threshold in percentage | string | `75` | no | +| cpu_high_time_aggregator | Monitor aggregator for Elasticache memcached cpu high [available values: min, max or avg] | string | `min` | no | | cpu_high_timeframe | Monitor timeframe for Elasticache memcached cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | | elasticache_size | Size of the Elasticache instance | string | - | yes | | environment | Architecture Environment | string | - | yes | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | -| free_memory_aggregator | Monitor aggregator for Elasticache memcached free memory [available values: min, max, sum or avg] | string | `min` | no | | free_memory_message | Custom message for Elasticache memcached free memory monitor | string | `` | no | | free_memory_silenced | Groups to mute for Elasticache memcached free memory monitor | map | `` | no | | free_memory_threshold_critical | Elasticache memcached free memory critical threshold in percentage | string | `5` | no | | free_memory_threshold_warning | Elasticache memcached free memory warning threshold in percentage | string | `10` | no | +| free_memory_time_aggregator | Monitor aggregator for Elasticache memcached free memory [available values: min, max or avg] | string | `min` | no | | free_memory_timeframe | Monitor timeframe for Elasticache memcached free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | -| get_hits_aggregator | Monitor aggregator for Elasticache memcached get hits [available values: min, max, sum or avg] | string | `min` | no | | get_hits_message | Custom message for Elasticache memcached get hits monitor | string | `` | no | | get_hits_silenced | Groups to mute for Elasticache memcached get hits monitor | map | `` | no | | get_hits_threshold_critical | Elasticache memcached get hits critical threshold in percentage | string | `10` | no | | get_hits_threshold_warning | Elasticache memcached get hits warning threshold in percentage | string | `20` | no | | get_hits_timeframe | Monitor timeframe for Elasticache memcached get hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | message | Message sent when an alert is triggered | string | - | yes | -| swap_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max, sum or avg] | string | `min` | no | | swap_message | Custom message for Elasticache memcached swap monitor | string | `` | no | | swap_silenced | Groups to mute for Elasticache memcached swap monitor | map | `` | no | | swap_threshold_critical | Elasticache memcached swap critical threshold in percentage | string | `50` | no | | swap_threshold_warning | Elasticache memcached swap warning threshold in percentage | string | `0` | no | +| swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | `min` | no | | swap_timeframe | Monitor timeframe for Elasticache memcached swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | Related documentation diff --git a/cloud/aws/elasticache/memcached/inputs.tf b/cloud/aws/elasticache/memcached/inputs.tf index ce8b253..aa71bca 100644 --- a/cloud/aws/elasticache/memcached/inputs.tf +++ b/cloud/aws/elasticache/memcached/inputs.tf @@ -43,12 +43,6 @@ variable "get_hits_message" { default = "" } -variable "get_hits_aggregator" { - description = "Monitor aggregator for Elasticache memcached get hits [available values: min, max, sum or avg]" - type = "string" - default = "min" -} - variable "get_hits_timeframe" { description = "Monitor timeframe for Elasticache memcached get hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" default = "last_15m" @@ -76,8 +70,8 @@ variable "cpu_high_message" { default = "" } -variable "cpu_high_aggregator" { - description = "Monitor aggregator for Elasticache memcached cpu high [available values: min, max, sum or avg]" +variable "cpu_high_time_aggregator" { + description = "Monitor aggregator for Elasticache memcached cpu high [available values: min, max or avg]" type = "string" default = "min" } @@ -109,8 +103,8 @@ variable "swap_message" { default = "" } -variable "swap_aggregator" { - description = "Monitor aggregator for Elasticache memcached swap [available values: min, max, sum or avg]" +variable "swap_time_aggregator" { + description = "Monitor aggregator for Elasticache memcached swap [available values: min, max or avg]" type = "string" default = "min" } @@ -142,8 +136,8 @@ variable "free_memory_message" { default = "" } -variable "free_memory_aggregator" { - description = "Monitor aggregator for Elasticache memcached free memory [available values: min, max, sum or avg]" +variable "free_memory_time_aggregator" { + description = "Monitor aggregator for Elasticache memcached free memory [available values: min, max or avg]" type = "string" default = "min" } diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf index ea2ebec..fa8baf6 100644 --- a/cloud/aws/elasticache/memcached/monitors-memcached.tf +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -22,10 +22,10 @@ resource "datadog_monitor" "memcached_get_hits" { type = "metric alert" query = < ${var.cpu_high_threshold_critical} EOF @@ -88,8 +88,8 @@ resource "datadog_monitor" "memcached_swap" { type = "metric alert" query = < ${var.swap_threshold_critical} EOF @@ -120,8 +120,8 @@ resource "datadog_monitor" "memcached_free_memory" { type = "metric alert" query = <` | no | | cache_hits_threshold_critical | Elasticache redis cache hits critical threshold in percentage | string | `10` | no | | cache_hits_threshold_warning | Elasticache redis cache hits warning threshold in percentage | string | `20` | no | | cache_hits_timeframe | Monitor timeframe for Elasticache redis cache hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | -| commands_aggregator | Monitor aggregator for Elasticache redis commands [available values: min, max, sum or avg] | string | `min` | no | | commands_message | Custom message for Elasticache redis commands monitor | string | `` | no | | commands_silenced | Groups to mute for Elasticache redis commands monitor | map | `` | no | | commands_timeframe | Monitor timeframe for Elasticache redis commands [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| cpu_high_aggregator | Monitor aggregator for Elasticache redis cpu high [available values: min, max, sum or avg] | string | `min` | no | | cpu_high_message | Custom message for Elasticache redis cpu high monitor | string | `` | no | | cpu_high_silenced | Groups to mute for Elasticache redis cpu high monitor | map | `` | no | | cpu_high_threshold_critical | Elasticache redis cpu high critical threshold in percentage | string | `90` | no | | cpu_high_threshold_warning | Elasticache redis cpu high warning threshold in percentage | string | `75` | no | +| cpu_high_time_aggregator | Monitor aggregator for Elasticache redis cpu high [available values: min, max or avg] | string | `min` | no | | cpu_high_timeframe | Monitor timeframe for Elasticache redis cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | | elasticache_size | Size of the Elasticache instance | string | - | yes | | environment | Architecture Environment | string | - | yes | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | -| free_memory_aggregator | Monitor aggregator for Elasticache redis free memory [available values: min, max, sum or avg] | string | `min` | no | | free_memory_message | Custom message for Elasticache redis free memory monitor | string | `` | no | | free_memory_silenced | Groups to mute for Elasticache redis free memory monitor | map | `` | no | | free_memory_threshold_critical | Elasticache redis free memory critical threshold in percentage | string | `5` | no | | free_memory_threshold_warning | Elasticache redis free memory warning threshold in percentage | string | `10` | no | +| free_memory_time_aggregator | Monitor aggregator for Elasticache redis free memory [available values: min, max or avg] | string | `min` | no | | free_memory_timeframe | Monitor timeframe for Elasticache redis free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | message | Message sent when an alert is triggered | string | - | yes | | nodes | Number of Elasticache nodes | string | - | yes | -| replication_lag_aggregator | Monitor aggregator for Elasticache redis replication lag [available values: min, max, sum or avg] | string | `min` | no | | replication_lag_message | Custom message for Elasticache redis replication lag monitor | string | `` | no | | replication_lag_silenced | Groups to mute for Elasticache redis replication lag monitor | map | `` | no | | replication_lag_threshold_critical | Elasticache redis replication lag critical threshold in seconds | string | `1` | no | | replication_lag_threshold_warning | Elasticache redis replication lag warning threshold in seconds | string | `0` | no | +| replication_lag_time_aggregator | Monitor aggregator for Elasticache redis replication lag [available values: min, max or avg] | string | `min` | no | | replication_lag_timeframe | Monitor timeframe for Elasticache redis replication lag [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| swap_aggregator | Monitor aggregator for Elasticache redis swap [available values: min, max, sum or avg] | string | `min` | no | | swap_message | Custom message for Elasticache redis swap monitor | string | `` | no | | swap_silenced | Groups to mute for Elasticache redis swap monitor | map | `` | no | +| swap_time_aggregator | Monitor aggregator for Elasticache redis swap [available values: min, max or avg] | string | `min` | no | | swap_timeframe | Monitor timeframe for Elasticache redis swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | Related documentation diff --git a/cloud/aws/elasticache/redis/inputs.tf b/cloud/aws/elasticache/redis/inputs.tf index 319d118..890f502 100644 --- a/cloud/aws/elasticache/redis/inputs.tf +++ b/cloud/aws/elasticache/redis/inputs.tf @@ -48,12 +48,6 @@ variable "cache_hits_message" { default = "" } -variable "cache_hits_aggregator" { - description = "Monitor aggregator for Elasticache redis cache hits [available values: min, max, sum or avg]" - type = "string" - default = "min" -} - variable "cache_hits_timeframe" { description = "Monitor timeframe for Elasticache redis cache hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" default = "last_15m" @@ -81,8 +75,8 @@ variable "cpu_high_message" { default = "" } -variable "cpu_high_aggregator" { - description = "Monitor aggregator for Elasticache redis cpu high [available values: min, max, sum or avg]" +variable "cpu_high_time_aggregator" { + description = "Monitor aggregator for Elasticache redis cpu high [available values: min, max or avg]" type = "string" default = "min" } @@ -114,8 +108,8 @@ variable "swap_message" { default = "" } -variable "swap_aggregator" { - description = "Monitor aggregator for Elasticache redis swap [available values: min, max, sum or avg]" +variable "swap_time_aggregator" { + description = "Monitor aggregator for Elasticache redis swap [available values: min, max or avg]" type = "string" default = "min" } @@ -137,8 +131,8 @@ variable "replication_lag_message" { default = "" } -variable "replication_lag_aggregator" { - description = "Monitor aggregator for Elasticache redis replication lag [available values: min, max, sum or avg]" +variable "replication_lag_time_aggregator" { + description = "Monitor aggregator for Elasticache redis replication lag [available values: min, max or avg]" type = "string" default = "min" } @@ -170,12 +164,6 @@ variable "commands_message" { default = "" } -variable "commands_aggregator" { - description = "Monitor aggregator for Elasticache redis commands [available values: min, max, sum or avg]" - type = "string" - default = "min" -} - variable "commands_timeframe" { description = "Monitor timeframe for Elasticache redis commands [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" default = "last_5m" @@ -193,8 +181,8 @@ variable "free_memory_message" { default = "" } -variable "free_memory_aggregator" { - description = "Monitor aggregator for Elasticache redis free memory [available values: min, max, sum or avg]" +variable "free_memory_time_aggregator" { + description = "Monitor aggregator for Elasticache redis free memory [available values: min, max or avg]" type = "string" default = "min" } diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index c8fe00c..9b0b57f 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -22,10 +22,10 @@ resource "datadog_monitor" "redis_cache_hits" { type = "metric alert" query = < ( ${var.cpu_high_threshold_critical} / ${local.core[var.elasticache_size]} ) EOF @@ -88,8 +88,8 @@ resource "datadog_monitor" "redis_swap" { type = "metric alert" query = < 0 EOF @@ -115,8 +115,8 @@ resource "datadog_monitor" "redis_replication_lag" { type = "metric alert" query = < ${var.replication_lag_threshold_critical} EOF @@ -147,9 +147,9 @@ resource "datadog_monitor" "redis_commands" { type = "metric alert" query = < Date: Fri, 25 May 2018 12:21:21 +0200 Subject: [PATCH 12/53] MON-32 - group by updated --- .../memcached/monitors-memcached.tf | 12 ++++++------ cloud/aws/elasticache/redis/monitors-redis.tf | 18 +++++++++--------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf index fa8baf6..152bd9c 100644 --- a/cloud/aws/elasticache/memcached/monitors-memcached.tf +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -23,9 +23,9 @@ resource "datadog_monitor" "memcached_get_hits" { query = < ${var.cpu_high_threshold_critical} EOF @@ -89,7 +89,7 @@ resource "datadog_monitor" "memcached_swap" { query = < ${var.swap_threshold_critical} EOF @@ -121,7 +121,7 @@ resource "datadog_monitor" "memcached_free_memory" { query = < ( ${var.cpu_high_threshold_critical} / ${local.core[var.elasticache_size]} ) EOF @@ -89,7 +89,7 @@ resource "datadog_monitor" "redis_swap" { query = < 0 EOF @@ -116,7 +116,7 @@ resource "datadog_monitor" "redis_replication_lag" { query = < ${var.replication_lag_threshold_critical} EOF @@ -148,8 +148,8 @@ resource "datadog_monitor" "redis_commands" { query = < Date: Fri, 25 May 2018 12:40:30 +0200 Subject: [PATCH 13/53] MON-32 - Tags updated --- cloud/aws/elasticache/common/monitors-elasticache.tf | 12 ++++++------ .../aws/elasticache/memcached/monitors-memcached.tf | 8 ++++---- cloud/aws/elasticache/redis/monitors-redis.tf | 12 ++++++------ 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/cloud/aws/elasticache/common/monitors-elasticache.tf b/cloud/aws/elasticache/common/monitors-elasticache.tf index 2b6bae1..51ced20 100644 --- a/cloud/aws/elasticache/common/monitors-elasticache.tf +++ b/cloud/aws/elasticache/common/monitors-elasticache.tf @@ -6,7 +6,7 @@ resource "datadog_monitor" "elasticache_eviction" { query = < 0 EOF @@ -22,7 +22,7 @@ resource "datadog_monitor" "elasticache_eviction" { silenced = "${var.eviction_silenced}" - tags = ["env:${var.environment}", "resource:${var.resource}", "team:aws", "provider:aws"] + tags = ["env:${var.environment}", "engine:${var.resource}", "team:aws", "provider:aws"] } resource "datadog_monitor" "elasticache_max_connection" { @@ -33,7 +33,7 @@ resource "datadog_monitor" "elasticache_max_connection" { query = <= 65000 EOF @@ -49,7 +49,7 @@ resource "datadog_monitor" "elasticache_max_connection" { silenced = "${var.max_connection_silenced}" - tags = ["env:${var.environment}", "resource:${var.resource}", "team:aws", "provider:aws"] + tags = ["env:${var.environment}", "engine:${var.resource}", "team:aws", "provider:aws"] } resource "datadog_monitor" "elasticache_no_connection" { @@ -60,7 +60,7 @@ resource "datadog_monitor" "elasticache_no_connection" { query = < Date: Fri, 25 May 2018 16:51:15 +0200 Subject: [PATCH 14/53] MON-32 - Fix redis replication_lag monitor --- cloud/aws/elasticache/redis/monitors-redis.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index dee5909..8aa63dd 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -116,7 +116,7 @@ resource "datadog_monitor" "redis_replication_lag" { query = < ${var.replication_lag_threshold_critical} EOF From 21f61b42cb9efd905b7f184c304956068232a2e0 Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Fri, 25 May 2018 16:53:16 +0200 Subject: [PATCH 15/53] MON-32 - Fix common value type in eviction monitor --- cloud/aws/elasticache/common/monitors-elasticache.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloud/aws/elasticache/common/monitors-elasticache.tf b/cloud/aws/elasticache/common/monitors-elasticache.tf index 51ced20..342a885 100644 --- a/cloud/aws/elasticache/common/monitors-elasticache.tf +++ b/cloud/aws/elasticache/common/monitors-elasticache.tf @@ -1,5 +1,5 @@ resource "datadog_monitor" "elasticache_eviction" { - name = "[${var.environment}] Elasticache ${var.resource} eviction {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}" + name = "[${var.environment}] Elasticache ${var.resource} eviction {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}" message = "${coalesce(var.eviction_message, var.message)}" type = "metric alert" From ada70533d747ef0dc035cfac709c9b6c9f9fd1b3 Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Fri, 25 May 2018 16:56:02 +0200 Subject: [PATCH 16/53] MON-32 - Fix elasticache_max_connection monitor name --- cloud/aws/elasticache/common/monitors-elasticache.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloud/aws/elasticache/common/monitors-elasticache.tf b/cloud/aws/elasticache/common/monitors-elasticache.tf index 342a885..9a0121e 100644 --- a/cloud/aws/elasticache/common/monitors-elasticache.tf +++ b/cloud/aws/elasticache/common/monitors-elasticache.tf @@ -26,7 +26,7 @@ resource "datadog_monitor" "elasticache_eviction" { } resource "datadog_monitor" "elasticache_max_connection" { - name = "[${var.environment}] Elasticache ${var.resource} connections {{#is_alert}}{{{comparator}}} {{threshold}} {{/is_alert}}" + name = "[${var.environment}] Elasticache ${var.resource} max connections reached {{#is_alert}}{{{comparator}}} {{threshold}} {{/is_alert}}" message = "${coalesce(var.max_connection_message, var.message)}" type = "metric alert" From 299beab1a4d5cd3a8c72d1f8cd468dc10b2f7379 Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Fri, 25 May 2018 17:13:50 +0200 Subject: [PATCH 17/53] MON-32 - Fix redis_cpu_high monitor --- cloud/aws/elasticache/redis/locals.tf | 92 +++++++++---------- cloud/aws/elasticache/redis/monitors-redis.tf | 13 +-- 2 files changed, 51 insertions(+), 54 deletions(-) diff --git a/cloud/aws/elasticache/redis/locals.tf b/cloud/aws/elasticache/redis/locals.tf index 89858cc..591525d 100644 --- a/cloud/aws/elasticache/redis/locals.tf +++ b/cloud/aws/elasticache/redis/locals.tf @@ -1,53 +1,53 @@ locals { core = { - cache.t2.micro = "1" - cache.t2.small = "1" - cache.t2.medium = "2" - cache.m3.medium = "1" - cache.m3.large = "2" - cache.m3.xlarge = "4" - cache.m3.2xlarge = "8" - cache.m4.large = "2" - cache.m4.xlarge = "4" - cache.m4.2xlarge = "8" - cache.m4.4xlarge = "16" - cache.m4.10xlarge = "40" - cache.r3.large = "2" - cache.r3.xlarge = "4" - cache.r3.2xlarge = "8" - cache.r3.4xlarge = "16" - cache.r3.8xlarge = "32" - cache.r4.large = "2" - cache.r4.xlarge = "4" - cache.r4.2xlarge = "8" - cache.r4.4xlarge = "16" - cache.r4.8xlarge = "32" - cache.r4.16xlarge = "64" + cache.t2.micro = 1 + cache.t2.small = 1 + cache.t2.medium = 2 + cache.m3.medium = 1 + cache.m3.large = 2 + cache.m3.xlarge = 4 + cache.m3.2xlarge = 8 + cache.m4.large = 2 + cache.m4.xlarge = 4 + cache.m4.2xlarge = 8 + cache.m4.4xlarge = 16 + cache.m4.10xlarge = 40 + cache.r3.large = 2 + cache.r3.xlarge = 4 + cache.r3.2xlarge = 8 + cache.r3.4xlarge = 16 + cache.r3.8xlarge = 32 + cache.r4.large = 2 + cache.r4.xlarge = 4 + cache.r4.2xlarge = 8 + cache.r4.4xlarge = 16 + cache.r4.8xlarge = 32 + cache.r4.16xlarge = 64 } memory = { - cache.t2.micro = "595926712" - cache.t2.small = "1664299827" - cache.t2.medium = "3457448673" - cache.m3.medium = "2985002270" - cache.m3.large = "6496138035" - cache.m3.xlarge = "14280766259" - cache.m3.2xlarge = "29957396889" - cache.m4.large = "6893422510" - cache.m4.xlarge = "15333033246" - cache.m4.2xlarge = "31890132172" - cache.m4.4xlarge = "65262028062" - cache.m4.10xlarge = "166043435663" - cache.r3.large = "14495514624" - cache.r3.xlarge = "30494267801" - cache.r3.2xlarge = "62491774156" - cache.r3.4xlarge = "126701535232" - cache.r3.8xlarge = "254476812288" - cache.r4.large = "13207024435" - cache.r4.xlarge = "26897232691" - cache.r4.2xlarge = "54191749857" - cache.r4.4xlarge = "108855946117" - cache.r4.8xlarge = "218248763146" - cache.r4.16xlarge = "437012922368" + cache.t2.micro = 595926712 + cache.t2.small = 1664299827 + cache.t2.medium = 3457448673 + cache.m3.medium = 2985002270 + cache.m3.large = 6496138035 + cache.m3.xlarge = 14280766259 + cache.m3.2xlarge = 29957396889 + cache.m4.large = 6893422510 + cache.m4.xlarge = 15333033246 + cache.m4.2xlarge = 31890132172 + cache.m4.4xlarge = 65262028062 + cache.m4.10xlarge = 166043435663 + cache.r3.large = 14495514624 + cache.r3.xlarge = 30494267801 + cache.r3.2xlarge = 62491774156 + cache.r3.4xlarge = 126701535232 + cache.r3.8xlarge = 254476812288 + cache.r4.large = 13207024435 + cache.r4.xlarge = 26897232691 + cache.r4.2xlarge = 54191749857 + cache.r4.4xlarge = 108855946117 + cache.r4.8xlarge = 218248763146 + cache.r4.16xlarge = 437012922368 } } diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index 8aa63dd..dddb799 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -53,20 +53,17 @@ resource "datadog_monitor" "redis_cpu_high" { name = "[${var.environment}] Elasticache redis CPU {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.cpu_high_message, var.message)}" + count = "${length(keys(local.core))}" + type = "metric alert" query = < ( ${var.cpu_high_threshold_critical} / ${local.core[var.elasticache_size]} ) + avg:aws.elasticache.cpuutilization{dd_monitoring:enabled,dd_aws_red:enabled,env:${var.environment},cache_node_type:${element(keys(local.core), count.index)}} by {region,cacheclusterid,cachenodeid} + ) > ${var.cpu_high_threshold_critical / element(values(local.core), count.index)} EOF - thresholds { - warning = "${var.cpu_high_threshold_warning}" - critical = "${var.cpu_high_threshold_critical}" - } - - notify_no_data = true + notify_no_data = false evaluation_delay = "${var.delay}" renotify_interval = 0 notify_audit = false From 9c4f5b4cfd652ec2b4a0e82454807b396f4a621c Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Sat, 26 May 2018 02:41:37 +0200 Subject: [PATCH 18/53] MON-32 - Memory usage monitor updated --- cloud/aws/elasticache/common/inputs.tf | 2 +- cloud/aws/elasticache/memcached/locals.tf | 46 +++++++++---------- .../memcached/monitors-memcached.tf | 10 ++-- cloud/aws/elasticache/redis/inputs.tf | 2 +- cloud/aws/elasticache/redis/monitors-redis.tf | 14 +++--- 5 files changed, 39 insertions(+), 35 deletions(-) diff --git a/cloud/aws/elasticache/common/inputs.tf b/cloud/aws/elasticache/common/inputs.tf index ba43b3d..d174c2f 100644 --- a/cloud/aws/elasticache/common/inputs.tf +++ b/cloud/aws/elasticache/common/inputs.tf @@ -61,7 +61,7 @@ variable "max_connection_message" { variable "max_connection_time_aggregator" { description = "Monitor aggregator for Elasticache max connection [available values: min, max or avg]" type = "string" - default = "min" + default = "max" } variable "max_connection_timeframe" { diff --git a/cloud/aws/elasticache/memcached/locals.tf b/cloud/aws/elasticache/memcached/locals.tf index 310de51..b044b56 100644 --- a/cloud/aws/elasticache/memcached/locals.tf +++ b/cloud/aws/elasticache/memcached/locals.tf @@ -1,27 +1,27 @@ locals { memory = { - cache.t2.micro = "595926712" - cache.t2.small = "1664299827" - cache.t2.medium = "3457448673" - cache.m3.medium = "2985002270" - cache.m3.large = "6496138035" - cache.m3.xlarge = "14280766259" - cache.m3.2xlarge = "29957396889" - cache.m4.large = "6893422510" - cache.m4.xlarge = "15333033246" - cache.m4.2xlarge = "31890132172" - cache.m4.4xlarge = "65262028062" - cache.m4.10xlarge = "166043435663" - cache.r3.large = "14495514624" - cache.r3.xlarge = "30494267801" - cache.r3.2xlarge = "62491774156" - cache.r3.4xlarge = "126701535232" - cache.r3.8xlarge = "254476812288" - cache.r4.large = "13207024435" - cache.r4.xlarge = "26897232691" - cache.r4.2xlarge = "54191749857" - cache.r4.4xlarge = "108855946117" - cache.r4.8xlarge = "218248763146" - cache.r4.16xlarge = "437012922368" + cache.t2.micro = 595926712 + cache.t2.small = 1664299827 + cache.t2.medium = 3457448673 + cache.m3.medium = 2985002270 + cache.m3.large = 6496138035 + cache.m3.xlarge = 14280766259 + cache.m3.2xlarge = 29957396889 + cache.m4.large = 6893422510 + cache.m4.xlarge = 15333033246 + cache.m4.2xlarge = 31890132172 + cache.m4.4xlarge = 65262028062 + cache.m4.10xlarge = 166043435663 + cache.r3.large = 14495514624 + cache.r3.xlarge = 30494267801 + cache.r3.2xlarge = 62491774156 + cache.r3.4xlarge = 126701535232 + cache.r3.8xlarge = 254476812288 + cache.r4.large = 13207024435 + cache.r4.xlarge = 26897232691 + cache.r4.2xlarge = 54191749857 + cache.r4.4xlarge = 108855946117 + cache.r4.8xlarge = 218248763146 + cache.r4.16xlarge = 437012922368 } } diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf index 9754cb1..027a43d 100644 --- a/cloud/aws/elasticache/memcached/monitors-memcached.tf +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -117,13 +117,15 @@ resource "datadog_monitor" "memcached_free_memory" { name = "[${var.environment}] Elasticache memcached free memory {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.free_memory_message, var.message)}" + count = "${length(keys(local.memory))}" + type = "metric alert" query = < Date: Thu, 31 May 2018 12:03:44 +0200 Subject: [PATCH 19/53] MON-32 - monitors updated with latest fix --- cloud/aws/elasticache/common/README.md | 15 +++++--- cloud/aws/elasticache/common/inputs.tf | 35 +++++++++++++++++- .../common/monitors-elasticache.tf | 32 +++++++++++++++++ cloud/aws/elasticache/memcached/README.md | 13 ++----- cloud/aws/elasticache/memcached/inputs.tf | 35 +----------------- .../memcached/monitors-memcached.tf | 36 ++----------------- cloud/aws/elasticache/redis/README.md | 13 +++---- cloud/aws/elasticache/redis/inputs.tf | 25 +------------ cloud/aws/elasticache/redis/monitors-redis.tf | 35 +++--------------- 9 files changed, 92 insertions(+), 147 deletions(-) diff --git a/cloud/aws/elasticache/common/README.md b/cloud/aws/elasticache/common/README.md index 2b29612..f0007ce 100644 --- a/cloud/aws/elasticache/common/README.md +++ b/cloud/aws/elasticache/common/README.md @@ -1,5 +1,5 @@ -AWS ElasticCache Service DataDog monitors -========================================= +AWS ElastiCache Service DataDog monitors +======================================== How to use this module ---------------------- @@ -23,6 +23,7 @@ Purpose Creates DataDog monitors with the following checks: * Eviction +* Swap * Max connections * No connection @@ -32,7 +33,7 @@ Inputs | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| | delay | Delay in seconds for the metric evaluation | string | `900` | no | -| environment | Architecture Environment | string | - | yes | +| environment | Infrastructure Environment | string | - | yes | | eviction_message | Custom message for Elasticache eviction monitor | string | `` | no | | eviction_silenced | Groups to mute for Elasticache eviction monitor | map | `` | no | | eviction_time_aggregator | Monitor aggregator for Elasticache eviction [available values: min, max or avg] | string | `min` | no | @@ -40,7 +41,7 @@ Inputs | filter_tags | Tags used for filtering | string | - | yes | | max_connection_message | Custom message for Elasticache max connection monitor | string | `` | no | | max_connection_silenced | Groups to mute for Elasticache max connection monitor | map | `` | no | -| max_connection_time_aggregator | Monitor aggregator for Elasticache max connection [available values: min, max or avg] | string | `min` | no | +| max_connection_time_aggregator | Monitor aggregator for Elasticache max connection [available values: min, max or avg] | string | `max` | no | | max_connection_timeframe | Monitor timeframe for Elasticache max connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when an alert is triggered | string | - | yes | | no_connection_message | Custom message for Elasticache no connection monitor | string | `` | no | @@ -48,6 +49,12 @@ Inputs | no_connection_time_aggregator | Monitor aggregator for Elasticache no connection [available values: min, max or avg] | string | `min` | no | | no_connection_timeframe | Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | resource | Type of Elasticache used | string | - | yes | +| swap_message | Custom message for Elasticache memcached swap monitor | string | `` | no | +| swap_silenced | Groups to mute for Elasticache memcached swap monitor | map | `` | no | +| swap_threshold_critical | Elasticache memcached swap critical threshold in percentage | string | `50` | no | +| swap_threshold_warning | Elasticache memcached swap warning threshold in percentage | string | `0` | no | +| swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | `min` | no | +| swap_timeframe | Monitor timeframe for Elasticache memcached swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | Related documentation --------------------- diff --git a/cloud/aws/elasticache/common/inputs.tf b/cloud/aws/elasticache/common/inputs.tf index d174c2f..d4bd386 100644 --- a/cloud/aws/elasticache/common/inputs.tf +++ b/cloud/aws/elasticache/common/inputs.tf @@ -1,6 +1,6 @@ # Global Terraform variable "environment" { - description = "Architecture Environment" + description = "Infrastructure Environment" type = "string" } @@ -91,3 +91,36 @@ variable "no_connection_timeframe" { description = "Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" default = "last_5m" } + +variable "swap_silenced" { + description = "Groups to mute for Elasticache memcached swap monitor" + type = "map" + default = {} +} + +variable "swap_message" { + description = "Custom message for Elasticache memcached swap monitor" + type = "string" + default = "" +} + +variable "swap_time_aggregator" { + description = "Monitor aggregator for Elasticache memcached swap [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "swap_timeframe" { + description = "Monitor timeframe for Elasticache memcached swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "swap_threshold_warning" { + description = "Elasticache memcached swap warning threshold in percentage" + default = 0 +} + +variable "swap_threshold_critical" { + description = "Elasticache memcached swap critical threshold in percentage" + default = 50 +} diff --git a/cloud/aws/elasticache/common/monitors-elasticache.tf b/cloud/aws/elasticache/common/monitors-elasticache.tf index 9a0121e..9ef7c72 100644 --- a/cloud/aws/elasticache/common/monitors-elasticache.tf +++ b/cloud/aws/elasticache/common/monitors-elasticache.tf @@ -78,3 +78,35 @@ resource "datadog_monitor" "elasticache_no_connection" { tags = ["env:${var.environment}", "engine:${var.resource}", "team:aws", "provider:aws"] } + +resource "datadog_monitor" "elasticache_swap" { + name = "[${var.environment}] Elasticache ${var.resource} swap {{#is_alert}}{{{comparator}}} {{threshold}}MB ({{value}}MB){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}MB ({{value}}MB){{/is_warning}}" + message = "${coalesce(var.swap_message, var.message)}" + + type = "metric alert" + + query = < ${var.swap_threshold_critical} + EOF + + thresholds { + warning = "${var.swap_threshold_warning}" + critical = "${var.swap_threshold_critical}" + } + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.delay}" + + silenced = "${var.swap_silenced}" + + tags = ["env:${var.environment}", "engine:${var.resource}", "team:aws", "provider:aws"] +} diff --git a/cloud/aws/elasticache/memcached/README.md b/cloud/aws/elasticache/memcached/README.md index 75c02f3..2c38e85 100644 --- a/cloud/aws/elasticache/memcached/README.md +++ b/cloud/aws/elasticache/memcached/README.md @@ -1,5 +1,5 @@ -AWS ElasticCache Memcached Service DataDog monitors -=================================================== +AWS ElastiCache Memcached Service DataDog monitors +================================================== How to use this module ---------------------- @@ -21,7 +21,6 @@ Creates DataDog monitors with the following checks : * Get Hit * CPU High -* Swap * Free memory Inputs @@ -37,7 +36,7 @@ Inputs | cpu_high_timeframe | Monitor timeframe for Elasticache memcached cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | | elasticache_size | Size of the Elasticache instance | string | - | yes | -| environment | Architecture Environment | string | - | yes | +| environment | Infrastructure Environment | string | - | yes | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | free_memory_message | Custom message for Elasticache memcached free memory monitor | string | `` | no | @@ -52,12 +51,6 @@ Inputs | get_hits_threshold_warning | Elasticache memcached get hits warning threshold in percentage | string | `20` | no | | get_hits_timeframe | Monitor timeframe for Elasticache memcached get hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | message | Message sent when an alert is triggered | string | - | yes | -| swap_message | Custom message for Elasticache memcached swap monitor | string | `` | no | -| swap_silenced | Groups to mute for Elasticache memcached swap monitor | map | `` | no | -| swap_threshold_critical | Elasticache memcached swap critical threshold in percentage | string | `50` | no | -| swap_threshold_warning | Elasticache memcached swap warning threshold in percentage | string | `0` | no | -| swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | `min` | no | -| swap_timeframe | Monitor timeframe for Elasticache memcached swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | Related documentation --------------------- diff --git a/cloud/aws/elasticache/memcached/inputs.tf b/cloud/aws/elasticache/memcached/inputs.tf index aa71bca..2288257 100644 --- a/cloud/aws/elasticache/memcached/inputs.tf +++ b/cloud/aws/elasticache/memcached/inputs.tf @@ -1,6 +1,6 @@ # Global Terraform variable "environment" { - description = "Architecture Environment" + description = "Infrastructure Environment" type = "string" } @@ -91,39 +91,6 @@ variable "cpu_high_threshold_critical" { default = 90 } -variable "swap_silenced" { - description = "Groups to mute for Elasticache memcached swap monitor" - type = "map" - default = {} -} - -variable "swap_message" { - description = "Custom message for Elasticache memcached swap monitor" - type = "string" - default = "" -} - -variable "swap_time_aggregator" { - description = "Monitor aggregator for Elasticache memcached swap [available values: min, max or avg]" - type = "string" - default = "min" -} - -variable "swap_timeframe" { - description = "Monitor timeframe for Elasticache memcached swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_5m" -} - -variable "swap_threshold_warning" { - description = "Elasticache memcached swap warning threshold in percentage" - default = 0 -} - -variable "swap_threshold_critical" { - description = "Elasticache memcached swap critical threshold in percentage" - default = 50 -} - variable "free_memory_silenced" { description = "Groups to mute for Elasticache memcached free memory monitor" type = "map" diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf index 027a43d..af4925b 100644 --- a/cloud/aws/elasticache/memcached/monitors-memcached.tf +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -2,7 +2,7 @@ data "template_file" "filter" { template = "$${filter}" vars { - filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_aws_mem:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" + filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_aws_elasticache_memcached:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" } } @@ -81,38 +81,6 @@ resource "datadog_monitor" "memcached_cpu_high" { tags = ["env:${var.environment}", "engine:memcached", "team:aws", "provider:aws"] } -resource "datadog_monitor" "memcached_swap" { - name = "[${var.environment}] Elasticache memcached swap {{#is_alert}}{{{comparator}}} {{threshold}}MB ({{value}}MB){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}MB ({{value}}MB){{/is_warning}}" - message = "${coalesce(var.swap_message, var.message)}" - - type = "metric alert" - - query = < ${var.swap_threshold_critical} - EOF - - thresholds { - warning = "${var.swap_threshold_warning}" - critical = "${var.swap_threshold_critical}" - } - - notify_no_data = false - evaluation_delay = "${var.delay}" - renotify_interval = 0 - notify_audit = false - timeout_h = 0 - include_tags = true - locked = false - require_full_window = false - new_host_delay = "${var.delay}" - - silenced = "${var.swap_silenced}" - - tags = ["env:${var.environment}", "engine:memcached", "team:aws", "provider:aws"] -} - resource "datadog_monitor" "memcached_free_memory" { name = "[${var.environment}] Elasticache memcached free memory {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.free_memory_message, var.message)}" @@ -123,7 +91,7 @@ resource "datadog_monitor" "memcached_free_memory" { query = <` | no | | cpu_high_threshold_critical | Elasticache redis cpu high critical threshold in percentage | string | `90` | no | | cpu_high_threshold_warning | Elasticache redis cpu high warning threshold in percentage | string | `75` | no | -| cpu_high_time_aggregator | Monitor aggregator for Elasticache redis cpu high [available values: min, max or avg] | string | `min` | no | +| cpu_high_time_aggregator | Monitor aggregator for Elasticache redis cpu high [available values: min, max or avg] | string | `avg` | no | | cpu_high_timeframe | Monitor timeframe for Elasticache redis cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | | elasticache_size | Size of the Elasticache instance | string | - | yes | -| environment | Architecture Environment | string | - | yes | +| environment | Infrastructure Environment | string | - | yes | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | free_memory_message | Custom message for Elasticache redis free memory monitor | string | `` | no | @@ -74,10 +73,6 @@ Inputs | replication_lag_threshold_warning | Elasticache redis replication lag warning threshold in seconds | string | `0` | no | | replication_lag_time_aggregator | Monitor aggregator for Elasticache redis replication lag [available values: min, max or avg] | string | `min` | no | | replication_lag_timeframe | Monitor timeframe for Elasticache redis replication lag [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| swap_message | Custom message for Elasticache redis swap monitor | string | `` | no | -| swap_silenced | Groups to mute for Elasticache redis swap monitor | map | `` | no | -| swap_time_aggregator | Monitor aggregator for Elasticache redis swap [available values: min, max or avg] | string | `min` | no | -| swap_timeframe | Monitor timeframe for Elasticache redis swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | Related documentation --------------------- diff --git a/cloud/aws/elasticache/redis/inputs.tf b/cloud/aws/elasticache/redis/inputs.tf index 3c0380c..c20aa25 100644 --- a/cloud/aws/elasticache/redis/inputs.tf +++ b/cloud/aws/elasticache/redis/inputs.tf @@ -1,6 +1,6 @@ # Global Terraform variable "environment" { - description = "Architecture Environment" + description = "Infrastructure Environment" type = "string" } @@ -96,29 +96,6 @@ variable "cpu_high_threshold_critical" { default = 90 } -variable "swap_silenced" { - description = "Groups to mute for Elasticache redis swap monitor" - type = "map" - default = {} -} - -variable "swap_message" { - description = "Custom message for Elasticache redis swap monitor" - type = "string" - default = "" -} - -variable "swap_time_aggregator" { - description = "Monitor aggregator for Elasticache redis swap [available values: min, max or avg]" - type = "string" - default = "min" -} - -variable "swap_timeframe" { - description = "Monitor timeframe for Elasticache redis swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_5m" -} - variable "replication_lag_silenced" { description = "Groups to mute for Elasticache redis replication lag monitor" type = "map" diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index f51e1ef..fa8bd56 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -2,11 +2,11 @@ data "template_file" "filter" { template = "$${filter}" vars { - filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_aws_red:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" + filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_aws_elasticache_redis:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" } } -module "datadog-monitors-aws-elasticcache-common" { +module "datadog-monitors-aws-elasticache-common" { source = "../common" message = "${var.message}" @@ -59,7 +59,7 @@ resource "datadog_monitor" "redis_cpu_high" { query = < ${var.cpu_high_threshold_critical / element(values(local.core), count.index)} EOF @@ -78,33 +78,6 @@ resource "datadog_monitor" "redis_cpu_high" { tags = ["env:${var.environment}", "engine:redis", "team:aws", "provider:aws"] } -resource "datadog_monitor" "redis_swap" { - name = "[${var.environment}] Elasticache redis is starting to swap ({{value}}MB)" - message = "${coalesce(var.swap_message, var.message)}" - - type = "metric alert" - - query = < 0 - EOF - - notify_no_data = false - evaluation_delay = "${var.delay}" - renotify_interval = 0 - notify_audit = false - timeout_h = 0 - include_tags = true - locked = false - require_full_window = false - new_host_delay = "${var.delay}" - - silenced = "${var.swap_silenced}" - - tags = ["env:${var.environment}", "engine:redis", "team:aws", "provider:aws"] -} - resource "datadog_monitor" "redis_replication_lag" { name = "[${var.environment}] Elasticache redis replication lag {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}" message = "${coalesce(var.replication_lag_message, var.message)}" @@ -175,7 +148,7 @@ resource "datadog_monitor" "redis_free_memory" { query = < Date: Wed, 6 Jun 2018 10:34:17 +0200 Subject: [PATCH 20/53] MON-32 - Fix free_memory monitors --- cloud/aws/elasticache/memcached/monitors-memcached.tf | 4 ++-- cloud/aws/elasticache/redis/monitors-redis.tf | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf index af4925b..9ee1f34 100644 --- a/cloud/aws/elasticache/memcached/monitors-memcached.tf +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -91,9 +91,9 @@ resource "datadog_monitor" "memcached_free_memory" { query = < Date: Thu, 7 Jun 2018 11:57:36 +0200 Subject: [PATCH 21/53] MON-32 - Free Memory monitors updated to evaluate the metric over a period --- cloud/aws/elasticache/common/README.md | 17 ++++--- cloud/aws/elasticache/common/inputs.tf | 44 ++++++++++++++++--- .../common/monitors-elasticache.tf | 32 ++++++++++++++ cloud/aws/elasticache/memcached/README.md | 7 --- cloud/aws/elasticache/memcached/inputs.tf | 33 -------------- cloud/aws/elasticache/memcached/locals.tf | 27 ------------ .../memcached/monitors-memcached.tf | 35 --------------- cloud/aws/elasticache/redis/README.md | 7 --- cloud/aws/elasticache/redis/inputs.tf | 33 -------------- cloud/aws/elasticache/redis/locals.tf | 26 ----------- cloud/aws/elasticache/redis/monitors-redis.tf | 35 --------------- 11 files changed, 82 insertions(+), 214 deletions(-) delete mode 100644 cloud/aws/elasticache/memcached/locals.tf diff --git a/cloud/aws/elasticache/common/README.md b/cloud/aws/elasticache/common/README.md index f0007ce..ee27e38 100644 --- a/cloud/aws/elasticache/common/README.md +++ b/cloud/aws/elasticache/common/README.md @@ -26,6 +26,7 @@ Creates DataDog monitors with the following checks: * Swap * Max connections * No connection +* Free Memory Inputs ------ @@ -39,6 +40,12 @@ Inputs | eviction_time_aggregator | Monitor aggregator for Elasticache eviction [available values: min, max or avg] | string | `min` | no | | eviction_timeframe | Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | filter_tags | Tags used for filtering | string | - | yes | +| free_memory_condition_timeframe | Monitor condition timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| free_memory_message | Custom message for Elasticache free memory monitor | string | `` | no | +| free_memory_silenced | Groups to mute for Elasticache free memory monitor | map | `` | no | +| free_memory_threshold_critical | Elasticache free memory critical threshold in percentage | string | `300` | no | +| free_memory_threshold_warning | Elasticache free memory warning threshold in percentage | string | `200` | no | +| free_memory_timeframe | Monitor timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | max_connection_message | Custom message for Elasticache max connection monitor | string | `` | no | | max_connection_silenced | Groups to mute for Elasticache max connection monitor | map | `` | no | | max_connection_time_aggregator | Monitor aggregator for Elasticache max connection [available values: min, max or avg] | string | `max` | no | @@ -49,12 +56,12 @@ Inputs | no_connection_time_aggregator | Monitor aggregator for Elasticache no connection [available values: min, max or avg] | string | `min` | no | | no_connection_timeframe | Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | resource | Type of Elasticache used | string | - | yes | -| swap_message | Custom message for Elasticache memcached swap monitor | string | `` | no | -| swap_silenced | Groups to mute for Elasticache memcached swap monitor | map | `` | no | -| swap_threshold_critical | Elasticache memcached swap critical threshold in percentage | string | `50` | no | -| swap_threshold_warning | Elasticache memcached swap warning threshold in percentage | string | `0` | no | +| swap_message | Custom message for Elasticache swap monitor | string | `` | no | +| swap_silenced | Groups to mute for Elasticache swap monitor | map | `` | no | +| swap_threshold_critical | Elasticache swap critical threshold in percentage | string | `50` | no | +| swap_threshold_warning | Elasticache swap warning threshold in percentage | string | `0` | no | | swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | `min` | no | -| swap_timeframe | Monitor timeframe for Elasticache memcached swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| swap_timeframe | Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | Related documentation --------------------- diff --git a/cloud/aws/elasticache/common/inputs.tf b/cloud/aws/elasticache/common/inputs.tf index d4bd386..ab249df 100644 --- a/cloud/aws/elasticache/common/inputs.tf +++ b/cloud/aws/elasticache/common/inputs.tf @@ -93,13 +93,13 @@ variable "no_connection_timeframe" { } variable "swap_silenced" { - description = "Groups to mute for Elasticache memcached swap monitor" + description = "Groups to mute for Elasticache swap monitor" type = "map" default = {} } variable "swap_message" { - description = "Custom message for Elasticache memcached swap monitor" + description = "Custom message for Elasticache swap monitor" type = "string" default = "" } @@ -111,16 +111,48 @@ variable "swap_time_aggregator" { } variable "swap_timeframe" { - description = "Monitor timeframe for Elasticache memcached swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + description = "Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" default = "last_5m" } variable "swap_threshold_warning" { - description = "Elasticache memcached swap warning threshold in percentage" + description = "Elasticache swap warning threshold in percentage" default = 0 } variable "swap_threshold_critical" { - description = "Elasticache memcached swap critical threshold in percentage" - default = 50 + description = "Elasticache swap critical threshold in percentage" + default = 50000000 +} + +variable "free_memory_silenced" { + description = "Groups to mute for Elasticache free memory monitor" + type = "map" + default = {} +} + +variable "free_memory_message" { + description = "Custom message for Elasticache free memory monitor" + type = "string" + default = "" +} + +variable "free_memory_condition_timeframe" { + description = "Monitor condition timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_15m" +} + +variable "free_memory_timeframe" { + description = "Monitor timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_15m" +} + +variable "free_memory_threshold_warning" { + description = "Elasticache free memory warning threshold in percentage" + default = -50 +} + +variable "free_memory_threshold_critical" { + description = "Elasticache free memory critical threshold in percentage" + default = -70 } diff --git a/cloud/aws/elasticache/common/monitors-elasticache.tf b/cloud/aws/elasticache/common/monitors-elasticache.tf index 9ef7c72..91f6287 100644 --- a/cloud/aws/elasticache/common/monitors-elasticache.tf +++ b/cloud/aws/elasticache/common/monitors-elasticache.tf @@ -110,3 +110,35 @@ resource "datadog_monitor" "elasticache_swap" { tags = ["env:${var.environment}", "engine:${var.resource}", "team:aws", "provider:aws"] } + +resource "datadog_monitor" "redis_free_memory" { + name = "[${var.environment}] Elasticache ${var.resource} free memory {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.free_memory_message, var.message)}" + + type = "metric alert" + + query = <` | no | -| free_memory_threshold_critical | Elasticache memcached free memory critical threshold in percentage | string | `5` | no | -| free_memory_threshold_warning | Elasticache memcached free memory warning threshold in percentage | string | `10` | no | -| free_memory_time_aggregator | Monitor aggregator for Elasticache memcached free memory [available values: min, max or avg] | string | `min` | no | -| free_memory_timeframe | Monitor timeframe for Elasticache memcached free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | get_hits_message | Custom message for Elasticache memcached get hits monitor | string | `` | no | | get_hits_silenced | Groups to mute for Elasticache memcached get hits monitor | map | `` | no | | get_hits_threshold_critical | Elasticache memcached get hits critical threshold in percentage | string | `10` | no | diff --git a/cloud/aws/elasticache/memcached/inputs.tf b/cloud/aws/elasticache/memcached/inputs.tf index 2288257..6bb4ff4 100644 --- a/cloud/aws/elasticache/memcached/inputs.tf +++ b/cloud/aws/elasticache/memcached/inputs.tf @@ -90,36 +90,3 @@ variable "cpu_high_threshold_critical" { description = "Elasticache memcached cpu high critical threshold in percentage" default = 90 } - -variable "free_memory_silenced" { - description = "Groups to mute for Elasticache memcached free memory monitor" - type = "map" - default = {} -} - -variable "free_memory_message" { - description = "Custom message for Elasticache memcached free memory monitor" - type = "string" - default = "" -} - -variable "free_memory_time_aggregator" { - description = "Monitor aggregator for Elasticache memcached free memory [available values: min, max or avg]" - type = "string" - default = "min" -} - -variable "free_memory_timeframe" { - description = "Monitor timeframe for Elasticache memcached free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_15m" -} - -variable "free_memory_threshold_warning" { - description = "Elasticache memcached free memory warning threshold in percentage" - default = 10 -} - -variable "free_memory_threshold_critical" { - description = "Elasticache memcached free memory critical threshold in percentage" - default = 5 -} diff --git a/cloud/aws/elasticache/memcached/locals.tf b/cloud/aws/elasticache/memcached/locals.tf deleted file mode 100644 index b044b56..0000000 --- a/cloud/aws/elasticache/memcached/locals.tf +++ /dev/null @@ -1,27 +0,0 @@ -locals { - memory = { - cache.t2.micro = 595926712 - cache.t2.small = 1664299827 - cache.t2.medium = 3457448673 - cache.m3.medium = 2985002270 - cache.m3.large = 6496138035 - cache.m3.xlarge = 14280766259 - cache.m3.2xlarge = 29957396889 - cache.m4.large = 6893422510 - cache.m4.xlarge = 15333033246 - cache.m4.2xlarge = 31890132172 - cache.m4.4xlarge = 65262028062 - cache.m4.10xlarge = 166043435663 - cache.r3.large = 14495514624 - cache.r3.xlarge = 30494267801 - cache.r3.2xlarge = 62491774156 - cache.r3.4xlarge = 126701535232 - cache.r3.8xlarge = 254476812288 - cache.r4.large = 13207024435 - cache.r4.xlarge = 26897232691 - cache.r4.2xlarge = 54191749857 - cache.r4.4xlarge = 108855946117 - cache.r4.8xlarge = 218248763146 - cache.r4.16xlarge = 437012922368 - } -} diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf index 9ee1f34..1a95797 100644 --- a/cloud/aws/elasticache/memcached/monitors-memcached.tf +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -80,38 +80,3 @@ resource "datadog_monitor" "memcached_cpu_high" { tags = ["env:${var.environment}", "engine:memcached", "team:aws", "provider:aws"] } - -resource "datadog_monitor" "memcached_free_memory" { - name = "[${var.environment}] Elasticache memcached free memory {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" - message = "${coalesce(var.free_memory_message, var.message)}" - - count = "${length(keys(local.memory))}" - - type = "metric alert" - - query = <` | no | -| free_memory_threshold_critical | Elasticache redis free memory critical threshold in percentage | string | `5` | no | -| free_memory_threshold_warning | Elasticache redis free memory warning threshold in percentage | string | `10` | no | -| free_memory_time_aggregator | Monitor aggregator for Elasticache redis free memory [available values: min, max or avg] | string | `min` | no | -| free_memory_timeframe | Monitor timeframe for Elasticache redis free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | message | Message sent when an alert is triggered | string | - | yes | | nodes | Number of Elasticache nodes | string | - | yes | | replication_lag_message | Custom message for Elasticache redis replication lag monitor | string | `` | no | diff --git a/cloud/aws/elasticache/redis/inputs.tf b/cloud/aws/elasticache/redis/inputs.tf index c20aa25..8a62621 100644 --- a/cloud/aws/elasticache/redis/inputs.tf +++ b/cloud/aws/elasticache/redis/inputs.tf @@ -145,36 +145,3 @@ variable "commands_timeframe" { description = "Monitor timeframe for Elasticache redis commands [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" default = "last_5m" } - -variable "free_memory_silenced" { - description = "Groups to mute for Elasticache redis free memory monitor" - type = "map" - default = {} -} - -variable "free_memory_message" { - description = "Custom message for Elasticache redis free memory monitor" - type = "string" - default = "" -} - -variable "free_memory_time_aggregator" { - description = "Monitor aggregator for Elasticache redis free memory [available values: min, max or avg]" - type = "string" - default = "min" -} - -variable "free_memory_timeframe" { - description = "Monitor timeframe for Elasticache redis free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_15m" -} - -variable "free_memory_threshold_warning" { - description = "Elasticache redis free memory warning threshold in percentage" - default = 10 -} - -variable "free_memory_threshold_critical" { - description = "Elasticache redis free memory critical threshold in percentage" - default = 5 -} diff --git a/cloud/aws/elasticache/redis/locals.tf b/cloud/aws/elasticache/redis/locals.tf index 591525d..9d7a1eb 100644 --- a/cloud/aws/elasticache/redis/locals.tf +++ b/cloud/aws/elasticache/redis/locals.tf @@ -24,30 +24,4 @@ locals { cache.r4.8xlarge = 32 cache.r4.16xlarge = 64 } - - memory = { - cache.t2.micro = 595926712 - cache.t2.small = 1664299827 - cache.t2.medium = 3457448673 - cache.m3.medium = 2985002270 - cache.m3.large = 6496138035 - cache.m3.xlarge = 14280766259 - cache.m3.2xlarge = 29957396889 - cache.m4.large = 6893422510 - cache.m4.xlarge = 15333033246 - cache.m4.2xlarge = 31890132172 - cache.m4.4xlarge = 65262028062 - cache.m4.10xlarge = 166043435663 - cache.r3.large = 14495514624 - cache.r3.xlarge = 30494267801 - cache.r3.2xlarge = 62491774156 - cache.r3.4xlarge = 126701535232 - cache.r3.8xlarge = 254476812288 - cache.r4.large = 13207024435 - cache.r4.xlarge = 26897232691 - cache.r4.2xlarge = 54191749857 - cache.r4.4xlarge = 108855946117 - cache.r4.8xlarge = 218248763146 - cache.r4.16xlarge = 437012922368 - } } diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index ed9db87..0232498 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -137,38 +137,3 @@ resource "datadog_monitor" "redis_commands" { tags = ["env:${var.environment}", "engine:redis", "team:aws", "provider:aws"] } - -resource "datadog_monitor" "redis_free_memory" { - name = "[${var.environment}] Elasticache redis free memory {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" - message = "${coalesce(var.free_memory_message, var.message)}" - - count = "${length(keys(local.memory))}" - - type = "metric alert" - - query = < Date: Fri, 8 Jun 2018 11:25:05 +0200 Subject: [PATCH 22/53] MON-32 - CPU Redis monitors updated with new metric --- .../common/monitors-elasticache.tf | 1 + cloud/aws/elasticache/redis/locals.tf | 27 ------------------- cloud/aws/elasticache/redis/monitors-redis.tf | 8 +++--- 3 files changed, 4 insertions(+), 32 deletions(-) delete mode 100644 cloud/aws/elasticache/redis/locals.tf diff --git a/cloud/aws/elasticache/common/monitors-elasticache.tf b/cloud/aws/elasticache/common/monitors-elasticache.tf index 91f6287..0e83633 100644 --- a/cloud/aws/elasticache/common/monitors-elasticache.tf +++ b/cloud/aws/elasticache/common/monitors-elasticache.tf @@ -111,6 +111,7 @@ resource "datadog_monitor" "elasticache_swap" { tags = ["env:${var.environment}", "engine:${var.resource}", "team:aws", "provider:aws"] } +# POC - A approfondir resource "datadog_monitor" "redis_free_memory" { name = "[${var.environment}] Elasticache ${var.resource} free memory {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.free_memory_message, var.message)}" diff --git a/cloud/aws/elasticache/redis/locals.tf b/cloud/aws/elasticache/redis/locals.tf deleted file mode 100644 index 9d7a1eb..0000000 --- a/cloud/aws/elasticache/redis/locals.tf +++ /dev/null @@ -1,27 +0,0 @@ -locals { - core = { - cache.t2.micro = 1 - cache.t2.small = 1 - cache.t2.medium = 2 - cache.m3.medium = 1 - cache.m3.large = 2 - cache.m3.xlarge = 4 - cache.m3.2xlarge = 8 - cache.m4.large = 2 - cache.m4.xlarge = 4 - cache.m4.2xlarge = 8 - cache.m4.4xlarge = 16 - cache.m4.10xlarge = 40 - cache.r3.large = 2 - cache.r3.xlarge = 4 - cache.r3.2xlarge = 8 - cache.r3.4xlarge = 16 - cache.r3.8xlarge = 32 - cache.r4.large = 2 - cache.r4.xlarge = 4 - cache.r4.2xlarge = 8 - cache.r4.4xlarge = 16 - cache.r4.8xlarge = 32 - cache.r4.16xlarge = 64 - } -} diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index 0232498..b5c7935 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -53,17 +53,15 @@ resource "datadog_monitor" "redis_cpu_high" { name = "[${var.environment}] Elasticache redis CPU {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.cpu_high_message, var.message)}" - count = "${length(keys(local.core))}" - type = "metric alert" query = < ${var.cpu_high_threshold_critical / element(values(local.core), count.index)} + avg:aws.elasticache.engine_cpuutilization{${data.template_file.filter.rendered}} by {region,cacheclusterid,cachenodeid} + ) > ${var.cpu_high_threshold_critical} EOF - notify_no_data = false + notify_no_data = true evaluation_delay = "${var.delay}" renotify_interval = 0 notify_audit = false From 57abe436bf2901c84d848320df2fa283e2b82161 Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Mon, 11 Jun 2018 11:37:49 +0200 Subject: [PATCH 23/53] MON-32 - module elasticache updated with variables in redis --- cloud/aws/elasticache/redis/README.md | 24 ++++ cloud/aws/elasticache/redis/inputs.tf | 135 ++++++++++++++++++ cloud/aws/elasticache/redis/monitors-redis.tf | 29 ++++ 3 files changed, 188 insertions(+) diff --git a/cloud/aws/elasticache/redis/README.md b/cloud/aws/elasticache/redis/README.md index f576872..2be14a9 100644 --- a/cloud/aws/elasticache/redis/README.md +++ b/cloud/aws/elasticache/redis/README.md @@ -56,9 +56,27 @@ Inputs | delay | Delay in seconds for the metric evaluation | string | `900` | no | | elasticache_size | Size of the Elasticache instance | string | - | yes | | environment | Infrastructure Environment | string | - | yes | +| eviction_message | Custom message for Elasticache eviction monitor | string | `` | no | +| eviction_silenced | Groups to mute for Elasticache eviction monitor | map | `` | no | +| eviction_time_aggregator | Monitor aggregator for Elasticache eviction [available values: min, max or avg] | string | `min` | no | +| eviction_timeframe | Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| free_memory_condition_timeframe | Monitor condition timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| free_memory_message | Custom message for Elasticache free memory monitor | string | `` | no | +| free_memory_silenced | Groups to mute for Elasticache free memory monitor | map | `` | no | +| free_memory_threshold_critical | Elasticache free memory critical threshold in percentage | string | `-70` | no | +| free_memory_threshold_warning | Elasticache free memory warning threshold in percentage | string | `-50` | no | +| free_memory_timeframe | Monitor timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| max_connection_message | Custom message for Elasticache max connection monitor | string | `` | no | +| max_connection_silenced | Groups to mute for Elasticache max connection monitor | map | `` | no | +| max_connection_time_aggregator | Monitor aggregator for Elasticache max connection [available values: min, max or avg] | string | `max` | no | +| max_connection_timeframe | Monitor timeframe for Elasticache max connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when an alert is triggered | string | - | yes | +| no_connection_message | Custom message for Elasticache no connection monitor | string | `` | no | +| no_connection_silenced | Groups to mute for Elasticache no connection monitor | map | `` | no | +| no_connection_time_aggregator | Monitor aggregator for Elasticache no connection [available values: min, max or avg] | string | `min` | no | +| no_connection_timeframe | Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | nodes | Number of Elasticache nodes | string | - | yes | | replication_lag_message | Custom message for Elasticache redis replication lag monitor | string | `` | no | | replication_lag_silenced | Groups to mute for Elasticache redis replication lag monitor | map | `` | no | @@ -66,6 +84,12 @@ Inputs | replication_lag_threshold_warning | Elasticache redis replication lag warning threshold in seconds | string | `0` | no | | replication_lag_time_aggregator | Monitor aggregator for Elasticache redis replication lag [available values: min, max or avg] | string | `min` | no | | replication_lag_timeframe | Monitor timeframe for Elasticache redis replication lag [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| swap_message | Custom message for Elasticache swap monitor | string | `` | no | +| swap_silenced | Groups to mute for Elasticache swap monitor | map | `` | no | +| swap_threshold_critical | Elasticache swap critical threshold in percentage | string | `50000000` | no | +| swap_threshold_warning | Elasticache swap warning threshold in percentage | string | `0` | no | +| swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | `min` | no | +| swap_timeframe | Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | Related documentation --------------------- diff --git a/cloud/aws/elasticache/redis/inputs.tf b/cloud/aws/elasticache/redis/inputs.tf index 8a62621..940577b 100644 --- a/cloud/aws/elasticache/redis/inputs.tf +++ b/cloud/aws/elasticache/redis/inputs.tf @@ -145,3 +145,138 @@ variable "commands_timeframe" { description = "Monitor timeframe for Elasticache redis commands [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" default = "last_5m" } + +# Elasticache specific +variable "eviction_silenced" { + description = "Groups to mute for Elasticache eviction monitor" + type = "map" + default = {} +} + +variable "eviction_message" { + description = "Custom message for Elasticache eviction monitor" + type = "string" + default = "" +} + +variable "eviction_time_aggregator" { + description = "Monitor aggregator for Elasticache eviction [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "eviction_timeframe" { + description = "Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "max_connection_silenced" { + description = "Groups to mute for Elasticache max connection monitor" + type = "map" + default = {} +} + +variable "max_connection_message" { + description = "Custom message for Elasticache max connection monitor" + type = "string" + default = "" +} + +variable "max_connection_time_aggregator" { + description = "Monitor aggregator for Elasticache max connection [available values: min, max or avg]" + type = "string" + default = "max" +} + +variable "max_connection_timeframe" { + description = "Monitor timeframe for Elasticache max connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "no_connection_silenced" { + description = "Groups to mute for Elasticache no connection monitor" + type = "map" + default = {} +} + +variable "no_connection_message" { + description = "Custom message for Elasticache no connection monitor" + type = "string" + default = "" +} + +variable "no_connection_time_aggregator" { + description = "Monitor aggregator for Elasticache no connection [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "no_connection_timeframe" { + description = "Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "swap_silenced" { + description = "Groups to mute for Elasticache swap monitor" + type = "map" + default = {} +} + +variable "swap_message" { + description = "Custom message for Elasticache swap monitor" + type = "string" + default = "" +} + +variable "swap_time_aggregator" { + description = "Monitor aggregator for Elasticache memcached swap [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "swap_timeframe" { + description = "Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "swap_threshold_warning" { + description = "Elasticache swap warning threshold in percentage" + default = 0 +} + +variable "swap_threshold_critical" { + description = "Elasticache swap critical threshold in percentage" + default = 50000000 +} + +variable "free_memory_silenced" { + description = "Groups to mute for Elasticache free memory monitor" + type = "map" + default = {} +} + +variable "free_memory_message" { + description = "Custom message for Elasticache free memory monitor" + type = "string" + default = "" +} + +variable "free_memory_condition_timeframe" { + description = "Monitor condition timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_15m" +} + +variable "free_memory_timeframe" { + description = "Monitor timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_15m" +} + +variable "free_memory_threshold_warning" { + description = "Elasticache free memory warning threshold in percentage" + default = -50 +} + +variable "free_memory_threshold_critical" { + description = "Elasticache free memory critical threshold in percentage" + default = -70 +} diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index b5c7935..6572a3f 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -13,6 +13,35 @@ module "datadog-monitors-aws-elasticache-common" { environment = "${var.environment}" filter_tags = "${data.template_file.filter.rendered}" resource = "redis" + + eviction_message = "${var.eviction_message}" + eviction_silenced = "${var.eviction_silenced}" + eviction_time_aggregator = "${var.eviction_time_aggregator}" + eviction_timeframe = "${var.eviction_timeframe}" + + free_memory_condition_timeframe = "${var.free_memory_condition_timeframe}" + free_memory_timeframe = "${var.free_memory_timeframe}" + free_memory_message = "${var.free_memory_message}" + free_memory_silenced = "${var.free_memory_silenced}" + free_memory_threshold_critical = "${var.free_memory_threshold_critical}" + free_memory_threshold_warning = "${var.free_memory_threshold_warning}" + + max_connection_message = "${var.max_connection_message}" + max_connection_silenced = "${var.max_connection_silenced}" + max_connection_time_aggregator = "${var.max_connection_time_aggregator}" + max_connection_timeframe = "${var.max_connection_timeframe}" + + no_connection_message = "${var.no_connection_message}" + no_connection_silenced = "${var.no_connection_silenced}" + no_connection_time_aggregator = "${var.no_connection_time_aggregator}" + no_connection_timeframe = "${var.no_connection_timeframe}" + + swap_message = "${var.swap_message}" + swap_silenced = "${var.swap_silenced}" + swap_threshold_critical = "${var.swap_threshold_critical}" + swap_threshold_warning = "${var.swap_threshold_warning}" + swap_time_aggregator = "${var.swap_time_aggregator}" + swap_timeframe = "${var.swap_timeframe}" } resource "datadog_monitor" "redis_cache_hits" { From 3c24284384b197ce1d2f791f569891bf2510b31d Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Mon, 11 Jun 2018 13:57:34 +0200 Subject: [PATCH 24/53] MON-32 - module elasticache updated with variables in memcached --- cloud/aws/elasticache/memcached/README.md | 26 +++- cloud/aws/elasticache/memcached/inputs.tf | 135 ++++++++++++++++++ .../memcached/monitors-memcached.tf | 31 +++- cloud/aws/elasticache/redis/README.md | 2 +- 4 files changed, 191 insertions(+), 3 deletions(-) diff --git a/cloud/aws/elasticache/memcached/README.md b/cloud/aws/elasticache/memcached/README.md index 20266e9..75360b0 100644 --- a/cloud/aws/elasticache/memcached/README.md +++ b/cloud/aws/elasticache/memcached/README.md @@ -5,7 +5,7 @@ How to use this module ---------------------- ``` -module "datadog-monitors-aws-elasticcache-redis" { +module "datadog-monitors-aws-elasticache-redis" { source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/aws/elasticache/memcached?ref={revision}" message = "${module.datadog-message-alerting.alerting-message}" @@ -36,14 +36,38 @@ Inputs | delay | Delay in seconds for the metric evaluation | string | `900` | no | | elasticache_size | Size of the Elasticache instance | string | - | yes | | environment | Infrastructure Environment | string | - | yes | +| eviction_message | Custom message for Elasticache eviction monitor | string | `` | no | +| eviction_silenced | Groups to mute for Elasticache eviction monitor | map | `` | no | +| eviction_time_aggregator | Monitor aggregator for Elasticache eviction [available values: min, max or avg] | string | `min` | no | +| eviction_timeframe | Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| free_memory_condition_timeframe | Monitor condition timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| free_memory_message | Custom message for Elasticache free memory monitor | string | `` | no | +| free_memory_silenced | Groups to mute for Elasticache free memory monitor | map | `` | no | +| free_memory_threshold_critical | Elasticache free memory critical threshold in percentage | string | `-70` | no | +| free_memory_threshold_warning | Elasticache free memory warning threshold in percentage | string | `-50` | no | +| free_memory_timeframe | Monitor timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | get_hits_message | Custom message for Elasticache memcached get hits monitor | string | `` | no | | get_hits_silenced | Groups to mute for Elasticache memcached get hits monitor | map | `` | no | | get_hits_threshold_critical | Elasticache memcached get hits critical threshold in percentage | string | `10` | no | | get_hits_threshold_warning | Elasticache memcached get hits warning threshold in percentage | string | `20` | no | | get_hits_timeframe | Monitor timeframe for Elasticache memcached get hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| max_connection_message | Custom message for Elasticache max connection monitor | string | `` | no | +| max_connection_silenced | Groups to mute for Elasticache max connection monitor | map | `` | no | +| max_connection_time_aggregator | Monitor aggregator for Elasticache max connection [available values: min, max or avg] | string | `max` | no | +| max_connection_timeframe | Monitor timeframe for Elasticache max connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when an alert is triggered | string | - | yes | +| no_connection_message | Custom message for Elasticache no connection monitor | string | `` | no | +| no_connection_silenced | Groups to mute for Elasticache no connection monitor | map | `` | no | +| no_connection_time_aggregator | Monitor aggregator for Elasticache no connection [available values: min, max or avg] | string | `min` | no | +| no_connection_timeframe | Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| swap_message | Custom message for Elasticache swap monitor | string | `` | no | +| swap_silenced | Groups to mute for Elasticache swap monitor | map | `` | no | +| swap_threshold_critical | Elasticache swap critical threshold in percentage | string | `50000000` | no | +| swap_threshold_warning | Elasticache swap warning threshold in percentage | string | `0` | no | +| swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | `min` | no | +| swap_timeframe | Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | Related documentation --------------------- diff --git a/cloud/aws/elasticache/memcached/inputs.tf b/cloud/aws/elasticache/memcached/inputs.tf index 6bb4ff4..acdce22 100644 --- a/cloud/aws/elasticache/memcached/inputs.tf +++ b/cloud/aws/elasticache/memcached/inputs.tf @@ -90,3 +90,138 @@ variable "cpu_high_threshold_critical" { description = "Elasticache memcached cpu high critical threshold in percentage" default = 90 } + +# Elasticache specific +variable "eviction_silenced" { + description = "Groups to mute for Elasticache eviction monitor" + type = "map" + default = {} +} + +variable "eviction_message" { + description = "Custom message for Elasticache eviction monitor" + type = "string" + default = "" +} + +variable "eviction_time_aggregator" { + description = "Monitor aggregator for Elasticache eviction [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "eviction_timeframe" { + description = "Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "max_connection_silenced" { + description = "Groups to mute for Elasticache max connection monitor" + type = "map" + default = {} +} + +variable "max_connection_message" { + description = "Custom message for Elasticache max connection monitor" + type = "string" + default = "" +} + +variable "max_connection_time_aggregator" { + description = "Monitor aggregator for Elasticache max connection [available values: min, max or avg]" + type = "string" + default = "max" +} + +variable "max_connection_timeframe" { + description = "Monitor timeframe for Elasticache max connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "no_connection_silenced" { + description = "Groups to mute for Elasticache no connection monitor" + type = "map" + default = {} +} + +variable "no_connection_message" { + description = "Custom message for Elasticache no connection monitor" + type = "string" + default = "" +} + +variable "no_connection_time_aggregator" { + description = "Monitor aggregator for Elasticache no connection [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "no_connection_timeframe" { + description = "Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "swap_silenced" { + description = "Groups to mute for Elasticache swap monitor" + type = "map" + default = {} +} + +variable "swap_message" { + description = "Custom message for Elasticache swap monitor" + type = "string" + default = "" +} + +variable "swap_time_aggregator" { + description = "Monitor aggregator for Elasticache memcached swap [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "swap_timeframe" { + description = "Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "swap_threshold_warning" { + description = "Elasticache swap warning threshold in percentage" + default = 0 +} + +variable "swap_threshold_critical" { + description = "Elasticache swap critical threshold in percentage" + default = 50000000 +} + +variable "free_memory_silenced" { + description = "Groups to mute for Elasticache free memory monitor" + type = "map" + default = {} +} + +variable "free_memory_message" { + description = "Custom message for Elasticache free memory monitor" + type = "string" + default = "" +} + +variable "free_memory_condition_timeframe" { + description = "Monitor condition timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_15m" +} + +variable "free_memory_timeframe" { + description = "Monitor timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_15m" +} + +variable "free_memory_threshold_warning" { + description = "Elasticache free memory warning threshold in percentage" + default = -50 +} + +variable "free_memory_threshold_critical" { + description = "Elasticache free memory critical threshold in percentage" + default = -70 +} diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf index 1a95797..ea8a1c8 100644 --- a/cloud/aws/elasticache/memcached/monitors-memcached.tf +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -6,13 +6,42 @@ data "template_file" "filter" { } } -module "datadog-monitors-aws-elasticcache-common" { +module "datadog-monitors-aws-elasticache-common" { source = "../common" message = "${var.message}" environment = "${var.environment}" filter_tags = "${data.template_file.filter.rendered}" resource = "memcached" + + eviction_message = "${var.eviction_message}" + eviction_silenced = "${var.eviction_silenced}" + eviction_time_aggregator = "${var.eviction_time_aggregator}" + eviction_timeframe = "${var.eviction_timeframe}" + + free_memory_condition_timeframe = "${var.free_memory_condition_timeframe}" + free_memory_timeframe = "${var.free_memory_timeframe}" + free_memory_message = "${var.free_memory_message}" + free_memory_silenced = "${var.free_memory_silenced}" + free_memory_threshold_critical = "${var.free_memory_threshold_critical}" + free_memory_threshold_warning = "${var.free_memory_threshold_warning}" + + max_connection_message = "${var.max_connection_message}" + max_connection_silenced = "${var.max_connection_silenced}" + max_connection_time_aggregator = "${var.max_connection_time_aggregator}" + max_connection_timeframe = "${var.max_connection_timeframe}" + + no_connection_message = "${var.no_connection_message}" + no_connection_silenced = "${var.no_connection_silenced}" + no_connection_time_aggregator = "${var.no_connection_time_aggregator}" + no_connection_timeframe = "${var.no_connection_timeframe}" + + swap_message = "${var.swap_message}" + swap_silenced = "${var.swap_silenced}" + swap_threshold_critical = "${var.swap_threshold_critical}" + swap_threshold_warning = "${var.swap_threshold_warning}" + swap_time_aggregator = "${var.swap_time_aggregator}" + swap_timeframe = "${var.swap_timeframe}" } resource "datadog_monitor" "memcached_get_hits" { diff --git a/cloud/aws/elasticache/redis/README.md b/cloud/aws/elasticache/redis/README.md index 2be14a9..e643fea 100644 --- a/cloud/aws/elasticache/redis/README.md +++ b/cloud/aws/elasticache/redis/README.md @@ -5,7 +5,7 @@ How to use this module ---------------------- ``` -module "datadog-monitors-aws-elasticcache-redis" { +module "datadog-monitors-aws-elasticache-redis" { source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/aws/elasticache/redis?ref={revision}" message = "${module.datadog-message-alerting.alerting-message}" From bdfb0f39fc792b1b98e6afe83b5396549c641185 Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Tue, 12 Jun 2018 10:50:12 +0200 Subject: [PATCH 25/53] MON-32 - Evictions growing monitor added --- cloud/aws/elasticache/common/README.md | 12 +++++-- cloud/aws/elasticache/common/inputs.tf | 32 +++++++++++++++++ .../common/monitors-elasticache.tf | 34 ++++++++++++++++++- cloud/aws/elasticache/memcached/README.md | 6 ++++ cloud/aws/elasticache/memcached/inputs.tf | 32 +++++++++++++++++ .../memcached/monitors-memcached.tf | 7 ++++ cloud/aws/elasticache/redis/README.md | 6 ++++ cloud/aws/elasticache/redis/inputs.tf | 32 +++++++++++++++++ cloud/aws/elasticache/redis/monitors-redis.tf | 7 ++++ 9 files changed, 164 insertions(+), 4 deletions(-) diff --git a/cloud/aws/elasticache/common/README.md b/cloud/aws/elasticache/common/README.md index ee27e38..5bf7f7a 100644 --- a/cloud/aws/elasticache/common/README.md +++ b/cloud/aws/elasticache/common/README.md @@ -35,6 +35,12 @@ Inputs |------|-------------|:----:|:-----:|:-----:| | delay | Delay in seconds for the metric evaluation | string | `900` | no | | environment | Infrastructure Environment | string | - | yes | +| eviction_growing_condition_timeframe | Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| eviction_growing_message | Custom message for Elasticache eviction growing monitor | string | `` | no | +| eviction_growing_silenced | Groups to mute for Elasticache eviction growing monitor | map | `` | no | +| eviction_growing_threshold_critical | Elasticache eviction growing critical threshold in percentage | string | `30` | no | +| eviction_growing_threshold_warning | Elasticache eviction growing warning threshold in percentage | string | `10` | no | +| eviction_growing_timeframe | Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | eviction_message | Custom message for Elasticache eviction monitor | string | `` | no | | eviction_silenced | Groups to mute for Elasticache eviction monitor | map | `` | no | | eviction_time_aggregator | Monitor aggregator for Elasticache eviction [available values: min, max or avg] | string | `min` | no | @@ -43,8 +49,8 @@ Inputs | free_memory_condition_timeframe | Monitor condition timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | free_memory_message | Custom message for Elasticache free memory monitor | string | `` | no | | free_memory_silenced | Groups to mute for Elasticache free memory monitor | map | `` | no | -| free_memory_threshold_critical | Elasticache free memory critical threshold in percentage | string | `300` | no | -| free_memory_threshold_warning | Elasticache free memory warning threshold in percentage | string | `200` | no | +| free_memory_threshold_critical | Elasticache free memory critical threshold in percentage | string | `-70` | no | +| free_memory_threshold_warning | Elasticache free memory warning threshold in percentage | string | `-50` | no | | free_memory_timeframe | Monitor timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | max_connection_message | Custom message for Elasticache max connection monitor | string | `` | no | | max_connection_silenced | Groups to mute for Elasticache max connection monitor | map | `` | no | @@ -58,7 +64,7 @@ Inputs | resource | Type of Elasticache used | string | - | yes | | swap_message | Custom message for Elasticache swap monitor | string | `` | no | | swap_silenced | Groups to mute for Elasticache swap monitor | map | `` | no | -| swap_threshold_critical | Elasticache swap critical threshold in percentage | string | `50` | no | +| swap_threshold_critical | Elasticache swap critical threshold in percentage | string | `50000000` | no | | swap_threshold_warning | Elasticache swap warning threshold in percentage | string | `0` | no | | swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | `min` | no | | swap_timeframe | Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | diff --git a/cloud/aws/elasticache/common/inputs.tf b/cloud/aws/elasticache/common/inputs.tf index ab249df..bda0bfd 100644 --- a/cloud/aws/elasticache/common/inputs.tf +++ b/cloud/aws/elasticache/common/inputs.tf @@ -156,3 +156,35 @@ variable "free_memory_threshold_critical" { description = "Elasticache free memory critical threshold in percentage" default = -70 } + +variable "eviction_growing_silenced" { + description = "Groups to mute for Elasticache eviction growing monitor" + type = "map" + default = {} +} + +variable "eviction_growing_message" { + description = "Custom message for Elasticache eviction growing monitor" + type = "string" + default = "" +} + +variable "eviction_growing_condition_timeframe" { + description = "Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "eviction_growing_timeframe" { + description = "Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "eviction_growing_threshold_warning" { + description = "Elasticache eviction growing warning threshold in percentage" + default = 10 +} + +variable "eviction_growing_threshold_critical" { + description = "Elasticache eviction growing critical threshold in percentage" + default = 30 +} diff --git a/cloud/aws/elasticache/common/monitors-elasticache.tf b/cloud/aws/elasticache/common/monitors-elasticache.tf index 0e83633..6d53ade 100644 --- a/cloud/aws/elasticache/common/monitors-elasticache.tf +++ b/cloud/aws/elasticache/common/monitors-elasticache.tf @@ -112,7 +112,7 @@ resource "datadog_monitor" "elasticache_swap" { } # POC - A approfondir -resource "datadog_monitor" "redis_free_memory" { +resource "datadog_monitor" "elasticache_free_memory" { name = "[${var.environment}] Elasticache ${var.resource} free memory {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.free_memory_message, var.message)}" @@ -143,3 +143,35 @@ resource "datadog_monitor" "redis_free_memory" { tags = ["env:${var.environment}", "engine:${var.resource}", "team:aws", "provider:aws"] } + +resource "datadog_monitor" "elasticache_eviction_growing" { + name = "[${var.environment}] Elasticache ${var.resource} evictions is growing {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + message = "${coalesce(var.eviction_growing_message, var.message)}" + + type = "metric alert" + + query = < ${var.eviction_growing_threshold_critical} + EOF + + thresholds { + warning = "${var.eviction_growing_threshold_warning}" + critical = "${var.eviction_growing_threshold_critical}" + } + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.delay}" + + silenced = "${var.eviction_growing_silenced}" + + tags = ["env:${var.environment}", "engine:${var.resource}", "team:aws", "provider:aws"] +} diff --git a/cloud/aws/elasticache/memcached/README.md b/cloud/aws/elasticache/memcached/README.md index 75360b0..58e910d 100644 --- a/cloud/aws/elasticache/memcached/README.md +++ b/cloud/aws/elasticache/memcached/README.md @@ -36,6 +36,12 @@ Inputs | delay | Delay in seconds for the metric evaluation | string | `900` | no | | elasticache_size | Size of the Elasticache instance | string | - | yes | | environment | Infrastructure Environment | string | - | yes | +| eviction_growing_condition_timeframe | Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| eviction_growing_message | Custom message for Elasticache eviction growing monitor | string | `` | no | +| eviction_growing_silenced | Groups to mute for Elasticache eviction growing monitor | map | `` | no | +| eviction_growing_threshold_critical | Elasticache eviction growing critical threshold in percentage | string | `30` | no | +| eviction_growing_threshold_warning | Elasticache eviction growing warning threshold in percentage | string | `10` | no | +| eviction_growing_timeframe | Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | eviction_message | Custom message for Elasticache eviction monitor | string | `` | no | | eviction_silenced | Groups to mute for Elasticache eviction monitor | map | `` | no | | eviction_time_aggregator | Monitor aggregator for Elasticache eviction [available values: min, max or avg] | string | `min` | no | diff --git a/cloud/aws/elasticache/memcached/inputs.tf b/cloud/aws/elasticache/memcached/inputs.tf index acdce22..74c0e39 100644 --- a/cloud/aws/elasticache/memcached/inputs.tf +++ b/cloud/aws/elasticache/memcached/inputs.tf @@ -225,3 +225,35 @@ variable "free_memory_threshold_critical" { description = "Elasticache free memory critical threshold in percentage" default = -70 } + +variable "eviction_growing_silenced" { + description = "Groups to mute for Elasticache eviction growing monitor" + type = "map" + default = {} +} + +variable "eviction_growing_message" { + description = "Custom message for Elasticache eviction growing monitor" + type = "string" + default = "" +} + +variable "eviction_growing_condition_timeframe" { + description = "Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "eviction_growing_timeframe" { + description = "Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "eviction_growing_threshold_warning" { + description = "Elasticache eviction growing warning threshold in percentage" + default = 10 +} + +variable "eviction_growing_threshold_critical" { + description = "Elasticache eviction growing critical threshold in percentage" + default = 30 +} diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf index ea8a1c8..1ac0125 100644 --- a/cloud/aws/elasticache/memcached/monitors-memcached.tf +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -19,6 +19,13 @@ module "datadog-monitors-aws-elasticache-common" { eviction_time_aggregator = "${var.eviction_time_aggregator}" eviction_timeframe = "${var.eviction_timeframe}" + eviction_growing_condition_timeframe = "${var.eviction_growing_condition_timeframe}" + eviction_growing_timeframe = "${var.eviction_growing_timeframe}" + eviction_growing_message = "${var.eviction_growing_message}" + eviction_growing_silenced = "${var.eviction_growing_silenced}" + eviction_growing_threshold_warning = "${var.eviction_growing_threshold_warning}" + eviction_growing_threshold_critical = "${var.eviction_growing_threshold_critical}" + free_memory_condition_timeframe = "${var.free_memory_condition_timeframe}" free_memory_timeframe = "${var.free_memory_timeframe}" free_memory_message = "${var.free_memory_message}" diff --git a/cloud/aws/elasticache/redis/README.md b/cloud/aws/elasticache/redis/README.md index e643fea..4d8e4aa 100644 --- a/cloud/aws/elasticache/redis/README.md +++ b/cloud/aws/elasticache/redis/README.md @@ -56,6 +56,12 @@ Inputs | delay | Delay in seconds for the metric evaluation | string | `900` | no | | elasticache_size | Size of the Elasticache instance | string | - | yes | | environment | Infrastructure Environment | string | - | yes | +| eviction_growing_condition_timeframe | Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| eviction_growing_message | Custom message for Elasticache eviction growing monitor | string | `` | no | +| eviction_growing_silenced | Groups to mute for Elasticache eviction growing monitor | map | `` | no | +| eviction_growing_threshold_critical | Elasticache eviction growing critical threshold in percentage | string | `30` | no | +| eviction_growing_threshold_warning | Elasticache eviction growing warning threshold in percentage | string | `10` | no | +| eviction_growing_timeframe | Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | eviction_message | Custom message for Elasticache eviction monitor | string | `` | no | | eviction_silenced | Groups to mute for Elasticache eviction monitor | map | `` | no | | eviction_time_aggregator | Monitor aggregator for Elasticache eviction [available values: min, max or avg] | string | `min` | no | diff --git a/cloud/aws/elasticache/redis/inputs.tf b/cloud/aws/elasticache/redis/inputs.tf index 940577b..255adff 100644 --- a/cloud/aws/elasticache/redis/inputs.tf +++ b/cloud/aws/elasticache/redis/inputs.tf @@ -280,3 +280,35 @@ variable "free_memory_threshold_critical" { description = "Elasticache free memory critical threshold in percentage" default = -70 } + +variable "eviction_growing_silenced" { + description = "Groups to mute for Elasticache eviction growing monitor" + type = "map" + default = {} +} + +variable "eviction_growing_message" { + description = "Custom message for Elasticache eviction growing monitor" + type = "string" + default = "" +} + +variable "eviction_growing_condition_timeframe" { + description = "Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "eviction_growing_timeframe" { + description = "Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "eviction_growing_threshold_warning" { + description = "Elasticache eviction growing warning threshold in percentage" + default = 10 +} + +variable "eviction_growing_threshold_critical" { + description = "Elasticache eviction growing critical threshold in percentage" + default = 30 +} diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index 6572a3f..16257a6 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -19,6 +19,13 @@ module "datadog-monitors-aws-elasticache-common" { eviction_time_aggregator = "${var.eviction_time_aggregator}" eviction_timeframe = "${var.eviction_timeframe}" + eviction_growing_condition_timeframe = "${var.eviction_growing_condition_timeframe}" + eviction_growing_timeframe = "${var.eviction_growing_timeframe}" + eviction_growing_message = "${var.eviction_growing_message}" + eviction_growing_silenced = "${var.eviction_growing_silenced}" + eviction_growing_threshold_warning = "${var.eviction_growing_threshold_warning}" + eviction_growing_threshold_critical = "${var.eviction_growing_threshold_critical}" + free_memory_condition_timeframe = "${var.free_memory_condition_timeframe}" free_memory_timeframe = "${var.free_memory_timeframe}" free_memory_message = "${var.free_memory_message}" From 27606ee6774cb05e1d6c2499116eabdd9deb7f7a Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Tue, 12 Jun 2018 11:54:37 +0200 Subject: [PATCH 26/53] MON-32 - READMEs updated --- cloud/aws/elasticache/common/README.md | 1 + cloud/aws/elasticache/memcached/README.md | 13 ++++++++++++- cloud/aws/elasticache/redis/README.md | 22 +++++++++++----------- 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/cloud/aws/elasticache/common/README.md b/cloud/aws/elasticache/common/README.md index 5bf7f7a..779e9c9 100644 --- a/cloud/aws/elasticache/common/README.md +++ b/cloud/aws/elasticache/common/README.md @@ -23,6 +23,7 @@ Purpose Creates DataDog monitors with the following checks: * Eviction +* Eviction growing * Swap * Max connections * No connection diff --git a/cloud/aws/elasticache/memcached/README.md b/cloud/aws/elasticache/memcached/README.md index 58e910d..83b0ca8 100644 --- a/cloud/aws/elasticache/memcached/README.md +++ b/cloud/aws/elasticache/memcached/README.md @@ -10,7 +10,6 @@ module "datadog-monitors-aws-elasticache-redis" { message = "${module.datadog-message-alerting.alerting-message}" environment = "${var.environment}" - elasticache_size = "${var.size_of_elsaticache}" } ``` @@ -19,9 +18,21 @@ Purpose ------- Creates DataDog monitors with the following checks : +Memcached specific: + * Get Hit * CPU High +Elasticache common: + +* Eviction +* Eviction growing +* Swap +* Max connections +* No connection +* Free Memory + + Inputs ------ diff --git a/cloud/aws/elasticache/redis/README.md b/cloud/aws/elasticache/redis/README.md index 4d8e4aa..eea8930 100644 --- a/cloud/aws/elasticache/redis/README.md +++ b/cloud/aws/elasticache/redis/README.md @@ -10,30 +10,30 @@ module "datadog-monitors-aws-elasticache-redis" { message = "${module.datadog-message-alerting.alerting-message}" environment = "${var.environment}" - elasticache_size = "${var.size_of_elsaticache}" - nodes = "${data.my_cluster.num_cache_nodes}" } - ``` -You can retrieve the number of nodes using the data source : - -``` -data "aws_elasticache_cluster" "my_cluster" { - cluster_id = "my-cluster-id" -} - -``` Purpose ------- Creates DataDog monitors with the following checks: +Redis specific: + * Cache hits * CPU high * Commands received * Replication lag +Elasticache common: + +* Eviction +* Eviction growing +* Swap +* Max connections +* No connection +* Free Memory + Inputs ------ From 84495256a95d7e26605ec517c1efc236aaa3f301 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Wed, 27 Jun 2018 12:17:09 +0200 Subject: [PATCH 27/53] MON-32 Removed non used parameters nodes and elasticache_size --- cloud/aws/elasticache/memcached/README.md | 3 +-- cloud/aws/elasticache/memcached/inputs.tf | 6 ------ cloud/aws/elasticache/redis/README.md | 5 ++--- cloud/aws/elasticache/redis/inputs.tf | 11 ----------- 4 files changed, 3 insertions(+), 22 deletions(-) diff --git a/cloud/aws/elasticache/memcached/README.md b/cloud/aws/elasticache/memcached/README.md index 83b0ca8..b744370 100644 --- a/cloud/aws/elasticache/memcached/README.md +++ b/cloud/aws/elasticache/memcached/README.md @@ -45,7 +45,6 @@ Inputs | cpu_high_time_aggregator | Monitor aggregator for Elasticache memcached cpu high [available values: min, max or avg] | string | `min` | no | | cpu_high_timeframe | Monitor timeframe for Elasticache memcached cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | -| elasticache_size | Size of the Elasticache instance | string | - | yes | | environment | Infrastructure Environment | string | - | yes | | eviction_growing_condition_timeframe | Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | eviction_growing_message | Custom message for Elasticache eviction growing monitor | string | `` | no | @@ -95,4 +94,4 @@ DataDog documentation: * [https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/](https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/) -AWS ElasticSearch Service Instance metrics documentation: [https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html) +AWS ElasticSearch Service Instance metrics documentation: [https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html) diff --git a/cloud/aws/elasticache/memcached/inputs.tf b/cloud/aws/elasticache/memcached/inputs.tf index 74c0e39..345e1f5 100644 --- a/cloud/aws/elasticache/memcached/inputs.tf +++ b/cloud/aws/elasticache/memcached/inputs.tf @@ -24,12 +24,6 @@ variable "filter_tags_custom" { default = "*" } -# elasticache variable -variable "elasticache_size" { - description = "Size of the Elasticache instance" - type = "string" -} - # Memcached specific variable "get_hits_silenced" { description = "Groups to mute for Elasticache memcached get hits monitor" diff --git a/cloud/aws/elasticache/redis/README.md b/cloud/aws/elasticache/redis/README.md index eea8930..7eb58ca 100644 --- a/cloud/aws/elasticache/redis/README.md +++ b/cloud/aws/elasticache/redis/README.md @@ -54,7 +54,6 @@ Inputs | cpu_high_time_aggregator | Monitor aggregator for Elasticache redis cpu high [available values: min, max or avg] | string | `avg` | no | | cpu_high_timeframe | Monitor timeframe for Elasticache redis cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | -| elasticache_size | Size of the Elasticache instance | string | - | yes | | environment | Infrastructure Environment | string | - | yes | | eviction_growing_condition_timeframe | Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | eviction_growing_message | Custom message for Elasticache eviction growing monitor | string | `` | no | @@ -83,7 +82,6 @@ Inputs | no_connection_silenced | Groups to mute for Elasticache no connection monitor | map | `` | no | | no_connection_time_aggregator | Monitor aggregator for Elasticache no connection [available values: min, max or avg] | string | `min` | no | | no_connection_timeframe | Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| nodes | Number of Elasticache nodes | string | - | yes | | replication_lag_message | Custom message for Elasticache redis replication lag monitor | string | `` | no | | replication_lag_silenced | Groups to mute for Elasticache redis replication lag monitor | map | `` | no | | replication_lag_threshold_critical | Elasticache redis replication lag critical threshold in seconds | string | `1` | no | @@ -97,6 +95,7 @@ Inputs | swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | `min` | no | | swap_timeframe | Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | + Related documentation --------------------- @@ -106,4 +105,4 @@ DataDog documentation: * [https://www.datadoghq.com/dashboards/elasticache-dashboard-redis/](https://www.datadoghq.com/dashboards/elasticache-dashboard-redis/) * [https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/](https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/) -AWS ElasticSearch Service Instance metrics documentation: [https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html) +AWS ElasticSearch Service Instance metrics documentation: [https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html) diff --git a/cloud/aws/elasticache/redis/inputs.tf b/cloud/aws/elasticache/redis/inputs.tf index 255adff..a2baf75 100644 --- a/cloud/aws/elasticache/redis/inputs.tf +++ b/cloud/aws/elasticache/redis/inputs.tf @@ -24,17 +24,6 @@ variable "filter_tags_custom" { default = "*" } -# elasticache variable -variable "elasticache_size" { - description = "Size of the Elasticache instance" - type = "string" -} - -variable "nodes" { - description = "Number of Elasticache nodes" - type = "string" -} - # redis specific variable "cache_hits_silenced" { description = "Groups to mute for Elasticache redis cache hits monitor" From 7e39b2870351284f4b801c6d3b653ec50e876f32 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Thu, 28 Jun 2018 14:11:58 +0200 Subject: [PATCH 28/53] MON-32 Updated SWAP default WARNING threshold to 40MB, which is 80% of the 50MB recommendation for alerts according to AWS documentation --- cloud/aws/elasticache/memcached/README.md | 8 +++++--- cloud/aws/elasticache/memcached/inputs.tf | 6 +++--- cloud/aws/elasticache/redis/README.md | 9 ++++++--- cloud/aws/elasticache/redis/inputs.tf | 6 +++--- 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/cloud/aws/elasticache/memcached/README.md b/cloud/aws/elasticache/memcached/README.md index b744370..3b4f2fc 100644 --- a/cloud/aws/elasticache/memcached/README.md +++ b/cloud/aws/elasticache/memcached/README.md @@ -80,8 +80,8 @@ Inputs | no_connection_timeframe | Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | swap_message | Custom message for Elasticache swap monitor | string | `` | no | | swap_silenced | Groups to mute for Elasticache swap monitor | map | `` | no | -| swap_threshold_critical | Elasticache swap critical threshold in percentage | string | `50000000` | no | -| swap_threshold_warning | Elasticache swap warning threshold in percentage | string | `0` | no | +| swap_threshold_critical | Elasticache swap critical threshold in Bytes | string | `50000000` | no | +| swap_threshold_warning | Elasticache swap warning threshold in Bytes | string | `40000000` | no | | swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | `min` | no | | swap_timeframe | Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | @@ -94,4 +94,6 @@ DataDog documentation: * [https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/](https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/) -AWS ElasticSearch Service Instance metrics documentation: [https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html) +AWS ElasticSearch Service Instance metrics documentation: [https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html) + + diff --git a/cloud/aws/elasticache/memcached/inputs.tf b/cloud/aws/elasticache/memcached/inputs.tf index 345e1f5..c0d9413 100644 --- a/cloud/aws/elasticache/memcached/inputs.tf +++ b/cloud/aws/elasticache/memcached/inputs.tf @@ -179,12 +179,12 @@ variable "swap_timeframe" { } variable "swap_threshold_warning" { - description = "Elasticache swap warning threshold in percentage" - default = 0 + description = "Elasticache swap warning threshold in Bytes" + default = 40000000 } variable "swap_threshold_critical" { - description = "Elasticache swap critical threshold in percentage" + description = "Elasticache swap critical threshold in Bytes" default = 50000000 } diff --git a/cloud/aws/elasticache/redis/README.md b/cloud/aws/elasticache/redis/README.md index 7eb58ca..e9a8b76 100644 --- a/cloud/aws/elasticache/redis/README.md +++ b/cloud/aws/elasticache/redis/README.md @@ -90,8 +90,8 @@ Inputs | replication_lag_timeframe | Monitor timeframe for Elasticache redis replication lag [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | swap_message | Custom message for Elasticache swap monitor | string | `` | no | | swap_silenced | Groups to mute for Elasticache swap monitor | map | `` | no | -| swap_threshold_critical | Elasticache swap critical threshold in percentage | string | `50000000` | no | -| swap_threshold_warning | Elasticache swap warning threshold in percentage | string | `0` | no | +| swap_threshold_critical | Elasticache swap critical threshold in Bytes | string | `50000000` | no | +| swap_threshold_warning | Elasticache swap warning threshold in Bytes | string | `40000000` | no | | swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | `min` | no | | swap_timeframe | Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | @@ -105,4 +105,7 @@ DataDog documentation: * [https://www.datadoghq.com/dashboards/elasticache-dashboard-redis/](https://www.datadoghq.com/dashboards/elasticache-dashboard-redis/) * [https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/](https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/) -AWS ElasticSearch Service Instance metrics documentation: [https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html) +AWS ElasticSearch Service Instance metrics documentation: [https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html) + + + diff --git a/cloud/aws/elasticache/redis/inputs.tf b/cloud/aws/elasticache/redis/inputs.tf index a2baf75..ed7dbaf 100644 --- a/cloud/aws/elasticache/redis/inputs.tf +++ b/cloud/aws/elasticache/redis/inputs.tf @@ -229,12 +229,12 @@ variable "swap_timeframe" { } variable "swap_threshold_warning" { - description = "Elasticache swap warning threshold in percentage" - default = 0 + description = "Elasticache swap warning threshold in Bytes" + default = 40000000 } variable "swap_threshold_critical" { - description = "Elasticache swap critical threshold in percentage" + description = "Elasticache swap critical threshold in Bytes" default = 50000000 } From b395aff3aa38528da5a7174b8bf3cf18c21af9d3 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Thu, 28 Jun 2018 15:50:31 +0200 Subject: [PATCH 29/53] MON-32 No hits monitor notify no data to false. --- cloud/aws/elasticache/redis/monitors-redis.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index 16257a6..add9ec4 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -70,7 +70,7 @@ resource "datadog_monitor" "redis_cache_hits" { critical = "${var.cache_hits_threshold_critical}" } - notify_no_data = true + notify_no_data = false evaluation_delay = "${var.delay}" renotify_interval = 0 notify_audit = false From 5d2746c16a73f7a00885b24c55832f5281c2d72d Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Thu, 28 Jun 2018 15:59:19 +0200 Subject: [PATCH 30/53] MON-32 Remove default for common variables to force inputs on the module --- cloud/aws/elasticache/common/README.md | 156 +++++++++--------- cloud/aws/elasticache/common/inputs.tf | 31 ---- cloud/aws/elasticache/memcached/README.md | 24 ++- .../memcached/monitors-memcached.tf | 2 + cloud/aws/elasticache/redis/README.md | 27 ++- cloud/aws/elasticache/redis/monitors-redis.tf | 2 + 6 files changed, 105 insertions(+), 137 deletions(-) diff --git a/cloud/aws/elasticache/common/README.md b/cloud/aws/elasticache/common/README.md index 779e9c9..2d84f7e 100644 --- a/cloud/aws/elasticache/common/README.md +++ b/cloud/aws/elasticache/common/README.md @@ -1,78 +1,78 @@ -AWS ElastiCache Service DataDog monitors -======================================== - -How to use this module ----------------------- - -``` -module "datadog-monitors-aws-elasticache" { - source = "../common" - - message = "${module.datadog-message-alerting.alerting-message}" - environment = "${var.environment}" - filter_tags = "${var.filter_tags}" - resource = "${var.type_of_resource}" -} - -``` - -This module is loaded by default when you define `memcached` or `redis` monitors - -Purpose -------- -Creates DataDog monitors with the following checks: - -* Eviction -* Eviction growing -* Swap -* Max connections -* No connection -* Free Memory - -Inputs ------- - -| Name | Description | Type | Default | Required | -|------|-------------|:----:|:-----:|:-----:| -| delay | Delay in seconds for the metric evaluation | string | `900` | no | -| environment | Infrastructure Environment | string | - | yes | -| eviction_growing_condition_timeframe | Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| eviction_growing_message | Custom message for Elasticache eviction growing monitor | string | `` | no | -| eviction_growing_silenced | Groups to mute for Elasticache eviction growing monitor | map | `` | no | -| eviction_growing_threshold_critical | Elasticache eviction growing critical threshold in percentage | string | `30` | no | -| eviction_growing_threshold_warning | Elasticache eviction growing warning threshold in percentage | string | `10` | no | -| eviction_growing_timeframe | Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| eviction_message | Custom message for Elasticache eviction monitor | string | `` | no | -| eviction_silenced | Groups to mute for Elasticache eviction monitor | map | `` | no | -| eviction_time_aggregator | Monitor aggregator for Elasticache eviction [available values: min, max or avg] | string | `min` | no | -| eviction_timeframe | Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| filter_tags | Tags used for filtering | string | - | yes | -| free_memory_condition_timeframe | Monitor condition timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | -| free_memory_message | Custom message for Elasticache free memory monitor | string | `` | no | -| free_memory_silenced | Groups to mute for Elasticache free memory monitor | map | `` | no | -| free_memory_threshold_critical | Elasticache free memory critical threshold in percentage | string | `-70` | no | -| free_memory_threshold_warning | Elasticache free memory warning threshold in percentage | string | `-50` | no | -| free_memory_timeframe | Monitor timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | -| max_connection_message | Custom message for Elasticache max connection monitor | string | `` | no | -| max_connection_silenced | Groups to mute for Elasticache max connection monitor | map | `` | no | -| max_connection_time_aggregator | Monitor aggregator for Elasticache max connection [available values: min, max or avg] | string | `max` | no | -| max_connection_timeframe | Monitor timeframe for Elasticache max connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| message | Message sent when an alert is triggered | string | - | yes | -| no_connection_message | Custom message for Elasticache no connection monitor | string | `` | no | -| no_connection_silenced | Groups to mute for Elasticache no connection monitor | map | `` | no | -| no_connection_time_aggregator | Monitor aggregator for Elasticache no connection [available values: min, max or avg] | string | `min` | no | -| no_connection_timeframe | Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| resource | Type of Elasticache used | string | - | yes | -| swap_message | Custom message for Elasticache swap monitor | string | `` | no | -| swap_silenced | Groups to mute for Elasticache swap monitor | map | `` | no | -| swap_threshold_critical | Elasticache swap critical threshold in percentage | string | `50000000` | no | -| swap_threshold_warning | Elasticache swap warning threshold in percentage | string | `0` | no | -| swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | `min` | no | -| swap_timeframe | Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | - -Related documentation ---------------------- - -DataDog documentation: [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/) - -AWS ElasticSearch Service Instance metrics documentation: [https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html) +AWS ElastiCache Service DataDog monitors +======================================== + +How to use this module +---------------------- + +``` +module "datadog-monitors-aws-elasticache" { + source = "../common" + + message = "${module.datadog-message-alerting.alerting-message}" + environment = "${var.environment}" + filter_tags = "${var.filter_tags}" + resource = "${var.type_of_resource}" +} + +``` + +This module is loaded by default when you define `memcached` or `redis` monitors + +Purpose +------- +Creates DataDog monitors with the following checks: + +* Eviction +* Eviction growing +* Swap +* Max connections +* No connection +* Free Memory + +Related documentation +--------------------- + +DataDog documentation: [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/) + +AWS ElasticSearch Service Instance metrics documentation: [https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html) + +Inputs +------ + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| delay | Delay in seconds for the metric evaluation | string | - | yes | +| environment | Infrastructure Environment | string | - | yes | +| eviction_growing_condition_timeframe | Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | - | yes | +| eviction_growing_message | Custom message for Elasticache eviction growing monitor | string | - | yes | +| eviction_growing_silenced | Groups to mute for Elasticache eviction growing monitor | map | - | yes | +| eviction_growing_threshold_critical | Elasticache eviction growing critical threshold in percentage | string | - | yes | +| eviction_growing_threshold_warning | Elasticache eviction growing warning threshold in percentage | string | - | yes | +| eviction_growing_timeframe | Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | - | yes | +| eviction_message | Custom message for Elasticache eviction monitor | string | - | yes | +| eviction_silenced | Groups to mute for Elasticache eviction monitor | map | - | yes | +| eviction_time_aggregator | Monitor aggregator for Elasticache eviction [available values: min, max or avg] | string | - | yes | +| eviction_timeframe | Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | - | yes | +| filter_tags | Tags used for filtering | string | - | yes | +| free_memory_condition_timeframe | Monitor condition timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | - | yes | +| free_memory_message | Custom message for Elasticache free memory monitor | string | - | yes | +| free_memory_silenced | Groups to mute for Elasticache free memory monitor | map | - | yes | +| free_memory_threshold_critical | Elasticache free memory critical threshold in percentage | string | - | yes | +| free_memory_threshold_warning | Elasticache free memory warning threshold in percentage | string | - | yes | +| free_memory_timeframe | Monitor timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | - | yes | +| max_connection_message | Custom message for Elasticache max connection monitor | string | - | yes | +| max_connection_silenced | Groups to mute for Elasticache max connection monitor | map | - | yes | +| max_connection_time_aggregator | Monitor aggregator for Elasticache max connection [available values: min, max or avg] | string | - | yes | +| max_connection_timeframe | Monitor timeframe for Elasticache max connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | - | yes | +| message | Message sent when an alert is triggered | string | - | yes | +| no_connection_message | Custom message for Elasticache no connection monitor | string | - | yes | +| no_connection_silenced | Groups to mute for Elasticache no connection monitor | map | - | yes | +| no_connection_time_aggregator | Monitor aggregator for Elasticache no connection [available values: min, max or avg] | string | - | yes | +| no_connection_timeframe | Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | - | yes | +| resource | Type of Elasticache used | string | - | yes | +| swap_message | Custom message for Elasticache swap monitor | string | - | yes | +| swap_silenced | Groups to mute for Elasticache swap monitor | map | - | yes | +| swap_threshold_critical | Elasticache swap critical threshold in percentage | string | - | yes | +| swap_threshold_warning | Elasticache swap warning threshold in percentage | string | - | yes | +| swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | - | yes | +| swap_timeframe | Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | - | yes | diff --git a/cloud/aws/elasticache/common/inputs.tf b/cloud/aws/elasticache/common/inputs.tf index bda0bfd..6a686b2 100644 --- a/cloud/aws/elasticache/common/inputs.tf +++ b/cloud/aws/elasticache/common/inputs.tf @@ -7,7 +7,6 @@ variable "environment" { # Global DataDog variable "delay" { description = "Delay in seconds for the metric evaluation" - default = 900 } variable "message" { @@ -26,165 +25,135 @@ variable "resource" { variable "eviction_silenced" { description = "Groups to mute for Elasticache eviction monitor" type = "map" - default = {} } variable "eviction_message" { description = "Custom message for Elasticache eviction monitor" type = "string" - default = "" } variable "eviction_time_aggregator" { description = "Monitor aggregator for Elasticache eviction [available values: min, max or avg]" type = "string" - default = "min" } variable "eviction_timeframe" { description = "Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_5m" } variable "max_connection_silenced" { description = "Groups to mute for Elasticache max connection monitor" type = "map" - default = {} } variable "max_connection_message" { description = "Custom message for Elasticache max connection monitor" type = "string" - default = "" } variable "max_connection_time_aggregator" { description = "Monitor aggregator for Elasticache max connection [available values: min, max or avg]" type = "string" - default = "max" } variable "max_connection_timeframe" { description = "Monitor timeframe for Elasticache max connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_5m" } variable "no_connection_silenced" { description = "Groups to mute for Elasticache no connection monitor" type = "map" - default = {} } variable "no_connection_message" { description = "Custom message for Elasticache no connection monitor" type = "string" - default = "" } variable "no_connection_time_aggregator" { description = "Monitor aggregator for Elasticache no connection [available values: min, max or avg]" type = "string" - default = "min" } variable "no_connection_timeframe" { description = "Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_5m" } variable "swap_silenced" { description = "Groups to mute for Elasticache swap monitor" type = "map" - default = {} } variable "swap_message" { description = "Custom message for Elasticache swap monitor" type = "string" - default = "" } variable "swap_time_aggregator" { description = "Monitor aggregator for Elasticache memcached swap [available values: min, max or avg]" type = "string" - default = "min" } variable "swap_timeframe" { description = "Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_5m" } variable "swap_threshold_warning" { description = "Elasticache swap warning threshold in percentage" - default = 0 } variable "swap_threshold_critical" { description = "Elasticache swap critical threshold in percentage" - default = 50000000 } variable "free_memory_silenced" { description = "Groups to mute for Elasticache free memory monitor" type = "map" - default = {} } variable "free_memory_message" { description = "Custom message for Elasticache free memory monitor" type = "string" - default = "" } variable "free_memory_condition_timeframe" { description = "Monitor condition timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_15m" } variable "free_memory_timeframe" { description = "Monitor timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_15m" } variable "free_memory_threshold_warning" { description = "Elasticache free memory warning threshold in percentage" - default = -50 } variable "free_memory_threshold_critical" { description = "Elasticache free memory critical threshold in percentage" - default = -70 } variable "eviction_growing_silenced" { description = "Groups to mute for Elasticache eviction growing monitor" type = "map" - default = {} } variable "eviction_growing_message" { description = "Custom message for Elasticache eviction growing monitor" type = "string" - default = "" } variable "eviction_growing_condition_timeframe" { description = "Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_5m" } variable "eviction_growing_timeframe" { description = "Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_5m" } variable "eviction_growing_threshold_warning" { description = "Elasticache eviction growing warning threshold in percentage" - default = 10 } variable "eviction_growing_threshold_critical" { description = "Elasticache eviction growing critical threshold in percentage" - default = 30 } diff --git a/cloud/aws/elasticache/memcached/README.md b/cloud/aws/elasticache/memcached/README.md index 3b4f2fc..5b6bbec 100644 --- a/cloud/aws/elasticache/memcached/README.md +++ b/cloud/aws/elasticache/memcached/README.md @@ -32,6 +32,17 @@ Elasticache common: * No connection * Free Memory +Related documentation +--------------------- + +DataDog documentation: + +* [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/) +* [https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/](https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/) + + +AWS ElasticSearch Service Instance metrics documentation: [https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html) + Inputs ------ @@ -84,16 +95,3 @@ Inputs | swap_threshold_warning | Elasticache swap warning threshold in Bytes | string | `40000000` | no | | swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | `min` | no | | swap_timeframe | Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | - -Related documentation ---------------------- - -DataDog documentation: - -* [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/) -* [https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/](https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/) - - -AWS ElasticSearch Service Instance metrics documentation: [https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html) - - diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf index 1ac0125..37d109b 100644 --- a/cloud/aws/elasticache/memcached/monitors-memcached.tf +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -14,6 +14,8 @@ module "datadog-monitors-aws-elasticache-common" { filter_tags = "${data.template_file.filter.rendered}" resource = "memcached" + delay = "${var.delay}" + eviction_message = "${var.eviction_message}" eviction_silenced = "${var.eviction_silenced}" eviction_time_aggregator = "${var.eviction_time_aggregator}" diff --git a/cloud/aws/elasticache/redis/README.md b/cloud/aws/elasticache/redis/README.md index e9a8b76..82d1f4c 100644 --- a/cloud/aws/elasticache/redis/README.md +++ b/cloud/aws/elasticache/redis/README.md @@ -34,6 +34,18 @@ Elasticache common: * No connection * Free Memory +Related documentation +--------------------- + +DataDog documentation: + +* [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/) +* [https://www.datadoghq.com/dashboards/elasticache-dashboard-redis/](https://www.datadoghq.com/dashboards/elasticache-dashboard-redis/) +* [https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/](https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/) + +AWS ElasticSearch Service Instance metrics documentation: [https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html) + + Inputs ------ @@ -94,18 +106,3 @@ Inputs | swap_threshold_warning | Elasticache swap warning threshold in Bytes | string | `40000000` | no | | swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | `min` | no | | swap_timeframe | Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | - - -Related documentation ---------------------- - -DataDog documentation: - -* [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/) -* [https://www.datadoghq.com/dashboards/elasticache-dashboard-redis/](https://www.datadoghq.com/dashboards/elasticache-dashboard-redis/) -* [https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/](https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/) - -AWS ElasticSearch Service Instance metrics documentation: [https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html) - - - diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index add9ec4..08f35ef 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -14,6 +14,8 @@ module "datadog-monitors-aws-elasticache-common" { filter_tags = "${data.template_file.filter.rendered}" resource = "redis" + delay = "${var.delay}" + eviction_message = "${var.eviction_message}" eviction_silenced = "${var.eviction_silenced}" eviction_time_aggregator = "${var.eviction_time_aggregator}" From d620d436ba4aec6411c4436b7d7a50a27df99691 Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Tue, 24 Jul 2018 11:36:11 +0200 Subject: [PATCH 31/53] MON-32 - Regenerate README and outputs added --- README.md | 4 + cloud/aws/elasticache/common/README.md | 57 +++--- cloud/aws/elasticache/common/outputs.tf | 29 +++ cloud/aws/elasticache/memcached/README.md | 185 +++++++++--------- cloud/aws/elasticache/memcached/outputs.tf | 9 + cloud/aws/elasticache/redis/README.md | 206 ++++++++++----------- cloud/aws/elasticache/redis/outputs.tf | 19 ++ 7 files changed, 277 insertions(+), 232 deletions(-) create mode 100644 cloud/aws/elasticache/common/outputs.tf create mode 100644 cloud/aws/elasticache/memcached/outputs.tf create mode 100644 cloud/aws/elasticache/redis/outputs.tf diff --git a/README.md b/README.md index ed6b246..5cadf39 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,10 @@ The `//` is very important, it's a terraform specific syntax used to separate gi - [aws](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/) - [alb](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/alb/) - [apigateway](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/apigateway/) + - [elasticache](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/elasticache/) + - [common](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/elasticache/common/) + - [memcached](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/elasticache/memcached/) + - [redis](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/elasticache/redis/) - [elasticsearch](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/elasticsearch/) - [elb](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/elb/) - [kinesis-firehose](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/kinesis-firehose/) diff --git a/cloud/aws/elasticache/common/README.md b/cloud/aws/elasticache/common/README.md index 2d84f7e..f9d2d6e 100644 --- a/cloud/aws/elasticache/common/README.md +++ b/cloud/aws/elasticache/common/README.md @@ -1,43 +1,29 @@ -AWS ElastiCache Service DataDog monitors -======================================== +# CLOUD AWS ELASTICACHE COMMON DataDog monitors -How to use this module ----------------------- +## How to use this module ``` -module "datadog-monitors-aws-elasticache" { - source = "../common" +module "datadog-monitors-cloud-aws-elasticache-common" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/aws/elasticache/common?ref={revision}" - message = "${module.datadog-message-alerting.alerting-message}" environment = "${var.environment}" - filter_tags = "${var.filter_tags}" - resource = "${var.type_of_resource}" + message = "${module.datadog-message-alerting.alerting-message}" } ``` -This module is loaded by default when you define `memcached` or `redis` monitors +## Purpose -Purpose -------- Creates DataDog monitors with the following checks: -* Eviction -* Eviction growing -* Swap -* Max connections -* No connection -* Free Memory +- Elasticache ${var.resource} eviction +- Elasticache ${var.resource} max connections reached +- Elasticache ${var.resource} connections +- Elasticache ${var.resource} swap +- Elasticache ${var.resource} free memory +- Elasticache ${var.resource} evictions is growing -Related documentation ---------------------- - -DataDog documentation: [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/) - -AWS ElasticSearch Service Instance metrics documentation: [https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html) - -Inputs ------- +## Inputs | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| @@ -76,3 +62,20 @@ Inputs | swap_threshold_warning | Elasticache swap warning threshold in percentage | string | - | yes | | swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | - | yes | | swap_timeframe | Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | - | yes | + +## Outputs + +| Name | Description | +|------|-------------| +| elasticache_eviction_growing_id | id for monitor elasticache_eviction_growing | +| elasticache_eviction_id | id for monitor elasticache_eviction | +| elasticache_free_memory_id | id for monitor elasticache_free_memory | +| elasticache_max_connection_id | id for monitor elasticache_max_connection | +| elasticache_no_connection_id | id for monitor elasticache_no_connection | +| elasticache_swap_id | id for monitor elasticache_swap | + +Related documentation +--------------------- + +DataDog documentation: [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/) + diff --git a/cloud/aws/elasticache/common/outputs.tf b/cloud/aws/elasticache/common/outputs.tf new file mode 100644 index 0000000..a1e295e --- /dev/null +++ b/cloud/aws/elasticache/common/outputs.tf @@ -0,0 +1,29 @@ +output "elasticache_eviction_id" { + description = "id for monitor elasticache_eviction" + value = "${datadog_monitor.elasticache_eviction.id}" +} + +output "elasticache_max_connection_id" { + description = "id for monitor elasticache_max_connection" + value = "${datadog_monitor.elasticache_max_connection.id}" +} + +output "elasticache_no_connection_id" { + description = "id for monitor elasticache_no_connection" + value = "${datadog_monitor.elasticache_no_connection.id}" +} + +output "elasticache_swap_id" { + description = "id for monitor elasticache_swap" + value = "${datadog_monitor.elasticache_swap.id}" +} + +output "elasticache_free_memory_id" { + description = "id for monitor elasticache_free_memory" + value = "${datadog_monitor.elasticache_free_memory.id}" +} + +output "elasticache_eviction_growing_id" { + description = "id for monitor elasticache_eviction_growing" + value = "${datadog_monitor.elasticache_eviction_growing.id}" +} diff --git a/cloud/aws/elasticache/memcached/README.md b/cloud/aws/elasticache/memcached/README.md index 5b6bbec..2ec298e 100644 --- a/cloud/aws/elasticache/memcached/README.md +++ b/cloud/aws/elasticache/memcached/README.md @@ -1,97 +1,88 @@ -AWS ElastiCache Memcached Service DataDog monitors -================================================== - -How to use this module ----------------------- - -``` -module "datadog-monitors-aws-elasticache-redis" { - source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/aws/elasticache/memcached?ref={revision}" - - message = "${module.datadog-message-alerting.alerting-message}" - environment = "${var.environment}" -} - -``` - -Purpose -------- -Creates DataDog monitors with the following checks : - -Memcached specific: - -* Get Hit -* CPU High - -Elasticache common: - -* Eviction -* Eviction growing -* Swap -* Max connections -* No connection -* Free Memory - -Related documentation ---------------------- - -DataDog documentation: - -* [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/) -* [https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/](https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/) - - -AWS ElasticSearch Service Instance metrics documentation: [https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html) - - -Inputs ------- - -| Name | Description | Type | Default | Required | -|------|-------------|:----:|:-----:|:-----:| -| cpu_high_message | Custom message for Elasticache memcached cpu high monitor | string | `` | no | -| cpu_high_silenced | Groups to mute for Elasticache memcached cpu high monitor | map | `` | no | -| cpu_high_threshold_critical | Elasticache memcached cpu high critical threshold in percentage | string | `90` | no | -| cpu_high_threshold_warning | Elasticache memcached cpu high warning threshold in percentage | string | `75` | no | -| cpu_high_time_aggregator | Monitor aggregator for Elasticache memcached cpu high [available values: min, max or avg] | string | `min` | no | -| cpu_high_timeframe | Monitor timeframe for Elasticache memcached cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | -| delay | Delay in seconds for the metric evaluation | string | `900` | no | -| environment | Infrastructure Environment | string | - | yes | -| eviction_growing_condition_timeframe | Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| eviction_growing_message | Custom message for Elasticache eviction growing monitor | string | `` | no | -| eviction_growing_silenced | Groups to mute for Elasticache eviction growing monitor | map | `` | no | -| eviction_growing_threshold_critical | Elasticache eviction growing critical threshold in percentage | string | `30` | no | -| eviction_growing_threshold_warning | Elasticache eviction growing warning threshold in percentage | string | `10` | no | -| eviction_growing_timeframe | Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| eviction_message | Custom message for Elasticache eviction monitor | string | `` | no | -| eviction_silenced | Groups to mute for Elasticache eviction monitor | map | `` | no | -| eviction_time_aggregator | Monitor aggregator for Elasticache eviction [available values: min, max or avg] | string | `min` | no | -| eviction_timeframe | Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | -| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | -| free_memory_condition_timeframe | Monitor condition timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | -| free_memory_message | Custom message for Elasticache free memory monitor | string | `` | no | -| free_memory_silenced | Groups to mute for Elasticache free memory monitor | map | `` | no | -| free_memory_threshold_critical | Elasticache free memory critical threshold in percentage | string | `-70` | no | -| free_memory_threshold_warning | Elasticache free memory warning threshold in percentage | string | `-50` | no | -| free_memory_timeframe | Monitor timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | -| get_hits_message | Custom message for Elasticache memcached get hits monitor | string | `` | no | -| get_hits_silenced | Groups to mute for Elasticache memcached get hits monitor | map | `` | no | -| get_hits_threshold_critical | Elasticache memcached get hits critical threshold in percentage | string | `10` | no | -| get_hits_threshold_warning | Elasticache memcached get hits warning threshold in percentage | string | `20` | no | -| get_hits_timeframe | Monitor timeframe for Elasticache memcached get hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | -| max_connection_message | Custom message for Elasticache max connection monitor | string | `` | no | -| max_connection_silenced | Groups to mute for Elasticache max connection monitor | map | `` | no | -| max_connection_time_aggregator | Monitor aggregator for Elasticache max connection [available values: min, max or avg] | string | `max` | no | -| max_connection_timeframe | Monitor timeframe for Elasticache max connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| message | Message sent when an alert is triggered | string | - | yes | -| no_connection_message | Custom message for Elasticache no connection monitor | string | `` | no | -| no_connection_silenced | Groups to mute for Elasticache no connection monitor | map | `` | no | -| no_connection_time_aggregator | Monitor aggregator for Elasticache no connection [available values: min, max or avg] | string | `min` | no | -| no_connection_timeframe | Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| swap_message | Custom message for Elasticache swap monitor | string | `` | no | -| swap_silenced | Groups to mute for Elasticache swap monitor | map | `` | no | -| swap_threshold_critical | Elasticache swap critical threshold in Bytes | string | `50000000` | no | -| swap_threshold_warning | Elasticache swap warning threshold in Bytes | string | `40000000` | no | -| swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | `min` | no | -| swap_timeframe | Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +# CLOUD AWS ELASTICACHE MEMCACHED DataDog monitors + +## How to use this module + +``` +module "datadog-monitors-cloud-aws-elasticache-memcached" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/aws/elasticache/memcached?ref={revision}" + + environment = "${var.environment}" + message = "${module.datadog-message-alerting.alerting-message}" +} + +``` + +## Purpose + +Creates DataDog monitors with the following checks: + +- Elasticache memcached get hits +- Elasticache memcached CPU + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| cpu_high_message | Custom message for Elasticache memcached cpu high monitor | string | `` | no | +| cpu_high_silenced | Groups to mute for Elasticache memcached cpu high monitor | map | `` | no | +| cpu_high_threshold_critical | Elasticache memcached cpu high critical threshold in percentage | string | `90` | no | +| cpu_high_threshold_warning | Elasticache memcached cpu high warning threshold in percentage | string | `75` | no | +| cpu_high_time_aggregator | Monitor aggregator for Elasticache memcached cpu high [available values: min, max or avg] | string | `min` | no | +| cpu_high_timeframe | Monitor timeframe for Elasticache memcached cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| delay | Delay in seconds for the metric evaluation | string | `900` | no | +| environment | Infrastructure Environment | string | - | yes | +| eviction_growing_condition_timeframe | Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| eviction_growing_message | Custom message for Elasticache eviction growing monitor | string | `` | no | +| eviction_growing_silenced | Groups to mute for Elasticache eviction growing monitor | map | `` | no | +| eviction_growing_threshold_critical | Elasticache eviction growing critical threshold in percentage | string | `30` | no | +| eviction_growing_threshold_warning | Elasticache eviction growing warning threshold in percentage | string | `10` | no | +| eviction_growing_timeframe | Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| eviction_message | Custom message for Elasticache eviction monitor | string | `` | no | +| eviction_silenced | Groups to mute for Elasticache eviction monitor | map | `` | no | +| eviction_time_aggregator | Monitor aggregator for Elasticache eviction [available values: min, max or avg] | string | `min` | no | +| eviction_timeframe | Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| free_memory_condition_timeframe | Monitor condition timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| free_memory_message | Custom message for Elasticache free memory monitor | string | `` | no | +| free_memory_silenced | Groups to mute for Elasticache free memory monitor | map | `` | no | +| free_memory_threshold_critical | Elasticache free memory critical threshold in percentage | string | `-70` | no | +| free_memory_threshold_warning | Elasticache free memory warning threshold in percentage | string | `-50` | no | +| free_memory_timeframe | Monitor timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| get_hits_message | Custom message for Elasticache memcached get hits monitor | string | `` | no | +| get_hits_silenced | Groups to mute for Elasticache memcached get hits monitor | map | `` | no | +| get_hits_threshold_critical | Elasticache memcached get hits critical threshold in percentage | string | `10` | no | +| get_hits_threshold_warning | Elasticache memcached get hits warning threshold in percentage | string | `20` | no | +| get_hits_timeframe | Monitor timeframe for Elasticache memcached get hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| max_connection_message | Custom message for Elasticache max connection monitor | string | `` | no | +| max_connection_silenced | Groups to mute for Elasticache max connection monitor | map | `` | no | +| max_connection_time_aggregator | Monitor aggregator for Elasticache max connection [available values: min, max or avg] | string | `max` | no | +| max_connection_timeframe | Monitor timeframe for Elasticache max connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| message | Message sent when an alert is triggered | string | - | yes | +| no_connection_message | Custom message for Elasticache no connection monitor | string | `` | no | +| no_connection_silenced | Groups to mute for Elasticache no connection monitor | map | `` | no | +| no_connection_time_aggregator | Monitor aggregator for Elasticache no connection [available values: min, max or avg] | string | `min` | no | +| no_connection_timeframe | Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| swap_message | Custom message for Elasticache swap monitor | string | `` | no | +| swap_silenced | Groups to mute for Elasticache swap monitor | map | `` | no | +| swap_threshold_critical | Elasticache swap critical threshold in Bytes | string | `50000000` | no | +| swap_threshold_warning | Elasticache swap warning threshold in Bytes | string | `40000000` | no | +| swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | `min` | no | +| swap_timeframe | Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| memcached_cpu_high_id | id for monitor memcached_cpu_high | +| memcached_get_hits_id | id for monitor memcached_get_hits | + +Related documentation +--------------------- + +DataDog documentation: + +* [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/) +* [https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/](https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/) + + diff --git a/cloud/aws/elasticache/memcached/outputs.tf b/cloud/aws/elasticache/memcached/outputs.tf new file mode 100644 index 0000000..d4ed36e --- /dev/null +++ b/cloud/aws/elasticache/memcached/outputs.tf @@ -0,0 +1,9 @@ +output "memcached_get_hits_id" { + description = "id for monitor memcached_get_hits" + value = "${datadog_monitor.memcached_get_hits.id}" +} + +output "memcached_cpu_high_id" { + description = "id for monitor memcached_cpu_high" + value = "${datadog_monitor.memcached_cpu_high.id}" +} diff --git a/cloud/aws/elasticache/redis/README.md b/cloud/aws/elasticache/redis/README.md index 82d1f4c..0e3a5cc 100644 --- a/cloud/aws/elasticache/redis/README.md +++ b/cloud/aws/elasticache/redis/README.md @@ -1,108 +1,98 @@ -AWS ElastiCache Redis Service DataDog monitors -============================================== - -How to use this module ----------------------- - -``` -module "datadog-monitors-aws-elasticache-redis" { - source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/aws/elasticache/redis?ref={revision}" - - message = "${module.datadog-message-alerting.alerting-message}" - environment = "${var.environment}" -} -``` - - -Purpose -------- -Creates DataDog monitors with the following checks: - -Redis specific: - -* Cache hits -* CPU high -* Commands received -* Replication lag - -Elasticache common: - -* Eviction -* Eviction growing -* Swap -* Max connections -* No connection -* Free Memory - -Related documentation ---------------------- - -DataDog documentation: - -* [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/) -* [https://www.datadoghq.com/dashboards/elasticache-dashboard-redis/](https://www.datadoghq.com/dashboards/elasticache-dashboard-redis/) -* [https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/](https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/) - -AWS ElasticSearch Service Instance metrics documentation: [https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html) - - -Inputs ------- - -| Name | Description | Type | Default | Required | -|------|-------------|:----:|:-----:|:-----:| -| cache_hits_message | Custom message for Elasticache redis cache hits monitor | string | `` | no | -| cache_hits_silenced | Groups to mute for Elasticache redis cache hits monitor | map | `` | no | -| cache_hits_threshold_critical | Elasticache redis cache hits critical threshold in percentage | string | `10` | no | -| cache_hits_threshold_warning | Elasticache redis cache hits warning threshold in percentage | string | `20` | no | -| cache_hits_timeframe | Monitor timeframe for Elasticache redis cache hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | -| commands_message | Custom message for Elasticache redis commands monitor | string | `` | no | -| commands_silenced | Groups to mute for Elasticache redis commands monitor | map | `` | no | -| commands_timeframe | Monitor timeframe for Elasticache redis commands [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| cpu_high_message | Custom message for Elasticache redis cpu high monitor | string | `` | no | -| cpu_high_silenced | Groups to mute for Elasticache redis cpu high monitor | map | `` | no | -| cpu_high_threshold_critical | Elasticache redis cpu high critical threshold in percentage | string | `90` | no | -| cpu_high_threshold_warning | Elasticache redis cpu high warning threshold in percentage | string | `75` | no | -| cpu_high_time_aggregator | Monitor aggregator for Elasticache redis cpu high [available values: min, max or avg] | string | `avg` | no | -| cpu_high_timeframe | Monitor timeframe for Elasticache redis cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | -| delay | Delay in seconds for the metric evaluation | string | `900` | no | -| environment | Infrastructure Environment | string | - | yes | -| eviction_growing_condition_timeframe | Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| eviction_growing_message | Custom message for Elasticache eviction growing monitor | string | `` | no | -| eviction_growing_silenced | Groups to mute for Elasticache eviction growing monitor | map | `` | no | -| eviction_growing_threshold_critical | Elasticache eviction growing critical threshold in percentage | string | `30` | no | -| eviction_growing_threshold_warning | Elasticache eviction growing warning threshold in percentage | string | `10` | no | -| eviction_growing_timeframe | Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| eviction_message | Custom message for Elasticache eviction monitor | string | `` | no | -| eviction_silenced | Groups to mute for Elasticache eviction monitor | map | `` | no | -| eviction_time_aggregator | Monitor aggregator for Elasticache eviction [available values: min, max or avg] | string | `min` | no | -| eviction_timeframe | Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | -| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | -| free_memory_condition_timeframe | Monitor condition timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | -| free_memory_message | Custom message for Elasticache free memory monitor | string | `` | no | -| free_memory_silenced | Groups to mute for Elasticache free memory monitor | map | `` | no | -| free_memory_threshold_critical | Elasticache free memory critical threshold in percentage | string | `-70` | no | -| free_memory_threshold_warning | Elasticache free memory warning threshold in percentage | string | `-50` | no | -| free_memory_timeframe | Monitor timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | -| max_connection_message | Custom message for Elasticache max connection monitor | string | `` | no | -| max_connection_silenced | Groups to mute for Elasticache max connection monitor | map | `` | no | -| max_connection_time_aggregator | Monitor aggregator for Elasticache max connection [available values: min, max or avg] | string | `max` | no | -| max_connection_timeframe | Monitor timeframe for Elasticache max connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| message | Message sent when an alert is triggered | string | - | yes | -| no_connection_message | Custom message for Elasticache no connection monitor | string | `` | no | -| no_connection_silenced | Groups to mute for Elasticache no connection monitor | map | `` | no | -| no_connection_time_aggregator | Monitor aggregator for Elasticache no connection [available values: min, max or avg] | string | `min` | no | -| no_connection_timeframe | Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| replication_lag_message | Custom message for Elasticache redis replication lag monitor | string | `` | no | -| replication_lag_silenced | Groups to mute for Elasticache redis replication lag monitor | map | `` | no | -| replication_lag_threshold_critical | Elasticache redis replication lag critical threshold in seconds | string | `1` | no | -| replication_lag_threshold_warning | Elasticache redis replication lag warning threshold in seconds | string | `0` | no | -| replication_lag_time_aggregator | Monitor aggregator for Elasticache redis replication lag [available values: min, max or avg] | string | `min` | no | -| replication_lag_timeframe | Monitor timeframe for Elasticache redis replication lag [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| swap_message | Custom message for Elasticache swap monitor | string | `` | no | -| swap_silenced | Groups to mute for Elasticache swap monitor | map | `` | no | -| swap_threshold_critical | Elasticache swap critical threshold in Bytes | string | `50000000` | no | -| swap_threshold_warning | Elasticache swap warning threshold in Bytes | string | `40000000` | no | -| swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | `min` | no | -| swap_timeframe | Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +# CLOUD AWS ELASTICACHE REDIS DataDog monitors + +## How to use this module + +``` +module "datadog-monitors-cloud-aws-elasticache-redis" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/aws/elasticache/redis?ref={revision}" + + environment = "${var.environment}" + message = "${module.datadog-message-alerting.alerting-message}" +} + +``` + +## Purpose + +Creates DataDog monitors with the following checks: + +- Elasticache redis cache hits +- Elasticache redis CPU +- Elasticache redis replication lag +- Elasticache redis is receiving no commands + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| cache_hits_message | Custom message for Elasticache redis cache hits monitor | string | `` | no | +| cache_hits_silenced | Groups to mute for Elasticache redis cache hits monitor | map | `` | no | +| cache_hits_threshold_critical | Elasticache redis cache hits critical threshold in percentage | string | `10` | no | +| cache_hits_threshold_warning | Elasticache redis cache hits warning threshold in percentage | string | `20` | no | +| cache_hits_timeframe | Monitor timeframe for Elasticache redis cache hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| commands_message | Custom message for Elasticache redis commands monitor | string | `` | no | +| commands_silenced | Groups to mute for Elasticache redis commands monitor | map | `` | no | +| commands_timeframe | Monitor timeframe for Elasticache redis commands [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| cpu_high_message | Custom message for Elasticache redis cpu high monitor | string | `` | no | +| cpu_high_silenced | Groups to mute for Elasticache redis cpu high monitor | map | `` | no | +| cpu_high_threshold_critical | Elasticache redis cpu high critical threshold in percentage | string | `90` | no | +| cpu_high_threshold_warning | Elasticache redis cpu high warning threshold in percentage | string | `75` | no | +| cpu_high_time_aggregator | Monitor aggregator for Elasticache redis cpu high [available values: min, max or avg] | string | `avg` | no | +| cpu_high_timeframe | Monitor timeframe for Elasticache redis cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| delay | Delay in seconds for the metric evaluation | string | `900` | no | +| environment | Infrastructure Environment | string | - | yes | +| eviction_growing_condition_timeframe | Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| eviction_growing_message | Custom message for Elasticache eviction growing monitor | string | `` | no | +| eviction_growing_silenced | Groups to mute for Elasticache eviction growing monitor | map | `` | no | +| eviction_growing_threshold_critical | Elasticache eviction growing critical threshold in percentage | string | `30` | no | +| eviction_growing_threshold_warning | Elasticache eviction growing warning threshold in percentage | string | `10` | no | +| eviction_growing_timeframe | Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| eviction_message | Custom message for Elasticache eviction monitor | string | `` | no | +| eviction_silenced | Groups to mute for Elasticache eviction monitor | map | `` | no | +| eviction_time_aggregator | Monitor aggregator for Elasticache eviction [available values: min, max or avg] | string | `min` | no | +| eviction_timeframe | Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| free_memory_condition_timeframe | Monitor condition timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| free_memory_message | Custom message for Elasticache free memory monitor | string | `` | no | +| free_memory_silenced | Groups to mute for Elasticache free memory monitor | map | `` | no | +| free_memory_threshold_critical | Elasticache free memory critical threshold in percentage | string | `-70` | no | +| free_memory_threshold_warning | Elasticache free memory warning threshold in percentage | string | `-50` | no | +| free_memory_timeframe | Monitor timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| max_connection_message | Custom message for Elasticache max connection monitor | string | `` | no | +| max_connection_silenced | Groups to mute for Elasticache max connection monitor | map | `` | no | +| max_connection_time_aggregator | Monitor aggregator for Elasticache max connection [available values: min, max or avg] | string | `max` | no | +| max_connection_timeframe | Monitor timeframe for Elasticache max connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| message | Message sent when an alert is triggered | string | - | yes | +| no_connection_message | Custom message for Elasticache no connection monitor | string | `` | no | +| no_connection_silenced | Groups to mute for Elasticache no connection monitor | map | `` | no | +| no_connection_time_aggregator | Monitor aggregator for Elasticache no connection [available values: min, max or avg] | string | `min` | no | +| no_connection_timeframe | Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| replication_lag_message | Custom message for Elasticache redis replication lag monitor | string | `` | no | +| replication_lag_silenced | Groups to mute for Elasticache redis replication lag monitor | map | `` | no | +| replication_lag_threshold_critical | Elasticache redis replication lag critical threshold in seconds | string | `1` | no | +| replication_lag_threshold_warning | Elasticache redis replication lag warning threshold in seconds | string | `0` | no | +| replication_lag_time_aggregator | Monitor aggregator for Elasticache redis replication lag [available values: min, max or avg] | string | `min` | no | +| replication_lag_timeframe | Monitor timeframe for Elasticache redis replication lag [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| swap_message | Custom message for Elasticache swap monitor | string | `` | no | +| swap_silenced | Groups to mute for Elasticache swap monitor | map | `` | no | +| swap_threshold_critical | Elasticache swap critical threshold in Bytes | string | `50000000` | no | +| swap_threshold_warning | Elasticache swap warning threshold in Bytes | string | `40000000` | no | +| swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | `min` | no | +| swap_timeframe | Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| redis_cache_hits_id | id for monitor redis_cache_hits | +| redis_commands_id | id for monitor redis_commands | +| redis_cpu_high_id | id for monitor redis_cpu_high | +| redis_replication_lag_id | id for monitor redis_replication_lag | + +## Related documentation + +* [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/) +* [https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/](https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/) + + diff --git a/cloud/aws/elasticache/redis/outputs.tf b/cloud/aws/elasticache/redis/outputs.tf new file mode 100644 index 0000000..23bbe75 --- /dev/null +++ b/cloud/aws/elasticache/redis/outputs.tf @@ -0,0 +1,19 @@ +output "redis_cache_hits_id" { + description = "id for monitor redis_cache_hits" + value = "${datadog_monitor.redis_cache_hits.id}" +} + +output "redis_cpu_high_id" { + description = "id for monitor redis_cpu_high" + value = "${datadog_monitor.redis_cpu_high.id}" +} + +output "redis_replication_lag_id" { + description = "id for monitor redis_replication_lag" + value = "${datadog_monitor.redis_replication_lag.id}" +} + +output "redis_commands_id" { + description = "id for monitor redis_commands" + value = "${datadog_monitor.redis_commands.id}" +} From 5d494fc69c420206b05e5a7158f52b515d953dbb Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Fri, 27 Jul 2018 15:57:28 +0200 Subject: [PATCH 32/53] MON-32 - Monitors updated with MON-275 requirements --- .../common/monitors-elasticache.tf | 24 +++++++++++++++++++ .../memcached/monitors-memcached.tf | 8 +++++++ cloud/aws/elasticache/redis/monitors-redis.tf | 16 +++++++++++++ 3 files changed, 48 insertions(+) diff --git a/cloud/aws/elasticache/common/monitors-elasticache.tf b/cloud/aws/elasticache/common/monitors-elasticache.tf index 6d53ade..c826d4c 100644 --- a/cloud/aws/elasticache/common/monitors-elasticache.tf +++ b/cloud/aws/elasticache/common/monitors-elasticache.tf @@ -10,6 +10,10 @@ resource "datadog_monitor" "elasticache_eviction" { ) > 0 EOF + lifecycle { + ignore_changes = ["type"] + } + notify_no_data = false evaluation_delay = "${var.delay}" renotify_interval = 0 @@ -37,6 +41,10 @@ resource "datadog_monitor" "elasticache_max_connection" { ) >= 65000 EOF + lifecycle { + ignore_changes = ["type"] + } + notify_no_data = true evaluation_delay = "${var.delay}" renotify_interval = 0 @@ -64,6 +72,10 @@ resource "datadog_monitor" "elasticache_no_connection" { ) <= 0 EOF + lifecycle { + ignore_changes = ["type"] + } + notify_no_data = true evaluation_delay = "${var.delay}" renotify_interval = 0 @@ -91,6 +103,10 @@ resource "datadog_monitor" "elasticache_swap" { ) > ${var.swap_threshold_critical} EOF + lifecycle { + ignore_changes = ["type"] + } + thresholds { warning = "${var.swap_threshold_warning}" critical = "${var.swap_threshold_critical}" @@ -124,6 +140,10 @@ resource "datadog_monitor" "elasticache_free_memory" { < ${var.free_memory_threshold_critical} EOF + lifecycle { + ignore_changes = ["type"] + } + thresholds { warning = "${var.free_memory_threshold_warning}" critical = "${var.free_memory_threshold_critical}" @@ -156,6 +176,10 @@ resource "datadog_monitor" "elasticache_eviction_growing" { > ${var.eviction_growing_threshold_critical} EOF + lifecycle { + ignore_changes = ["type"] + } + thresholds { warning = "${var.eviction_growing_threshold_warning}" critical = "${var.eviction_growing_threshold_critical}" diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf index 37d109b..b87854c 100644 --- a/cloud/aws/elasticache/memcached/monitors-memcached.tf +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -67,6 +67,10 @@ resource "datadog_monitor" "memcached_get_hits" { ) < ${var.get_hits_threshold_critical} EOF + lifecycle { + ignore_changes = ["type"] + } + thresholds { warning = "${var.get_hits_threshold_warning}" critical = "${var.get_hits_threshold_critical}" @@ -99,6 +103,10 @@ resource "datadog_monitor" "memcached_cpu_high" { ) > ${var.cpu_high_threshold_critical} EOF + lifecycle { + ignore_changes = ["type"] + } + thresholds { warning = "${var.cpu_high_threshold_warning}" critical = "${var.cpu_high_threshold_critical}" diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index 08f35ef..2ecc561 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -67,6 +67,10 @@ resource "datadog_monitor" "redis_cache_hits" { ) * 100 < ${var.cache_hits_threshold_critical} EOF + lifecycle { + ignore_changes = ["type"] + } + thresholds { warning = "${var.cache_hits_threshold_warning}" critical = "${var.cache_hits_threshold_critical}" @@ -99,6 +103,10 @@ resource "datadog_monitor" "redis_cpu_high" { ) > ${var.cpu_high_threshold_critical} EOF + lifecycle { + ignore_changes = ["type"] + } + notify_no_data = true evaluation_delay = "${var.delay}" renotify_interval = 0 @@ -126,6 +134,10 @@ resource "datadog_monitor" "redis_replication_lag" { ) > ${var.replication_lag_threshold_critical} EOF + lifecycle { + ignore_changes = ["type"] + } + thresholds { warning = "${var.replication_lag_threshold_warning}" critical = "${var.replication_lag_threshold_critical}" @@ -159,6 +171,10 @@ resource "datadog_monitor" "redis_commands" { ) <= 0 EOF + lifecycle { + ignore_changes = ["type"] + } + notify_no_data = true evaluation_delay = "${var.delay}" renotify_interval = 0 From c1b5b0cbe234a160aa1e27370cf707779e3c5635 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Wed, 19 Sep 2018 16:20:55 +0200 Subject: [PATCH 33/53] MON-32 remove ignore change --- .../common/monitors-elasticache.tf | 24 ------------------- 1 file changed, 24 deletions(-) diff --git a/cloud/aws/elasticache/common/monitors-elasticache.tf b/cloud/aws/elasticache/common/monitors-elasticache.tf index c826d4c..6d53ade 100644 --- a/cloud/aws/elasticache/common/monitors-elasticache.tf +++ b/cloud/aws/elasticache/common/monitors-elasticache.tf @@ -10,10 +10,6 @@ resource "datadog_monitor" "elasticache_eviction" { ) > 0 EOF - lifecycle { - ignore_changes = ["type"] - } - notify_no_data = false evaluation_delay = "${var.delay}" renotify_interval = 0 @@ -41,10 +37,6 @@ resource "datadog_monitor" "elasticache_max_connection" { ) >= 65000 EOF - lifecycle { - ignore_changes = ["type"] - } - notify_no_data = true evaluation_delay = "${var.delay}" renotify_interval = 0 @@ -72,10 +64,6 @@ resource "datadog_monitor" "elasticache_no_connection" { ) <= 0 EOF - lifecycle { - ignore_changes = ["type"] - } - notify_no_data = true evaluation_delay = "${var.delay}" renotify_interval = 0 @@ -103,10 +91,6 @@ resource "datadog_monitor" "elasticache_swap" { ) > ${var.swap_threshold_critical} EOF - lifecycle { - ignore_changes = ["type"] - } - thresholds { warning = "${var.swap_threshold_warning}" critical = "${var.swap_threshold_critical}" @@ -140,10 +124,6 @@ resource "datadog_monitor" "elasticache_free_memory" { < ${var.free_memory_threshold_critical} EOF - lifecycle { - ignore_changes = ["type"] - } - thresholds { warning = "${var.free_memory_threshold_warning}" critical = "${var.free_memory_threshold_critical}" @@ -176,10 +156,6 @@ resource "datadog_monitor" "elasticache_eviction_growing" { > ${var.eviction_growing_threshold_critical} EOF - lifecycle { - ignore_changes = ["type"] - } - thresholds { warning = "${var.eviction_growing_threshold_warning}" critical = "${var.eviction_growing_threshold_critical}" From 2fb01644b511a2597fe2e3baf288920b4bbbc2ef Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Wed, 19 Sep 2018 16:25:22 +0200 Subject: [PATCH 34/53] MON-32 remove resource variable --- cloud/aws/elasticache/common/inputs.tf | 4 ---- cloud/aws/elasticache/common/monitors-elasticache.tf | 12 ++++++------ 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/cloud/aws/elasticache/common/inputs.tf b/cloud/aws/elasticache/common/inputs.tf index 6a686b2..078ded3 100644 --- a/cloud/aws/elasticache/common/inputs.tf +++ b/cloud/aws/elasticache/common/inputs.tf @@ -17,10 +17,6 @@ variable "filter_tags" { description = "Tags used for filtering" } -variable "resource" { - description = "Type of Elasticache used" -} - # Elasticache specific variable "eviction_silenced" { description = "Groups to mute for Elasticache eviction monitor" diff --git a/cloud/aws/elasticache/common/monitors-elasticache.tf b/cloud/aws/elasticache/common/monitors-elasticache.tf index 6d53ade..fb33362 100644 --- a/cloud/aws/elasticache/common/monitors-elasticache.tf +++ b/cloud/aws/elasticache/common/monitors-elasticache.tf @@ -1,5 +1,5 @@ resource "datadog_monitor" "elasticache_eviction" { - name = "[${var.environment}] Elasticache ${var.resource} eviction {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}" + name = "[${var.environment}] Elasticache eviction {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}" message = "${coalesce(var.eviction_message, var.message)}" type = "metric alert" @@ -26,7 +26,7 @@ resource "datadog_monitor" "elasticache_eviction" { } resource "datadog_monitor" "elasticache_max_connection" { - name = "[${var.environment}] Elasticache ${var.resource} max connections reached {{#is_alert}}{{{comparator}}} {{threshold}} {{/is_alert}}" + name = "[${var.environment}] Elasticache max connections reached {{#is_alert}}{{{comparator}}} {{threshold}} {{/is_alert}}" message = "${coalesce(var.max_connection_message, var.message)}" type = "metric alert" @@ -53,7 +53,7 @@ resource "datadog_monitor" "elasticache_max_connection" { } resource "datadog_monitor" "elasticache_no_connection" { - name = "[${var.environment}] Elasticache ${var.resource} connections {{#is_alert}}{{{comparator}}} {{threshold}} {{/is_alert}}" + name = "[${var.environment}] Elasticache connections {{#is_alert}}{{{comparator}}} {{threshold}} {{/is_alert}}" message = "${coalesce(var.no_connection_message, var.message)}" type = "metric alert" @@ -80,7 +80,7 @@ resource "datadog_monitor" "elasticache_no_connection" { } resource "datadog_monitor" "elasticache_swap" { - name = "[${var.environment}] Elasticache ${var.resource} swap {{#is_alert}}{{{comparator}}} {{threshold}}MB ({{value}}MB){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}MB ({{value}}MB){{/is_warning}}" + name = "[${var.environment}] Elasticache swap {{#is_alert}}{{{comparator}}} {{threshold}}MB ({{value}}MB){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}MB ({{value}}MB){{/is_warning}}" message = "${coalesce(var.swap_message, var.message)}" type = "metric alert" @@ -113,7 +113,7 @@ resource "datadog_monitor" "elasticache_swap" { # POC - A approfondir resource "datadog_monitor" "elasticache_free_memory" { - name = "[${var.environment}] Elasticache ${var.resource} free memory {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + name = "[${var.environment}] Elasticache free memory {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.free_memory_message, var.message)}" type = "metric alert" @@ -145,7 +145,7 @@ resource "datadog_monitor" "elasticache_free_memory" { } resource "datadog_monitor" "elasticache_eviction_growing" { - name = "[${var.environment}] Elasticache ${var.resource} evictions is growing {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + name = "[${var.environment}] Elasticache evictions is growing {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" message = "${coalesce(var.eviction_growing_message, var.message)}" type = "metric alert" From a901658ab6d144c2c8148894888bac901efa023e Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Wed, 19 Sep 2018 18:55:54 +0200 Subject: [PATCH 35/53] MON-32 split delay --- cloud/aws/elasticache/common/inputs.tf | 18 +++++++++++--- .../common/monitors-elasticache.tf | 24 +++++++++---------- cloud/aws/elasticache/memcached/inputs.tf | 7 +++++- .../memcached/monitors-memcached.tf | 10 ++++---- cloud/aws/elasticache/redis/inputs.tf | 7 +++++- cloud/aws/elasticache/redis/monitors-redis.tf | 18 +++++++------- 6 files changed, 53 insertions(+), 31 deletions(-) diff --git a/cloud/aws/elasticache/common/inputs.tf b/cloud/aws/elasticache/common/inputs.tf index 078ded3..b73bedb 100644 --- a/cloud/aws/elasticache/common/inputs.tf +++ b/cloud/aws/elasticache/common/inputs.tf @@ -5,16 +5,28 @@ variable "environment" { } # Global DataDog -variable "delay" { +variable "evaluation_delay" { description = "Delay in seconds for the metric evaluation" + default = 900 +} + +variable "new_host_delay" { + description = "Delay in seconds before monitor new resource" + default = 300 } variable "message" { description = "Message sent when an alert is triggered" } -variable "filter_tags" { - description = "Tags used for filtering" +variable "filter_tags_use_defaults" { + description = "Use default filter tags convention" + default = "true" +} + +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_use_defaults is false" + default = "*" } # Elasticache specific diff --git a/cloud/aws/elasticache/common/monitors-elasticache.tf b/cloud/aws/elasticache/common/monitors-elasticache.tf index fb33362..0e85c7c 100644 --- a/cloud/aws/elasticache/common/monitors-elasticache.tf +++ b/cloud/aws/elasticache/common/monitors-elasticache.tf @@ -11,14 +11,14 @@ resource "datadog_monitor" "elasticache_eviction" { EOF notify_no_data = false - evaluation_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false require_full_window = false - new_host_delay = "${var.delay}" + new_host_delay = "${var.new_host_delay}" silenced = "${var.eviction_silenced}" @@ -38,14 +38,14 @@ resource "datadog_monitor" "elasticache_max_connection" { EOF notify_no_data = true - evaluation_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false require_full_window = false - new_host_delay = "${var.delay}" + new_host_delay = "${var.new_host_delay}" silenced = "${var.max_connection_silenced}" @@ -65,14 +65,14 @@ resource "datadog_monitor" "elasticache_no_connection" { EOF notify_no_data = true - evaluation_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false require_full_window = false - new_host_delay = "${var.delay}" + new_host_delay = "${var.new_host_delay}" silenced = "${var.no_connection_silenced}" @@ -97,14 +97,14 @@ resource "datadog_monitor" "elasticache_swap" { } notify_no_data = false - evaluation_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false require_full_window = false - new_host_delay = "${var.delay}" + new_host_delay = "${var.new_host_delay}" silenced = "${var.swap_silenced}" @@ -130,14 +130,14 @@ resource "datadog_monitor" "elasticache_free_memory" { } notify_no_data = true - evaluation_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false require_full_window = false - new_host_delay = "${var.delay}" + new_host_delay = "${var.new_host_delay}" silenced = "${var.free_memory_silenced}" @@ -162,14 +162,14 @@ resource "datadog_monitor" "elasticache_eviction_growing" { } notify_no_data = false - evaluation_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false require_full_window = false - new_host_delay = "${var.delay}" + new_host_delay = "${var.new_host_delay}" silenced = "${var.eviction_growing_silenced}" diff --git a/cloud/aws/elasticache/memcached/inputs.tf b/cloud/aws/elasticache/memcached/inputs.tf index c0d9413..12d499f 100644 --- a/cloud/aws/elasticache/memcached/inputs.tf +++ b/cloud/aws/elasticache/memcached/inputs.tf @@ -5,11 +5,16 @@ variable "environment" { } # Global DataDog -variable "delay" { +variable "evaluation_delay" { description = "Delay in seconds for the metric evaluation" default = 900 } +variable "new_host_delay" { + description = "Delay in seconds before monitor new resource" + default = 300 +} + variable "message" { description = "Message sent when an alert is triggered" } diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf index b87854c..d6c07bb 100644 --- a/cloud/aws/elasticache/memcached/monitors-memcached.tf +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -14,7 +14,7 @@ module "datadog-monitors-aws-elasticache-common" { filter_tags = "${data.template_file.filter.rendered}" resource = "memcached" - delay = "${var.delay}" + delay = "${var.evaluation_delay}" eviction_message = "${var.eviction_message}" eviction_silenced = "${var.eviction_silenced}" @@ -77,14 +77,14 @@ resource "datadog_monitor" "memcached_get_hits" { } notify_no_data = true - evaluation_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false require_full_window = false - new_host_delay = "${var.delay}" + new_host_delay = "${var.new_host_delay}" silenced = "${var.get_hits_silenced}" @@ -113,14 +113,14 @@ resource "datadog_monitor" "memcached_cpu_high" { } notify_no_data = true - evaluation_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false require_full_window = false - new_host_delay = "${var.delay}" + new_host_delay = "${var.new_host_delay}" silenced = "${var.cpu_high_silenced}" diff --git a/cloud/aws/elasticache/redis/inputs.tf b/cloud/aws/elasticache/redis/inputs.tf index ed7dbaf..09b4bbf 100644 --- a/cloud/aws/elasticache/redis/inputs.tf +++ b/cloud/aws/elasticache/redis/inputs.tf @@ -5,11 +5,16 @@ variable "environment" { } # Global DataDog -variable "delay" { +variable "evaluation_delay" { description = "Delay in seconds for the metric evaluation" default = 900 } +variable "new_host_delay" { + description = "Delay in seconds before monitor new resource" + default = 300 +} + variable "message" { description = "Message sent when an alert is triggered" } diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index 2ecc561..9db5631 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -14,7 +14,7 @@ module "datadog-monitors-aws-elasticache-common" { filter_tags = "${data.template_file.filter.rendered}" resource = "redis" - delay = "${var.delay}" + delay = "${var.evaluation_delay}" eviction_message = "${var.eviction_message}" eviction_silenced = "${var.eviction_silenced}" @@ -77,14 +77,14 @@ resource "datadog_monitor" "redis_cache_hits" { } notify_no_data = false - evaluation_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false require_full_window = false - new_host_delay = "${var.delay}" + new_host_delay = "${var.new_host_delay}" silenced = "${var.cache_hits_silenced}" @@ -108,14 +108,14 @@ resource "datadog_monitor" "redis_cpu_high" { } notify_no_data = true - evaluation_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false require_full_window = false - new_host_delay = "${var.delay}" + new_host_delay = "${var.new_host_delay}" silenced = "${var.cpu_high_silenced}" @@ -144,14 +144,14 @@ resource "datadog_monitor" "redis_replication_lag" { } notify_no_data = false - evaluation_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false require_full_window = false - new_host_delay = "${var.delay}" + new_host_delay = "${var.new_host_delay}" silenced = "${var.replication_lag_silenced}" @@ -176,14 +176,14 @@ resource "datadog_monitor" "redis_commands" { } notify_no_data = true - evaluation_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false require_full_window = false - new_host_delay = "${var.delay}" + new_host_delay = "${var.new_host_delay}" silenced = "${var.commands_silenced}" From a32095864b7ba9c16730c7af23bca24e9ae0c7e8 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Wed, 19 Sep 2018 18:57:09 +0200 Subject: [PATCH 36/53] MON-32 remove ignore change for redis and memcached --- .../elasticache/memcached/monitors-memcached.tf | 8 -------- cloud/aws/elasticache/redis/monitors-redis.tf | 16 ---------------- 2 files changed, 24 deletions(-) diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf index d6c07bb..4ab3914 100644 --- a/cloud/aws/elasticache/memcached/monitors-memcached.tf +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -67,10 +67,6 @@ resource "datadog_monitor" "memcached_get_hits" { ) < ${var.get_hits_threshold_critical} EOF - lifecycle { - ignore_changes = ["type"] - } - thresholds { warning = "${var.get_hits_threshold_warning}" critical = "${var.get_hits_threshold_critical}" @@ -103,10 +99,6 @@ resource "datadog_monitor" "memcached_cpu_high" { ) > ${var.cpu_high_threshold_critical} EOF - lifecycle { - ignore_changes = ["type"] - } - thresholds { warning = "${var.cpu_high_threshold_warning}" critical = "${var.cpu_high_threshold_critical}" diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index 9db5631..ed61970 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -67,10 +67,6 @@ resource "datadog_monitor" "redis_cache_hits" { ) * 100 < ${var.cache_hits_threshold_critical} EOF - lifecycle { - ignore_changes = ["type"] - } - thresholds { warning = "${var.cache_hits_threshold_warning}" critical = "${var.cache_hits_threshold_critical}" @@ -103,10 +99,6 @@ resource "datadog_monitor" "redis_cpu_high" { ) > ${var.cpu_high_threshold_critical} EOF - lifecycle { - ignore_changes = ["type"] - } - notify_no_data = true evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 @@ -134,10 +126,6 @@ resource "datadog_monitor" "redis_replication_lag" { ) > ${var.replication_lag_threshold_critical} EOF - lifecycle { - ignore_changes = ["type"] - } - thresholds { warning = "${var.replication_lag_threshold_warning}" critical = "${var.replication_lag_threshold_critical}" @@ -171,10 +159,6 @@ resource "datadog_monitor" "redis_commands" { ) <= 0 EOF - lifecycle { - ignore_changes = ["type"] - } - notify_no_data = true evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 From 84af5591b984192019b79e3c55fe8d3fc4cc065d Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Wed, 19 Sep 2018 19:05:19 +0200 Subject: [PATCH 37/53] MON-32 use filter tags module --- cloud/aws/elasticache/common/modules.tf | 8 ++++++++ .../aws/elasticache/common/monitors-elasticache.tf | 12 ++++++------ cloud/aws/elasticache/memcached/modules.tf | 8 ++++++++ .../elasticache/memcached/monitors-memcached.tf | 8 ++++---- cloud/aws/elasticache/redis/modules.tf | 8 ++++++++ cloud/aws/elasticache/redis/monitors-redis.tf | 14 +++++++------- 6 files changed, 41 insertions(+), 17 deletions(-) create mode 100644 cloud/aws/elasticache/common/modules.tf create mode 100644 cloud/aws/elasticache/memcached/modules.tf create mode 100644 cloud/aws/elasticache/redis/modules.tf diff --git a/cloud/aws/elasticache/common/modules.tf b/cloud/aws/elasticache/common/modules.tf new file mode 100644 index 0000000..987c046 --- /dev/null +++ b/cloud/aws/elasticache/common/modules.tf @@ -0,0 +1,8 @@ +module "filter-tags" { + source = "../../../../common/filter-tags" + + environment = "${var.environment}" + resource = "aws_elasticache" + filter_tags_use_defaults = "${var.filter_tags_use_defaults}" + filter_tags_custom = "${var.filter_tags_custom}" +} diff --git a/cloud/aws/elasticache/common/monitors-elasticache.tf b/cloud/aws/elasticache/common/monitors-elasticache.tf index 0e85c7c..4c28e08 100644 --- a/cloud/aws/elasticache/common/monitors-elasticache.tf +++ b/cloud/aws/elasticache/common/monitors-elasticache.tf @@ -6,7 +6,7 @@ resource "datadog_monitor" "elasticache_eviction" { query = < 0 EOF @@ -33,7 +33,7 @@ resource "datadog_monitor" "elasticache_max_connection" { query = <= 65000 EOF @@ -60,7 +60,7 @@ resource "datadog_monitor" "elasticache_no_connection" { query = < ${var.swap_threshold_critical} EOF @@ -120,7 +120,7 @@ resource "datadog_monitor" "elasticache_free_memory" { query = < ${var.eviction_growing_threshold_critical} EOF diff --git a/cloud/aws/elasticache/memcached/modules.tf b/cloud/aws/elasticache/memcached/modules.tf new file mode 100644 index 0000000..987c046 --- /dev/null +++ b/cloud/aws/elasticache/memcached/modules.tf @@ -0,0 +1,8 @@ +module "filter-tags" { + source = "../../../../common/filter-tags" + + environment = "${var.environment}" + resource = "aws_elasticache" + filter_tags_use_defaults = "${var.filter_tags_use_defaults}" + filter_tags_custom = "${var.filter_tags_custom}" +} diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf index 4ab3914..58484e1 100644 --- a/cloud/aws/elasticache/memcached/monitors-memcached.tf +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -61,9 +61,9 @@ resource "datadog_monitor" "memcached_get_hits" { query = < ${var.cpu_high_threshold_critical} EOF diff --git a/cloud/aws/elasticache/redis/modules.tf b/cloud/aws/elasticache/redis/modules.tf new file mode 100644 index 0000000..987c046 --- /dev/null +++ b/cloud/aws/elasticache/redis/modules.tf @@ -0,0 +1,8 @@ +module "filter-tags" { + source = "../../../../common/filter-tags" + + environment = "${var.environment}" + resource = "aws_elasticache" + filter_tags_use_defaults = "${var.filter_tags_use_defaults}" + filter_tags_custom = "${var.filter_tags_custom}" +} diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index ed61970..e37f163 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -61,9 +61,9 @@ resource "datadog_monitor" "redis_cache_hits" { query = < ${var.cpu_high_threshold_critical} EOF @@ -122,7 +122,7 @@ resource "datadog_monitor" "redis_replication_lag" { query = < ${var.replication_lag_threshold_critical} EOF @@ -154,8 +154,8 @@ resource "datadog_monitor" "redis_commands" { query = < Date: Wed, 19 Sep 2018 19:24:32 +0200 Subject: [PATCH 38/53] MON-32 update tag convention and add extra tags --- cloud/aws/elasticache/common/inputs.tf | 36 +++++++++++++++++++ .../common/monitors-elasticache.tf | 12 +++---- cloud/aws/elasticache/memcached/inputs.tf | 12 +++++++ .../memcached/monitors-memcached.tf | 4 +-- cloud/aws/elasticache/redis/inputs.tf | 24 +++++++++++++ cloud/aws/elasticache/redis/monitors-redis.tf | 8 ++--- 6 files changed, 84 insertions(+), 12 deletions(-) diff --git a/cloud/aws/elasticache/common/inputs.tf b/cloud/aws/elasticache/common/inputs.tf index b73bedb..df0e2f3 100644 --- a/cloud/aws/elasticache/common/inputs.tf +++ b/cloud/aws/elasticache/common/inputs.tf @@ -35,6 +35,12 @@ variable "eviction_silenced" { type = "map" } +variable "eviction_extra_tags" { + description = "Extra tags for Elasticache eviction monitor" + type = "list" + default = [] +} + variable "eviction_message" { description = "Custom message for Elasticache eviction monitor" type = "string" @@ -54,6 +60,12 @@ variable "max_connection_silenced" { type = "map" } +variable "max_connection_extra_tags" { + description = "Extra tags for Elasticache max connection monitor" + type = "list" + default = [] +} + variable "max_connection_message" { description = "Custom message for Elasticache max connection monitor" type = "string" @@ -73,6 +85,12 @@ variable "no_connection_silenced" { type = "map" } +variable "no_connection_extra_tags" { + description = "Extra tags for Elasticache no connection monitor" + type = "list" + default = [] +} + variable "no_connection_message" { description = "Custom message for Elasticache no connection monitor" type = "string" @@ -92,6 +110,12 @@ variable "swap_silenced" { type = "map" } +variable "swap_extra_tags" { + description = "Extra tags for Elasticache swap monitor" + type = "list" + default = [] +} + variable "swap_message" { description = "Custom message for Elasticache swap monitor" type = "string" @@ -119,6 +143,12 @@ variable "free_memory_silenced" { type = "map" } +variable "free_memory_extra_tags" { + description = "Extra tags for Elasticache free memory monitor" + type = "list" + default = [] +} + variable "free_memory_message" { description = "Custom message for Elasticache free memory monitor" type = "string" @@ -145,6 +175,12 @@ variable "eviction_growing_silenced" { type = "map" } +variable "eviction_growing_extra_tags" { + description = "Extra tags for Elasticache eviction growing monitor" + type = "list" + default = [] +} + variable "eviction_growing_message" { description = "Custom message for Elasticache eviction growing monitor" type = "string" diff --git a/cloud/aws/elasticache/common/monitors-elasticache.tf b/cloud/aws/elasticache/common/monitors-elasticache.tf index 4c28e08..b315e81 100644 --- a/cloud/aws/elasticache/common/monitors-elasticache.tf +++ b/cloud/aws/elasticache/common/monitors-elasticache.tf @@ -22,7 +22,7 @@ resource "datadog_monitor" "elasticache_eviction" { silenced = "${var.eviction_silenced}" - tags = ["env:${var.environment}", "engine:${var.resource}", "team:aws", "provider:aws"] + tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache", "team:claranet", "created-by:terraform", "${var.eviction_extra_tags}"] } resource "datadog_monitor" "elasticache_max_connection" { @@ -49,7 +49,7 @@ resource "datadog_monitor" "elasticache_max_connection" { silenced = "${var.max_connection_silenced}" - tags = ["env:${var.environment}", "engine:${var.resource}", "team:aws", "provider:aws"] + tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache", "team:claranet", "created-by:terraform", "${var.max_connection_extra_tags}"] } resource "datadog_monitor" "elasticache_no_connection" { @@ -76,7 +76,7 @@ resource "datadog_monitor" "elasticache_no_connection" { silenced = "${var.no_connection_silenced}" - tags = ["env:${var.environment}", "engine:${var.resource}", "team:aws", "provider:aws"] + tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache", "team:claranet", "created-by:terraform", "${var.no_connection_extra_tags}"] } resource "datadog_monitor" "elasticache_swap" { @@ -108,7 +108,7 @@ resource "datadog_monitor" "elasticache_swap" { silenced = "${var.swap_silenced}" - tags = ["env:${var.environment}", "engine:${var.resource}", "team:aws", "provider:aws"] + tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache", "team:claranet", "created-by:terraform", "${var.swap_extra_tags}"] } # POC - A approfondir @@ -141,7 +141,7 @@ resource "datadog_monitor" "elasticache_free_memory" { silenced = "${var.free_memory_silenced}" - tags = ["env:${var.environment}", "engine:${var.resource}", "team:aws", "provider:aws"] + tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache", "team:claranet", "created-by:terraform", "${var.free_memory_extra_tags}"] } resource "datadog_monitor" "elasticache_eviction_growing" { @@ -173,5 +173,5 @@ resource "datadog_monitor" "elasticache_eviction_growing" { silenced = "${var.eviction_growing_silenced}" - tags = ["env:${var.environment}", "engine:${var.resource}", "team:aws", "provider:aws"] + tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache", "team:claranet", "created-by:terraform", "${var.eviction_growing_extra_tags}"] } diff --git a/cloud/aws/elasticache/memcached/inputs.tf b/cloud/aws/elasticache/memcached/inputs.tf index 12d499f..728c7b3 100644 --- a/cloud/aws/elasticache/memcached/inputs.tf +++ b/cloud/aws/elasticache/memcached/inputs.tf @@ -36,6 +36,12 @@ variable "get_hits_silenced" { default = {} } +variable "get_hits_extra_tags" { + description = "Extra tags for Elasticache memcached get hits monitor" + type = "list" + default = [] +} + variable "get_hits_message" { description = "Custom message for Elasticache memcached get hits monitor" type = "string" @@ -63,6 +69,12 @@ variable "cpu_high_silenced" { default = {} } +variable "cpu_high_extra_tags" { + description = "Extra tags for Elasticache memcached cpu high monitor" + type = "list" + default = [] +} + variable "cpu_high_message" { description = "Custom message for Elasticache memcached cpu high monitor" type = "string" diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf index 58484e1..74ecedf 100644 --- a/cloud/aws/elasticache/memcached/monitors-memcached.tf +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -84,7 +84,7 @@ resource "datadog_monitor" "memcached_get_hits" { silenced = "${var.get_hits_silenced}" - tags = ["env:${var.environment}", "engine:memcached", "team:aws", "provider:aws"] + tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-memcached", "team:claranet", "created-by:terraform", "${var.get_hits_extra_tags}"] } resource "datadog_monitor" "memcached_cpu_high" { @@ -116,5 +116,5 @@ resource "datadog_monitor" "memcached_cpu_high" { silenced = "${var.cpu_high_silenced}" - tags = ["env:${var.environment}", "engine:memcached", "team:aws", "provider:aws"] + tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-memcached", "team:claranet", "created-by:terraform", "${var.cpu_high_extra_tags}"] } diff --git a/cloud/aws/elasticache/redis/inputs.tf b/cloud/aws/elasticache/redis/inputs.tf index 09b4bbf..262d39f 100644 --- a/cloud/aws/elasticache/redis/inputs.tf +++ b/cloud/aws/elasticache/redis/inputs.tf @@ -36,6 +36,12 @@ variable "cache_hits_silenced" { default = {} } +variable "cache_hits_extra_tags" { + description = "Extra tags for Elasticache redis cache hits monitor" + type = "list" + default = [] +} + variable "cache_hits_message" { description = "Custom message for Elasticache redis cache hits monitor" type = "string" @@ -63,6 +69,12 @@ variable "cpu_high_silenced" { default = {} } +variable "cpu_high_extra_tags" { + description = "Extra tags for Elasticache redis cpu high monitor" + type = "list" + default = [] +} + variable "cpu_high_message" { description = "Custom message for Elasticache redis cpu high monitor" type = "string" @@ -96,6 +108,12 @@ variable "replication_lag_silenced" { default = {} } +variable "replication_lag_extra_tags" { + description = "Extra tags for Elasticache redis replication lag monitor" + type = "list" + default = [] +} + variable "replication_lag_message" { description = "Custom message for Elasticache redis replication lag monitor" type = "string" @@ -129,6 +147,12 @@ variable "commands_silenced" { default = {} } +variable "commands_extra_tags" { + description = "Extra tags for Elasticache redis commands monitor" + type = "list" + default = [] +} + variable "commands_message" { description = "Custom message for Elasticache redis commands monitor" type = "string" diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index e37f163..bb4991a 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -84,7 +84,7 @@ resource "datadog_monitor" "redis_cache_hits" { silenced = "${var.cache_hits_silenced}" - tags = ["env:${var.environment}", "engine:redis", "team:aws", "provider:aws"] + tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-redis", "team:claranet", "created-by:terraform", "${var.cache_hits_extra_tags}"] } resource "datadog_monitor" "redis_cpu_high" { @@ -111,7 +111,7 @@ resource "datadog_monitor" "redis_cpu_high" { silenced = "${var.cpu_high_silenced}" - tags = ["env:${var.environment}", "engine:redis", "team:aws", "provider:aws"] + tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-redis", "team:claranet", "created-by:terraform", "${var.cpu_high_extra_tags}"] } resource "datadog_monitor" "redis_replication_lag" { @@ -143,7 +143,7 @@ resource "datadog_monitor" "redis_replication_lag" { silenced = "${var.replication_lag_silenced}" - tags = ["env:${var.environment}", "engine:redis", "team:aws", "provider:aws"] + tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-redis", "team:claranet", "created-by:terraform", "${var.replication_lag_extra_tags}"] } resource "datadog_monitor" "redis_commands" { @@ -171,5 +171,5 @@ resource "datadog_monitor" "redis_commands" { silenced = "${var.commands_silenced}" - tags = ["env:${var.environment}", "engine:redis", "team:aws", "provider:aws"] + tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-redis", "team:claranet", "created-by:terraform", "${var.commands_extra_tags}"] } From 2a51f920c46e1d140e5058e9464e5a79ef403714 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Wed, 19 Sep 2018 19:26:57 +0200 Subject: [PATCH 39/53] MON-32 add enabled feature --- cloud/aws/elasticache/common/inputs.tf | 30 +++++++++++ .../common/monitors-elasticache.tf | 6 +++ cloud/aws/elasticache/memcached/inputs.tf | 40 +++++++++++++++ .../memcached/monitors-memcached.tf | 2 + cloud/aws/elasticache/redis/inputs.tf | 50 +++++++++++++++++++ cloud/aws/elasticache/redis/monitors-redis.tf | 4 ++ 6 files changed, 132 insertions(+) diff --git a/cloud/aws/elasticache/common/inputs.tf b/cloud/aws/elasticache/common/inputs.tf index df0e2f3..12354d7 100644 --- a/cloud/aws/elasticache/common/inputs.tf +++ b/cloud/aws/elasticache/common/inputs.tf @@ -35,6 +35,11 @@ variable "eviction_silenced" { type = "map" } +variable "eviction_enabled" { + description = "Flag to enable Elasticache eviction monitor" + type = "string" + default = "true" +} variable "eviction_extra_tags" { description = "Extra tags for Elasticache eviction monitor" type = "list" @@ -60,6 +65,11 @@ variable "max_connection_silenced" { type = "map" } +variable "max_connection_enabled" { + description = "Flag to enable Elasticache max connection monitor" + type = "string" + default = "true" +} variable "max_connection_extra_tags" { description = "Extra tags for Elasticache max connection monitor" type = "list" @@ -85,6 +95,11 @@ variable "no_connection_silenced" { type = "map" } +variable "no_connection_enabled" { + description = "Flag to enable Elasticache no connection monitor" + type = "string" + default = "true" +} variable "no_connection_extra_tags" { description = "Extra tags for Elasticache no connection monitor" type = "list" @@ -110,6 +125,11 @@ variable "swap_silenced" { type = "map" } +variable "swap_enabled" { + description = "Flag to enable Elasticache swap monitor" + type = "string" + default = "true" +} variable "swap_extra_tags" { description = "Extra tags for Elasticache swap monitor" type = "list" @@ -143,6 +163,11 @@ variable "free_memory_silenced" { type = "map" } +variable "free_memory_enabled" { + description = "Flag to enable Elasticache free memory monitor" + type = "string" + default = "true" +} variable "free_memory_extra_tags" { description = "Extra tags for Elasticache free memory monitor" type = "list" @@ -175,6 +200,11 @@ variable "eviction_growing_silenced" { type = "map" } +variable "eviction_growing_enabled" { + description = "Flag to enable Elasticache eviction growing monitor" + type = "string" + default = "true" +} variable "eviction_growing_extra_tags" { description = "Extra tags for Elasticache eviction growing monitor" type = "list" diff --git a/cloud/aws/elasticache/common/monitors-elasticache.tf b/cloud/aws/elasticache/common/monitors-elasticache.tf index b315e81..f0191b8 100644 --- a/cloud/aws/elasticache/common/monitors-elasticache.tf +++ b/cloud/aws/elasticache/common/monitors-elasticache.tf @@ -1,4 +1,5 @@ resource "datadog_monitor" "elasticache_eviction" { +count = "${var.eviction_enabled ? 1 : 0}" name = "[${var.environment}] Elasticache eviction {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}" message = "${coalesce(var.eviction_message, var.message)}" @@ -26,6 +27,7 @@ resource "datadog_monitor" "elasticache_eviction" { } resource "datadog_monitor" "elasticache_max_connection" { +count = "${var.max_connection_enabled ? 1 : 0}" name = "[${var.environment}] Elasticache max connections reached {{#is_alert}}{{{comparator}}} {{threshold}} {{/is_alert}}" message = "${coalesce(var.max_connection_message, var.message)}" @@ -53,6 +55,7 @@ resource "datadog_monitor" "elasticache_max_connection" { } resource "datadog_monitor" "elasticache_no_connection" { +count = "${var.no_connection_enabled ? 1 : 0}" name = "[${var.environment}] Elasticache connections {{#is_alert}}{{{comparator}}} {{threshold}} {{/is_alert}}" message = "${coalesce(var.no_connection_message, var.message)}" @@ -80,6 +83,7 @@ resource "datadog_monitor" "elasticache_no_connection" { } resource "datadog_monitor" "elasticache_swap" { +count = "${var.swap_enabled ? 1 : 0}" name = "[${var.environment}] Elasticache swap {{#is_alert}}{{{comparator}}} {{threshold}}MB ({{value}}MB){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}MB ({{value}}MB){{/is_warning}}" message = "${coalesce(var.swap_message, var.message)}" @@ -113,6 +117,7 @@ resource "datadog_monitor" "elasticache_swap" { # POC - A approfondir resource "datadog_monitor" "elasticache_free_memory" { +count = "${var.free_memory_enabled ? 1 : 0}" name = "[${var.environment}] Elasticache free memory {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.free_memory_message, var.message)}" @@ -145,6 +150,7 @@ resource "datadog_monitor" "elasticache_free_memory" { } resource "datadog_monitor" "elasticache_eviction_growing" { +count = "${var.eviction_growing_enabled ? 1 : 0}" name = "[${var.environment}] Elasticache evictions is growing {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" message = "${coalesce(var.eviction_growing_message, var.message)}" diff --git a/cloud/aws/elasticache/memcached/inputs.tf b/cloud/aws/elasticache/memcached/inputs.tf index 728c7b3..be38c1e 100644 --- a/cloud/aws/elasticache/memcached/inputs.tf +++ b/cloud/aws/elasticache/memcached/inputs.tf @@ -35,6 +35,11 @@ variable "get_hits_silenced" { type = "map" default = {} } +variable "get_hits_enabled" { + description = "Flag to enable Elasticache memcached get hits monitor" + type = "string" + default = "true" +} variable "get_hits_extra_tags" { description = "Extra tags for Elasticache memcached get hits monitor" @@ -68,6 +73,11 @@ variable "cpu_high_silenced" { type = "map" default = {} } +variable "cpu_high_enabled" { + description = "Flag to enable Elasticache memcached cpu high monitor" + type = "string" + default = "true" +} variable "cpu_high_extra_tags" { description = "Extra tags for Elasticache memcached cpu high monitor" @@ -108,6 +118,11 @@ variable "eviction_silenced" { type = "map" default = {} } +variable "eviction_enabled" { + description = "Flag to enable Elasticache eviction monitor" + type = "string" + default = "true" +} variable "eviction_message" { description = "Custom message for Elasticache eviction monitor" @@ -131,6 +146,11 @@ variable "max_connection_silenced" { type = "map" default = {} } +variable "max_connection_enabled" { + description = "Flag to enable Elasticache max connection monitor" + type = "string" + default = "true" +} variable "max_connection_message" { description = "Custom message for Elasticache max connection monitor" @@ -154,6 +174,11 @@ variable "no_connection_silenced" { type = "map" default = {} } +variable "no_connection_enabled" { + description = "Flag to enable Elasticache no connection monitor" + type = "string" + default = "true" +} variable "no_connection_message" { description = "Custom message for Elasticache no connection monitor" @@ -177,6 +202,11 @@ variable "swap_silenced" { type = "map" default = {} } +variable "swap_enabled" { + description = "Flag to enable Elasticache swap monitor" + type = "string" + default = "true" +} variable "swap_message" { description = "Custom message for Elasticache swap monitor" @@ -210,6 +240,11 @@ variable "free_memory_silenced" { type = "map" default = {} } +variable "free_memory_enabled" { + description = "Flag to enable Elasticache free memory monitor" + type = "string" + default = "true" +} variable "free_memory_message" { description = "Custom message for Elasticache free memory monitor" @@ -242,6 +277,11 @@ variable "eviction_growing_silenced" { type = "map" default = {} } +variable "eviction_growing_enabled" { + description = "Flag to enable Elasticache eviction growing monitor" + type = "string" + default = "true" +} variable "eviction_growing_message" { description = "Custom message for Elasticache eviction growing monitor" diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf index 74ecedf..e274c90 100644 --- a/cloud/aws/elasticache/memcached/monitors-memcached.tf +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -54,6 +54,7 @@ module "datadog-monitors-aws-elasticache-common" { } resource "datadog_monitor" "memcached_get_hits" { +count = "${var.get_hits_enabled ? 1 : 0}" name = "[${var.environment}] Elasticache memcached get hits {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.get_hits_message, var.message)}" @@ -88,6 +89,7 @@ resource "datadog_monitor" "memcached_get_hits" { } resource "datadog_monitor" "memcached_cpu_high" { +count = "${var.cpu_high_enabled ? 1 : 0}" name = "[${var.environment}] Elasticache memcached CPU {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.cpu_high_message, var.message)}" diff --git a/cloud/aws/elasticache/redis/inputs.tf b/cloud/aws/elasticache/redis/inputs.tf index 262d39f..de2f1b5 100644 --- a/cloud/aws/elasticache/redis/inputs.tf +++ b/cloud/aws/elasticache/redis/inputs.tf @@ -35,6 +35,11 @@ variable "cache_hits_silenced" { type = "map" default = {} } +variable "cache_hits_enabled" { + description = "Flag to enable Elasticache redis cache hits monitor" + type = "string" + default = "true" +} variable "cache_hits_extra_tags" { description = "Extra tags for Elasticache redis cache hits monitor" @@ -68,6 +73,11 @@ variable "cpu_high_silenced" { type = "map" default = {} } +variable "cpu_high_enabled" { + description = "Flag to enable Elasticache redis cpu high monitor" + type = "string" + default = "true" +} variable "cpu_high_extra_tags" { description = "Extra tags for Elasticache redis cpu high monitor" @@ -107,6 +117,11 @@ variable "replication_lag_silenced" { type = "map" default = {} } +variable "replication_lag_enabled" { + description = "Flag to enable Elasticache redis replication lag monitor" + type = "string" + default = "true" +} variable "replication_lag_extra_tags" { description = "Extra tags for Elasticache redis replication lag monitor" @@ -146,6 +161,11 @@ variable "commands_silenced" { type = "map" default = {} } +variable "commands_enabled" { + description = "Flag to enable Elasticache redis commands monitor" + type = "string" + default = "true" +} variable "commands_extra_tags" { description = "Extra tags for Elasticache redis commands monitor" @@ -170,6 +190,11 @@ variable "eviction_silenced" { type = "map" default = {} } +variable "eviction_enabled" { + description = "Flag to enable Elasticache eviction monitor" + type = "string" + default = "true" +} variable "eviction_message" { description = "Custom message for Elasticache eviction monitor" @@ -193,6 +218,11 @@ variable "max_connection_silenced" { type = "map" default = {} } +variable "max_connection_enabled" { + description = "Flag to enable Elasticache max connection monitor" + type = "string" + default = "true" +} variable "max_connection_message" { description = "Custom message for Elasticache max connection monitor" @@ -216,6 +246,11 @@ variable "no_connection_silenced" { type = "map" default = {} } +variable "no_connection_enabled" { + description = "Flag to enable Elasticache no connection monitor" + type = "string" + default = "true" +} variable "no_connection_message" { description = "Custom message for Elasticache no connection monitor" @@ -239,6 +274,11 @@ variable "swap_silenced" { type = "map" default = {} } +variable "swap_enabled" { + description = "Flag to enable Elasticache swap monitor" + type = "string" + default = "true" +} variable "swap_message" { description = "Custom message for Elasticache swap monitor" @@ -272,6 +312,11 @@ variable "free_memory_silenced" { type = "map" default = {} } +variable "free_memory_enabled" { + description = "Flag to enable Elasticache free memory monitor" + type = "string" + default = "true" +} variable "free_memory_message" { description = "Custom message for Elasticache free memory monitor" @@ -304,6 +349,11 @@ variable "eviction_growing_silenced" { type = "map" default = {} } +variable "eviction_growing_enabled" { + description = "Flag to enable Elasticache eviction growing monitor" + type = "string" + default = "true" +} variable "eviction_growing_message" { description = "Custom message for Elasticache eviction growing monitor" diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index bb4991a..e23d178 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -54,6 +54,7 @@ module "datadog-monitors-aws-elasticache-common" { } resource "datadog_monitor" "redis_cache_hits" { +count = "${var.cache_hits_enabled ? 1 : 0}" name = "[${var.environment}] Elasticache redis cache hits {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.cache_hits_message, var.message)}" @@ -88,6 +89,7 @@ resource "datadog_monitor" "redis_cache_hits" { } resource "datadog_monitor" "redis_cpu_high" { +count = "${var.cpu_high_enabled ? 1 : 0}" name = "[${var.environment}] Elasticache redis CPU {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.cpu_high_message, var.message)}" @@ -115,6 +117,7 @@ resource "datadog_monitor" "redis_cpu_high" { } resource "datadog_monitor" "redis_replication_lag" { +count = "${var.replication_lag_enabled ? 1 : 0}" name = "[${var.environment}] Elasticache redis replication lag {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}" message = "${coalesce(var.replication_lag_message, var.message)}" @@ -147,6 +150,7 @@ resource "datadog_monitor" "redis_replication_lag" { } resource "datadog_monitor" "redis_commands" { +count = "${var.commands_enabled ? 1 : 0}" name = "[${var.environment}] Elasticache redis is receiving no commands" message = "${coalesce(var.commands_message, var.message)}" From 25fc6076eebecc9c9b7a447f0da7359764f9e1ca Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Wed, 19 Sep 2018 19:30:27 +0200 Subject: [PATCH 40/53] MON-32 delete template filter --- cloud/aws/elasticache/memcached/monitors-memcached.tf | 8 -------- cloud/aws/elasticache/redis/monitors-redis.tf | 8 -------- 2 files changed, 16 deletions(-) diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf index e274c90..f7fba2e 100644 --- a/cloud/aws/elasticache/memcached/monitors-memcached.tf +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -1,11 +1,3 @@ -data "template_file" "filter" { - template = "$${filter}" - - vars { - filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_aws_elasticache_memcached:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" - } -} - module "datadog-monitors-aws-elasticache-common" { source = "../common" diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index e23d178..8fd9b20 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -1,11 +1,3 @@ -data "template_file" "filter" { - template = "$${filter}" - - vars { - filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_aws_elasticache_redis:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" - } -} - module "datadog-monitors-aws-elasticache-common" { source = "../common" From 644b292adde207219c295a0d89d99401e3fbe92b Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Wed, 19 Sep 2018 19:32:57 +0200 Subject: [PATCH 41/53] MON-32 remove common module use in redis and memcached --- cloud/aws/elasticache/memcached/inputs.tf | 197 ------------------ .../memcached/monitors-memcached.tf | 47 ----- cloud/aws/elasticache/redis/inputs.tf | 197 ------------------ cloud/aws/elasticache/redis/monitors-redis.tf | 47 ----- 4 files changed, 488 deletions(-) diff --git a/cloud/aws/elasticache/memcached/inputs.tf b/cloud/aws/elasticache/memcached/inputs.tf index be38c1e..3c0868f 100644 --- a/cloud/aws/elasticache/memcached/inputs.tf +++ b/cloud/aws/elasticache/memcached/inputs.tf @@ -111,200 +111,3 @@ variable "cpu_high_threshold_critical" { description = "Elasticache memcached cpu high critical threshold in percentage" default = 90 } - -# Elasticache specific -variable "eviction_silenced" { - description = "Groups to mute for Elasticache eviction monitor" - type = "map" - default = {} -} -variable "eviction_enabled" { - description = "Flag to enable Elasticache eviction monitor" - type = "string" - default = "true" -} - -variable "eviction_message" { - description = "Custom message for Elasticache eviction monitor" - type = "string" - default = "" -} - -variable "eviction_time_aggregator" { - description = "Monitor aggregator for Elasticache eviction [available values: min, max or avg]" - type = "string" - default = "min" -} - -variable "eviction_timeframe" { - description = "Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_5m" -} - -variable "max_connection_silenced" { - description = "Groups to mute for Elasticache max connection monitor" - type = "map" - default = {} -} -variable "max_connection_enabled" { - description = "Flag to enable Elasticache max connection monitor" - type = "string" - default = "true" -} - -variable "max_connection_message" { - description = "Custom message for Elasticache max connection monitor" - type = "string" - default = "" -} - -variable "max_connection_time_aggregator" { - description = "Monitor aggregator for Elasticache max connection [available values: min, max or avg]" - type = "string" - default = "max" -} - -variable "max_connection_timeframe" { - description = "Monitor timeframe for Elasticache max connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_5m" -} - -variable "no_connection_silenced" { - description = "Groups to mute for Elasticache no connection monitor" - type = "map" - default = {} -} -variable "no_connection_enabled" { - description = "Flag to enable Elasticache no connection monitor" - type = "string" - default = "true" -} - -variable "no_connection_message" { - description = "Custom message for Elasticache no connection monitor" - type = "string" - default = "" -} - -variable "no_connection_time_aggregator" { - description = "Monitor aggregator for Elasticache no connection [available values: min, max or avg]" - type = "string" - default = "min" -} - -variable "no_connection_timeframe" { - description = "Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_5m" -} - -variable "swap_silenced" { - description = "Groups to mute for Elasticache swap monitor" - type = "map" - default = {} -} -variable "swap_enabled" { - description = "Flag to enable Elasticache swap monitor" - type = "string" - default = "true" -} - -variable "swap_message" { - description = "Custom message for Elasticache swap monitor" - type = "string" - default = "" -} - -variable "swap_time_aggregator" { - description = "Monitor aggregator for Elasticache memcached swap [available values: min, max or avg]" - type = "string" - default = "min" -} - -variable "swap_timeframe" { - description = "Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_5m" -} - -variable "swap_threshold_warning" { - description = "Elasticache swap warning threshold in Bytes" - default = 40000000 -} - -variable "swap_threshold_critical" { - description = "Elasticache swap critical threshold in Bytes" - default = 50000000 -} - -variable "free_memory_silenced" { - description = "Groups to mute for Elasticache free memory monitor" - type = "map" - default = {} -} -variable "free_memory_enabled" { - description = "Flag to enable Elasticache free memory monitor" - type = "string" - default = "true" -} - -variable "free_memory_message" { - description = "Custom message for Elasticache free memory monitor" - type = "string" - default = "" -} - -variable "free_memory_condition_timeframe" { - description = "Monitor condition timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_15m" -} - -variable "free_memory_timeframe" { - description = "Monitor timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_15m" -} - -variable "free_memory_threshold_warning" { - description = "Elasticache free memory warning threshold in percentage" - default = -50 -} - -variable "free_memory_threshold_critical" { - description = "Elasticache free memory critical threshold in percentage" - default = -70 -} - -variable "eviction_growing_silenced" { - description = "Groups to mute for Elasticache eviction growing monitor" - type = "map" - default = {} -} -variable "eviction_growing_enabled" { - description = "Flag to enable Elasticache eviction growing monitor" - type = "string" - default = "true" -} - -variable "eviction_growing_message" { - description = "Custom message for Elasticache eviction growing monitor" - type = "string" - default = "" -} - -variable "eviction_growing_condition_timeframe" { - description = "Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_5m" -} - -variable "eviction_growing_timeframe" { - description = "Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_5m" -} - -variable "eviction_growing_threshold_warning" { - description = "Elasticache eviction growing warning threshold in percentage" - default = 10 -} - -variable "eviction_growing_threshold_critical" { - description = "Elasticache eviction growing critical threshold in percentage" - default = 30 -} diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf index f7fba2e..29382bb 100644 --- a/cloud/aws/elasticache/memcached/monitors-memcached.tf +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -1,50 +1,3 @@ -module "datadog-monitors-aws-elasticache-common" { - source = "../common" - - message = "${var.message}" - environment = "${var.environment}" - filter_tags = "${data.template_file.filter.rendered}" - resource = "memcached" - - delay = "${var.evaluation_delay}" - - eviction_message = "${var.eviction_message}" - eviction_silenced = "${var.eviction_silenced}" - eviction_time_aggregator = "${var.eviction_time_aggregator}" - eviction_timeframe = "${var.eviction_timeframe}" - - eviction_growing_condition_timeframe = "${var.eviction_growing_condition_timeframe}" - eviction_growing_timeframe = "${var.eviction_growing_timeframe}" - eviction_growing_message = "${var.eviction_growing_message}" - eviction_growing_silenced = "${var.eviction_growing_silenced}" - eviction_growing_threshold_warning = "${var.eviction_growing_threshold_warning}" - eviction_growing_threshold_critical = "${var.eviction_growing_threshold_critical}" - - free_memory_condition_timeframe = "${var.free_memory_condition_timeframe}" - free_memory_timeframe = "${var.free_memory_timeframe}" - free_memory_message = "${var.free_memory_message}" - free_memory_silenced = "${var.free_memory_silenced}" - free_memory_threshold_critical = "${var.free_memory_threshold_critical}" - free_memory_threshold_warning = "${var.free_memory_threshold_warning}" - - max_connection_message = "${var.max_connection_message}" - max_connection_silenced = "${var.max_connection_silenced}" - max_connection_time_aggregator = "${var.max_connection_time_aggregator}" - max_connection_timeframe = "${var.max_connection_timeframe}" - - no_connection_message = "${var.no_connection_message}" - no_connection_silenced = "${var.no_connection_silenced}" - no_connection_time_aggregator = "${var.no_connection_time_aggregator}" - no_connection_timeframe = "${var.no_connection_timeframe}" - - swap_message = "${var.swap_message}" - swap_silenced = "${var.swap_silenced}" - swap_threshold_critical = "${var.swap_threshold_critical}" - swap_threshold_warning = "${var.swap_threshold_warning}" - swap_time_aggregator = "${var.swap_time_aggregator}" - swap_timeframe = "${var.swap_timeframe}" -} - resource "datadog_monitor" "memcached_get_hits" { count = "${var.get_hits_enabled ? 1 : 0}" name = "[${var.environment}] Elasticache memcached get hits {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" diff --git a/cloud/aws/elasticache/redis/inputs.tf b/cloud/aws/elasticache/redis/inputs.tf index de2f1b5..185f960 100644 --- a/cloud/aws/elasticache/redis/inputs.tf +++ b/cloud/aws/elasticache/redis/inputs.tf @@ -183,200 +183,3 @@ variable "commands_timeframe" { description = "Monitor timeframe for Elasticache redis commands [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" default = "last_5m" } - -# Elasticache specific -variable "eviction_silenced" { - description = "Groups to mute for Elasticache eviction monitor" - type = "map" - default = {} -} -variable "eviction_enabled" { - description = "Flag to enable Elasticache eviction monitor" - type = "string" - default = "true" -} - -variable "eviction_message" { - description = "Custom message for Elasticache eviction monitor" - type = "string" - default = "" -} - -variable "eviction_time_aggregator" { - description = "Monitor aggregator for Elasticache eviction [available values: min, max or avg]" - type = "string" - default = "min" -} - -variable "eviction_timeframe" { - description = "Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_5m" -} - -variable "max_connection_silenced" { - description = "Groups to mute for Elasticache max connection monitor" - type = "map" - default = {} -} -variable "max_connection_enabled" { - description = "Flag to enable Elasticache max connection monitor" - type = "string" - default = "true" -} - -variable "max_connection_message" { - description = "Custom message for Elasticache max connection monitor" - type = "string" - default = "" -} - -variable "max_connection_time_aggregator" { - description = "Monitor aggregator for Elasticache max connection [available values: min, max or avg]" - type = "string" - default = "max" -} - -variable "max_connection_timeframe" { - description = "Monitor timeframe for Elasticache max connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_5m" -} - -variable "no_connection_silenced" { - description = "Groups to mute for Elasticache no connection monitor" - type = "map" - default = {} -} -variable "no_connection_enabled" { - description = "Flag to enable Elasticache no connection monitor" - type = "string" - default = "true" -} - -variable "no_connection_message" { - description = "Custom message for Elasticache no connection monitor" - type = "string" - default = "" -} - -variable "no_connection_time_aggregator" { - description = "Monitor aggregator for Elasticache no connection [available values: min, max or avg]" - type = "string" - default = "min" -} - -variable "no_connection_timeframe" { - description = "Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_5m" -} - -variable "swap_silenced" { - description = "Groups to mute for Elasticache swap monitor" - type = "map" - default = {} -} -variable "swap_enabled" { - description = "Flag to enable Elasticache swap monitor" - type = "string" - default = "true" -} - -variable "swap_message" { - description = "Custom message for Elasticache swap monitor" - type = "string" - default = "" -} - -variable "swap_time_aggregator" { - description = "Monitor aggregator for Elasticache memcached swap [available values: min, max or avg]" - type = "string" - default = "min" -} - -variable "swap_timeframe" { - description = "Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_5m" -} - -variable "swap_threshold_warning" { - description = "Elasticache swap warning threshold in Bytes" - default = 40000000 -} - -variable "swap_threshold_critical" { - description = "Elasticache swap critical threshold in Bytes" - default = 50000000 -} - -variable "free_memory_silenced" { - description = "Groups to mute for Elasticache free memory monitor" - type = "map" - default = {} -} -variable "free_memory_enabled" { - description = "Flag to enable Elasticache free memory monitor" - type = "string" - default = "true" -} - -variable "free_memory_message" { - description = "Custom message for Elasticache free memory monitor" - type = "string" - default = "" -} - -variable "free_memory_condition_timeframe" { - description = "Monitor condition timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_15m" -} - -variable "free_memory_timeframe" { - description = "Monitor timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_15m" -} - -variable "free_memory_threshold_warning" { - description = "Elasticache free memory warning threshold in percentage" - default = -50 -} - -variable "free_memory_threshold_critical" { - description = "Elasticache free memory critical threshold in percentage" - default = -70 -} - -variable "eviction_growing_silenced" { - description = "Groups to mute for Elasticache eviction growing monitor" - type = "map" - default = {} -} -variable "eviction_growing_enabled" { - description = "Flag to enable Elasticache eviction growing monitor" - type = "string" - default = "true" -} - -variable "eviction_growing_message" { - description = "Custom message for Elasticache eviction growing monitor" - type = "string" - default = "" -} - -variable "eviction_growing_condition_timeframe" { - description = "Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_5m" -} - -variable "eviction_growing_timeframe" { - description = "Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_5m" -} - -variable "eviction_growing_threshold_warning" { - description = "Elasticache eviction growing warning threshold in percentage" - default = 10 -} - -variable "eviction_growing_threshold_critical" { - description = "Elasticache eviction growing critical threshold in percentage" - default = 30 -} diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index 8fd9b20..08d67d5 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -1,50 +1,3 @@ -module "datadog-monitors-aws-elasticache-common" { - source = "../common" - - message = "${var.message}" - environment = "${var.environment}" - filter_tags = "${data.template_file.filter.rendered}" - resource = "redis" - - delay = "${var.evaluation_delay}" - - eviction_message = "${var.eviction_message}" - eviction_silenced = "${var.eviction_silenced}" - eviction_time_aggregator = "${var.eviction_time_aggregator}" - eviction_timeframe = "${var.eviction_timeframe}" - - eviction_growing_condition_timeframe = "${var.eviction_growing_condition_timeframe}" - eviction_growing_timeframe = "${var.eviction_growing_timeframe}" - eviction_growing_message = "${var.eviction_growing_message}" - eviction_growing_silenced = "${var.eviction_growing_silenced}" - eviction_growing_threshold_warning = "${var.eviction_growing_threshold_warning}" - eviction_growing_threshold_critical = "${var.eviction_growing_threshold_critical}" - - free_memory_condition_timeframe = "${var.free_memory_condition_timeframe}" - free_memory_timeframe = "${var.free_memory_timeframe}" - free_memory_message = "${var.free_memory_message}" - free_memory_silenced = "${var.free_memory_silenced}" - free_memory_threshold_critical = "${var.free_memory_threshold_critical}" - free_memory_threshold_warning = "${var.free_memory_threshold_warning}" - - max_connection_message = "${var.max_connection_message}" - max_connection_silenced = "${var.max_connection_silenced}" - max_connection_time_aggregator = "${var.max_connection_time_aggregator}" - max_connection_timeframe = "${var.max_connection_timeframe}" - - no_connection_message = "${var.no_connection_message}" - no_connection_silenced = "${var.no_connection_silenced}" - no_connection_time_aggregator = "${var.no_connection_time_aggregator}" - no_connection_timeframe = "${var.no_connection_timeframe}" - - swap_message = "${var.swap_message}" - swap_silenced = "${var.swap_silenced}" - swap_threshold_critical = "${var.swap_threshold_critical}" - swap_threshold_warning = "${var.swap_threshold_warning}" - swap_time_aggregator = "${var.swap_time_aggregator}" - swap_timeframe = "${var.swap_timeframe}" -} - resource "datadog_monitor" "redis_cache_hits" { count = "${var.cache_hits_enabled ? 1 : 0}" name = "[${var.environment}] Elasticache redis cache hits {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" From d7e778a7261cf9e19c0985a30c78bddaa3e131c0 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Wed, 19 Sep 2018 19:34:05 +0200 Subject: [PATCH 42/53] MON-32 auto update --- cloud/aws/elasticache/common/README.md | 31 +++++++++---- cloud/aws/elasticache/common/inputs.tf | 6 +++ .../common/monitors-elasticache.tf | 12 +++--- cloud/aws/elasticache/common/outputs.tf | 12 +++--- cloud/aws/elasticache/memcached/README.md | 39 +++-------------- cloud/aws/elasticache/memcached/inputs.tf | 2 + .../memcached/monitors-memcached.tf | 4 +- cloud/aws/elasticache/memcached/outputs.tf | 4 +- cloud/aws/elasticache/redis/README.md | 43 +++++-------------- cloud/aws/elasticache/redis/inputs.tf | 4 ++ cloud/aws/elasticache/redis/monitors-redis.tf | 8 ++-- cloud/aws/elasticache/redis/outputs.tf | 8 ++-- 12 files changed, 76 insertions(+), 97 deletions(-) diff --git a/cloud/aws/elasticache/common/README.md b/cloud/aws/elasticache/common/README.md index f9d2d6e..6d9263c 100644 --- a/cloud/aws/elasticache/common/README.md +++ b/cloud/aws/elasticache/common/README.md @@ -16,20 +16,24 @@ module "datadog-monitors-cloud-aws-elasticache-common" { Creates DataDog monitors with the following checks: -- Elasticache ${var.resource} eviction -- Elasticache ${var.resource} max connections reached -- Elasticache ${var.resource} connections -- Elasticache ${var.resource} swap -- Elasticache ${var.resource} free memory -- Elasticache ${var.resource} evictions is growing +- Elasticache connections +- Elasticache eviction +- Elasticache evictions is growing +- Elasticache free memory +- Elasticache max connections reached +- Elasticache swap ## Inputs | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| -| delay | Delay in seconds for the metric evaluation | string | - | yes | | environment | Infrastructure Environment | string | - | yes | +| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no | +| eviction_enabled | Flag to enable Elasticache eviction monitor | string | `true` | no | +| eviction_extra_tags | Extra tags for Elasticache eviction monitor | list | `` | no | | eviction_growing_condition_timeframe | Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | - | yes | +| eviction_growing_enabled | Flag to enable Elasticache eviction growing monitor | string | `true` | no | +| eviction_growing_extra_tags | Extra tags for Elasticache eviction growing monitor | list | `` | no | | eviction_growing_message | Custom message for Elasticache eviction growing monitor | string | - | yes | | eviction_growing_silenced | Groups to mute for Elasticache eviction growing monitor | map | - | yes | | eviction_growing_threshold_critical | Elasticache eviction growing critical threshold in percentage | string | - | yes | @@ -39,23 +43,32 @@ Creates DataDog monitors with the following checks: | eviction_silenced | Groups to mute for Elasticache eviction monitor | map | - | yes | | eviction_time_aggregator | Monitor aggregator for Elasticache eviction [available values: min, max or avg] | string | - | yes | | eviction_timeframe | Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | - | yes | -| filter_tags | Tags used for filtering | string | - | yes | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | free_memory_condition_timeframe | Monitor condition timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | - | yes | +| free_memory_enabled | Flag to enable Elasticache free memory monitor | string | `true` | no | +| free_memory_extra_tags | Extra tags for Elasticache free memory monitor | list | `` | no | | free_memory_message | Custom message for Elasticache free memory monitor | string | - | yes | | free_memory_silenced | Groups to mute for Elasticache free memory monitor | map | - | yes | | free_memory_threshold_critical | Elasticache free memory critical threshold in percentage | string | - | yes | | free_memory_threshold_warning | Elasticache free memory warning threshold in percentage | string | - | yes | | free_memory_timeframe | Monitor timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | - | yes | +| max_connection_enabled | Flag to enable Elasticache max connection monitor | string | `true` | no | +| max_connection_extra_tags | Extra tags for Elasticache max connection monitor | list | `` | no | | max_connection_message | Custom message for Elasticache max connection monitor | string | - | yes | | max_connection_silenced | Groups to mute for Elasticache max connection monitor | map | - | yes | | max_connection_time_aggregator | Monitor aggregator for Elasticache max connection [available values: min, max or avg] | string | - | yes | | max_connection_timeframe | Monitor timeframe for Elasticache max connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | - | yes | | message | Message sent when an alert is triggered | string | - | yes | +| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no | +| no_connection_enabled | Flag to enable Elasticache no connection monitor | string | `true` | no | +| no_connection_extra_tags | Extra tags for Elasticache no connection monitor | list | `` | no | | no_connection_message | Custom message for Elasticache no connection monitor | string | - | yes | | no_connection_silenced | Groups to mute for Elasticache no connection monitor | map | - | yes | | no_connection_time_aggregator | Monitor aggregator for Elasticache no connection [available values: min, max or avg] | string | - | yes | | no_connection_timeframe | Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | - | yes | -| resource | Type of Elasticache used | string | - | yes | +| swap_enabled | Flag to enable Elasticache swap monitor | string | `true` | no | +| swap_extra_tags | Extra tags for Elasticache swap monitor | list | `` | no | | swap_message | Custom message for Elasticache swap monitor | string | - | yes | | swap_silenced | Groups to mute for Elasticache swap monitor | map | - | yes | | swap_threshold_critical | Elasticache swap critical threshold in percentage | string | - | yes | diff --git a/cloud/aws/elasticache/common/inputs.tf b/cloud/aws/elasticache/common/inputs.tf index 12354d7..1a35318 100644 --- a/cloud/aws/elasticache/common/inputs.tf +++ b/cloud/aws/elasticache/common/inputs.tf @@ -40,6 +40,7 @@ variable "eviction_enabled" { type = "string" default = "true" } + variable "eviction_extra_tags" { description = "Extra tags for Elasticache eviction monitor" type = "list" @@ -70,6 +71,7 @@ variable "max_connection_enabled" { type = "string" default = "true" } + variable "max_connection_extra_tags" { description = "Extra tags for Elasticache max connection monitor" type = "list" @@ -100,6 +102,7 @@ variable "no_connection_enabled" { type = "string" default = "true" } + variable "no_connection_extra_tags" { description = "Extra tags for Elasticache no connection monitor" type = "list" @@ -130,6 +133,7 @@ variable "swap_enabled" { type = "string" default = "true" } + variable "swap_extra_tags" { description = "Extra tags for Elasticache swap monitor" type = "list" @@ -168,6 +172,7 @@ variable "free_memory_enabled" { type = "string" default = "true" } + variable "free_memory_extra_tags" { description = "Extra tags for Elasticache free memory monitor" type = "list" @@ -205,6 +210,7 @@ variable "eviction_growing_enabled" { type = "string" default = "true" } + variable "eviction_growing_extra_tags" { description = "Extra tags for Elasticache eviction growing monitor" type = "list" diff --git a/cloud/aws/elasticache/common/monitors-elasticache.tf b/cloud/aws/elasticache/common/monitors-elasticache.tf index f0191b8..d2b635d 100644 --- a/cloud/aws/elasticache/common/monitors-elasticache.tf +++ b/cloud/aws/elasticache/common/monitors-elasticache.tf @@ -1,5 +1,5 @@ resource "datadog_monitor" "elasticache_eviction" { -count = "${var.eviction_enabled ? 1 : 0}" + count = "${var.eviction_enabled ? 1 : 0}" name = "[${var.environment}] Elasticache eviction {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}" message = "${coalesce(var.eviction_message, var.message)}" @@ -27,7 +27,7 @@ count = "${var.eviction_enabled ? 1 : 0}" } resource "datadog_monitor" "elasticache_max_connection" { -count = "${var.max_connection_enabled ? 1 : 0}" + count = "${var.max_connection_enabled ? 1 : 0}" name = "[${var.environment}] Elasticache max connections reached {{#is_alert}}{{{comparator}}} {{threshold}} {{/is_alert}}" message = "${coalesce(var.max_connection_message, var.message)}" @@ -55,7 +55,7 @@ count = "${var.max_connection_enabled ? 1 : 0}" } resource "datadog_monitor" "elasticache_no_connection" { -count = "${var.no_connection_enabled ? 1 : 0}" + count = "${var.no_connection_enabled ? 1 : 0}" name = "[${var.environment}] Elasticache connections {{#is_alert}}{{{comparator}}} {{threshold}} {{/is_alert}}" message = "${coalesce(var.no_connection_message, var.message)}" @@ -83,7 +83,7 @@ count = "${var.no_connection_enabled ? 1 : 0}" } resource "datadog_monitor" "elasticache_swap" { -count = "${var.swap_enabled ? 1 : 0}" + count = "${var.swap_enabled ? 1 : 0}" name = "[${var.environment}] Elasticache swap {{#is_alert}}{{{comparator}}} {{threshold}}MB ({{value}}MB){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}MB ({{value}}MB){{/is_warning}}" message = "${coalesce(var.swap_message, var.message)}" @@ -117,7 +117,7 @@ count = "${var.swap_enabled ? 1 : 0}" # POC - A approfondir resource "datadog_monitor" "elasticache_free_memory" { -count = "${var.free_memory_enabled ? 1 : 0}" + count = "${var.free_memory_enabled ? 1 : 0}" name = "[${var.environment}] Elasticache free memory {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.free_memory_message, var.message)}" @@ -150,7 +150,7 @@ count = "${var.free_memory_enabled ? 1 : 0}" } resource "datadog_monitor" "elasticache_eviction_growing" { -count = "${var.eviction_growing_enabled ? 1 : 0}" + count = "${var.eviction_growing_enabled ? 1 : 0}" name = "[${var.environment}] Elasticache evictions is growing {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" message = "${coalesce(var.eviction_growing_message, var.message)}" diff --git a/cloud/aws/elasticache/common/outputs.tf b/cloud/aws/elasticache/common/outputs.tf index a1e295e..5268c4d 100644 --- a/cloud/aws/elasticache/common/outputs.tf +++ b/cloud/aws/elasticache/common/outputs.tf @@ -1,29 +1,29 @@ output "elasticache_eviction_id" { description = "id for monitor elasticache_eviction" - value = "${datadog_monitor.elasticache_eviction.id}" + value = "${datadog_monitor.elasticache_eviction.*.id}" } output "elasticache_max_connection_id" { description = "id for monitor elasticache_max_connection" - value = "${datadog_monitor.elasticache_max_connection.id}" + value = "${datadog_monitor.elasticache_max_connection.*.id}" } output "elasticache_no_connection_id" { description = "id for monitor elasticache_no_connection" - value = "${datadog_monitor.elasticache_no_connection.id}" + value = "${datadog_monitor.elasticache_no_connection.*.id}" } output "elasticache_swap_id" { description = "id for monitor elasticache_swap" - value = "${datadog_monitor.elasticache_swap.id}" + value = "${datadog_monitor.elasticache_swap.*.id}" } output "elasticache_free_memory_id" { description = "id for monitor elasticache_free_memory" - value = "${datadog_monitor.elasticache_free_memory.id}" + value = "${datadog_monitor.elasticache_free_memory.*.id}" } output "elasticache_eviction_growing_id" { description = "id for monitor elasticache_eviction_growing" - value = "${datadog_monitor.elasticache_eviction_growing.id}" + value = "${datadog_monitor.elasticache_eviction_growing.*.id}" } diff --git a/cloud/aws/elasticache/memcached/README.md b/cloud/aws/elasticache/memcached/README.md index 2ec298e..b697225 100644 --- a/cloud/aws/elasticache/memcached/README.md +++ b/cloud/aws/elasticache/memcached/README.md @@ -16,59 +16,34 @@ module "datadog-monitors-cloud-aws-elasticache-memcached" { Creates DataDog monitors with the following checks: -- Elasticache memcached get hits - Elasticache memcached CPU +- Elasticache memcached get hits ## Inputs | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| +| cpu_high_enabled | Flag to enable Elasticache memcached cpu high monitor | string | `true` | no | +| cpu_high_extra_tags | Extra tags for Elasticache memcached cpu high monitor | list | `` | no | | cpu_high_message | Custom message for Elasticache memcached cpu high monitor | string | `` | no | | cpu_high_silenced | Groups to mute for Elasticache memcached cpu high monitor | map | `` | no | | cpu_high_threshold_critical | Elasticache memcached cpu high critical threshold in percentage | string | `90` | no | | cpu_high_threshold_warning | Elasticache memcached cpu high warning threshold in percentage | string | `75` | no | | cpu_high_time_aggregator | Monitor aggregator for Elasticache memcached cpu high [available values: min, max or avg] | string | `min` | no | | cpu_high_timeframe | Monitor timeframe for Elasticache memcached cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | -| delay | Delay in seconds for the metric evaluation | string | `900` | no | | environment | Infrastructure Environment | string | - | yes | -| eviction_growing_condition_timeframe | Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| eviction_growing_message | Custom message for Elasticache eviction growing monitor | string | `` | no | -| eviction_growing_silenced | Groups to mute for Elasticache eviction growing monitor | map | `` | no | -| eviction_growing_threshold_critical | Elasticache eviction growing critical threshold in percentage | string | `30` | no | -| eviction_growing_threshold_warning | Elasticache eviction growing warning threshold in percentage | string | `10` | no | -| eviction_growing_timeframe | Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| eviction_message | Custom message for Elasticache eviction monitor | string | `` | no | -| eviction_silenced | Groups to mute for Elasticache eviction monitor | map | `` | no | -| eviction_time_aggregator | Monitor aggregator for Elasticache eviction [available values: min, max or avg] | string | `min` | no | -| eviction_timeframe | Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | -| free_memory_condition_timeframe | Monitor condition timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | -| free_memory_message | Custom message for Elasticache free memory monitor | string | `` | no | -| free_memory_silenced | Groups to mute for Elasticache free memory monitor | map | `` | no | -| free_memory_threshold_critical | Elasticache free memory critical threshold in percentage | string | `-70` | no | -| free_memory_threshold_warning | Elasticache free memory warning threshold in percentage | string | `-50` | no | -| free_memory_timeframe | Monitor timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| get_hits_enabled | Flag to enable Elasticache memcached get hits monitor | string | `true` | no | +| get_hits_extra_tags | Extra tags for Elasticache memcached get hits monitor | list | `` | no | | get_hits_message | Custom message for Elasticache memcached get hits monitor | string | `` | no | | get_hits_silenced | Groups to mute for Elasticache memcached get hits monitor | map | `` | no | | get_hits_threshold_critical | Elasticache memcached get hits critical threshold in percentage | string | `10` | no | | get_hits_threshold_warning | Elasticache memcached get hits warning threshold in percentage | string | `20` | no | | get_hits_timeframe | Monitor timeframe for Elasticache memcached get hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | -| max_connection_message | Custom message for Elasticache max connection monitor | string | `` | no | -| max_connection_silenced | Groups to mute for Elasticache max connection monitor | map | `` | no | -| max_connection_time_aggregator | Monitor aggregator for Elasticache max connection [available values: min, max or avg] | string | `max` | no | -| max_connection_timeframe | Monitor timeframe for Elasticache max connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when an alert is triggered | string | - | yes | -| no_connection_message | Custom message for Elasticache no connection monitor | string | `` | no | -| no_connection_silenced | Groups to mute for Elasticache no connection monitor | map | `` | no | -| no_connection_time_aggregator | Monitor aggregator for Elasticache no connection [available values: min, max or avg] | string | `min` | no | -| no_connection_timeframe | Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| swap_message | Custom message for Elasticache swap monitor | string | `` | no | -| swap_silenced | Groups to mute for Elasticache swap monitor | map | `` | no | -| swap_threshold_critical | Elasticache swap critical threshold in Bytes | string | `50000000` | no | -| swap_threshold_warning | Elasticache swap warning threshold in Bytes | string | `40000000` | no | -| swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | `min` | no | -| swap_timeframe | Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no | ## Outputs diff --git a/cloud/aws/elasticache/memcached/inputs.tf b/cloud/aws/elasticache/memcached/inputs.tf index 3c0868f..fd8330c 100644 --- a/cloud/aws/elasticache/memcached/inputs.tf +++ b/cloud/aws/elasticache/memcached/inputs.tf @@ -35,6 +35,7 @@ variable "get_hits_silenced" { type = "map" default = {} } + variable "get_hits_enabled" { description = "Flag to enable Elasticache memcached get hits monitor" type = "string" @@ -73,6 +74,7 @@ variable "cpu_high_silenced" { type = "map" default = {} } + variable "cpu_high_enabled" { description = "Flag to enable Elasticache memcached cpu high monitor" type = "string" diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf index 29382bb..b9cb556 100644 --- a/cloud/aws/elasticache/memcached/monitors-memcached.tf +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -1,5 +1,5 @@ resource "datadog_monitor" "memcached_get_hits" { -count = "${var.get_hits_enabled ? 1 : 0}" + count = "${var.get_hits_enabled ? 1 : 0}" name = "[${var.environment}] Elasticache memcached get hits {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.get_hits_message, var.message)}" @@ -34,7 +34,7 @@ count = "${var.get_hits_enabled ? 1 : 0}" } resource "datadog_monitor" "memcached_cpu_high" { -count = "${var.cpu_high_enabled ? 1 : 0}" + count = "${var.cpu_high_enabled ? 1 : 0}" name = "[${var.environment}] Elasticache memcached CPU {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.cpu_high_message, var.message)}" diff --git a/cloud/aws/elasticache/memcached/outputs.tf b/cloud/aws/elasticache/memcached/outputs.tf index d4ed36e..7beb0d5 100644 --- a/cloud/aws/elasticache/memcached/outputs.tf +++ b/cloud/aws/elasticache/memcached/outputs.tf @@ -1,9 +1,9 @@ output "memcached_get_hits_id" { description = "id for monitor memcached_get_hits" - value = "${datadog_monitor.memcached_get_hits.id}" + value = "${datadog_monitor.memcached_get_hits.*.id}" } output "memcached_cpu_high_id" { description = "id for monitor memcached_cpu_high" - value = "${datadog_monitor.memcached_cpu_high.id}" + value = "${datadog_monitor.memcached_cpu_high.*.id}" } diff --git a/cloud/aws/elasticache/redis/README.md b/cloud/aws/elasticache/redis/README.md index 0e3a5cc..77c13d1 100644 --- a/cloud/aws/elasticache/redis/README.md +++ b/cloud/aws/elasticache/redis/README.md @@ -18,68 +18,47 @@ Creates DataDog monitors with the following checks: - Elasticache redis cache hits - Elasticache redis CPU -- Elasticache redis replication lag - Elasticache redis is receiving no commands +- Elasticache redis replication lag ## Inputs | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| +| cache_hits_enabled | Flag to enable Elasticache redis cache hits monitor | string | `true` | no | +| cache_hits_extra_tags | Extra tags for Elasticache redis cache hits monitor | list | `` | no | | cache_hits_message | Custom message for Elasticache redis cache hits monitor | string | `` | no | | cache_hits_silenced | Groups to mute for Elasticache redis cache hits monitor | map | `` | no | | cache_hits_threshold_critical | Elasticache redis cache hits critical threshold in percentage | string | `10` | no | | cache_hits_threshold_warning | Elasticache redis cache hits warning threshold in percentage | string | `20` | no | | cache_hits_timeframe | Monitor timeframe for Elasticache redis cache hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| commands_enabled | Flag to enable Elasticache redis commands monitor | string | `true` | no | +| commands_extra_tags | Extra tags for Elasticache redis commands monitor | list | `` | no | | commands_message | Custom message for Elasticache redis commands monitor | string | `` | no | | commands_silenced | Groups to mute for Elasticache redis commands monitor | map | `` | no | | commands_timeframe | Monitor timeframe for Elasticache redis commands [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| cpu_high_enabled | Flag to enable Elasticache redis cpu high monitor | string | `true` | no | +| cpu_high_extra_tags | Extra tags for Elasticache redis cpu high monitor | list | `` | no | | cpu_high_message | Custom message for Elasticache redis cpu high monitor | string | `` | no | | cpu_high_silenced | Groups to mute for Elasticache redis cpu high monitor | map | `` | no | | cpu_high_threshold_critical | Elasticache redis cpu high critical threshold in percentage | string | `90` | no | | cpu_high_threshold_warning | Elasticache redis cpu high warning threshold in percentage | string | `75` | no | | cpu_high_time_aggregator | Monitor aggregator for Elasticache redis cpu high [available values: min, max or avg] | string | `avg` | no | | cpu_high_timeframe | Monitor timeframe for Elasticache redis cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | -| delay | Delay in seconds for the metric evaluation | string | `900` | no | | environment | Infrastructure Environment | string | - | yes | -| eviction_growing_condition_timeframe | Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| eviction_growing_message | Custom message for Elasticache eviction growing monitor | string | `` | no | -| eviction_growing_silenced | Groups to mute for Elasticache eviction growing monitor | map | `` | no | -| eviction_growing_threshold_critical | Elasticache eviction growing critical threshold in percentage | string | `30` | no | -| eviction_growing_threshold_warning | Elasticache eviction growing warning threshold in percentage | string | `10` | no | -| eviction_growing_timeframe | Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| eviction_message | Custom message for Elasticache eviction monitor | string | `` | no | -| eviction_silenced | Groups to mute for Elasticache eviction monitor | map | `` | no | -| eviction_time_aggregator | Monitor aggregator for Elasticache eviction [available values: min, max or avg] | string | `min` | no | -| eviction_timeframe | Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | -| free_memory_condition_timeframe | Monitor condition timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | -| free_memory_message | Custom message for Elasticache free memory monitor | string | `` | no | -| free_memory_silenced | Groups to mute for Elasticache free memory monitor | map | `` | no | -| free_memory_threshold_critical | Elasticache free memory critical threshold in percentage | string | `-70` | no | -| free_memory_threshold_warning | Elasticache free memory warning threshold in percentage | string | `-50` | no | -| free_memory_timeframe | Monitor timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | -| max_connection_message | Custom message for Elasticache max connection monitor | string | `` | no | -| max_connection_silenced | Groups to mute for Elasticache max connection monitor | map | `` | no | -| max_connection_time_aggregator | Monitor aggregator for Elasticache max connection [available values: min, max or avg] | string | `max` | no | -| max_connection_timeframe | Monitor timeframe for Elasticache max connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when an alert is triggered | string | - | yes | -| no_connection_message | Custom message for Elasticache no connection monitor | string | `` | no | -| no_connection_silenced | Groups to mute for Elasticache no connection monitor | map | `` | no | -| no_connection_time_aggregator | Monitor aggregator for Elasticache no connection [available values: min, max or avg] | string | `min` | no | -| no_connection_timeframe | Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no | +| replication_lag_enabled | Flag to enable Elasticache redis replication lag monitor | string | `true` | no | +| replication_lag_extra_tags | Extra tags for Elasticache redis replication lag monitor | list | `` | no | | replication_lag_message | Custom message for Elasticache redis replication lag monitor | string | `` | no | | replication_lag_silenced | Groups to mute for Elasticache redis replication lag monitor | map | `` | no | | replication_lag_threshold_critical | Elasticache redis replication lag critical threshold in seconds | string | `1` | no | | replication_lag_threshold_warning | Elasticache redis replication lag warning threshold in seconds | string | `0` | no | | replication_lag_time_aggregator | Monitor aggregator for Elasticache redis replication lag [available values: min, max or avg] | string | `min` | no | | replication_lag_timeframe | Monitor timeframe for Elasticache redis replication lag [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| swap_message | Custom message for Elasticache swap monitor | string | `` | no | -| swap_silenced | Groups to mute for Elasticache swap monitor | map | `` | no | -| swap_threshold_critical | Elasticache swap critical threshold in Bytes | string | `50000000` | no | -| swap_threshold_warning | Elasticache swap warning threshold in Bytes | string | `40000000` | no | -| swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | `min` | no | -| swap_timeframe | Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | ## Outputs diff --git a/cloud/aws/elasticache/redis/inputs.tf b/cloud/aws/elasticache/redis/inputs.tf index 185f960..4573abf 100644 --- a/cloud/aws/elasticache/redis/inputs.tf +++ b/cloud/aws/elasticache/redis/inputs.tf @@ -35,6 +35,7 @@ variable "cache_hits_silenced" { type = "map" default = {} } + variable "cache_hits_enabled" { description = "Flag to enable Elasticache redis cache hits monitor" type = "string" @@ -73,6 +74,7 @@ variable "cpu_high_silenced" { type = "map" default = {} } + variable "cpu_high_enabled" { description = "Flag to enable Elasticache redis cpu high monitor" type = "string" @@ -117,6 +119,7 @@ variable "replication_lag_silenced" { type = "map" default = {} } + variable "replication_lag_enabled" { description = "Flag to enable Elasticache redis replication lag monitor" type = "string" @@ -161,6 +164,7 @@ variable "commands_silenced" { type = "map" default = {} } + variable "commands_enabled" { description = "Flag to enable Elasticache redis commands monitor" type = "string" diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index 08d67d5..04e89fc 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -1,5 +1,5 @@ resource "datadog_monitor" "redis_cache_hits" { -count = "${var.cache_hits_enabled ? 1 : 0}" + count = "${var.cache_hits_enabled ? 1 : 0}" name = "[${var.environment}] Elasticache redis cache hits {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.cache_hits_message, var.message)}" @@ -34,7 +34,7 @@ count = "${var.cache_hits_enabled ? 1 : 0}" } resource "datadog_monitor" "redis_cpu_high" { -count = "${var.cpu_high_enabled ? 1 : 0}" + count = "${var.cpu_high_enabled ? 1 : 0}" name = "[${var.environment}] Elasticache redis CPU {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.cpu_high_message, var.message)}" @@ -62,7 +62,7 @@ count = "${var.cpu_high_enabled ? 1 : 0}" } resource "datadog_monitor" "redis_replication_lag" { -count = "${var.replication_lag_enabled ? 1 : 0}" + count = "${var.replication_lag_enabled ? 1 : 0}" name = "[${var.environment}] Elasticache redis replication lag {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}" message = "${coalesce(var.replication_lag_message, var.message)}" @@ -95,7 +95,7 @@ count = "${var.replication_lag_enabled ? 1 : 0}" } resource "datadog_monitor" "redis_commands" { -count = "${var.commands_enabled ? 1 : 0}" + count = "${var.commands_enabled ? 1 : 0}" name = "[${var.environment}] Elasticache redis is receiving no commands" message = "${coalesce(var.commands_message, var.message)}" diff --git a/cloud/aws/elasticache/redis/outputs.tf b/cloud/aws/elasticache/redis/outputs.tf index 23bbe75..77bbcfa 100644 --- a/cloud/aws/elasticache/redis/outputs.tf +++ b/cloud/aws/elasticache/redis/outputs.tf @@ -1,19 +1,19 @@ output "redis_cache_hits_id" { description = "id for monitor redis_cache_hits" - value = "${datadog_monitor.redis_cache_hits.id}" + value = "${datadog_monitor.redis_cache_hits.*.id}" } output "redis_cpu_high_id" { description = "id for monitor redis_cpu_high" - value = "${datadog_monitor.redis_cpu_high.id}" + value = "${datadog_monitor.redis_cpu_high.*.id}" } output "redis_replication_lag_id" { description = "id for monitor redis_replication_lag" - value = "${datadog_monitor.redis_replication_lag.id}" + value = "${datadog_monitor.redis_replication_lag.*.id}" } output "redis_commands_id" { description = "id for monitor redis_commands" - value = "${datadog_monitor.redis_commands.id}" + value = "${datadog_monitor.redis_commands.*.id}" } From 2ce0bbe8500addc07614c2e68b6111a1c438a545 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Wed, 19 Sep 2018 19:38:38 +0200 Subject: [PATCH 43/53] MON-32 only one notify no data to avoid noise --- cloud/aws/elasticache/common/monitors-elasticache.tf | 4 ++-- cloud/aws/elasticache/memcached/monitors-memcached.tf | 2 +- cloud/aws/elasticache/redis/monitors-redis.tf | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cloud/aws/elasticache/common/monitors-elasticache.tf b/cloud/aws/elasticache/common/monitors-elasticache.tf index d2b635d..2296b4c 100644 --- a/cloud/aws/elasticache/common/monitors-elasticache.tf +++ b/cloud/aws/elasticache/common/monitors-elasticache.tf @@ -67,7 +67,7 @@ resource "datadog_monitor" "elasticache_no_connection" { ) <= 0 EOF - notify_no_data = true + notify_no_data = false evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false @@ -134,7 +134,7 @@ resource "datadog_monitor" "elasticache_free_memory" { critical = "${var.free_memory_threshold_critical}" } - notify_no_data = true + notify_no_data = false evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf index b9cb556..6ba5153 100644 --- a/cloud/aws/elasticache/memcached/monitors-memcached.tf +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -18,7 +18,7 @@ resource "datadog_monitor" "memcached_get_hits" { critical = "${var.get_hits_threshold_critical}" } - notify_no_data = true + notify_no_data = false evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index 04e89fc..36734a5 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -108,7 +108,7 @@ resource "datadog_monitor" "redis_commands" { ) <= 0 EOF - notify_no_data = true + notify_no_data = false evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false From 61c7c7ee51cdfa950e5603a2897e616c2b33ad75 Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Thu, 20 Sep 2018 10:26:46 +0200 Subject: [PATCH 44/53] MON-32 - common swap_threshold updated --- cloud/aws/elasticache/common/README.md | 4 ++-- cloud/aws/elasticache/common/inputs.tf | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cloud/aws/elasticache/common/README.md b/cloud/aws/elasticache/common/README.md index 6d9263c..5ae024c 100644 --- a/cloud/aws/elasticache/common/README.md +++ b/cloud/aws/elasticache/common/README.md @@ -71,8 +71,8 @@ Creates DataDog monitors with the following checks: | swap_extra_tags | Extra tags for Elasticache swap monitor | list | `` | no | | swap_message | Custom message for Elasticache swap monitor | string | - | yes | | swap_silenced | Groups to mute for Elasticache swap monitor | map | - | yes | -| swap_threshold_critical | Elasticache swap critical threshold in percentage | string | - | yes | -| swap_threshold_warning | Elasticache swap warning threshold in percentage | string | - | yes | +| swap_threshold_critical | Elasticache swap critical threshold in bytes | string | - | yes | +| swap_threshold_warning | Elasticache swap warning threshold in bytes | string | - | yes | | swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | - | yes | | swap_timeframe | Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | - | yes | diff --git a/cloud/aws/elasticache/common/inputs.tf b/cloud/aws/elasticache/common/inputs.tf index 1a35318..1a48f4e 100644 --- a/cloud/aws/elasticache/common/inputs.tf +++ b/cloud/aws/elasticache/common/inputs.tf @@ -155,11 +155,11 @@ variable "swap_timeframe" { } variable "swap_threshold_warning" { - description = "Elasticache swap warning threshold in percentage" + description = "Elasticache swap warning threshold in bytes" } variable "swap_threshold_critical" { - description = "Elasticache swap critical threshold in percentage" + description = "Elasticache swap critical threshold in bytes" } variable "free_memory_silenced" { From 24f2a3d326039e04df519e6ac3b94abe4c46bde8 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 20 Sep 2018 13:43:05 +0200 Subject: [PATCH 45/53] MON-32 add engine to tags --- cloud/aws/elasticache/memcached/monitors-memcached.tf | 4 ++-- cloud/aws/elasticache/redis/monitors-redis.tf | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf index 6ba5153..6c86693 100644 --- a/cloud/aws/elasticache/memcached/monitors-memcached.tf +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -30,7 +30,7 @@ resource "datadog_monitor" "memcached_get_hits" { silenced = "${var.get_hits_silenced}" - tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-memcached", "team:claranet", "created-by:terraform", "${var.get_hits_extra_tags}"] + tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-memcached", "team:claranet", "created-by:terraform", "engine:memcached", "${var.get_hits_extra_tags}"] } resource "datadog_monitor" "memcached_cpu_high" { @@ -63,5 +63,5 @@ resource "datadog_monitor" "memcached_cpu_high" { silenced = "${var.cpu_high_silenced}" - tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-memcached", "team:claranet", "created-by:terraform", "${var.cpu_high_extra_tags}"] + tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-memcached", "team:claranet", "created-by:terraform", "engine:memcached", "${var.cpu_high_extra_tags}"] } diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index 36734a5..bcab961 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -30,7 +30,7 @@ resource "datadog_monitor" "redis_cache_hits" { silenced = "${var.cache_hits_silenced}" - tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-redis", "team:claranet", "created-by:terraform", "${var.cache_hits_extra_tags}"] + tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-redis", "team:claranet", "created-by:terraform", "engine:redis", "${var.cache_hits_extra_tags}"] } resource "datadog_monitor" "redis_cpu_high" { @@ -58,7 +58,7 @@ resource "datadog_monitor" "redis_cpu_high" { silenced = "${var.cpu_high_silenced}" - tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-redis", "team:claranet", "created-by:terraform", "${var.cpu_high_extra_tags}"] + tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-redis", "team:claranet", "created-by:terraform", "engine:redis", "${var.cpu_high_extra_tags}"] } resource "datadog_monitor" "redis_replication_lag" { @@ -91,7 +91,7 @@ resource "datadog_monitor" "redis_replication_lag" { silenced = "${var.replication_lag_silenced}" - tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-redis", "team:claranet", "created-by:terraform", "${var.replication_lag_extra_tags}"] + tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-redis", "team:claranet", "created-by:terraform", "engine:redis", "${var.replication_lag_extra_tags}"] } resource "datadog_monitor" "redis_commands" { @@ -120,5 +120,5 @@ resource "datadog_monitor" "redis_commands" { silenced = "${var.commands_silenced}" - tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-redis", "team:claranet", "created-by:terraform", "${var.commands_extra_tags}"] + tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-redis", "team:claranet", "created-by:terraform", "engine:redis", "${var.commands_extra_tags}"] } From d8bfbe086dfd6da0635983497f586c75995d9568 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 20 Sep 2018 14:30:57 +0200 Subject: [PATCH 46/53] MON-32 add missing default --- cloud/aws/elasticache/common/inputs.tf | 44 +++++++++++++++++++++++ cloud/aws/elasticache/memcached/inputs.tf | 6 ++++ cloud/aws/elasticache/redis/inputs.tf | 10 ++++++ 3 files changed, 60 insertions(+) diff --git a/cloud/aws/elasticache/common/inputs.tf b/cloud/aws/elasticache/common/inputs.tf index 1a48f4e..9c9600b 100644 --- a/cloud/aws/elasticache/common/inputs.tf +++ b/cloud/aws/elasticache/common/inputs.tf @@ -33,6 +33,7 @@ variable "filter_tags_custom" { variable "eviction_silenced" { description = "Groups to mute for Elasticache eviction monitor" type = "map" + default = {} } variable "eviction_enabled" { @@ -50,20 +51,25 @@ variable "eviction_extra_tags" { variable "eviction_message" { description = "Custom message for Elasticache eviction monitor" type = "string" + default = "" } variable "eviction_time_aggregator" { description = "Monitor aggregator for Elasticache eviction [available values: min, max or avg]" type = "string" + default = "min" } variable "eviction_timeframe" { description = "Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" } variable "max_connection_silenced" { description = "Groups to mute for Elasticache max connection monitor" type = "map" + default = {} } variable "max_connection_enabled" { @@ -81,20 +87,25 @@ variable "max_connection_extra_tags" { variable "max_connection_message" { description = "Custom message for Elasticache max connection monitor" type = "string" + default = "" } variable "max_connection_time_aggregator" { description = "Monitor aggregator for Elasticache max connection [available values: min, max or avg]" type = "string" + default = "max" } variable "max_connection_timeframe" { description = "Monitor timeframe for Elasticache max connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" } variable "no_connection_silenced" { description = "Groups to mute for Elasticache no connection monitor" type = "map" + default = {} } variable "no_connection_enabled" { @@ -112,20 +123,25 @@ variable "no_connection_extra_tags" { variable "no_connection_message" { description = "Custom message for Elasticache no connection monitor" type = "string" + default = "" } variable "no_connection_time_aggregator" { description = "Monitor aggregator for Elasticache no connection [available values: min, max or avg]" type = "string" + default = "min" } variable "no_connection_timeframe" { description = "Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" } variable "swap_silenced" { description = "Groups to mute for Elasticache swap monitor" type = "map" + default = {} } variable "swap_enabled" { @@ -143,28 +159,37 @@ variable "swap_extra_tags" { variable "swap_message" { description = "Custom message for Elasticache swap monitor" type = "string" + default = "" } variable "swap_time_aggregator" { description = "Monitor aggregator for Elasticache memcached swap [available values: min, max or avg]" type = "string" + default = "min" } variable "swap_timeframe" { description = "Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = = "string" + default = "last_5m" } variable "swap_threshold_warning" { description = "Elasticache swap warning threshold in bytes" + type = = "string" + default = 0 } variable "swap_threshold_critical" { description = "Elasticache swap critical threshold in bytes" + type = = "string" + default = 50000000 } variable "free_memory_silenced" { description = "Groups to mute for Elasticache free memory monitor" type = "map" + default = {} } variable "free_memory_enabled" { @@ -182,27 +207,37 @@ variable "free_memory_extra_tags" { variable "free_memory_message" { description = "Custom message for Elasticache free memory monitor" type = "string" + default = "" } variable "free_memory_condition_timeframe" { description = "Monitor condition timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_15m" } variable "free_memory_timeframe" { description = "Monitor timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_15m" } variable "free_memory_threshold_warning" { description = "Elasticache free memory warning threshold in percentage" + type = "string" + default = -50 } variable "free_memory_threshold_critical" { description = "Elasticache free memory critical threshold in percentage" + type = "string" + default = -70 } variable "eviction_growing_silenced" { description = "Groups to mute for Elasticache eviction growing monitor" type = "map" + default = {} } variable "eviction_growing_enabled" { @@ -220,20 +255,29 @@ variable "eviction_growing_extra_tags" { variable "eviction_growing_message" { description = "Custom message for Elasticache eviction growing monitor" type = "string" + default = "" } variable "eviction_growing_condition_timeframe" { description = "Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" } variable "eviction_growing_timeframe" { description = "Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" } variable "eviction_growing_threshold_warning" { description = "Elasticache eviction growing warning threshold in percentage" + type = "string" + default = 10 } variable "eviction_growing_threshold_critical" { description = "Elasticache eviction growing critical threshold in percentage" + type = "string" + default = 30 } diff --git a/cloud/aws/elasticache/memcached/inputs.tf b/cloud/aws/elasticache/memcached/inputs.tf index fd8330c..4f23bca 100644 --- a/cloud/aws/elasticache/memcached/inputs.tf +++ b/cloud/aws/elasticache/memcached/inputs.tf @@ -56,16 +56,19 @@ variable "get_hits_message" { variable "get_hits_timeframe" { description = "Monitor timeframe for Elasticache memcached get hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" default = "last_15m" } variable "get_hits_threshold_warning" { description = "Elasticache memcached get hits warning threshold in percentage" + type = "string" default = 20 } variable "get_hits_threshold_critical" { description = "Elasticache memcached get hits critical threshold in percentage" + type = "string" default = 10 } @@ -101,15 +104,18 @@ variable "cpu_high_time_aggregator" { variable "cpu_high_timeframe" { description = "Monitor timeframe for Elasticache memcached cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" default = "last_15m" } variable "cpu_high_threshold_warning" { description = "Elasticache memcached cpu high warning threshold in percentage" + type = "string" default = 75 } variable "cpu_high_threshold_critical" { description = "Elasticache memcached cpu high critical threshold in percentage" + type = "string" default = 90 } diff --git a/cloud/aws/elasticache/redis/inputs.tf b/cloud/aws/elasticache/redis/inputs.tf index 4573abf..edfc238 100644 --- a/cloud/aws/elasticache/redis/inputs.tf +++ b/cloud/aws/elasticache/redis/inputs.tf @@ -56,16 +56,19 @@ variable "cache_hits_message" { variable "cache_hits_timeframe" { description = "Monitor timeframe for Elasticache redis cache hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" default = "last_15m" } variable "cache_hits_threshold_warning" { description = "Elasticache redis cache hits warning threshold in percentage" + type = "string" default = 20 } variable "cache_hits_threshold_critical" { description = "Elasticache redis cache hits critical threshold in percentage" + type = "string" default = 10 } @@ -101,16 +104,19 @@ variable "cpu_high_time_aggregator" { variable "cpu_high_timeframe" { description = "Monitor timeframe for Elasticache redis cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" default = "last_15m" } variable "cpu_high_threshold_warning" { description = "Elasticache redis cpu high warning threshold in percentage" + type = "string" default = 75 } variable "cpu_high_threshold_critical" { description = "Elasticache redis cpu high critical threshold in percentage" + type = "string" default = 90 } @@ -146,16 +152,19 @@ variable "replication_lag_time_aggregator" { variable "replication_lag_timeframe" { description = "Monitor timeframe for Elasticache redis replication lag [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" default = "last_5m" } variable "replication_lag_threshold_warning" { description = "Elasticache redis replication lag warning threshold in seconds" + type = "string" default = 0 } variable "replication_lag_threshold_critical" { description = "Elasticache redis replication lag critical threshold in seconds" + type = "string" default = 1 } @@ -185,5 +194,6 @@ variable "commands_message" { variable "commands_timeframe" { description = "Monitor timeframe for Elasticache redis commands [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" default = "last_5m" } From 5357ef68e0f99b47849d833880d7cae0d1745126 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 20 Sep 2018 15:53:28 +0200 Subject: [PATCH 47/53] MON-32 add variables for eviction --- cloud/aws/elasticache/common/inputs.tf | 20 ++++++++++++------- .../common/monitors-elasticache.tf | 9 +++++++-- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/cloud/aws/elasticache/common/inputs.tf b/cloud/aws/elasticache/common/inputs.tf index 9c9600b..84aaac0 100644 --- a/cloud/aws/elasticache/common/inputs.tf +++ b/cloud/aws/elasticache/common/inputs.tf @@ -54,16 +54,22 @@ variable "eviction_message" { default = "" } -variable "eviction_time_aggregator" { - description = "Monitor aggregator for Elasticache eviction [available values: min, max or avg]" - type = "string" - default = "min" -} - variable "eviction_timeframe" { description = "Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" - default = "last_5m" + default = "last_15m" +} + +variable "eviction_threshold_warning" { + description = "Elasticache free memory warning threshold in percentage" + type = "string" + default = 0 +} + +variable "eviction_threshold_critical" { + description = "Elasticache free memory critical threshold in percentage" + type = "string" + default = 30 } variable "max_connection_silenced" { diff --git a/cloud/aws/elasticache/common/monitors-elasticache.tf b/cloud/aws/elasticache/common/monitors-elasticache.tf index 2296b4c..2a908e5 100644 --- a/cloud/aws/elasticache/common/monitors-elasticache.tf +++ b/cloud/aws/elasticache/common/monitors-elasticache.tf @@ -6,11 +6,16 @@ resource "datadog_monitor" "elasticache_eviction" { type = "metric alert" query = < 0 + ) > ${var.eviction_threshold_critical} EOF + thresholds { + warning = "${var.eviction_threshold_warning}" + critical = "${var.eviction_threshold_critical}" + } + notify_no_data = false evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 From 329097c626dc7d0b9cc11cba5db1f920e555b05d Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 20 Sep 2018 15:56:22 +0200 Subject: [PATCH 48/53] MON-32 add cachenodeid to group by --- cloud/aws/elasticache/common/monitors-elasticache.tf | 10 +++++----- cloud/aws/elasticache/memcached/monitors-memcached.tf | 6 +++--- cloud/aws/elasticache/redis/monitors-redis.tf | 10 +++++----- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/cloud/aws/elasticache/common/monitors-elasticache.tf b/cloud/aws/elasticache/common/monitors-elasticache.tf index 2a908e5..efe4dfd 100644 --- a/cloud/aws/elasticache/common/monitors-elasticache.tf +++ b/cloud/aws/elasticache/common/monitors-elasticache.tf @@ -7,7 +7,7 @@ resource "datadog_monitor" "elasticache_eviction" { query = < ${var.eviction_threshold_critical} EOF @@ -40,7 +40,7 @@ resource "datadog_monitor" "elasticache_max_connection" { query = <= 65000 EOF @@ -68,7 +68,7 @@ resource "datadog_monitor" "elasticache_no_connection" { query = < ${var.swap_threshold_critical} EOF @@ -163,7 +163,7 @@ resource "datadog_monitor" "elasticache_eviction_growing" { query = < ${var.eviction_growing_threshold_critical} EOF diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf index 6c86693..5a8369c 100644 --- a/cloud/aws/elasticache/memcached/monitors-memcached.tf +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -7,9 +7,9 @@ resource "datadog_monitor" "memcached_get_hits" { query = < Date: Thu, 20 Sep 2018 15:57:23 +0200 Subject: [PATCH 49/53] MON-32 remove comment --- cloud/aws/elasticache/common/monitors-elasticache.tf | 1 - 1 file changed, 1 deletion(-) diff --git a/cloud/aws/elasticache/common/monitors-elasticache.tf b/cloud/aws/elasticache/common/monitors-elasticache.tf index efe4dfd..e7c76c3 100644 --- a/cloud/aws/elasticache/common/monitors-elasticache.tf +++ b/cloud/aws/elasticache/common/monitors-elasticache.tf @@ -120,7 +120,6 @@ resource "datadog_monitor" "elasticache_swap" { tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache", "team:claranet", "created-by:terraform", "${var.swap_extra_tags}"] } -# POC - A approfondir resource "datadog_monitor" "elasticache_free_memory" { count = "${var.free_memory_enabled ? 1 : 0}" name = "[${var.environment}] Elasticache free memory {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" From 08b5cb16d13b77347bca4b71b00de52e966687d8 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 20 Sep 2018 16:00:47 +0200 Subject: [PATCH 50/53] MON-32 increase replication lag timeframe and thresholds --- cloud/aws/elasticache/redis/inputs.tf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cloud/aws/elasticache/redis/inputs.tf b/cloud/aws/elasticache/redis/inputs.tf index edfc238..4e7a5d0 100644 --- a/cloud/aws/elasticache/redis/inputs.tf +++ b/cloud/aws/elasticache/redis/inputs.tf @@ -153,19 +153,19 @@ variable "replication_lag_time_aggregator" { variable "replication_lag_timeframe" { description = "Monitor timeframe for Elasticache redis replication lag [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" - default = "last_5m" + default = "last_10m" } variable "replication_lag_threshold_warning" { description = "Elasticache redis replication lag warning threshold in seconds" type = "string" - default = 0 + default = 90 } variable "replication_lag_threshold_critical" { description = "Elasticache redis replication lag critical threshold in seconds" type = "string" - default = 1 + default = 180 } variable "commands_silenced" { From 0462972da6f80b0487968b86292d8ad3f84a11bf Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 20 Sep 2018 16:03:32 +0200 Subject: [PATCH 51/53] MON-32 decrease threasholds for cache hit ratio --- cloud/aws/elasticache/memcached/inputs.tf | 4 ++-- cloud/aws/elasticache/memcached/monitors-memcached.tf | 2 +- cloud/aws/elasticache/redis/inputs.tf | 4 ++-- cloud/aws/elasticache/redis/monitors-redis.tf | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cloud/aws/elasticache/memcached/inputs.tf b/cloud/aws/elasticache/memcached/inputs.tf index 4f23bca..a07c61a 100644 --- a/cloud/aws/elasticache/memcached/inputs.tf +++ b/cloud/aws/elasticache/memcached/inputs.tf @@ -63,13 +63,13 @@ variable "get_hits_timeframe" { variable "get_hits_threshold_warning" { description = "Elasticache memcached get hits warning threshold in percentage" type = "string" - default = 20 + default = 80 } variable "get_hits_threshold_critical" { description = "Elasticache memcached get hits critical threshold in percentage" type = "string" - default = 10 + default = 60 } variable "cpu_high_silenced" { diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf index 5a8369c..a79d226 100644 --- a/cloud/aws/elasticache/memcached/monitors-memcached.tf +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -1,6 +1,6 @@ resource "datadog_monitor" "memcached_get_hits" { count = "${var.get_hits_enabled ? 1 : 0}" - name = "[${var.environment}] Elasticache memcached get hits {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + name = "[${var.environment}] Elasticache memcached get hit ratio {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.get_hits_message, var.message)}" type = "metric alert" diff --git a/cloud/aws/elasticache/redis/inputs.tf b/cloud/aws/elasticache/redis/inputs.tf index 4e7a5d0..69e2024 100644 --- a/cloud/aws/elasticache/redis/inputs.tf +++ b/cloud/aws/elasticache/redis/inputs.tf @@ -63,13 +63,13 @@ variable "cache_hits_timeframe" { variable "cache_hits_threshold_warning" { description = "Elasticache redis cache hits warning threshold in percentage" type = "string" - default = 20 + default = 80 } variable "cache_hits_threshold_critical" { description = "Elasticache redis cache hits critical threshold in percentage" type = "string" - default = 10 + default = 60 } variable "cpu_high_silenced" { diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf index 0a52bf5..92a3dcf 100644 --- a/cloud/aws/elasticache/redis/monitors-redis.tf +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -1,6 +1,6 @@ resource "datadog_monitor" "redis_cache_hits" { count = "${var.cache_hits_enabled ? 1 : 0}" - name = "[${var.environment}] Elasticache redis cache hits {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + name = "[${var.environment}] Elasticache redis cache hit ratio {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.cache_hits_message, var.message)}" type = "metric alert" From d902149c8a7f5c382fd1fa1e6708db1d2da32f07 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 20 Sep 2018 16:16:08 +0200 Subject: [PATCH 52/53] MON-32 fix typo --- cloud/aws/elasticache/common/inputs.tf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cloud/aws/elasticache/common/inputs.tf b/cloud/aws/elasticache/common/inputs.tf index 84aaac0..626c639 100644 --- a/cloud/aws/elasticache/common/inputs.tf +++ b/cloud/aws/elasticache/common/inputs.tf @@ -176,19 +176,19 @@ variable "swap_time_aggregator" { variable "swap_timeframe" { description = "Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - type = = "string" + type = "string" default = "last_5m" } variable "swap_threshold_warning" { description = "Elasticache swap warning threshold in bytes" - type = = "string" + type = "string" default = 0 } variable "swap_threshold_critical" { description = "Elasticache swap critical threshold in bytes" - type = = "string" + type = "string" default = 50000000 } From b5a63fcb2adbd600b40882babf1d212eee622132 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 20 Sep 2018 16:16:56 +0200 Subject: [PATCH 53/53] MON-32 auto update --- cloud/aws/elasticache/common/README.md | 61 ++++++++++++----------- cloud/aws/elasticache/common/inputs.tf | 2 +- cloud/aws/elasticache/memcached/README.md | 6 +-- cloud/aws/elasticache/redis/README.md | 12 ++--- 4 files changed, 41 insertions(+), 40 deletions(-) diff --git a/cloud/aws/elasticache/common/README.md b/cloud/aws/elasticache/common/README.md index 5ae024c..a899a64 100644 --- a/cloud/aws/elasticache/common/README.md +++ b/cloud/aws/elasticache/common/README.md @@ -31,50 +31,51 @@ Creates DataDog monitors with the following checks: | evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no | | eviction_enabled | Flag to enable Elasticache eviction monitor | string | `true` | no | | eviction_extra_tags | Extra tags for Elasticache eviction monitor | list | `` | no | -| eviction_growing_condition_timeframe | Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | - | yes | +| eviction_growing_condition_timeframe | Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | eviction_growing_enabled | Flag to enable Elasticache eviction growing monitor | string | `true` | no | | eviction_growing_extra_tags | Extra tags for Elasticache eviction growing monitor | list | `` | no | -| eviction_growing_message | Custom message for Elasticache eviction growing monitor | string | - | yes | -| eviction_growing_silenced | Groups to mute for Elasticache eviction growing monitor | map | - | yes | -| eviction_growing_threshold_critical | Elasticache eviction growing critical threshold in percentage | string | - | yes | -| eviction_growing_threshold_warning | Elasticache eviction growing warning threshold in percentage | string | - | yes | -| eviction_growing_timeframe | Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | - | yes | -| eviction_message | Custom message for Elasticache eviction monitor | string | - | yes | -| eviction_silenced | Groups to mute for Elasticache eviction monitor | map | - | yes | -| eviction_time_aggregator | Monitor aggregator for Elasticache eviction [available values: min, max or avg] | string | - | yes | -| eviction_timeframe | Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | - | yes | +| eviction_growing_message | Custom message for Elasticache eviction growing monitor | string | `` | no | +| eviction_growing_silenced | Groups to mute for Elasticache eviction growing monitor | map | `` | no | +| eviction_growing_threshold_critical | Elasticache eviction growing critical threshold in percentage | string | `30` | no | +| eviction_growing_threshold_warning | Elasticache eviction growing warning threshold in percentage | string | `10` | no | +| eviction_growing_timeframe | Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| eviction_message | Custom message for Elasticache eviction monitor | string | `` | no | +| eviction_silenced | Groups to mute for Elasticache eviction monitor | map | `` | no | +| eviction_threshold_critical | Elasticache free memory critical threshold in percentage | string | `30` | no | +| eviction_threshold_warning | Elasticache free memory warning threshold in percentage | string | `0` | no | +| eviction_timeframe | Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | -| free_memory_condition_timeframe | Monitor condition timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | - | yes | +| free_memory_condition_timeframe | Monitor condition timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | free_memory_enabled | Flag to enable Elasticache free memory monitor | string | `true` | no | | free_memory_extra_tags | Extra tags for Elasticache free memory monitor | list | `` | no | -| free_memory_message | Custom message for Elasticache free memory monitor | string | - | yes | -| free_memory_silenced | Groups to mute for Elasticache free memory monitor | map | - | yes | -| free_memory_threshold_critical | Elasticache free memory critical threshold in percentage | string | - | yes | -| free_memory_threshold_warning | Elasticache free memory warning threshold in percentage | string | - | yes | -| free_memory_timeframe | Monitor timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | - | yes | +| free_memory_message | Custom message for Elasticache free memory monitor | string | `` | no | +| free_memory_silenced | Groups to mute for Elasticache free memory monitor | map | `` | no | +| free_memory_threshold_critical | Elasticache free memory critical threshold in percentage | string | `-70` | no | +| free_memory_threshold_warning | Elasticache free memory warning threshold in percentage | string | `-50` | no | +| free_memory_timeframe | Monitor timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | max_connection_enabled | Flag to enable Elasticache max connection monitor | string | `true` | no | | max_connection_extra_tags | Extra tags for Elasticache max connection monitor | list | `` | no | -| max_connection_message | Custom message for Elasticache max connection monitor | string | - | yes | -| max_connection_silenced | Groups to mute for Elasticache max connection monitor | map | - | yes | -| max_connection_time_aggregator | Monitor aggregator for Elasticache max connection [available values: min, max or avg] | string | - | yes | -| max_connection_timeframe | Monitor timeframe for Elasticache max connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | - | yes | +| max_connection_message | Custom message for Elasticache max connection monitor | string | `` | no | +| max_connection_silenced | Groups to mute for Elasticache max connection monitor | map | `` | no | +| max_connection_time_aggregator | Monitor aggregator for Elasticache max connection [available values: min, max or avg] | string | `max` | no | +| max_connection_timeframe | Monitor timeframe for Elasticache max connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when an alert is triggered | string | - | yes | | new_host_delay | Delay in seconds before monitor new resource | string | `300` | no | | no_connection_enabled | Flag to enable Elasticache no connection monitor | string | `true` | no | | no_connection_extra_tags | Extra tags for Elasticache no connection monitor | list | `` | no | -| no_connection_message | Custom message for Elasticache no connection monitor | string | - | yes | -| no_connection_silenced | Groups to mute for Elasticache no connection monitor | map | - | yes | -| no_connection_time_aggregator | Monitor aggregator for Elasticache no connection [available values: min, max or avg] | string | - | yes | -| no_connection_timeframe | Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | - | yes | +| no_connection_message | Custom message for Elasticache no connection monitor | string | `` | no | +| no_connection_silenced | Groups to mute for Elasticache no connection monitor | map | `` | no | +| no_connection_time_aggregator | Monitor aggregator for Elasticache no connection [available values: min, max or avg] | string | `min` | no | +| no_connection_timeframe | Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | swap_enabled | Flag to enable Elasticache swap monitor | string | `true` | no | | swap_extra_tags | Extra tags for Elasticache swap monitor | list | `` | no | -| swap_message | Custom message for Elasticache swap monitor | string | - | yes | -| swap_silenced | Groups to mute for Elasticache swap monitor | map | - | yes | -| swap_threshold_critical | Elasticache swap critical threshold in bytes | string | - | yes | -| swap_threshold_warning | Elasticache swap warning threshold in bytes | string | - | yes | -| swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | - | yes | -| swap_timeframe | Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | - | yes | +| swap_message | Custom message for Elasticache swap monitor | string | `` | no | +| swap_silenced | Groups to mute for Elasticache swap monitor | map | `` | no | +| swap_threshold_critical | Elasticache swap critical threshold in bytes | string | `50000000` | no | +| swap_threshold_warning | Elasticache swap warning threshold in bytes | string | `0` | no | +| swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | `min` | no | +| swap_timeframe | Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | ## Outputs diff --git a/cloud/aws/elasticache/common/inputs.tf b/cloud/aws/elasticache/common/inputs.tf index 626c639..9575148 100644 --- a/cloud/aws/elasticache/common/inputs.tf +++ b/cloud/aws/elasticache/common/inputs.tf @@ -177,7 +177,7 @@ variable "swap_time_aggregator" { variable "swap_timeframe" { description = "Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" - default = "last_5m" + default = "last_5m" } variable "swap_threshold_warning" { diff --git a/cloud/aws/elasticache/memcached/README.md b/cloud/aws/elasticache/memcached/README.md index b697225..6b5d05a 100644 --- a/cloud/aws/elasticache/memcached/README.md +++ b/cloud/aws/elasticache/memcached/README.md @@ -17,7 +17,7 @@ module "datadog-monitors-cloud-aws-elasticache-memcached" { Creates DataDog monitors with the following checks: - Elasticache memcached CPU -- Elasticache memcached get hits +- Elasticache memcached get hit ratio ## Inputs @@ -39,8 +39,8 @@ Creates DataDog monitors with the following checks: | get_hits_extra_tags | Extra tags for Elasticache memcached get hits monitor | list | `` | no | | get_hits_message | Custom message for Elasticache memcached get hits monitor | string | `` | no | | get_hits_silenced | Groups to mute for Elasticache memcached get hits monitor | map | `` | no | -| get_hits_threshold_critical | Elasticache memcached get hits critical threshold in percentage | string | `10` | no | -| get_hits_threshold_warning | Elasticache memcached get hits warning threshold in percentage | string | `20` | no | +| get_hits_threshold_critical | Elasticache memcached get hits critical threshold in percentage | string | `60` | no | +| get_hits_threshold_warning | Elasticache memcached get hits warning threshold in percentage | string | `80` | no | | get_hits_timeframe | Monitor timeframe for Elasticache memcached get hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | message | Message sent when an alert is triggered | string | - | yes | | new_host_delay | Delay in seconds before monitor new resource | string | `300` | no | diff --git a/cloud/aws/elasticache/redis/README.md b/cloud/aws/elasticache/redis/README.md index 77c13d1..f1bfe94 100644 --- a/cloud/aws/elasticache/redis/README.md +++ b/cloud/aws/elasticache/redis/README.md @@ -16,7 +16,7 @@ module "datadog-monitors-cloud-aws-elasticache-redis" { Creates DataDog monitors with the following checks: -- Elasticache redis cache hits +- Elasticache redis cache hit ratio - Elasticache redis CPU - Elasticache redis is receiving no commands - Elasticache redis replication lag @@ -29,8 +29,8 @@ Creates DataDog monitors with the following checks: | cache_hits_extra_tags | Extra tags for Elasticache redis cache hits monitor | list | `` | no | | cache_hits_message | Custom message for Elasticache redis cache hits monitor | string | `` | no | | cache_hits_silenced | Groups to mute for Elasticache redis cache hits monitor | map | `` | no | -| cache_hits_threshold_critical | Elasticache redis cache hits critical threshold in percentage | string | `10` | no | -| cache_hits_threshold_warning | Elasticache redis cache hits warning threshold in percentage | string | `20` | no | +| cache_hits_threshold_critical | Elasticache redis cache hits critical threshold in percentage | string | `60` | no | +| cache_hits_threshold_warning | Elasticache redis cache hits warning threshold in percentage | string | `80` | no | | cache_hits_timeframe | Monitor timeframe for Elasticache redis cache hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | commands_enabled | Flag to enable Elasticache redis commands monitor | string | `true` | no | | commands_extra_tags | Extra tags for Elasticache redis commands monitor | list | `` | no | @@ -55,10 +55,10 @@ Creates DataDog monitors with the following checks: | replication_lag_extra_tags | Extra tags for Elasticache redis replication lag monitor | list | `` | no | | replication_lag_message | Custom message for Elasticache redis replication lag monitor | string | `` | no | | replication_lag_silenced | Groups to mute for Elasticache redis replication lag monitor | map | `` | no | -| replication_lag_threshold_critical | Elasticache redis replication lag critical threshold in seconds | string | `1` | no | -| replication_lag_threshold_warning | Elasticache redis replication lag warning threshold in seconds | string | `0` | no | +| replication_lag_threshold_critical | Elasticache redis replication lag critical threshold in seconds | string | `180` | no | +| replication_lag_threshold_warning | Elasticache redis replication lag warning threshold in seconds | string | `90` | no | | replication_lag_time_aggregator | Monitor aggregator for Elasticache redis replication lag [available values: min, max or avg] | string | `min` | no | -| replication_lag_timeframe | Monitor timeframe for Elasticache redis replication lag [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| replication_lag_timeframe | Monitor timeframe for Elasticache redis replication lag [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_10m` | no | ## Outputs