diff --git a/README.md b/README.md index ed6b246..5cadf39 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,10 @@ The `//` is very important, it's a terraform specific syntax used to separate gi - [aws](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/) - [alb](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/alb/) - [apigateway](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/apigateway/) + - [elasticache](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/elasticache/) + - [common](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/elasticache/common/) + - [memcached](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/elasticache/memcached/) + - [redis](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/elasticache/redis/) - [elasticsearch](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/elasticsearch/) - [elb](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/elb/) - [kinesis-firehose](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/kinesis-firehose/) diff --git a/cloud/aws/elasticache/common/README.md b/cloud/aws/elasticache/common/README.md new file mode 100644 index 0000000..a899a64 --- /dev/null +++ b/cloud/aws/elasticache/common/README.md @@ -0,0 +1,95 @@ +# CLOUD AWS ELASTICACHE COMMON DataDog monitors + +## How to use this module + +``` +module "datadog-monitors-cloud-aws-elasticache-common" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/aws/elasticache/common?ref={revision}" + + environment = "${var.environment}" + message = "${module.datadog-message-alerting.alerting-message}" +} + +``` + +## Purpose + +Creates DataDog monitors with the following checks: + +- Elasticache connections +- Elasticache eviction +- Elasticache evictions is growing +- Elasticache free memory +- Elasticache max connections reached +- Elasticache swap + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| environment | Infrastructure Environment | string | - | yes | +| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no | +| eviction_enabled | Flag to enable Elasticache eviction monitor | string | `true` | no | +| eviction_extra_tags | Extra tags for Elasticache eviction monitor | list | `` | no | +| eviction_growing_condition_timeframe | Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| eviction_growing_enabled | Flag to enable Elasticache eviction growing monitor | string | `true` | no | +| eviction_growing_extra_tags | Extra tags for Elasticache eviction growing monitor | list | `` | no | +| eviction_growing_message | Custom message for Elasticache eviction growing monitor | string | `` | no | +| eviction_growing_silenced | Groups to mute for Elasticache eviction growing monitor | map | `` | no | +| eviction_growing_threshold_critical | Elasticache eviction growing critical threshold in percentage | string | `30` | no | +| eviction_growing_threshold_warning | Elasticache eviction growing warning threshold in percentage | string | `10` | no | +| eviction_growing_timeframe | Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| eviction_message | Custom message for Elasticache eviction monitor | string | `` | no | +| eviction_silenced | Groups to mute for Elasticache eviction monitor | map | `` | no | +| eviction_threshold_critical | Elasticache free memory critical threshold in percentage | string | `30` | no | +| eviction_threshold_warning | Elasticache free memory warning threshold in percentage | string | `0` | no | +| eviction_timeframe | Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| free_memory_condition_timeframe | Monitor condition timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| free_memory_enabled | Flag to enable Elasticache free memory monitor | string | `true` | no | +| free_memory_extra_tags | Extra tags for Elasticache free memory monitor | list | `` | no | +| free_memory_message | Custom message for Elasticache free memory monitor | string | `` | no | +| free_memory_silenced | Groups to mute for Elasticache free memory monitor | map | `` | no | +| free_memory_threshold_critical | Elasticache free memory critical threshold in percentage | string | `-70` | no | +| free_memory_threshold_warning | Elasticache free memory warning threshold in percentage | string | `-50` | no | +| free_memory_timeframe | Monitor timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| max_connection_enabled | Flag to enable Elasticache max connection monitor | string | `true` | no | +| max_connection_extra_tags | Extra tags for Elasticache max connection monitor | list | `` | no | +| max_connection_message | Custom message for Elasticache max connection monitor | string | `` | no | +| max_connection_silenced | Groups to mute for Elasticache max connection monitor | map | `` | no | +| max_connection_time_aggregator | Monitor aggregator for Elasticache max connection [available values: min, max or avg] | string | `max` | no | +| max_connection_timeframe | Monitor timeframe for Elasticache max connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| message | Message sent when an alert is triggered | string | - | yes | +| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no | +| no_connection_enabled | Flag to enable Elasticache no connection monitor | string | `true` | no | +| no_connection_extra_tags | Extra tags for Elasticache no connection monitor | list | `` | no | +| no_connection_message | Custom message for Elasticache no connection monitor | string | `` | no | +| no_connection_silenced | Groups to mute for Elasticache no connection monitor | map | `` | no | +| no_connection_time_aggregator | Monitor aggregator for Elasticache no connection [available values: min, max or avg] | string | `min` | no | +| no_connection_timeframe | Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| swap_enabled | Flag to enable Elasticache swap monitor | string | `true` | no | +| swap_extra_tags | Extra tags for Elasticache swap monitor | list | `` | no | +| swap_message | Custom message for Elasticache swap monitor | string | `` | no | +| swap_silenced | Groups to mute for Elasticache swap monitor | map | `` | no | +| swap_threshold_critical | Elasticache swap critical threshold in bytes | string | `50000000` | no | +| swap_threshold_warning | Elasticache swap warning threshold in bytes | string | `0` | no | +| swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | `min` | no | +| swap_timeframe | Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| elasticache_eviction_growing_id | id for monitor elasticache_eviction_growing | +| elasticache_eviction_id | id for monitor elasticache_eviction | +| elasticache_free_memory_id | id for monitor elasticache_free_memory | +| elasticache_max_connection_id | id for monitor elasticache_max_connection | +| elasticache_no_connection_id | id for monitor elasticache_no_connection | +| elasticache_swap_id | id for monitor elasticache_swap | + +Related documentation +--------------------- + +DataDog documentation: [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/) + diff --git a/cloud/aws/elasticache/common/inputs.tf b/cloud/aws/elasticache/common/inputs.tf new file mode 100644 index 0000000..9575148 --- /dev/null +++ b/cloud/aws/elasticache/common/inputs.tf @@ -0,0 +1,289 @@ +# Global Terraform +variable "environment" { + description = "Infrastructure Environment" + type = "string" +} + +# Global DataDog +variable "evaluation_delay" { + description = "Delay in seconds for the metric evaluation" + default = 900 +} + +variable "new_host_delay" { + description = "Delay in seconds before monitor new resource" + default = 300 +} + +variable "message" { + description = "Message sent when an alert is triggered" +} + +variable "filter_tags_use_defaults" { + description = "Use default filter tags convention" + default = "true" +} + +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_use_defaults is false" + default = "*" +} + +# Elasticache specific +variable "eviction_silenced" { + description = "Groups to mute for Elasticache eviction monitor" + type = "map" + default = {} +} + +variable "eviction_enabled" { + description = "Flag to enable Elasticache eviction monitor" + type = "string" + default = "true" +} + +variable "eviction_extra_tags" { + description = "Extra tags for Elasticache eviction monitor" + type = "list" + default = [] +} + +variable "eviction_message" { + description = "Custom message for Elasticache eviction monitor" + type = "string" + default = "" +} + +variable "eviction_timeframe" { + description = "Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_15m" +} + +variable "eviction_threshold_warning" { + description = "Elasticache free memory warning threshold in percentage" + type = "string" + default = 0 +} + +variable "eviction_threshold_critical" { + description = "Elasticache free memory critical threshold in percentage" + type = "string" + default = 30 +} + +variable "max_connection_silenced" { + description = "Groups to mute for Elasticache max connection monitor" + type = "map" + default = {} +} + +variable "max_connection_enabled" { + description = "Flag to enable Elasticache max connection monitor" + type = "string" + default = "true" +} + +variable "max_connection_extra_tags" { + description = "Extra tags for Elasticache max connection monitor" + type = "list" + default = [] +} + +variable "max_connection_message" { + description = "Custom message for Elasticache max connection monitor" + type = "string" + default = "" +} + +variable "max_connection_time_aggregator" { + description = "Monitor aggregator for Elasticache max connection [available values: min, max or avg]" + type = "string" + default = "max" +} + +variable "max_connection_timeframe" { + description = "Monitor timeframe for Elasticache max connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +variable "no_connection_silenced" { + description = "Groups to mute for Elasticache no connection monitor" + type = "map" + default = {} +} + +variable "no_connection_enabled" { + description = "Flag to enable Elasticache no connection monitor" + type = "string" + default = "true" +} + +variable "no_connection_extra_tags" { + description = "Extra tags for Elasticache no connection monitor" + type = "list" + default = [] +} + +variable "no_connection_message" { + description = "Custom message for Elasticache no connection monitor" + type = "string" + default = "" +} + +variable "no_connection_time_aggregator" { + description = "Monitor aggregator for Elasticache no connection [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "no_connection_timeframe" { + description = "Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +variable "swap_silenced" { + description = "Groups to mute for Elasticache swap monitor" + type = "map" + default = {} +} + +variable "swap_enabled" { + description = "Flag to enable Elasticache swap monitor" + type = "string" + default = "true" +} + +variable "swap_extra_tags" { + description = "Extra tags for Elasticache swap monitor" + type = "list" + default = [] +} + +variable "swap_message" { + description = "Custom message for Elasticache swap monitor" + type = "string" + default = "" +} + +variable "swap_time_aggregator" { + description = "Monitor aggregator for Elasticache memcached swap [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "swap_timeframe" { + description = "Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +variable "swap_threshold_warning" { + description = "Elasticache swap warning threshold in bytes" + type = "string" + default = 0 +} + +variable "swap_threshold_critical" { + description = "Elasticache swap critical threshold in bytes" + type = "string" + default = 50000000 +} + +variable "free_memory_silenced" { + description = "Groups to mute for Elasticache free memory monitor" + type = "map" + default = {} +} + +variable "free_memory_enabled" { + description = "Flag to enable Elasticache free memory monitor" + type = "string" + default = "true" +} + +variable "free_memory_extra_tags" { + description = "Extra tags for Elasticache free memory monitor" + type = "list" + default = [] +} + +variable "free_memory_message" { + description = "Custom message for Elasticache free memory monitor" + type = "string" + default = "" +} + +variable "free_memory_condition_timeframe" { + description = "Monitor condition timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_15m" +} + +variable "free_memory_timeframe" { + description = "Monitor timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_15m" +} + +variable "free_memory_threshold_warning" { + description = "Elasticache free memory warning threshold in percentage" + type = "string" + default = -50 +} + +variable "free_memory_threshold_critical" { + description = "Elasticache free memory critical threshold in percentage" + type = "string" + default = -70 +} + +variable "eviction_growing_silenced" { + description = "Groups to mute for Elasticache eviction growing monitor" + type = "map" + default = {} +} + +variable "eviction_growing_enabled" { + description = "Flag to enable Elasticache eviction growing monitor" + type = "string" + default = "true" +} + +variable "eviction_growing_extra_tags" { + description = "Extra tags for Elasticache eviction growing monitor" + type = "list" + default = [] +} + +variable "eviction_growing_message" { + description = "Custom message for Elasticache eviction growing monitor" + type = "string" + default = "" +} + +variable "eviction_growing_condition_timeframe" { + description = "Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +variable "eviction_growing_timeframe" { + description = "Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +variable "eviction_growing_threshold_warning" { + description = "Elasticache eviction growing warning threshold in percentage" + type = "string" + default = 10 +} + +variable "eviction_growing_threshold_critical" { + description = "Elasticache eviction growing critical threshold in percentage" + type = "string" + default = 30 +} diff --git a/cloud/aws/elasticache/common/modules.tf b/cloud/aws/elasticache/common/modules.tf new file mode 100644 index 0000000..987c046 --- /dev/null +++ b/cloud/aws/elasticache/common/modules.tf @@ -0,0 +1,8 @@ +module "filter-tags" { + source = "../../../../common/filter-tags" + + environment = "${var.environment}" + resource = "aws_elasticache" + filter_tags_use_defaults = "${var.filter_tags_use_defaults}" + filter_tags_custom = "${var.filter_tags_custom}" +} diff --git a/cloud/aws/elasticache/common/monitors-elasticache.tf b/cloud/aws/elasticache/common/monitors-elasticache.tf new file mode 100644 index 0000000..e7c76c3 --- /dev/null +++ b/cloud/aws/elasticache/common/monitors-elasticache.tf @@ -0,0 +1,187 @@ +resource "datadog_monitor" "elasticache_eviction" { + count = "${var.eviction_enabled ? 1 : 0}" + name = "[${var.environment}] Elasticache eviction {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}" + message = "${coalesce(var.eviction_message, var.message)}" + + type = "metric alert" + + query = < ${var.eviction_threshold_critical} + EOF + + thresholds { + warning = "${var.eviction_threshold_warning}" + critical = "${var.eviction_threshold_critical}" + } + + notify_no_data = false + evaluation_delay = "${var.evaluation_delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.new_host_delay}" + + silenced = "${var.eviction_silenced}" + + tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache", "team:claranet", "created-by:terraform", "${var.eviction_extra_tags}"] +} + +resource "datadog_monitor" "elasticache_max_connection" { + count = "${var.max_connection_enabled ? 1 : 0}" + name = "[${var.environment}] Elasticache max connections reached {{#is_alert}}{{{comparator}}} {{threshold}} {{/is_alert}}" + message = "${coalesce(var.max_connection_message, var.message)}" + + type = "metric alert" + + query = <= 65000 + EOF + + notify_no_data = true + evaluation_delay = "${var.evaluation_delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.new_host_delay}" + + silenced = "${var.max_connection_silenced}" + + tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache", "team:claranet", "created-by:terraform", "${var.max_connection_extra_tags}"] +} + +resource "datadog_monitor" "elasticache_no_connection" { + count = "${var.no_connection_enabled ? 1 : 0}" + name = "[${var.environment}] Elasticache connections {{#is_alert}}{{{comparator}}} {{threshold}} {{/is_alert}}" + message = "${coalesce(var.no_connection_message, var.message)}" + + type = "metric alert" + + query = < ${var.swap_threshold_critical} + EOF + + thresholds { + warning = "${var.swap_threshold_warning}" + critical = "${var.swap_threshold_critical}" + } + + notify_no_data = false + evaluation_delay = "${var.evaluation_delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.new_host_delay}" + + silenced = "${var.swap_silenced}" + + tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache", "team:claranet", "created-by:terraform", "${var.swap_extra_tags}"] +} + +resource "datadog_monitor" "elasticache_free_memory" { + count = "${var.free_memory_enabled ? 1 : 0}" + name = "[${var.environment}] Elasticache free memory {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.free_memory_message, var.message)}" + + type = "metric alert" + + query = < ${var.eviction_growing_threshold_critical} + EOF + + thresholds { + warning = "${var.eviction_growing_threshold_warning}" + critical = "${var.eviction_growing_threshold_critical}" + } + + notify_no_data = false + evaluation_delay = "${var.evaluation_delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.new_host_delay}" + + silenced = "${var.eviction_growing_silenced}" + + tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache", "team:claranet", "created-by:terraform", "${var.eviction_growing_extra_tags}"] +} diff --git a/cloud/aws/elasticache/common/outputs.tf b/cloud/aws/elasticache/common/outputs.tf new file mode 100644 index 0000000..5268c4d --- /dev/null +++ b/cloud/aws/elasticache/common/outputs.tf @@ -0,0 +1,29 @@ +output "elasticache_eviction_id" { + description = "id for monitor elasticache_eviction" + value = "${datadog_monitor.elasticache_eviction.*.id}" +} + +output "elasticache_max_connection_id" { + description = "id for monitor elasticache_max_connection" + value = "${datadog_monitor.elasticache_max_connection.*.id}" +} + +output "elasticache_no_connection_id" { + description = "id for monitor elasticache_no_connection" + value = "${datadog_monitor.elasticache_no_connection.*.id}" +} + +output "elasticache_swap_id" { + description = "id for monitor elasticache_swap" + value = "${datadog_monitor.elasticache_swap.*.id}" +} + +output "elasticache_free_memory_id" { + description = "id for monitor elasticache_free_memory" + value = "${datadog_monitor.elasticache_free_memory.*.id}" +} + +output "elasticache_eviction_growing_id" { + description = "id for monitor elasticache_eviction_growing" + value = "${datadog_monitor.elasticache_eviction_growing.*.id}" +} diff --git a/cloud/aws/elasticache/memcached/README.md b/cloud/aws/elasticache/memcached/README.md new file mode 100644 index 0000000..6b5d05a --- /dev/null +++ b/cloud/aws/elasticache/memcached/README.md @@ -0,0 +1,63 @@ +# CLOUD AWS ELASTICACHE MEMCACHED DataDog monitors + +## How to use this module + +``` +module "datadog-monitors-cloud-aws-elasticache-memcached" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/aws/elasticache/memcached?ref={revision}" + + environment = "${var.environment}" + message = "${module.datadog-message-alerting.alerting-message}" +} + +``` + +## Purpose + +Creates DataDog monitors with the following checks: + +- Elasticache memcached CPU +- Elasticache memcached get hit ratio + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| cpu_high_enabled | Flag to enable Elasticache memcached cpu high monitor | string | `true` | no | +| cpu_high_extra_tags | Extra tags for Elasticache memcached cpu high monitor | list | `` | no | +| cpu_high_message | Custom message for Elasticache memcached cpu high monitor | string | `` | no | +| cpu_high_silenced | Groups to mute for Elasticache memcached cpu high monitor | map | `` | no | +| cpu_high_threshold_critical | Elasticache memcached cpu high critical threshold in percentage | string | `90` | no | +| cpu_high_threshold_warning | Elasticache memcached cpu high warning threshold in percentage | string | `75` | no | +| cpu_high_time_aggregator | Monitor aggregator for Elasticache memcached cpu high [available values: min, max or avg] | string | `min` | no | +| cpu_high_timeframe | Monitor timeframe for Elasticache memcached cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| environment | Infrastructure Environment | string | - | yes | +| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| get_hits_enabled | Flag to enable Elasticache memcached get hits monitor | string | `true` | no | +| get_hits_extra_tags | Extra tags for Elasticache memcached get hits monitor | list | `` | no | +| get_hits_message | Custom message for Elasticache memcached get hits monitor | string | `` | no | +| get_hits_silenced | Groups to mute for Elasticache memcached get hits monitor | map | `` | no | +| get_hits_threshold_critical | Elasticache memcached get hits critical threshold in percentage | string | `60` | no | +| get_hits_threshold_warning | Elasticache memcached get hits warning threshold in percentage | string | `80` | no | +| get_hits_timeframe | Monitor timeframe for Elasticache memcached get hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| message | Message sent when an alert is triggered | string | - | yes | +| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| memcached_cpu_high_id | id for monitor memcached_cpu_high | +| memcached_get_hits_id | id for monitor memcached_get_hits | + +Related documentation +--------------------- + +DataDog documentation: + +* [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/) +* [https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/](https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/) + + diff --git a/cloud/aws/elasticache/memcached/inputs.tf b/cloud/aws/elasticache/memcached/inputs.tf new file mode 100644 index 0000000..a07c61a --- /dev/null +++ b/cloud/aws/elasticache/memcached/inputs.tf @@ -0,0 +1,121 @@ +# Global Terraform +variable "environment" { + description = "Infrastructure Environment" + type = "string" +} + +# Global DataDog +variable "evaluation_delay" { + description = "Delay in seconds for the metric evaluation" + default = 900 +} + +variable "new_host_delay" { + description = "Delay in seconds before monitor new resource" + default = 300 +} + +variable "message" { + description = "Message sent when an alert is triggered" +} + +variable "filter_tags_use_defaults" { + description = "Use default filter tags convention" + default = "true" +} + +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_use_defaults is false" + default = "*" +} + +# Memcached specific +variable "get_hits_silenced" { + description = "Groups to mute for Elasticache memcached get hits monitor" + type = "map" + default = {} +} + +variable "get_hits_enabled" { + description = "Flag to enable Elasticache memcached get hits monitor" + type = "string" + default = "true" +} + +variable "get_hits_extra_tags" { + description = "Extra tags for Elasticache memcached get hits monitor" + type = "list" + default = [] +} + +variable "get_hits_message" { + description = "Custom message for Elasticache memcached get hits monitor" + type = "string" + default = "" +} + +variable "get_hits_timeframe" { + description = "Monitor timeframe for Elasticache memcached get hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_15m" +} + +variable "get_hits_threshold_warning" { + description = "Elasticache memcached get hits warning threshold in percentage" + type = "string" + default = 80 +} + +variable "get_hits_threshold_critical" { + description = "Elasticache memcached get hits critical threshold in percentage" + type = "string" + default = 60 +} + +variable "cpu_high_silenced" { + description = "Groups to mute for Elasticache memcached cpu high monitor" + type = "map" + default = {} +} + +variable "cpu_high_enabled" { + description = "Flag to enable Elasticache memcached cpu high monitor" + type = "string" + default = "true" +} + +variable "cpu_high_extra_tags" { + description = "Extra tags for Elasticache memcached cpu high monitor" + type = "list" + default = [] +} + +variable "cpu_high_message" { + description = "Custom message for Elasticache memcached cpu high monitor" + type = "string" + default = "" +} + +variable "cpu_high_time_aggregator" { + description = "Monitor aggregator for Elasticache memcached cpu high [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "cpu_high_timeframe" { + description = "Monitor timeframe for Elasticache memcached cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_15m" +} + +variable "cpu_high_threshold_warning" { + description = "Elasticache memcached cpu high warning threshold in percentage" + type = "string" + default = 75 +} + +variable "cpu_high_threshold_critical" { + description = "Elasticache memcached cpu high critical threshold in percentage" + type = "string" + default = 90 +} diff --git a/cloud/aws/elasticache/memcached/modules.tf b/cloud/aws/elasticache/memcached/modules.tf new file mode 100644 index 0000000..987c046 --- /dev/null +++ b/cloud/aws/elasticache/memcached/modules.tf @@ -0,0 +1,8 @@ +module "filter-tags" { + source = "../../../../common/filter-tags" + + environment = "${var.environment}" + resource = "aws_elasticache" + filter_tags_use_defaults = "${var.filter_tags_use_defaults}" + filter_tags_custom = "${var.filter_tags_custom}" +} diff --git a/cloud/aws/elasticache/memcached/monitors-memcached.tf b/cloud/aws/elasticache/memcached/monitors-memcached.tf new file mode 100644 index 0000000..a79d226 --- /dev/null +++ b/cloud/aws/elasticache/memcached/monitors-memcached.tf @@ -0,0 +1,67 @@ +resource "datadog_monitor" "memcached_get_hits" { + count = "${var.get_hits_enabled ? 1 : 0}" + name = "[${var.environment}] Elasticache memcached get hit ratio {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.get_hits_message, var.message)}" + + type = "metric alert" + + query = < ${var.cpu_high_threshold_critical} + EOF + + thresholds { + warning = "${var.cpu_high_threshold_warning}" + critical = "${var.cpu_high_threshold_critical}" + } + + notify_no_data = true + evaluation_delay = "${var.evaluation_delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.new_host_delay}" + + silenced = "${var.cpu_high_silenced}" + + tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-memcached", "team:claranet", "created-by:terraform", "engine:memcached", "${var.cpu_high_extra_tags}"] +} diff --git a/cloud/aws/elasticache/memcached/outputs.tf b/cloud/aws/elasticache/memcached/outputs.tf new file mode 100644 index 0000000..7beb0d5 --- /dev/null +++ b/cloud/aws/elasticache/memcached/outputs.tf @@ -0,0 +1,9 @@ +output "memcached_get_hits_id" { + description = "id for monitor memcached_get_hits" + value = "${datadog_monitor.memcached_get_hits.*.id}" +} + +output "memcached_cpu_high_id" { + description = "id for monitor memcached_cpu_high" + value = "${datadog_monitor.memcached_cpu_high.*.id}" +} diff --git a/cloud/aws/elasticache/redis/README.md b/cloud/aws/elasticache/redis/README.md new file mode 100644 index 0000000..f1bfe94 --- /dev/null +++ b/cloud/aws/elasticache/redis/README.md @@ -0,0 +1,77 @@ +# CLOUD AWS ELASTICACHE REDIS DataDog monitors + +## How to use this module + +``` +module "datadog-monitors-cloud-aws-elasticache-redis" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/aws/elasticache/redis?ref={revision}" + + environment = "${var.environment}" + message = "${module.datadog-message-alerting.alerting-message}" +} + +``` + +## Purpose + +Creates DataDog monitors with the following checks: + +- Elasticache redis cache hit ratio +- Elasticache redis CPU +- Elasticache redis is receiving no commands +- Elasticache redis replication lag + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| cache_hits_enabled | Flag to enable Elasticache redis cache hits monitor | string | `true` | no | +| cache_hits_extra_tags | Extra tags for Elasticache redis cache hits monitor | list | `` | no | +| cache_hits_message | Custom message for Elasticache redis cache hits monitor | string | `` | no | +| cache_hits_silenced | Groups to mute for Elasticache redis cache hits monitor | map | `` | no | +| cache_hits_threshold_critical | Elasticache redis cache hits critical threshold in percentage | string | `60` | no | +| cache_hits_threshold_warning | Elasticache redis cache hits warning threshold in percentage | string | `80` | no | +| cache_hits_timeframe | Monitor timeframe for Elasticache redis cache hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| commands_enabled | Flag to enable Elasticache redis commands monitor | string | `true` | no | +| commands_extra_tags | Extra tags for Elasticache redis commands monitor | list | `` | no | +| commands_message | Custom message for Elasticache redis commands monitor | string | `` | no | +| commands_silenced | Groups to mute for Elasticache redis commands monitor | map | `` | no | +| commands_timeframe | Monitor timeframe for Elasticache redis commands [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| cpu_high_enabled | Flag to enable Elasticache redis cpu high monitor | string | `true` | no | +| cpu_high_extra_tags | Extra tags for Elasticache redis cpu high monitor | list | `` | no | +| cpu_high_message | Custom message for Elasticache redis cpu high monitor | string | `` | no | +| cpu_high_silenced | Groups to mute for Elasticache redis cpu high monitor | map | `` | no | +| cpu_high_threshold_critical | Elasticache redis cpu high critical threshold in percentage | string | `90` | no | +| cpu_high_threshold_warning | Elasticache redis cpu high warning threshold in percentage | string | `75` | no | +| cpu_high_time_aggregator | Monitor aggregator for Elasticache redis cpu high [available values: min, max or avg] | string | `avg` | no | +| cpu_high_timeframe | Monitor timeframe for Elasticache redis cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| environment | Infrastructure Environment | string | - | yes | +| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| message | Message sent when an alert is triggered | string | - | yes | +| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no | +| replication_lag_enabled | Flag to enable Elasticache redis replication lag monitor | string | `true` | no | +| replication_lag_extra_tags | Extra tags for Elasticache redis replication lag monitor | list | `` | no | +| replication_lag_message | Custom message for Elasticache redis replication lag monitor | string | `` | no | +| replication_lag_silenced | Groups to mute for Elasticache redis replication lag monitor | map | `` | no | +| replication_lag_threshold_critical | Elasticache redis replication lag critical threshold in seconds | string | `180` | no | +| replication_lag_threshold_warning | Elasticache redis replication lag warning threshold in seconds | string | `90` | no | +| replication_lag_time_aggregator | Monitor aggregator for Elasticache redis replication lag [available values: min, max or avg] | string | `min` | no | +| replication_lag_timeframe | Monitor timeframe for Elasticache redis replication lag [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_10m` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| redis_cache_hits_id | id for monitor redis_cache_hits | +| redis_commands_id | id for monitor redis_commands | +| redis_cpu_high_id | id for monitor redis_cpu_high | +| redis_replication_lag_id | id for monitor redis_replication_lag | + +## Related documentation + +* [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/) +* [https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/](https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/) + + diff --git a/cloud/aws/elasticache/redis/inputs.tf b/cloud/aws/elasticache/redis/inputs.tf new file mode 100644 index 0000000..69e2024 --- /dev/null +++ b/cloud/aws/elasticache/redis/inputs.tf @@ -0,0 +1,199 @@ +# Global Terraform +variable "environment" { + description = "Infrastructure Environment" + type = "string" +} + +# Global DataDog +variable "evaluation_delay" { + description = "Delay in seconds for the metric evaluation" + default = 900 +} + +variable "new_host_delay" { + description = "Delay in seconds before monitor new resource" + default = 300 +} + +variable "message" { + description = "Message sent when an alert is triggered" +} + +variable "filter_tags_use_defaults" { + description = "Use default filter tags convention" + default = "true" +} + +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_use_defaults is false" + default = "*" +} + +# redis specific +variable "cache_hits_silenced" { + description = "Groups to mute for Elasticache redis cache hits monitor" + type = "map" + default = {} +} + +variable "cache_hits_enabled" { + description = "Flag to enable Elasticache redis cache hits monitor" + type = "string" + default = "true" +} + +variable "cache_hits_extra_tags" { + description = "Extra tags for Elasticache redis cache hits monitor" + type = "list" + default = [] +} + +variable "cache_hits_message" { + description = "Custom message for Elasticache redis cache hits monitor" + type = "string" + default = "" +} + +variable "cache_hits_timeframe" { + description = "Monitor timeframe for Elasticache redis cache hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_15m" +} + +variable "cache_hits_threshold_warning" { + description = "Elasticache redis cache hits warning threshold in percentage" + type = "string" + default = 80 +} + +variable "cache_hits_threshold_critical" { + description = "Elasticache redis cache hits critical threshold in percentage" + type = "string" + default = 60 +} + +variable "cpu_high_silenced" { + description = "Groups to mute for Elasticache redis cpu high monitor" + type = "map" + default = {} +} + +variable "cpu_high_enabled" { + description = "Flag to enable Elasticache redis cpu high monitor" + type = "string" + default = "true" +} + +variable "cpu_high_extra_tags" { + description = "Extra tags for Elasticache redis cpu high monitor" + type = "list" + default = [] +} + +variable "cpu_high_message" { + description = "Custom message for Elasticache redis cpu high monitor" + type = "string" + default = "" +} + +variable "cpu_high_time_aggregator" { + description = "Monitor aggregator for Elasticache redis cpu high [available values: min, max or avg]" + type = "string" + default = "avg" +} + +variable "cpu_high_timeframe" { + description = "Monitor timeframe for Elasticache redis cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_15m" +} + +variable "cpu_high_threshold_warning" { + description = "Elasticache redis cpu high warning threshold in percentage" + type = "string" + default = 75 +} + +variable "cpu_high_threshold_critical" { + description = "Elasticache redis cpu high critical threshold in percentage" + type = "string" + default = 90 +} + +variable "replication_lag_silenced" { + description = "Groups to mute for Elasticache redis replication lag monitor" + type = "map" + default = {} +} + +variable "replication_lag_enabled" { + description = "Flag to enable Elasticache redis replication lag monitor" + type = "string" + default = "true" +} + +variable "replication_lag_extra_tags" { + description = "Extra tags for Elasticache redis replication lag monitor" + type = "list" + default = [] +} + +variable "replication_lag_message" { + description = "Custom message for Elasticache redis replication lag monitor" + type = "string" + default = "" +} + +variable "replication_lag_time_aggregator" { + description = "Monitor aggregator for Elasticache redis replication lag [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "replication_lag_timeframe" { + description = "Monitor timeframe for Elasticache redis replication lag [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_10m" +} + +variable "replication_lag_threshold_warning" { + description = "Elasticache redis replication lag warning threshold in seconds" + type = "string" + default = 90 +} + +variable "replication_lag_threshold_critical" { + description = "Elasticache redis replication lag critical threshold in seconds" + type = "string" + default = 180 +} + +variable "commands_silenced" { + description = "Groups to mute for Elasticache redis commands monitor" + type = "map" + default = {} +} + +variable "commands_enabled" { + description = "Flag to enable Elasticache redis commands monitor" + type = "string" + default = "true" +} + +variable "commands_extra_tags" { + description = "Extra tags for Elasticache redis commands monitor" + type = "list" + default = [] +} + +variable "commands_message" { + description = "Custom message for Elasticache redis commands monitor" + type = "string" + default = "" +} + +variable "commands_timeframe" { + description = "Monitor timeframe for Elasticache redis commands [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} diff --git a/cloud/aws/elasticache/redis/modules.tf b/cloud/aws/elasticache/redis/modules.tf new file mode 100644 index 0000000..987c046 --- /dev/null +++ b/cloud/aws/elasticache/redis/modules.tf @@ -0,0 +1,8 @@ +module "filter-tags" { + source = "../../../../common/filter-tags" + + environment = "${var.environment}" + resource = "aws_elasticache" + filter_tags_use_defaults = "${var.filter_tags_use_defaults}" + filter_tags_custom = "${var.filter_tags_custom}" +} diff --git a/cloud/aws/elasticache/redis/monitors-redis.tf b/cloud/aws/elasticache/redis/monitors-redis.tf new file mode 100644 index 0000000..92a3dcf --- /dev/null +++ b/cloud/aws/elasticache/redis/monitors-redis.tf @@ -0,0 +1,124 @@ +resource "datadog_monitor" "redis_cache_hits" { + count = "${var.cache_hits_enabled ? 1 : 0}" + name = "[${var.environment}] Elasticache redis cache hit ratio {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.cache_hits_message, var.message)}" + + type = "metric alert" + + query = < ${var.cpu_high_threshold_critical} + EOF + + notify_no_data = true + evaluation_delay = "${var.evaluation_delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.new_host_delay}" + + silenced = "${var.cpu_high_silenced}" + + tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-redis", "team:claranet", "created-by:terraform", "engine:redis", "${var.cpu_high_extra_tags}"] +} + +resource "datadog_monitor" "redis_replication_lag" { + count = "${var.replication_lag_enabled ? 1 : 0}" + name = "[${var.environment}] Elasticache redis replication lag {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}" + message = "${coalesce(var.replication_lag_message, var.message)}" + + type = "metric alert" + + query = < ${var.replication_lag_threshold_critical} + EOF + + thresholds { + warning = "${var.replication_lag_threshold_warning}" + critical = "${var.replication_lag_threshold_critical}" + } + + notify_no_data = false + evaluation_delay = "${var.evaluation_delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.new_host_delay}" + + silenced = "${var.replication_lag_silenced}" + + tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-redis", "team:claranet", "created-by:terraform", "engine:redis", "${var.replication_lag_extra_tags}"] +} + +resource "datadog_monitor" "redis_commands" { + count = "${var.commands_enabled ? 1 : 0}" + name = "[${var.environment}] Elasticache redis is receiving no commands" + message = "${coalesce(var.commands_message, var.message)}" + + type = "metric alert" + + query = <