Merged in MON-32-specific-cache-in-general-directory (pull request #94)
MON-32 Add Elasticache Monitors Approved-by: Alexandre Gaillet <alexandre.gaillet@fr.clara.net> Approved-by: Quentin Manfroi <quentin.manfroi@yahoo.fr>
This commit is contained in:
commit
fc5e42f0e3
@ -80,6 +80,10 @@ The `//` is very important, it's a terraform specific syntax used to separate gi
|
||||
- [aws](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/)
|
||||
- [alb](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/alb/)
|
||||
- [apigateway](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/apigateway/)
|
||||
- [elasticache](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/elasticache/)
|
||||
- [common](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/elasticache/common/)
|
||||
- [memcached](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/elasticache/memcached/)
|
||||
- [redis](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/elasticache/redis/)
|
||||
- [elasticsearch](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/elasticsearch/)
|
||||
- [elb](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/elb/)
|
||||
- [kinesis-firehose](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/kinesis-firehose/)
|
||||
|
||||
95
cloud/aws/elasticache/common/README.md
Normal file
95
cloud/aws/elasticache/common/README.md
Normal file
@ -0,0 +1,95 @@
|
||||
# CLOUD AWS ELASTICACHE COMMON DataDog monitors
|
||||
|
||||
## How to use this module
|
||||
|
||||
```
|
||||
module "datadog-monitors-cloud-aws-elasticache-common" {
|
||||
source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/aws/elasticache/common?ref={revision}"
|
||||
|
||||
environment = "${var.environment}"
|
||||
message = "${module.datadog-message-alerting.alerting-message}"
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
## Purpose
|
||||
|
||||
Creates DataDog monitors with the following checks:
|
||||
|
||||
- Elasticache connections
|
||||
- Elasticache eviction
|
||||
- Elasticache evictions is growing
|
||||
- Elasticache free memory
|
||||
- Elasticache max connections reached
|
||||
- Elasticache swap
|
||||
|
||||
## Inputs
|
||||
|
||||
| Name | Description | Type | Default | Required |
|
||||
|------|-------------|:----:|:-----:|:-----:|
|
||||
| environment | Infrastructure Environment | string | - | yes |
|
||||
| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no |
|
||||
| eviction_enabled | Flag to enable Elasticache eviction monitor | string | `true` | no |
|
||||
| eviction_extra_tags | Extra tags for Elasticache eviction monitor | list | `<list>` | no |
|
||||
| eviction_growing_condition_timeframe | Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| eviction_growing_enabled | Flag to enable Elasticache eviction growing monitor | string | `true` | no |
|
||||
| eviction_growing_extra_tags | Extra tags for Elasticache eviction growing monitor | list | `<list>` | no |
|
||||
| eviction_growing_message | Custom message for Elasticache eviction growing monitor | string | `` | no |
|
||||
| eviction_growing_silenced | Groups to mute for Elasticache eviction growing monitor | map | `<map>` | no |
|
||||
| eviction_growing_threshold_critical | Elasticache eviction growing critical threshold in percentage | string | `30` | no |
|
||||
| eviction_growing_threshold_warning | Elasticache eviction growing warning threshold in percentage | string | `10` | no |
|
||||
| eviction_growing_timeframe | Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| eviction_message | Custom message for Elasticache eviction monitor | string | `` | no |
|
||||
| eviction_silenced | Groups to mute for Elasticache eviction monitor | map | `<map>` | no |
|
||||
| eviction_threshold_critical | Elasticache free memory critical threshold in percentage | string | `30` | no |
|
||||
| eviction_threshold_warning | Elasticache free memory warning threshold in percentage | string | `0` | no |
|
||||
| eviction_timeframe | Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no |
|
||||
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
|
||||
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
|
||||
| free_memory_condition_timeframe | Monitor condition timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no |
|
||||
| free_memory_enabled | Flag to enable Elasticache free memory monitor | string | `true` | no |
|
||||
| free_memory_extra_tags | Extra tags for Elasticache free memory monitor | list | `<list>` | no |
|
||||
| free_memory_message | Custom message for Elasticache free memory monitor | string | `` | no |
|
||||
| free_memory_silenced | Groups to mute for Elasticache free memory monitor | map | `<map>` | no |
|
||||
| free_memory_threshold_critical | Elasticache free memory critical threshold in percentage | string | `-70` | no |
|
||||
| free_memory_threshold_warning | Elasticache free memory warning threshold in percentage | string | `-50` | no |
|
||||
| free_memory_timeframe | Monitor timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no |
|
||||
| max_connection_enabled | Flag to enable Elasticache max connection monitor | string | `true` | no |
|
||||
| max_connection_extra_tags | Extra tags for Elasticache max connection monitor | list | `<list>` | no |
|
||||
| max_connection_message | Custom message for Elasticache max connection monitor | string | `` | no |
|
||||
| max_connection_silenced | Groups to mute for Elasticache max connection monitor | map | `<map>` | no |
|
||||
| max_connection_time_aggregator | Monitor aggregator for Elasticache max connection [available values: min, max or avg] | string | `max` | no |
|
||||
| max_connection_timeframe | Monitor timeframe for Elasticache max connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| message | Message sent when an alert is triggered | string | - | yes |
|
||||
| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no |
|
||||
| no_connection_enabled | Flag to enable Elasticache no connection monitor | string | `true` | no |
|
||||
| no_connection_extra_tags | Extra tags for Elasticache no connection monitor | list | `<list>` | no |
|
||||
| no_connection_message | Custom message for Elasticache no connection monitor | string | `` | no |
|
||||
| no_connection_silenced | Groups to mute for Elasticache no connection monitor | map | `<map>` | no |
|
||||
| no_connection_time_aggregator | Monitor aggregator for Elasticache no connection [available values: min, max or avg] | string | `min` | no |
|
||||
| no_connection_timeframe | Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| swap_enabled | Flag to enable Elasticache swap monitor | string | `true` | no |
|
||||
| swap_extra_tags | Extra tags for Elasticache swap monitor | list | `<list>` | no |
|
||||
| swap_message | Custom message for Elasticache swap monitor | string | `` | no |
|
||||
| swap_silenced | Groups to mute for Elasticache swap monitor | map | `<map>` | no |
|
||||
| swap_threshold_critical | Elasticache swap critical threshold in bytes | string | `50000000` | no |
|
||||
| swap_threshold_warning | Elasticache swap warning threshold in bytes | string | `0` | no |
|
||||
| swap_time_aggregator | Monitor aggregator for Elasticache memcached swap [available values: min, max or avg] | string | `min` | no |
|
||||
| swap_timeframe | Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
|
||||
## Outputs
|
||||
|
||||
| Name | Description |
|
||||
|------|-------------|
|
||||
| elasticache_eviction_growing_id | id for monitor elasticache_eviction_growing |
|
||||
| elasticache_eviction_id | id for monitor elasticache_eviction |
|
||||
| elasticache_free_memory_id | id for monitor elasticache_free_memory |
|
||||
| elasticache_max_connection_id | id for monitor elasticache_max_connection |
|
||||
| elasticache_no_connection_id | id for monitor elasticache_no_connection |
|
||||
| elasticache_swap_id | id for monitor elasticache_swap |
|
||||
|
||||
Related documentation
|
||||
---------------------
|
||||
|
||||
DataDog documentation: [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/)
|
||||
|
||||
289
cloud/aws/elasticache/common/inputs.tf
Normal file
289
cloud/aws/elasticache/common/inputs.tf
Normal file
@ -0,0 +1,289 @@
|
||||
# Global Terraform
|
||||
variable "environment" {
|
||||
description = "Infrastructure Environment"
|
||||
type = "string"
|
||||
}
|
||||
|
||||
# Global DataDog
|
||||
variable "evaluation_delay" {
|
||||
description = "Delay in seconds for the metric evaluation"
|
||||
default = 900
|
||||
}
|
||||
|
||||
variable "new_host_delay" {
|
||||
description = "Delay in seconds before monitor new resource"
|
||||
default = 300
|
||||
}
|
||||
|
||||
variable "message" {
|
||||
description = "Message sent when an alert is triggered"
|
||||
}
|
||||
|
||||
variable "filter_tags_use_defaults" {
|
||||
description = "Use default filter tags convention"
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "filter_tags_custom" {
|
||||
description = "Tags used for custom filtering when filter_tags_use_defaults is false"
|
||||
default = "*"
|
||||
}
|
||||
|
||||
# Elasticache specific
|
||||
variable "eviction_silenced" {
|
||||
description = "Groups to mute for Elasticache eviction monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "eviction_enabled" {
|
||||
description = "Flag to enable Elasticache eviction monitor"
|
||||
type = "string"
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "eviction_extra_tags" {
|
||||
description = "Extra tags for Elasticache eviction monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "eviction_message" {
|
||||
description = "Custom message for Elasticache eviction monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "eviction_timeframe" {
|
||||
description = "Monitor timeframe for Elasticache eviction [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
default = "last_15m"
|
||||
}
|
||||
|
||||
variable "eviction_threshold_warning" {
|
||||
description = "Elasticache free memory warning threshold in percentage"
|
||||
type = "string"
|
||||
default = 0
|
||||
}
|
||||
|
||||
variable "eviction_threshold_critical" {
|
||||
description = "Elasticache free memory critical threshold in percentage"
|
||||
type = "string"
|
||||
default = 30
|
||||
}
|
||||
|
||||
variable "max_connection_silenced" {
|
||||
description = "Groups to mute for Elasticache max connection monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "max_connection_enabled" {
|
||||
description = "Flag to enable Elasticache max connection monitor"
|
||||
type = "string"
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "max_connection_extra_tags" {
|
||||
description = "Extra tags for Elasticache max connection monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "max_connection_message" {
|
||||
description = "Custom message for Elasticache max connection monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "max_connection_time_aggregator" {
|
||||
description = "Monitor aggregator for Elasticache max connection [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "max"
|
||||
}
|
||||
|
||||
variable "max_connection_timeframe" {
|
||||
description = "Monitor timeframe for Elasticache max connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "no_connection_silenced" {
|
||||
description = "Groups to mute for Elasticache no connection monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "no_connection_enabled" {
|
||||
description = "Flag to enable Elasticache no connection monitor"
|
||||
type = "string"
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "no_connection_extra_tags" {
|
||||
description = "Extra tags for Elasticache no connection monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "no_connection_message" {
|
||||
description = "Custom message for Elasticache no connection monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "no_connection_time_aggregator" {
|
||||
description = "Monitor aggregator for Elasticache no connection [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "no_connection_timeframe" {
|
||||
description = "Monitor timeframe for Elasticache no connection [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "swap_silenced" {
|
||||
description = "Groups to mute for Elasticache swap monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "swap_enabled" {
|
||||
description = "Flag to enable Elasticache swap monitor"
|
||||
type = "string"
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "swap_extra_tags" {
|
||||
description = "Extra tags for Elasticache swap monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "swap_message" {
|
||||
description = "Custom message for Elasticache swap monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "swap_time_aggregator" {
|
||||
description = "Monitor aggregator for Elasticache memcached swap [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "swap_timeframe" {
|
||||
description = "Monitor timeframe for Elasticache swap [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "swap_threshold_warning" {
|
||||
description = "Elasticache swap warning threshold in bytes"
|
||||
type = "string"
|
||||
default = 0
|
||||
}
|
||||
|
||||
variable "swap_threshold_critical" {
|
||||
description = "Elasticache swap critical threshold in bytes"
|
||||
type = "string"
|
||||
default = 50000000
|
||||
}
|
||||
|
||||
variable "free_memory_silenced" {
|
||||
description = "Groups to mute for Elasticache free memory monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "free_memory_enabled" {
|
||||
description = "Flag to enable Elasticache free memory monitor"
|
||||
type = "string"
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "free_memory_extra_tags" {
|
||||
description = "Extra tags for Elasticache free memory monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "free_memory_message" {
|
||||
description = "Custom message for Elasticache free memory monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "free_memory_condition_timeframe" {
|
||||
description = "Monitor condition timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
default = "last_15m"
|
||||
}
|
||||
|
||||
variable "free_memory_timeframe" {
|
||||
description = "Monitor timeframe for Elasticache free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
default = "last_15m"
|
||||
}
|
||||
|
||||
variable "free_memory_threshold_warning" {
|
||||
description = "Elasticache free memory warning threshold in percentage"
|
||||
type = "string"
|
||||
default = -50
|
||||
}
|
||||
|
||||
variable "free_memory_threshold_critical" {
|
||||
description = "Elasticache free memory critical threshold in percentage"
|
||||
type = "string"
|
||||
default = -70
|
||||
}
|
||||
|
||||
variable "eviction_growing_silenced" {
|
||||
description = "Groups to mute for Elasticache eviction growing monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "eviction_growing_enabled" {
|
||||
description = "Flag to enable Elasticache eviction growing monitor"
|
||||
type = "string"
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "eviction_growing_extra_tags" {
|
||||
description = "Extra tags for Elasticache eviction growing monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "eviction_growing_message" {
|
||||
description = "Custom message for Elasticache eviction growing monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "eviction_growing_condition_timeframe" {
|
||||
description = "Monitor condition timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "eviction_growing_timeframe" {
|
||||
description = "Monitor timeframe for Elasticache eviction growing [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "eviction_growing_threshold_warning" {
|
||||
description = "Elasticache eviction growing warning threshold in percentage"
|
||||
type = "string"
|
||||
default = 10
|
||||
}
|
||||
|
||||
variable "eviction_growing_threshold_critical" {
|
||||
description = "Elasticache eviction growing critical threshold in percentage"
|
||||
type = "string"
|
||||
default = 30
|
||||
}
|
||||
8
cloud/aws/elasticache/common/modules.tf
Normal file
8
cloud/aws/elasticache/common/modules.tf
Normal file
@ -0,0 +1,8 @@
|
||||
module "filter-tags" {
|
||||
source = "../../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
resource = "aws_elasticache"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
}
|
||||
187
cloud/aws/elasticache/common/monitors-elasticache.tf
Normal file
187
cloud/aws/elasticache/common/monitors-elasticache.tf
Normal file
@ -0,0 +1,187 @@
|
||||
resource "datadog_monitor" "elasticache_eviction" {
|
||||
count = "${var.eviction_enabled ? 1 : 0}"
|
||||
name = "[${var.environment}] Elasticache eviction {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}"
|
||||
message = "${coalesce(var.eviction_message, var.message)}"
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
sum(${var.eviction_timeframe}): (
|
||||
avg:aws.elasticache.evictions${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}
|
||||
) > ${var.eviction_threshold_critical}
|
||||
EOF
|
||||
|
||||
thresholds {
|
||||
warning = "${var.eviction_threshold_warning}"
|
||||
critical = "${var.eviction_threshold_critical}"
|
||||
}
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
silenced = "${var.eviction_silenced}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache", "team:claranet", "created-by:terraform", "${var.eviction_extra_tags}"]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "elasticache_max_connection" {
|
||||
count = "${var.max_connection_enabled ? 1 : 0}"
|
||||
name = "[${var.environment}] Elasticache max connections reached {{#is_alert}}{{{comparator}}} {{threshold}} {{/is_alert}}"
|
||||
message = "${coalesce(var.max_connection_message, var.message)}"
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
${var.max_connection_time_aggregator}(${var.max_connection_timeframe}): (
|
||||
avg:aws.elasticache.curr_connections${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}
|
||||
) >= 65000
|
||||
EOF
|
||||
|
||||
notify_no_data = true
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
silenced = "${var.max_connection_silenced}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache", "team:claranet", "created-by:terraform", "${var.max_connection_extra_tags}"]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "elasticache_no_connection" {
|
||||
count = "${var.no_connection_enabled ? 1 : 0}"
|
||||
name = "[${var.environment}] Elasticache connections {{#is_alert}}{{{comparator}}} {{threshold}} {{/is_alert}}"
|
||||
message = "${coalesce(var.no_connection_message, var.message)}"
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
${var.no_connection_time_aggregator}(${var.no_connection_timeframe}): (
|
||||
avg:aws.elasticache.curr_connections${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}
|
||||
) <= 0
|
||||
EOF
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
silenced = "${var.no_connection_silenced}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache", "team:claranet", "created-by:terraform", "${var.no_connection_extra_tags}"]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "elasticache_swap" {
|
||||
count = "${var.swap_enabled ? 1 : 0}"
|
||||
name = "[${var.environment}] Elasticache swap {{#is_alert}}{{{comparator}}} {{threshold}}MB ({{value}}MB){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}MB ({{value}}MB){{/is_warning}}"
|
||||
message = "${coalesce(var.swap_message, var.message)}"
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
${var.swap_time_aggregator}(${var.swap_timeframe}): (
|
||||
avg:aws.elasticache.swap_usage${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}
|
||||
) > ${var.swap_threshold_critical}
|
||||
EOF
|
||||
|
||||
thresholds {
|
||||
warning = "${var.swap_threshold_warning}"
|
||||
critical = "${var.swap_threshold_critical}"
|
||||
}
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
silenced = "${var.swap_silenced}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache", "team:claranet", "created-by:terraform", "${var.swap_extra_tags}"]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "elasticache_free_memory" {
|
||||
count = "${var.free_memory_enabled ? 1 : 0}"
|
||||
name = "[${var.environment}] Elasticache free memory {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.free_memory_message, var.message)}"
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
pct_change(avg(${var.free_memory_timeframe}),${var.free_memory_condition_timeframe}):
|
||||
avg:aws.elasticache.freeable_memory${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}
|
||||
< ${var.free_memory_threshold_critical}
|
||||
EOF
|
||||
|
||||
thresholds {
|
||||
warning = "${var.free_memory_threshold_warning}"
|
||||
critical = "${var.free_memory_threshold_critical}"
|
||||
}
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
silenced = "${var.free_memory_silenced}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache", "team:claranet", "created-by:terraform", "${var.free_memory_extra_tags}"]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "elasticache_eviction_growing" {
|
||||
count = "${var.eviction_growing_enabled ? 1 : 0}"
|
||||
name = "[${var.environment}] Elasticache evictions is growing {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}"
|
||||
message = "${coalesce(var.eviction_growing_message, var.message)}"
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
pct_change(avg(${var.eviction_growing_timeframe}),${var.eviction_growing_condition_timeframe}):
|
||||
avg:aws.elasticache.evictions${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}
|
||||
> ${var.eviction_growing_threshold_critical}
|
||||
EOF
|
||||
|
||||
thresholds {
|
||||
warning = "${var.eviction_growing_threshold_warning}"
|
||||
critical = "${var.eviction_growing_threshold_critical}"
|
||||
}
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
silenced = "${var.eviction_growing_silenced}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache", "team:claranet", "created-by:terraform", "${var.eviction_growing_extra_tags}"]
|
||||
}
|
||||
29
cloud/aws/elasticache/common/outputs.tf
Normal file
29
cloud/aws/elasticache/common/outputs.tf
Normal file
@ -0,0 +1,29 @@
|
||||
output "elasticache_eviction_id" {
|
||||
description = "id for monitor elasticache_eviction"
|
||||
value = "${datadog_monitor.elasticache_eviction.*.id}"
|
||||
}
|
||||
|
||||
output "elasticache_max_connection_id" {
|
||||
description = "id for monitor elasticache_max_connection"
|
||||
value = "${datadog_monitor.elasticache_max_connection.*.id}"
|
||||
}
|
||||
|
||||
output "elasticache_no_connection_id" {
|
||||
description = "id for monitor elasticache_no_connection"
|
||||
value = "${datadog_monitor.elasticache_no_connection.*.id}"
|
||||
}
|
||||
|
||||
output "elasticache_swap_id" {
|
||||
description = "id for monitor elasticache_swap"
|
||||
value = "${datadog_monitor.elasticache_swap.*.id}"
|
||||
}
|
||||
|
||||
output "elasticache_free_memory_id" {
|
||||
description = "id for monitor elasticache_free_memory"
|
||||
value = "${datadog_monitor.elasticache_free_memory.*.id}"
|
||||
}
|
||||
|
||||
output "elasticache_eviction_growing_id" {
|
||||
description = "id for monitor elasticache_eviction_growing"
|
||||
value = "${datadog_monitor.elasticache_eviction_growing.*.id}"
|
||||
}
|
||||
63
cloud/aws/elasticache/memcached/README.md
Normal file
63
cloud/aws/elasticache/memcached/README.md
Normal file
@ -0,0 +1,63 @@
|
||||
# CLOUD AWS ELASTICACHE MEMCACHED DataDog monitors
|
||||
|
||||
## How to use this module
|
||||
|
||||
```
|
||||
module "datadog-monitors-cloud-aws-elasticache-memcached" {
|
||||
source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/aws/elasticache/memcached?ref={revision}"
|
||||
|
||||
environment = "${var.environment}"
|
||||
message = "${module.datadog-message-alerting.alerting-message}"
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
## Purpose
|
||||
|
||||
Creates DataDog monitors with the following checks:
|
||||
|
||||
- Elasticache memcached CPU
|
||||
- Elasticache memcached get hit ratio
|
||||
|
||||
## Inputs
|
||||
|
||||
| Name | Description | Type | Default | Required |
|
||||
|------|-------------|:----:|:-----:|:-----:|
|
||||
| cpu_high_enabled | Flag to enable Elasticache memcached cpu high monitor | string | `true` | no |
|
||||
| cpu_high_extra_tags | Extra tags for Elasticache memcached cpu high monitor | list | `<list>` | no |
|
||||
| cpu_high_message | Custom message for Elasticache memcached cpu high monitor | string | `` | no |
|
||||
| cpu_high_silenced | Groups to mute for Elasticache memcached cpu high monitor | map | `<map>` | no |
|
||||
| cpu_high_threshold_critical | Elasticache memcached cpu high critical threshold in percentage | string | `90` | no |
|
||||
| cpu_high_threshold_warning | Elasticache memcached cpu high warning threshold in percentage | string | `75` | no |
|
||||
| cpu_high_time_aggregator | Monitor aggregator for Elasticache memcached cpu high [available values: min, max or avg] | string | `min` | no |
|
||||
| cpu_high_timeframe | Monitor timeframe for Elasticache memcached cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no |
|
||||
| environment | Infrastructure Environment | string | - | yes |
|
||||
| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no |
|
||||
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
|
||||
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
|
||||
| get_hits_enabled | Flag to enable Elasticache memcached get hits monitor | string | `true` | no |
|
||||
| get_hits_extra_tags | Extra tags for Elasticache memcached get hits monitor | list | `<list>` | no |
|
||||
| get_hits_message | Custom message for Elasticache memcached get hits monitor | string | `` | no |
|
||||
| get_hits_silenced | Groups to mute for Elasticache memcached get hits monitor | map | `<map>` | no |
|
||||
| get_hits_threshold_critical | Elasticache memcached get hits critical threshold in percentage | string | `60` | no |
|
||||
| get_hits_threshold_warning | Elasticache memcached get hits warning threshold in percentage | string | `80` | no |
|
||||
| get_hits_timeframe | Monitor timeframe for Elasticache memcached get hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no |
|
||||
| message | Message sent when an alert is triggered | string | - | yes |
|
||||
| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no |
|
||||
|
||||
## Outputs
|
||||
|
||||
| Name | Description |
|
||||
|------|-------------|
|
||||
| memcached_cpu_high_id | id for monitor memcached_cpu_high |
|
||||
| memcached_get_hits_id | id for monitor memcached_get_hits |
|
||||
|
||||
Related documentation
|
||||
---------------------
|
||||
|
||||
DataDog documentation:
|
||||
|
||||
* [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/)
|
||||
* [https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/](https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/)
|
||||
|
||||
|
||||
121
cloud/aws/elasticache/memcached/inputs.tf
Normal file
121
cloud/aws/elasticache/memcached/inputs.tf
Normal file
@ -0,0 +1,121 @@
|
||||
# Global Terraform
|
||||
variable "environment" {
|
||||
description = "Infrastructure Environment"
|
||||
type = "string"
|
||||
}
|
||||
|
||||
# Global DataDog
|
||||
variable "evaluation_delay" {
|
||||
description = "Delay in seconds for the metric evaluation"
|
||||
default = 900
|
||||
}
|
||||
|
||||
variable "new_host_delay" {
|
||||
description = "Delay in seconds before monitor new resource"
|
||||
default = 300
|
||||
}
|
||||
|
||||
variable "message" {
|
||||
description = "Message sent when an alert is triggered"
|
||||
}
|
||||
|
||||
variable "filter_tags_use_defaults" {
|
||||
description = "Use default filter tags convention"
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "filter_tags_custom" {
|
||||
description = "Tags used for custom filtering when filter_tags_use_defaults is false"
|
||||
default = "*"
|
||||
}
|
||||
|
||||
# Memcached specific
|
||||
variable "get_hits_silenced" {
|
||||
description = "Groups to mute for Elasticache memcached get hits monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "get_hits_enabled" {
|
||||
description = "Flag to enable Elasticache memcached get hits monitor"
|
||||
type = "string"
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "get_hits_extra_tags" {
|
||||
description = "Extra tags for Elasticache memcached get hits monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "get_hits_message" {
|
||||
description = "Custom message for Elasticache memcached get hits monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "get_hits_timeframe" {
|
||||
description = "Monitor timeframe for Elasticache memcached get hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
default = "last_15m"
|
||||
}
|
||||
|
||||
variable "get_hits_threshold_warning" {
|
||||
description = "Elasticache memcached get hits warning threshold in percentage"
|
||||
type = "string"
|
||||
default = 80
|
||||
}
|
||||
|
||||
variable "get_hits_threshold_critical" {
|
||||
description = "Elasticache memcached get hits critical threshold in percentage"
|
||||
type = "string"
|
||||
default = 60
|
||||
}
|
||||
|
||||
variable "cpu_high_silenced" {
|
||||
description = "Groups to mute for Elasticache memcached cpu high monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "cpu_high_enabled" {
|
||||
description = "Flag to enable Elasticache memcached cpu high monitor"
|
||||
type = "string"
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "cpu_high_extra_tags" {
|
||||
description = "Extra tags for Elasticache memcached cpu high monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "cpu_high_message" {
|
||||
description = "Custom message for Elasticache memcached cpu high monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "cpu_high_time_aggregator" {
|
||||
description = "Monitor aggregator for Elasticache memcached cpu high [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "cpu_high_timeframe" {
|
||||
description = "Monitor timeframe for Elasticache memcached cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
default = "last_15m"
|
||||
}
|
||||
|
||||
variable "cpu_high_threshold_warning" {
|
||||
description = "Elasticache memcached cpu high warning threshold in percentage"
|
||||
type = "string"
|
||||
default = 75
|
||||
}
|
||||
|
||||
variable "cpu_high_threshold_critical" {
|
||||
description = "Elasticache memcached cpu high critical threshold in percentage"
|
||||
type = "string"
|
||||
default = 90
|
||||
}
|
||||
8
cloud/aws/elasticache/memcached/modules.tf
Normal file
8
cloud/aws/elasticache/memcached/modules.tf
Normal file
@ -0,0 +1,8 @@
|
||||
module "filter-tags" {
|
||||
source = "../../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
resource = "aws_elasticache"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
}
|
||||
67
cloud/aws/elasticache/memcached/monitors-memcached.tf
Normal file
67
cloud/aws/elasticache/memcached/monitors-memcached.tf
Normal file
@ -0,0 +1,67 @@
|
||||
resource "datadog_monitor" "memcached_get_hits" {
|
||||
count = "${var.get_hits_enabled ? 1 : 0}"
|
||||
name = "[${var.environment}] Elasticache memcached get hit ratio {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.get_hits_message, var.message)}"
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
sum(${var.get_hits_timeframe}): (
|
||||
avg:aws.elasticache.get_hits${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}.as_count() /
|
||||
(avg:aws.elasticache.get_hits${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}.as_count() +
|
||||
avg:aws.elasticache.get_misses${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}.as_count())
|
||||
) < ${var.get_hits_threshold_critical}
|
||||
EOF
|
||||
|
||||
thresholds {
|
||||
warning = "${var.get_hits_threshold_warning}"
|
||||
critical = "${var.get_hits_threshold_critical}"
|
||||
}
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
silenced = "${var.get_hits_silenced}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-memcached", "team:claranet", "created-by:terraform", "engine:memcached", "${var.get_hits_extra_tags}"]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "memcached_cpu_high" {
|
||||
count = "${var.cpu_high_enabled ? 1 : 0}"
|
||||
name = "[${var.environment}] Elasticache memcached CPU {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.cpu_high_message, var.message)}"
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
${var.cpu_high_time_aggregator}(${var.cpu_high_timeframe}): (
|
||||
avg:aws.elasticache.cpuutilization${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}
|
||||
) > ${var.cpu_high_threshold_critical}
|
||||
EOF
|
||||
|
||||
thresholds {
|
||||
warning = "${var.cpu_high_threshold_warning}"
|
||||
critical = "${var.cpu_high_threshold_critical}"
|
||||
}
|
||||
|
||||
notify_no_data = true
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
silenced = "${var.cpu_high_silenced}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-memcached", "team:claranet", "created-by:terraform", "engine:memcached", "${var.cpu_high_extra_tags}"]
|
||||
}
|
||||
9
cloud/aws/elasticache/memcached/outputs.tf
Normal file
9
cloud/aws/elasticache/memcached/outputs.tf
Normal file
@ -0,0 +1,9 @@
|
||||
output "memcached_get_hits_id" {
|
||||
description = "id for monitor memcached_get_hits"
|
||||
value = "${datadog_monitor.memcached_get_hits.*.id}"
|
||||
}
|
||||
|
||||
output "memcached_cpu_high_id" {
|
||||
description = "id for monitor memcached_cpu_high"
|
||||
value = "${datadog_monitor.memcached_cpu_high.*.id}"
|
||||
}
|
||||
77
cloud/aws/elasticache/redis/README.md
Normal file
77
cloud/aws/elasticache/redis/README.md
Normal file
@ -0,0 +1,77 @@
|
||||
# CLOUD AWS ELASTICACHE REDIS DataDog monitors
|
||||
|
||||
## How to use this module
|
||||
|
||||
```
|
||||
module "datadog-monitors-cloud-aws-elasticache-redis" {
|
||||
source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/aws/elasticache/redis?ref={revision}"
|
||||
|
||||
environment = "${var.environment}"
|
||||
message = "${module.datadog-message-alerting.alerting-message}"
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
## Purpose
|
||||
|
||||
Creates DataDog monitors with the following checks:
|
||||
|
||||
- Elasticache redis cache hit ratio
|
||||
- Elasticache redis CPU
|
||||
- Elasticache redis is receiving no commands
|
||||
- Elasticache redis replication lag
|
||||
|
||||
## Inputs
|
||||
|
||||
| Name | Description | Type | Default | Required |
|
||||
|------|-------------|:----:|:-----:|:-----:|
|
||||
| cache_hits_enabled | Flag to enable Elasticache redis cache hits monitor | string | `true` | no |
|
||||
| cache_hits_extra_tags | Extra tags for Elasticache redis cache hits monitor | list | `<list>` | no |
|
||||
| cache_hits_message | Custom message for Elasticache redis cache hits monitor | string | `` | no |
|
||||
| cache_hits_silenced | Groups to mute for Elasticache redis cache hits monitor | map | `<map>` | no |
|
||||
| cache_hits_threshold_critical | Elasticache redis cache hits critical threshold in percentage | string | `60` | no |
|
||||
| cache_hits_threshold_warning | Elasticache redis cache hits warning threshold in percentage | string | `80` | no |
|
||||
| cache_hits_timeframe | Monitor timeframe for Elasticache redis cache hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no |
|
||||
| commands_enabled | Flag to enable Elasticache redis commands monitor | string | `true` | no |
|
||||
| commands_extra_tags | Extra tags for Elasticache redis commands monitor | list | `<list>` | no |
|
||||
| commands_message | Custom message for Elasticache redis commands monitor | string | `` | no |
|
||||
| commands_silenced | Groups to mute for Elasticache redis commands monitor | map | `<map>` | no |
|
||||
| commands_timeframe | Monitor timeframe for Elasticache redis commands [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| cpu_high_enabled | Flag to enable Elasticache redis cpu high monitor | string | `true` | no |
|
||||
| cpu_high_extra_tags | Extra tags for Elasticache redis cpu high monitor | list | `<list>` | no |
|
||||
| cpu_high_message | Custom message for Elasticache redis cpu high monitor | string | `` | no |
|
||||
| cpu_high_silenced | Groups to mute for Elasticache redis cpu high monitor | map | `<map>` | no |
|
||||
| cpu_high_threshold_critical | Elasticache redis cpu high critical threshold in percentage | string | `90` | no |
|
||||
| cpu_high_threshold_warning | Elasticache redis cpu high warning threshold in percentage | string | `75` | no |
|
||||
| cpu_high_time_aggregator | Monitor aggregator for Elasticache redis cpu high [available values: min, max or avg] | string | `avg` | no |
|
||||
| cpu_high_timeframe | Monitor timeframe for Elasticache redis cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no |
|
||||
| environment | Infrastructure Environment | string | - | yes |
|
||||
| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no |
|
||||
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
|
||||
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
|
||||
| message | Message sent when an alert is triggered | string | - | yes |
|
||||
| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no |
|
||||
| replication_lag_enabled | Flag to enable Elasticache redis replication lag monitor | string | `true` | no |
|
||||
| replication_lag_extra_tags | Extra tags for Elasticache redis replication lag monitor | list | `<list>` | no |
|
||||
| replication_lag_message | Custom message for Elasticache redis replication lag monitor | string | `` | no |
|
||||
| replication_lag_silenced | Groups to mute for Elasticache redis replication lag monitor | map | `<map>` | no |
|
||||
| replication_lag_threshold_critical | Elasticache redis replication lag critical threshold in seconds | string | `180` | no |
|
||||
| replication_lag_threshold_warning | Elasticache redis replication lag warning threshold in seconds | string | `90` | no |
|
||||
| replication_lag_time_aggregator | Monitor aggregator for Elasticache redis replication lag [available values: min, max or avg] | string | `min` | no |
|
||||
| replication_lag_timeframe | Monitor timeframe for Elasticache redis replication lag [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_10m` | no |
|
||||
|
||||
## Outputs
|
||||
|
||||
| Name | Description |
|
||||
|------|-------------|
|
||||
| redis_cache_hits_id | id for monitor redis_cache_hits |
|
||||
| redis_commands_id | id for monitor redis_commands |
|
||||
| redis_cpu_high_id | id for monitor redis_cpu_high |
|
||||
| redis_replication_lag_id | id for monitor redis_replication_lag |
|
||||
|
||||
## Related documentation
|
||||
|
||||
* [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/)
|
||||
* [https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/](https://www.datadoghq.com/blog/monitoring-elasticache-performance-metrics-with-redis-or-memcached/)
|
||||
|
||||
|
||||
199
cloud/aws/elasticache/redis/inputs.tf
Normal file
199
cloud/aws/elasticache/redis/inputs.tf
Normal file
@ -0,0 +1,199 @@
|
||||
# Global Terraform
|
||||
variable "environment" {
|
||||
description = "Infrastructure Environment"
|
||||
type = "string"
|
||||
}
|
||||
|
||||
# Global DataDog
|
||||
variable "evaluation_delay" {
|
||||
description = "Delay in seconds for the metric evaluation"
|
||||
default = 900
|
||||
}
|
||||
|
||||
variable "new_host_delay" {
|
||||
description = "Delay in seconds before monitor new resource"
|
||||
default = 300
|
||||
}
|
||||
|
||||
variable "message" {
|
||||
description = "Message sent when an alert is triggered"
|
||||
}
|
||||
|
||||
variable "filter_tags_use_defaults" {
|
||||
description = "Use default filter tags convention"
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "filter_tags_custom" {
|
||||
description = "Tags used for custom filtering when filter_tags_use_defaults is false"
|
||||
default = "*"
|
||||
}
|
||||
|
||||
# redis specific
|
||||
variable "cache_hits_silenced" {
|
||||
description = "Groups to mute for Elasticache redis cache hits monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "cache_hits_enabled" {
|
||||
description = "Flag to enable Elasticache redis cache hits monitor"
|
||||
type = "string"
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "cache_hits_extra_tags" {
|
||||
description = "Extra tags for Elasticache redis cache hits monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "cache_hits_message" {
|
||||
description = "Custom message for Elasticache redis cache hits monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "cache_hits_timeframe" {
|
||||
description = "Monitor timeframe for Elasticache redis cache hits [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
default = "last_15m"
|
||||
}
|
||||
|
||||
variable "cache_hits_threshold_warning" {
|
||||
description = "Elasticache redis cache hits warning threshold in percentage"
|
||||
type = "string"
|
||||
default = 80
|
||||
}
|
||||
|
||||
variable "cache_hits_threshold_critical" {
|
||||
description = "Elasticache redis cache hits critical threshold in percentage"
|
||||
type = "string"
|
||||
default = 60
|
||||
}
|
||||
|
||||
variable "cpu_high_silenced" {
|
||||
description = "Groups to mute for Elasticache redis cpu high monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "cpu_high_enabled" {
|
||||
description = "Flag to enable Elasticache redis cpu high monitor"
|
||||
type = "string"
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "cpu_high_extra_tags" {
|
||||
description = "Extra tags for Elasticache redis cpu high monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "cpu_high_message" {
|
||||
description = "Custom message for Elasticache redis cpu high monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "cpu_high_time_aggregator" {
|
||||
description = "Monitor aggregator for Elasticache redis cpu high [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "avg"
|
||||
}
|
||||
|
||||
variable "cpu_high_timeframe" {
|
||||
description = "Monitor timeframe for Elasticache redis cpu high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
default = "last_15m"
|
||||
}
|
||||
|
||||
variable "cpu_high_threshold_warning" {
|
||||
description = "Elasticache redis cpu high warning threshold in percentage"
|
||||
type = "string"
|
||||
default = 75
|
||||
}
|
||||
|
||||
variable "cpu_high_threshold_critical" {
|
||||
description = "Elasticache redis cpu high critical threshold in percentage"
|
||||
type = "string"
|
||||
default = 90
|
||||
}
|
||||
|
||||
variable "replication_lag_silenced" {
|
||||
description = "Groups to mute for Elasticache redis replication lag monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "replication_lag_enabled" {
|
||||
description = "Flag to enable Elasticache redis replication lag monitor"
|
||||
type = "string"
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "replication_lag_extra_tags" {
|
||||
description = "Extra tags for Elasticache redis replication lag monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "replication_lag_message" {
|
||||
description = "Custom message for Elasticache redis replication lag monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "replication_lag_time_aggregator" {
|
||||
description = "Monitor aggregator for Elasticache redis replication lag [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "replication_lag_timeframe" {
|
||||
description = "Monitor timeframe for Elasticache redis replication lag [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
default = "last_10m"
|
||||
}
|
||||
|
||||
variable "replication_lag_threshold_warning" {
|
||||
description = "Elasticache redis replication lag warning threshold in seconds"
|
||||
type = "string"
|
||||
default = 90
|
||||
}
|
||||
|
||||
variable "replication_lag_threshold_critical" {
|
||||
description = "Elasticache redis replication lag critical threshold in seconds"
|
||||
type = "string"
|
||||
default = 180
|
||||
}
|
||||
|
||||
variable "commands_silenced" {
|
||||
description = "Groups to mute for Elasticache redis commands monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "commands_enabled" {
|
||||
description = "Flag to enable Elasticache redis commands monitor"
|
||||
type = "string"
|
||||
default = "true"
|
||||
}
|
||||
|
||||
variable "commands_extra_tags" {
|
||||
description = "Extra tags for Elasticache redis commands monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "commands_message" {
|
||||
description = "Custom message for Elasticache redis commands monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "commands_timeframe" {
|
||||
description = "Monitor timeframe for Elasticache redis commands [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
default = "last_5m"
|
||||
}
|
||||
8
cloud/aws/elasticache/redis/modules.tf
Normal file
8
cloud/aws/elasticache/redis/modules.tf
Normal file
@ -0,0 +1,8 @@
|
||||
module "filter-tags" {
|
||||
source = "../../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
resource = "aws_elasticache"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
}
|
||||
124
cloud/aws/elasticache/redis/monitors-redis.tf
Normal file
124
cloud/aws/elasticache/redis/monitors-redis.tf
Normal file
@ -0,0 +1,124 @@
|
||||
resource "datadog_monitor" "redis_cache_hits" {
|
||||
count = "${var.cache_hits_enabled ? 1 : 0}"
|
||||
name = "[${var.environment}] Elasticache redis cache hit ratio {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.cache_hits_message, var.message)}"
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
sum(${var.cache_hits_timeframe}): (
|
||||
avg:aws.elasticache.cache_hits${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}.as_count() /
|
||||
(avg:aws.elasticache.cache_hits${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}.as_count() +
|
||||
avg:aws.elasticache.cache_misses${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}.as_count())
|
||||
) * 100 < ${var.cache_hits_threshold_critical}
|
||||
EOF
|
||||
|
||||
thresholds {
|
||||
warning = "${var.cache_hits_threshold_warning}"
|
||||
critical = "${var.cache_hits_threshold_critical}"
|
||||
}
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
silenced = "${var.cache_hits_silenced}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-redis", "team:claranet", "created-by:terraform", "engine:redis", "${var.cache_hits_extra_tags}"]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "redis_cpu_high" {
|
||||
count = "${var.cpu_high_enabled ? 1 : 0}"
|
||||
name = "[${var.environment}] Elasticache redis CPU {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.cpu_high_message, var.message)}"
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
${var.cpu_high_time_aggregator}(${var.cpu_high_timeframe}): (
|
||||
avg:aws.elasticache.engine_cpuutilization${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}
|
||||
) > ${var.cpu_high_threshold_critical}
|
||||
EOF
|
||||
|
||||
notify_no_data = true
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
silenced = "${var.cpu_high_silenced}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-redis", "team:claranet", "created-by:terraform", "engine:redis", "${var.cpu_high_extra_tags}"]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "redis_replication_lag" {
|
||||
count = "${var.replication_lag_enabled ? 1 : 0}"
|
||||
name = "[${var.environment}] Elasticache redis replication lag {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}"
|
||||
message = "${coalesce(var.replication_lag_message, var.message)}"
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
${var.replication_lag_time_aggregator}(${var.replication_lag_timeframe}): (
|
||||
avg:aws.elasticache.replication_lag${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}
|
||||
) > ${var.replication_lag_threshold_critical}
|
||||
EOF
|
||||
|
||||
thresholds {
|
||||
warning = "${var.replication_lag_threshold_warning}"
|
||||
critical = "${var.replication_lag_threshold_critical}"
|
||||
}
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
silenced = "${var.replication_lag_silenced}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-redis", "team:claranet", "created-by:terraform", "engine:redis", "${var.replication_lag_extra_tags}"]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "redis_commands" {
|
||||
count = "${var.commands_enabled ? 1 : 0}"
|
||||
name = "[${var.environment}] Elasticache redis is receiving no commands"
|
||||
message = "${coalesce(var.commands_message, var.message)}"
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
sum(${var.commands_timeframe}): (
|
||||
avg:aws.elasticache.get_type_cmds${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}.as_count() +
|
||||
avg:aws.elasticache.set_type_cmds${module.filter-tags.query_alert} by {region,cacheclusterid,cachenodeid}.as_count()
|
||||
) <= 0
|
||||
EOF
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
silenced = "${var.commands_silenced}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:aws", "resource:elasticache-redis", "team:claranet", "created-by:terraform", "engine:redis", "${var.commands_extra_tags}"]
|
||||
}
|
||||
19
cloud/aws/elasticache/redis/outputs.tf
Normal file
19
cloud/aws/elasticache/redis/outputs.tf
Normal file
@ -0,0 +1,19 @@
|
||||
output "redis_cache_hits_id" {
|
||||
description = "id for monitor redis_cache_hits"
|
||||
value = "${datadog_monitor.redis_cache_hits.*.id}"
|
||||
}
|
||||
|
||||
output "redis_cpu_high_id" {
|
||||
description = "id for monitor redis_cpu_high"
|
||||
value = "${datadog_monitor.redis_cpu_high.*.id}"
|
||||
}
|
||||
|
||||
output "redis_replication_lag_id" {
|
||||
description = "id for monitor redis_replication_lag"
|
||||
value = "${datadog_monitor.redis_replication_lag.*.id}"
|
||||
}
|
||||
|
||||
output "redis_commands_id" {
|
||||
description = "id for monitor redis_commands"
|
||||
value = "${datadog_monitor.redis_commands.*.id}"
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user