From fdc0b086475a762695a97e1c958b979a90a549c8 Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Thu, 3 May 2018 14:38:55 +0200 Subject: [PATCH] MON-32 - Elasticache CPU monitors added --- cloud/aws/elasticache/README.md | 45 ++++++++++++++ cloud/aws/elasticache/inputs.tf | 59 +++++++++++++++++++ cloud/aws/elasticache/monitors-elasticache.tf | 39 ++++++++++++ 3 files changed, 143 insertions(+) create mode 100644 cloud/aws/elasticache/README.md create mode 100644 cloud/aws/elasticache/inputs.tf create mode 100644 cloud/aws/elasticache/monitors-elasticache.tf diff --git a/cloud/aws/elasticache/README.md b/cloud/aws/elasticache/README.md new file mode 100644 index 0000000..6f395a4 --- /dev/null +++ b/cloud/aws/elasticache/README.md @@ -0,0 +1,45 @@ +AWS ElasticCache Service DataDog monitors +========================================= + +How to use this module +---------------------- + +``` +module "datadog-monitors-aws-elasticcache" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/aws/elasticcache?ref={revision}" + + message = "${module.datadog-message-alerting.alerting-message}" + environment = "${var.environment}" +} + +``` + +Purpose +------- +Creates DataDog monitors with the following checks : + +* CPU High + +Inputs +------ + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| cpu_aggregator | Monitor aggregator for Elasticache CPU high [available values: min, max, sum or avg] | string | `min` | no | +| cpu_message | Custom message for Elasticache CPU high monitor | string | `` | no | +| cpu_silenced | Groups to mute for Elasticache CPU high monitor | map | `` | no | +| cpu_threshold_critical | Elasticache CPU high critical threshold in percentage | string | `95` | no | +| cpu_threshold_warning | Elasticache CPU high warning threshold in percentage | string | `80` | no | +| cpu_timeframe | Monitor timeframe for Elasticache CPU high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| delay | Delay in seconds for the metric evaluation | string | `900` | no | +| environment | Architecture Environment | string | - | yes | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| message | Message sent when an alert is triggered | string | - | yes | + +Related documentation +--------------------- + +DataDog documentation: [https://docs.datadoghq.com/integrations/amazon_elasticache/](https://docs.datadoghq.com/integrations/amazon_elasticache/) + +AWS ElasticSearch Service Instance metrics documentation: [https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/elasticache-metricscollected.html) diff --git a/cloud/aws/elasticache/inputs.tf b/cloud/aws/elasticache/inputs.tf new file mode 100644 index 0000000..2e6aa71 --- /dev/null +++ b/cloud/aws/elasticache/inputs.tf @@ -0,0 +1,59 @@ +# Global Terraform +variable "environment" { + description = "Architecture Environment" + type = "string" +} + +# Global DataDog +variable "delay" { + description = "Delay in seconds for the metric evaluation" + default = 900 +} + +variable "message" { + description = "Message sent when an alert is triggered" +} + +variable "filter_tags_use_defaults" { + description = "Use default filter tags convention" + default = "true" +} + +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_use_defaults is false" + default = "*" +} + +# Elasticache specific +variable "cpu_silenced" { + description = "Groups to mute for Elasticache CPU high monitor" + type = "map" + default = {} +} + +variable "cpu_message" { + description = "Custom message for Elasticache CPU high monitor" + type = "string" + default = "" +} + +variable "cpu_aggregator" { + description = "Monitor aggregator for Elasticache CPU high [available values: min, max, sum or avg]" + type = "string" + default = "min" +} + +variable "cpu_timeframe" { + description = "Monitor timeframe for Elasticache CPU high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_15m" +} + +variable "cpu_threshold_warning" { + description = "Elasticache CPU high warning threshold in percentage" + default = 80 +} + +variable "cpu_threshold_critical" { + description = "Elasticache CPU high critical threshold in percentage" + default = 95 +} diff --git a/cloud/aws/elasticache/monitors-elasticache.tf b/cloud/aws/elasticache/monitors-elasticache.tf new file mode 100644 index 0000000..0587feb --- /dev/null +++ b/cloud/aws/elasticache/monitors-elasticache.tf @@ -0,0 +1,39 @@ +data "template_file" "filter" { + template = "$${filter}" + + vars { + filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_aws_es:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" + } +} + +resource "datadog_monitor" "elasticache_cpu_high" { + name = "[${var.environment}] Elasticache CPU high {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.cpu_message, var.message)}" + + type = "metric alert" + + query = < ${var.cpu_threshold_critical} + EOF + + thresholds { + warning = "${var.cpu_threshold_warning}" + critical = "${var.cpu_threshold_critical}" + } + + notify_no_data = true + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.delay}" + + silenced = "${var.cpu_silenced}" + + tags = ["env:${var.environment}", "resource:elasticache", "team:aws", "provider:aws"] +}