From 587a1472ca0131adbd6ded7336bff54b256487eb Mon Sep 17 00:00:00 2001 From: Guillaume Kerivel Date: Wed, 17 Jan 2018 16:31:50 +0100 Subject: [PATCH] MON-107 Add AWS ElasticSearch Service basics monitors --- cloud/aws/elasticsearch/README.md | 48 +++++++++ cloud/aws/elasticsearch/inputs.tf | 50 +++++++++ .../elasticsearch/monitors-elasticsearch.tf | 102 ++++++++++++++++++ 3 files changed, 200 insertions(+) create mode 100644 cloud/aws/elasticsearch/README.md create mode 100644 cloud/aws/elasticsearch/inputs.tf create mode 100644 cloud/aws/elasticsearch/monitors-elasticsearch.tf diff --git a/cloud/aws/elasticsearch/README.md b/cloud/aws/elasticsearch/README.md new file mode 100644 index 0000000..770e237 --- /dev/null +++ b/cloud/aws/elasticsearch/README.md @@ -0,0 +1,48 @@ +AWS ElasticSearch Service DataDog monitors +========================================== + +How to use this module +---------------------- + +``` +module "datadog-monitors-aws-elasticsearch" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/aws/elasticsearch?ref={revision}" + + message = "${module.datadog-message-alerting.alerting-message}" + environment = "${var.environment}" + + es_cluster_volume_size = "100" +} + +``` + +Purpose +------- +Creates DataDog monitors with the following checks : + +* Cluster status not green +* Free disk space low +* CPU High + +Inputs +------ + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| cpu_threshold_critical | CPU usage in percent (critical threshold) | string | `90` | no | +| cpu_threshold_warning | CPU usage in percent (warning threshold) | string | `80` | no | +| diskspace_threshold_critical | Disk free space in percent (critical threshold) | string | `10` | no | +| diskspace_threshold_warning | Disk free space in percent (warning threshold) | string | `20` | no | +| environment | Architecture Environment | string | - | yes | +| es_cluster_volume_size | ElasticSearch Domain volume size (in GB) | string | - | yes | +| evaluation_delay | Delay in seconds for the metric evaluation | string | `600` | no | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| message | Message sent when an alert is triggered | string | - | yes | + +Related documentation +--------------------- + +DataDog documentation: [https://docs.datadoghq.com/integrations/amazon_es/](https://docs.datadoghq.com/integrations/amazon_es/) + +AWS ElasticSearch Service Instance metrics documentation: [https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/es-metricscollected.html](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/es-metricscollected.html) diff --git a/cloud/aws/elasticsearch/inputs.tf b/cloud/aws/elasticsearch/inputs.tf new file mode 100644 index 0000000..a676154 --- /dev/null +++ b/cloud/aws/elasticsearch/inputs.tf @@ -0,0 +1,50 @@ +# Global Terraform +variable "environment" { + description = "Architecture Environment" + type = "string" +} + +# Global DataDog +variable "evaluation_delay" { + description = "Delay in seconds for the metric evaluation" + default = 600 +} + +variable "message" { + description = "Message sent when an alert is triggered" +} + +variable "filter_tags_use_defaults" { + description = "Use default filter tags convention" + default = "true" +} + +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_use_defaults is false" + default = "*" +} + +# AWS ElasticSearch Service specific +variable "es_cluster_volume_size" { + description = "ElasticSearch Domain volume size (in GB)" +} + +variable "diskspace_threshold_warning" { + description = "Disk free space in percent (warning threshold)" + default = "20" +} + +variable "diskspace_threshold_critical" { + description = "Disk free space in percent (critical threshold)" + default = "10" +} + +variable "cpu_threshold_warning" { + description = "CPU usage in percent (warning threshold)" + default = "80" +} + +variable "cpu_threshold_critical" { + description = "CPU usage in percent (critical threshold)" + default = "90" +} diff --git a/cloud/aws/elasticsearch/monitors-elasticsearch.tf b/cloud/aws/elasticsearch/monitors-elasticsearch.tf new file mode 100644 index 0000000..c28eaed --- /dev/null +++ b/cloud/aws/elasticsearch/monitors-elasticsearch.tf @@ -0,0 +1,102 @@ +data "template_file" "filter" { + template = "$${filter}" + + vars { + filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_aws_es:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" + } +} + +### Elasticsearch cluster status monitor ### +resource "datadog_monitor" "es_cluster_status" { + name = "[${var.environment}] ElasticSearch cluster status is not green" + message = "${var.message}" + + type = "query alert" + query = < 2 +EOF + + thresholds { + ok = 0 + warning = 1 + critical = 2 + } + + notify_no_data = true + evaluation_delay = "${var.evaluation_delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.evaluation_delay}" + no_data_timeframe = 20 + + tags = ["env:${var.environment}", "resource:elasticsearch", "team:aws", "provider:aws"] +} + +### Elasticsearch cluster free storage space monitor ### +resource "datadog_monitor" "es_free_space_low" { + name = "[${var.environment}] ElasticSearch cluster free storage space < ${var.diskspace_threshold_critical}%" + message = "${var.message}" + + type = "query alert" + query = < ${var.cpu_threshold_critical} +EOF + + thresholds { + warning = "${var.cpu_threshold_warning}" + critical = "${var.cpu_threshold_critical}" + } + + notify_no_data = true + evaluation_delay = "${var.evaluation_delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.evaluation_delay}" + no_data_timeframe = 20 + + tags = ["env:${var.environment}", "resource:elasticsearch", "team:aws", "provider:aws"] +}