diff --git a/cloud/aws/elasticsearch/README.md b/cloud/aws/elasticsearch/README.md index 8e7e45a..8fb4da9 100644 --- a/cloud/aws/elasticsearch/README.md +++ b/cloud/aws/elasticsearch/README.md @@ -33,6 +33,7 @@ Inputs | cpu_silenced | Groups to mute for ES cluster cpu monitor | map | `` | no | | cpu_threshold_critical | CPU usage in percent (critical threshold) | string | `90` | no | | cpu_threshold_warning | CPU usage in percent (warning threshold) | string | `80` | no | +| delay | Delay in seconds for the metric evaluation | string | `600` | no | | diskspace_message | Custom message for ES cluster diskspace monitor | string | `` | no | | diskspace_silenced | Groups to mute for ES cluster diskspace monitor | map | `` | no | | diskspace_threshold_critical | Disk free space in percent (critical threshold) | string | `10` | no | @@ -41,7 +42,6 @@ Inputs | es_cluster_status_message | Custom message for ES cluster status monitor | string | `` | no | | es_cluster_status_silenced | Groups to mute for ES cluster status monitor | map | `` | no | | es_cluster_volume_size | ElasticSearch Domain volume size (in GB) | string | - | yes | -| evaluation_delay | Delay in seconds for the metric evaluation | string | `600` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | message | Message sent when an alert is triggered | string | - | yes | diff --git a/cloud/aws/elasticsearch/inputs.tf b/cloud/aws/elasticsearch/inputs.tf index a23da20..fda0cee 100644 --- a/cloud/aws/elasticsearch/inputs.tf +++ b/cloud/aws/elasticsearch/inputs.tf @@ -5,7 +5,7 @@ variable "environment" { } # Global DataDog -variable "evaluation_delay" { +variable "delay" { description = "Delay in seconds for the metric evaluation" default = 600 } diff --git a/cloud/aws/elasticsearch/monitors-elasticsearch.tf b/cloud/aws/elasticsearch/monitors-elasticsearch.tf index d51f50b..1e3f0ab 100644 --- a/cloud/aws/elasticsearch/monitors-elasticsearch.tf +++ b/cloud/aws/elasticsearch/monitors-elasticsearch.tf @@ -7,34 +7,37 @@ data "template_file" "filter" { } ### Elasticsearch cluster status monitor ### +/* Note about the query + - If aws.es.cluster_statusred is 1 --> query value (= 2.1) > 2 : critical + - If aws.es.cluster_statusyellow is 1 --> 1 < query value (=1.1) < 2 : warning + Workaround : in the query, we add "0.1" to the result and we use the comparator ">=". No alert was triggered without that. */ resource "datadog_monitor" "es_cluster_status" { name = "[${var.environment}] ElasticSearch cluster status is not green" message = "${coalesce(var.es_cluster_status_message, var.message)}" - type = "query alert" + type = "metric alert" query = < 2 + (avg:aws.es.cluster_statusyellow{${data.template_file.filter.rendered}} by {region,name} + 0.1) + ) >= 2 EOF thresholds { - ok = 0 warning = 1 critical = 2 } notify_no_data = true - evaluation_delay = "${var.evaluation_delay}" + evaluation_delay = "${var.delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false require_full_window = false - new_host_delay = "${var.evaluation_delay}" + new_host_delay = "${var.delay}" no_data_timeframe = 20 silenced = "${var.es_cluster_status_silenced}" @@ -47,7 +50,7 @@ resource "datadog_monitor" "es_free_space_low" { name = "[${var.environment}] ElasticSearch cluster free storage space {{#is_alert}}{{comparator}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{comparator}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.diskspace_message, var.message)}" - type = "query alert" + type = "metric alert" query = <