diff --git a/cloud/azure/redis/inputs.tf b/cloud/azure/redis/inputs.tf index f13b4cb..a96cc51 100644 --- a/cloud/azure/redis/inputs.tf +++ b/cloud/azure/redis/inputs.tf @@ -9,28 +9,40 @@ variable "environment" { # Global DataDog variable "message" { + description = "Message sent when a Redis monitor is triggered" } variable "delay" { + description = "Delay in seconds for the metric evaluation" default = 600 } +variable "use_filter_tags" { + description = "Filter the data with service tags if true" + default = "true" +} + # Azure Redis specific variable "evictedkeys_limit_threshold_warning" { default = 0 } + variable "evictedkeys_limit_threshold_critical" { default = 100 } + variable "percent_processor_time_threshold_critical" { default = 80 } + variable "percent_processor_time_threshold_warning" { default = 60 } + variable "server_load_rate_threshold_critical" { default = 90 } + variable "server_load_rate_threshold_warning" { default = 70 } diff --git a/cloud/azure/redis/monitors-azure-redis.tf b/cloud/azure/redis/monitors-azure-redis.tf index d4b21b5..6931afe 100644 --- a/cloud/azure/redis/monitors-azure-redis.tf +++ b/cloud/azure/redis/monitors-azure-redis.tf @@ -1,8 +1,16 @@ +data "template_file" "filter" { + template = "$${filter}" + + vars { + filter = "${var.use_filter_tags == "true" ? format("dd_monitoring:enabled,dd_azure_eventhub:enabled,env:%s", var.environment) : "*"}" + } +} + resource "datadog_monitor" "status" { name = "[${var.environment}] Redis {{name}} is down" message = "${var.message}" - query = "avg(last_5m):avg:azure.cache_redis.status{*} by {name,resource_group} != 1" + query = "avg(last_5m):avg:azure.cache_redis.status{${data.template_file.filter.rendered}} by {name,resource_group} != 1" type = "query alert" notify_no_data = false @@ -21,7 +29,7 @@ resource "datadog_monitor" "evictedkeys" { name = "[${var.environment}] Redis {{value}} evictedkeys on {{name}}" message = "${var.message}" - query = "avg(last_5m):avg:azure.cache_redis.evictedkeys{*} by {name,resource_group} > ${var.evictedkeys_limit_threshold_critical}" + query = "avg(last_5m):avg:azure.cache_redis.evictedkeys{${data.template_file.filter.rendered}} by {name,resource_group} > ${var.evictedkeys_limit_threshold_critical}" type = "query alert" thresholds { @@ -45,7 +53,7 @@ resource "datadog_monitor" "percent_processor_time" { name = "[${var.environment}] Redis processor time {{value}}% on {{name}}" message = "${var.message}" - query = "avg(last_5m):avg:azure.cache_redis.percent_processor_time{*} by {name,resource_group} > ${var.percent_processor_time_threshold_critical}" + query = "avg(last_5m):avg:azure.cache_redis.percent_processor_time{${data.template_file.filter.rendered}} by {name,resource_group} > ${var.percent_processor_time_threshold_critical}" type = "query alert" thresholds { @@ -69,7 +77,7 @@ resource "datadog_monitor" "server_load" { name = "[${var.environment}] Redis processor server load {{value}}% on {{name}}" message = "${var.message}" - query = "avg(last_5m):avg:azure.cache_redis.server_load{*} by {name,resource_group} > ${var.server_load_rate_threshold_critical}" + query = "avg(last_5m):avg:azure.cache_redis.server_load{${data.template_file.filter.rendered}} by {name,resource_group} > ${var.server_load_rate_threshold_critical}" type = "query alert" thresholds {