From 9f1051097e4b42b37f3814a7cb6d139f537ba280 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Respaut?= Date: Mon, 30 Oct 2017 17:44:30 +0100 Subject: [PATCH] MON-76: More monitors --- cloud/azure/redis/inputs.tf | 20 +++++--- cloud/azure/redis/monitors-azure-redis.tf | 58 +++++++++++++++++++++-- 2 files changed, 67 insertions(+), 11 deletions(-) diff --git a/cloud/azure/redis/inputs.tf b/cloud/azure/redis/inputs.tf index 3f9460f..f13b4cb 100644 --- a/cloud/azure/redis/inputs.tf +++ b/cloud/azure/redis/inputs.tf @@ -7,10 +7,6 @@ variable "environment" { type = "string" } -variable "stack" { - type = "string" -} - # Global DataDog variable "message" { } @@ -20,9 +16,21 @@ variable "delay" { } # Azure Redis specific -variable "evictedkeys_threshold_warning" { +variable "evictedkeys_limit_threshold_warning" { default = 0 } -variable "evictedkeys_threshold_critical" { +variable "evictedkeys_limit_threshold_critical" { default = 100 } +variable "percent_processor_time_threshold_critical" { + default = 80 +} +variable "percent_processor_time_threshold_warning" { + default = 60 +} +variable "server_load_rate_threshold_critical" { + default = 90 +} +variable "server_load_rate_threshold_warning" { + default = 70 +} diff --git a/cloud/azure/redis/monitors-azure-redis.tf b/cloud/azure/redis/monitors-azure-redis.tf index 8b47249..d4b21b5 100644 --- a/cloud/azure/redis/monitors-azure-redis.tf +++ b/cloud/azure/redis/monitors-azure-redis.tf @@ -7,7 +7,7 @@ resource "datadog_monitor" "status" { notify_no_data = false evaluation_delay = "${var.delay}" - renotify_interval = 60 + renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true @@ -21,17 +21,65 @@ resource "datadog_monitor" "evictedkeys" { name = "[${var.environment}] Redis {{value}} evictedkeys on {{name}}" message = "${var.message}" - query = "avg(last_5m):avg:azure.cache_redis.evictedkeys{*} by {name,resource_group} > ${var.evictedkeys_threshold_critical}" + query = "avg(last_5m):avg:azure.cache_redis.evictedkeys{*} by {name,resource_group} > ${var.evictedkeys_limit_threshold_critical}" type = "query alert" thresholds { - warning = "${var.evictedkeys_threshold_warning}" - critical = "${var.evictedkeys_threshold_critical}" + warning = "${var.evictedkeys_limit_threshold_warning}" + critical = "${var.evictedkeys_limit_threshold_critical}" } notify_no_data = false evaluation_delay = "${var.delay}" - renotify_interval = 60 + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + no_data_timeframe = 20 +} + +resource "datadog_monitor" "percent_processor_time" { + name = "[${var.environment}] Redis processor time {{value}}% on {{name}}" + message = "${var.message}" + + query = "avg(last_5m):avg:azure.cache_redis.percent_processor_time{*} by {name,resource_group} > ${var.percent_processor_time_threshold_critical}" + type = "query alert" + + thresholds { + warning = "${var.percent_processor_time_threshold_warning}" + critical = "${var.percent_processor_time_threshold_critical}" + } + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + no_data_timeframe = 20 +} + +resource "datadog_monitor" "server_load" { + name = "[${var.environment}] Redis processor server load {{value}}% on {{name}}" + message = "${var.message}" + + query = "avg(last_5m):avg:azure.cache_redis.server_load{*} by {name,resource_group} > ${var.server_load_rate_threshold_critical}" + type = "query alert" + + thresholds { + warning = "${var.server_load_rate_threshold_critical}" + critical = "${var.server_load_rate_threshold_warning}" + } + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true