MON-76: More monitors

This commit is contained in:
Jérôme Respaut 2017-10-30 17:44:30 +01:00 committed by Laurent Piroelle
parent 9112ce02a3
commit 9f1051097e
2 changed files with 67 additions and 11 deletions

View File

@ -7,10 +7,6 @@ variable "environment" {
type = "string" type = "string"
} }
variable "stack" {
type = "string"
}
# Global DataDog # Global DataDog
variable "message" { variable "message" {
} }
@ -20,9 +16,21 @@ variable "delay" {
} }
# Azure Redis specific # Azure Redis specific
variable "evictedkeys_threshold_warning" { variable "evictedkeys_limit_threshold_warning" {
default = 0 default = 0
} }
variable "evictedkeys_threshold_critical" { variable "evictedkeys_limit_threshold_critical" {
default = 100 default = 100
} }
variable "percent_processor_time_threshold_critical" {
default = 80
}
variable "percent_processor_time_threshold_warning" {
default = 60
}
variable "server_load_rate_threshold_critical" {
default = 90
}
variable "server_load_rate_threshold_warning" {
default = 70
}

View File

@ -7,7 +7,7 @@ resource "datadog_monitor" "status" {
notify_no_data = false notify_no_data = false
evaluation_delay = "${var.delay}" evaluation_delay = "${var.delay}"
renotify_interval = 60 renotify_interval = 0
notify_audit = false notify_audit = false
timeout_h = 0 timeout_h = 0
include_tags = true include_tags = true
@ -21,17 +21,65 @@ resource "datadog_monitor" "evictedkeys" {
name = "[${var.environment}] Redis {{value}} evictedkeys on {{name}}" name = "[${var.environment}] Redis {{value}} evictedkeys on {{name}}"
message = "${var.message}" message = "${var.message}"
query = "avg(last_5m):avg:azure.cache_redis.evictedkeys{*} by {name,resource_group} > ${var.evictedkeys_threshold_critical}" query = "avg(last_5m):avg:azure.cache_redis.evictedkeys{*} by {name,resource_group} > ${var.evictedkeys_limit_threshold_critical}"
type = "query alert" type = "query alert"
thresholds { thresholds {
warning = "${var.evictedkeys_threshold_warning}" warning = "${var.evictedkeys_limit_threshold_warning}"
critical = "${var.evictedkeys_threshold_critical}" critical = "${var.evictedkeys_limit_threshold_critical}"
} }
notify_no_data = false notify_no_data = false
evaluation_delay = "${var.delay}" evaluation_delay = "${var.delay}"
renotify_interval = 60 renotify_interval = 0
notify_audit = false
timeout_h = 0
include_tags = true
locked = false
require_full_window = true
new_host_delay = "${var.delay}"
no_data_timeframe = 20
}
resource "datadog_monitor" "percent_processor_time" {
name = "[${var.environment}] Redis processor time {{value}}% on {{name}}"
message = "${var.message}"
query = "avg(last_5m):avg:azure.cache_redis.percent_processor_time{*} by {name,resource_group} > ${var.percent_processor_time_threshold_critical}"
type = "query alert"
thresholds {
warning = "${var.percent_processor_time_threshold_warning}"
critical = "${var.percent_processor_time_threshold_critical}"
}
notify_no_data = false
evaluation_delay = "${var.delay}"
renotify_interval = 0
notify_audit = false
timeout_h = 0
include_tags = true
locked = false
require_full_window = true
new_host_delay = "${var.delay}"
no_data_timeframe = 20
}
resource "datadog_monitor" "server_load" {
name = "[${var.environment}] Redis processor server load {{value}}% on {{name}}"
message = "${var.message}"
query = "avg(last_5m):avg:azure.cache_redis.server_load{*} by {name,resource_group} > ${var.server_load_rate_threshold_critical}"
type = "query alert"
thresholds {
warning = "${var.server_load_rate_threshold_critical}"
critical = "${var.server_load_rate_threshold_warning}"
}
notify_no_data = false
evaluation_delay = "${var.delay}"
renotify_interval = 0
notify_audit = false notify_audit = false
timeout_h = 0 timeout_h = 0
include_tags = true include_tags = true