MON-32 - Fix redis_cpu_high monitor

This commit is contained in:
Alexandre Gaillet 2018-05-25 17:13:50 +02:00 committed by Quentin Manfroi
parent ada70533d7
commit 299beab1a4
2 changed files with 51 additions and 54 deletions

View File

@ -1,53 +1,53 @@
locals { locals {
core = { core = {
cache.t2.micro = "1" cache.t2.micro = 1
cache.t2.small = "1" cache.t2.small = 1
cache.t2.medium = "2" cache.t2.medium = 2
cache.m3.medium = "1" cache.m3.medium = 1
cache.m3.large = "2" cache.m3.large = 2
cache.m3.xlarge = "4" cache.m3.xlarge = 4
cache.m3.2xlarge = "8" cache.m3.2xlarge = 8
cache.m4.large = "2" cache.m4.large = 2
cache.m4.xlarge = "4" cache.m4.xlarge = 4
cache.m4.2xlarge = "8" cache.m4.2xlarge = 8
cache.m4.4xlarge = "16" cache.m4.4xlarge = 16
cache.m4.10xlarge = "40" cache.m4.10xlarge = 40
cache.r3.large = "2" cache.r3.large = 2
cache.r3.xlarge = "4" cache.r3.xlarge = 4
cache.r3.2xlarge = "8" cache.r3.2xlarge = 8
cache.r3.4xlarge = "16" cache.r3.4xlarge = 16
cache.r3.8xlarge = "32" cache.r3.8xlarge = 32
cache.r4.large = "2" cache.r4.large = 2
cache.r4.xlarge = "4" cache.r4.xlarge = 4
cache.r4.2xlarge = "8" cache.r4.2xlarge = 8
cache.r4.4xlarge = "16" cache.r4.4xlarge = 16
cache.r4.8xlarge = "32" cache.r4.8xlarge = 32
cache.r4.16xlarge = "64" cache.r4.16xlarge = 64
} }
memory = { memory = {
cache.t2.micro = "595926712" cache.t2.micro = 595926712
cache.t2.small = "1664299827" cache.t2.small = 1664299827
cache.t2.medium = "3457448673" cache.t2.medium = 3457448673
cache.m3.medium = "2985002270" cache.m3.medium = 2985002270
cache.m3.large = "6496138035" cache.m3.large = 6496138035
cache.m3.xlarge = "14280766259" cache.m3.xlarge = 14280766259
cache.m3.2xlarge = "29957396889" cache.m3.2xlarge = 29957396889
cache.m4.large = "6893422510" cache.m4.large = 6893422510
cache.m4.xlarge = "15333033246" cache.m4.xlarge = 15333033246
cache.m4.2xlarge = "31890132172" cache.m4.2xlarge = 31890132172
cache.m4.4xlarge = "65262028062" cache.m4.4xlarge = 65262028062
cache.m4.10xlarge = "166043435663" cache.m4.10xlarge = 166043435663
cache.r3.large = "14495514624" cache.r3.large = 14495514624
cache.r3.xlarge = "30494267801" cache.r3.xlarge = 30494267801
cache.r3.2xlarge = "62491774156" cache.r3.2xlarge = 62491774156
cache.r3.4xlarge = "126701535232" cache.r3.4xlarge = 126701535232
cache.r3.8xlarge = "254476812288" cache.r3.8xlarge = 254476812288
cache.r4.large = "13207024435" cache.r4.large = 13207024435
cache.r4.xlarge = "26897232691" cache.r4.xlarge = 26897232691
cache.r4.2xlarge = "54191749857" cache.r4.2xlarge = 54191749857
cache.r4.4xlarge = "108855946117" cache.r4.4xlarge = 108855946117
cache.r4.8xlarge = "218248763146" cache.r4.8xlarge = 218248763146
cache.r4.16xlarge = "437012922368" cache.r4.16xlarge = 437012922368
} }
} }

View File

@ -53,20 +53,17 @@ resource "datadog_monitor" "redis_cpu_high" {
name = "[${var.environment}] Elasticache redis CPU {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" name = "[${var.environment}] Elasticache redis CPU {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
message = "${coalesce(var.cpu_high_message, var.message)}" message = "${coalesce(var.cpu_high_message, var.message)}"
count = "${length(keys(local.core))}"
type = "metric alert" type = "metric alert"
query = <<EOF query = <<EOF
${var.cpu_high_time_aggregator}(${var.cpu_high_timeframe}): ( ${var.cpu_high_time_aggregator}(${var.cpu_high_timeframe}): (
avg:aws.elasticache.cpuutilization{${data.template_file.filter.rendered}} by {region,cacheclusterid,cachenodeid} avg:aws.elasticache.cpuutilization{dd_monitoring:enabled,dd_aws_red:enabled,env:${var.environment},cache_node_type:${element(keys(local.core), count.index)}} by {region,cacheclusterid,cachenodeid}
) > ( ${var.cpu_high_threshold_critical} / ${local.core[var.elasticache_size]} ) ) > ${var.cpu_high_threshold_critical / element(values(local.core), count.index)}
EOF EOF
thresholds { notify_no_data = false
warning = "${var.cpu_high_threshold_warning}"
critical = "${var.cpu_high_threshold_critical}"
}
notify_no_data = true
evaluation_delay = "${var.delay}" evaluation_delay = "${var.delay}"
renotify_interval = 0 renotify_interval = 0
notify_audit = false notify_audit = false