MON-32 - Memory usage monitor updated
This commit is contained in:
parent
299beab1a4
commit
9c4f5b4cfd
@ -61,7 +61,7 @@ variable "max_connection_message" {
|
||||
variable "max_connection_time_aggregator" {
|
||||
description = "Monitor aggregator for Elasticache max connection [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
default = "max"
|
||||
}
|
||||
|
||||
variable "max_connection_timeframe" {
|
||||
|
||||
@ -1,27 +1,27 @@
|
||||
locals {
|
||||
memory = {
|
||||
cache.t2.micro = "595926712"
|
||||
cache.t2.small = "1664299827"
|
||||
cache.t2.medium = "3457448673"
|
||||
cache.m3.medium = "2985002270"
|
||||
cache.m3.large = "6496138035"
|
||||
cache.m3.xlarge = "14280766259"
|
||||
cache.m3.2xlarge = "29957396889"
|
||||
cache.m4.large = "6893422510"
|
||||
cache.m4.xlarge = "15333033246"
|
||||
cache.m4.2xlarge = "31890132172"
|
||||
cache.m4.4xlarge = "65262028062"
|
||||
cache.m4.10xlarge = "166043435663"
|
||||
cache.r3.large = "14495514624"
|
||||
cache.r3.xlarge = "30494267801"
|
||||
cache.r3.2xlarge = "62491774156"
|
||||
cache.r3.4xlarge = "126701535232"
|
||||
cache.r3.8xlarge = "254476812288"
|
||||
cache.r4.large = "13207024435"
|
||||
cache.r4.xlarge = "26897232691"
|
||||
cache.r4.2xlarge = "54191749857"
|
||||
cache.r4.4xlarge = "108855946117"
|
||||
cache.r4.8xlarge = "218248763146"
|
||||
cache.r4.16xlarge = "437012922368"
|
||||
cache.t2.micro = 595926712
|
||||
cache.t2.small = 1664299827
|
||||
cache.t2.medium = 3457448673
|
||||
cache.m3.medium = 2985002270
|
||||
cache.m3.large = 6496138035
|
||||
cache.m3.xlarge = 14280766259
|
||||
cache.m3.2xlarge = 29957396889
|
||||
cache.m4.large = 6893422510
|
||||
cache.m4.xlarge = 15333033246
|
||||
cache.m4.2xlarge = 31890132172
|
||||
cache.m4.4xlarge = 65262028062
|
||||
cache.m4.10xlarge = 166043435663
|
||||
cache.r3.large = 14495514624
|
||||
cache.r3.xlarge = 30494267801
|
||||
cache.r3.2xlarge = 62491774156
|
||||
cache.r3.4xlarge = 126701535232
|
||||
cache.r3.8xlarge = 254476812288
|
||||
cache.r4.large = 13207024435
|
||||
cache.r4.xlarge = 26897232691
|
||||
cache.r4.2xlarge = 54191749857
|
||||
cache.r4.4xlarge = 108855946117
|
||||
cache.r4.8xlarge = 218248763146
|
||||
cache.r4.16xlarge = 437012922368
|
||||
}
|
||||
}
|
||||
|
||||
@ -117,13 +117,15 @@ resource "datadog_monitor" "memcached_free_memory" {
|
||||
name = "[${var.environment}] Elasticache memcached free memory {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.free_memory_message, var.message)}"
|
||||
|
||||
count = "${length(keys(local.memory))}"
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
${var.free_memory_time_aggregator}(${var.free_memory_timeframe}): (
|
||||
avg:aws.elasticache.freeable_memory{${data.template_file.filter.rendered}} by {region,cacheclusterid,cachenodeid} /
|
||||
${local.memory[var.elasticache_size]} * 100
|
||||
) < ${var.free_memory_threshold_critical}
|
||||
avg:aws.elasticache.freeable_memory{dd_monitoring:enabled,dd_aws_red:enabled,env:${var.environment},cache_node_type:${element(keys(local.memory), count.index)}} by {region,cacheclusterid,cachenodeid} /
|
||||
${element(values(local.memory), count.index)}
|
||||
) * 100 < ${var.free_memory_threshold_critical}
|
||||
EOF
|
||||
|
||||
thresholds {
|
||||
@ -131,7 +133,7 @@ resource "datadog_monitor" "memcached_free_memory" {
|
||||
critical = "${var.free_memory_threshold_critical}"
|
||||
}
|
||||
|
||||
notify_no_data = true
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
|
||||
@ -78,7 +78,7 @@ variable "cpu_high_message" {
|
||||
variable "cpu_high_time_aggregator" {
|
||||
description = "Monitor aggregator for Elasticache redis cpu high [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
default = "avg"
|
||||
}
|
||||
|
||||
variable "cpu_high_timeframe" {
|
||||
|
||||
@ -26,7 +26,7 @@ resource "datadog_monitor" "redis_cache_hits" {
|
||||
avg:aws.elasticache.cache_hits{${data.template_file.filter.rendered}} by {region,cacheclusterid}.as_count() /
|
||||
(avg:aws.elasticache.cache_hits{${data.template_file.filter.rendered}} by {region,cacheclusterid}.as_count() +
|
||||
avg:aws.elasticache.cache_misses{${data.template_file.filter.rendered}} by {region,cacheclusterid}.as_count())
|
||||
) < ${var.cache_hits_threshold_critical}
|
||||
) * 100 < ${var.cache_hits_threshold_critical}
|
||||
EOF
|
||||
|
||||
thresholds {
|
||||
@ -145,8 +145,8 @@ resource "datadog_monitor" "redis_commands" {
|
||||
|
||||
query = <<EOF
|
||||
sum(${var.commands_timeframe}): (
|
||||
avg:aws.elasticache.get_type_cmds{${data.template_file.filter.rendered}} by {region,cacheclusterid,cachenodeid}.as_count() +
|
||||
avg:aws.elasticache.set_type_cmds{${data.template_file.filter.rendered}} by {region,cacheclusterid,cachenodeid}.as_count()
|
||||
avg:aws.elasticache.get_type_cmds{${data.template_file.filter.rendered}} by {region,cacheclusterid}.as_count() +
|
||||
avg:aws.elasticache.set_type_cmds{${data.template_file.filter.rendered}} by {region,cacheclusterid}.as_count()
|
||||
) <= 0
|
||||
EOF
|
||||
|
||||
@ -169,12 +169,14 @@ resource "datadog_monitor" "redis_free_memory" {
|
||||
name = "[${var.environment}] Elasticache redis free memory {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.free_memory_message, var.message)}"
|
||||
|
||||
count = "${length(keys(local.memory))}"
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
${var.free_memory_time_aggregator}(${var.free_memory_timeframe}): (
|
||||
avg:aws.elasticache.freeable_memory{${data.template_file.filter.rendered}} by {region,cacheclusterid,cachenodeid} /
|
||||
( ${local.memory[var.elasticache_size]} / ${var.nodes} )
|
||||
avg:aws.elasticache.freeable_memory{dd_monitoring:enabled,dd_aws_red:enabled,env:${var.environment},cache_node_type:${element(keys(local.memory), count.index)}} by {region,cacheclusterid,cachenodeid} /
|
||||
( ${element(values(local.memory), count.index)} / ${var.nodes} )
|
||||
) * 100 < ${var.free_memory_threshold_critical}
|
||||
EOF
|
||||
|
||||
@ -183,7 +185,7 @@ resource "datadog_monitor" "redis_free_memory" {
|
||||
critical = "${var.free_memory_threshold_critical}"
|
||||
}
|
||||
|
||||
notify_no_data = true
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user