MON-32 - Memory usage monitor updated
This commit is contained in:
parent
299beab1a4
commit
9c4f5b4cfd
@ -61,7 +61,7 @@ variable "max_connection_message" {
|
|||||||
variable "max_connection_time_aggregator" {
|
variable "max_connection_time_aggregator" {
|
||||||
description = "Monitor aggregator for Elasticache max connection [available values: min, max or avg]"
|
description = "Monitor aggregator for Elasticache max connection [available values: min, max or avg]"
|
||||||
type = "string"
|
type = "string"
|
||||||
default = "min"
|
default = "max"
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "max_connection_timeframe" {
|
variable "max_connection_timeframe" {
|
||||||
|
|||||||
@ -1,27 +1,27 @@
|
|||||||
locals {
|
locals {
|
||||||
memory = {
|
memory = {
|
||||||
cache.t2.micro = "595926712"
|
cache.t2.micro = 595926712
|
||||||
cache.t2.small = "1664299827"
|
cache.t2.small = 1664299827
|
||||||
cache.t2.medium = "3457448673"
|
cache.t2.medium = 3457448673
|
||||||
cache.m3.medium = "2985002270"
|
cache.m3.medium = 2985002270
|
||||||
cache.m3.large = "6496138035"
|
cache.m3.large = 6496138035
|
||||||
cache.m3.xlarge = "14280766259"
|
cache.m3.xlarge = 14280766259
|
||||||
cache.m3.2xlarge = "29957396889"
|
cache.m3.2xlarge = 29957396889
|
||||||
cache.m4.large = "6893422510"
|
cache.m4.large = 6893422510
|
||||||
cache.m4.xlarge = "15333033246"
|
cache.m4.xlarge = 15333033246
|
||||||
cache.m4.2xlarge = "31890132172"
|
cache.m4.2xlarge = 31890132172
|
||||||
cache.m4.4xlarge = "65262028062"
|
cache.m4.4xlarge = 65262028062
|
||||||
cache.m4.10xlarge = "166043435663"
|
cache.m4.10xlarge = 166043435663
|
||||||
cache.r3.large = "14495514624"
|
cache.r3.large = 14495514624
|
||||||
cache.r3.xlarge = "30494267801"
|
cache.r3.xlarge = 30494267801
|
||||||
cache.r3.2xlarge = "62491774156"
|
cache.r3.2xlarge = 62491774156
|
||||||
cache.r3.4xlarge = "126701535232"
|
cache.r3.4xlarge = 126701535232
|
||||||
cache.r3.8xlarge = "254476812288"
|
cache.r3.8xlarge = 254476812288
|
||||||
cache.r4.large = "13207024435"
|
cache.r4.large = 13207024435
|
||||||
cache.r4.xlarge = "26897232691"
|
cache.r4.xlarge = 26897232691
|
||||||
cache.r4.2xlarge = "54191749857"
|
cache.r4.2xlarge = 54191749857
|
||||||
cache.r4.4xlarge = "108855946117"
|
cache.r4.4xlarge = 108855946117
|
||||||
cache.r4.8xlarge = "218248763146"
|
cache.r4.8xlarge = 218248763146
|
||||||
cache.r4.16xlarge = "437012922368"
|
cache.r4.16xlarge = 437012922368
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -117,13 +117,15 @@ resource "datadog_monitor" "memcached_free_memory" {
|
|||||||
name = "[${var.environment}] Elasticache memcached free memory {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
name = "[${var.environment}] Elasticache memcached free memory {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||||
message = "${coalesce(var.free_memory_message, var.message)}"
|
message = "${coalesce(var.free_memory_message, var.message)}"
|
||||||
|
|
||||||
|
count = "${length(keys(local.memory))}"
|
||||||
|
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
${var.free_memory_time_aggregator}(${var.free_memory_timeframe}): (
|
${var.free_memory_time_aggregator}(${var.free_memory_timeframe}): (
|
||||||
avg:aws.elasticache.freeable_memory{${data.template_file.filter.rendered}} by {region,cacheclusterid,cachenodeid} /
|
avg:aws.elasticache.freeable_memory{dd_monitoring:enabled,dd_aws_red:enabled,env:${var.environment},cache_node_type:${element(keys(local.memory), count.index)}} by {region,cacheclusterid,cachenodeid} /
|
||||||
${local.memory[var.elasticache_size]} * 100
|
${element(values(local.memory), count.index)}
|
||||||
) < ${var.free_memory_threshold_critical}
|
) * 100 < ${var.free_memory_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
thresholds {
|
thresholds {
|
||||||
@ -131,7 +133,7 @@ resource "datadog_monitor" "memcached_free_memory" {
|
|||||||
critical = "${var.free_memory_threshold_critical}"
|
critical = "${var.free_memory_threshold_critical}"
|
||||||
}
|
}
|
||||||
|
|
||||||
notify_no_data = true
|
notify_no_data = false
|
||||||
evaluation_delay = "${var.delay}"
|
evaluation_delay = "${var.delay}"
|
||||||
renotify_interval = 0
|
renotify_interval = 0
|
||||||
notify_audit = false
|
notify_audit = false
|
||||||
|
|||||||
@ -78,7 +78,7 @@ variable "cpu_high_message" {
|
|||||||
variable "cpu_high_time_aggregator" {
|
variable "cpu_high_time_aggregator" {
|
||||||
description = "Monitor aggregator for Elasticache redis cpu high [available values: min, max or avg]"
|
description = "Monitor aggregator for Elasticache redis cpu high [available values: min, max or avg]"
|
||||||
type = "string"
|
type = "string"
|
||||||
default = "min"
|
default = "avg"
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "cpu_high_timeframe" {
|
variable "cpu_high_timeframe" {
|
||||||
|
|||||||
@ -26,7 +26,7 @@ resource "datadog_monitor" "redis_cache_hits" {
|
|||||||
avg:aws.elasticache.cache_hits{${data.template_file.filter.rendered}} by {region,cacheclusterid}.as_count() /
|
avg:aws.elasticache.cache_hits{${data.template_file.filter.rendered}} by {region,cacheclusterid}.as_count() /
|
||||||
(avg:aws.elasticache.cache_hits{${data.template_file.filter.rendered}} by {region,cacheclusterid}.as_count() +
|
(avg:aws.elasticache.cache_hits{${data.template_file.filter.rendered}} by {region,cacheclusterid}.as_count() +
|
||||||
avg:aws.elasticache.cache_misses{${data.template_file.filter.rendered}} by {region,cacheclusterid}.as_count())
|
avg:aws.elasticache.cache_misses{${data.template_file.filter.rendered}} by {region,cacheclusterid}.as_count())
|
||||||
) < ${var.cache_hits_threshold_critical}
|
) * 100 < ${var.cache_hits_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
thresholds {
|
thresholds {
|
||||||
@ -145,8 +145,8 @@ resource "datadog_monitor" "redis_commands" {
|
|||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
sum(${var.commands_timeframe}): (
|
sum(${var.commands_timeframe}): (
|
||||||
avg:aws.elasticache.get_type_cmds{${data.template_file.filter.rendered}} by {region,cacheclusterid,cachenodeid}.as_count() +
|
avg:aws.elasticache.get_type_cmds{${data.template_file.filter.rendered}} by {region,cacheclusterid}.as_count() +
|
||||||
avg:aws.elasticache.set_type_cmds{${data.template_file.filter.rendered}} by {region,cacheclusterid,cachenodeid}.as_count()
|
avg:aws.elasticache.set_type_cmds{${data.template_file.filter.rendered}} by {region,cacheclusterid}.as_count()
|
||||||
) <= 0
|
) <= 0
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
@ -169,12 +169,14 @@ resource "datadog_monitor" "redis_free_memory" {
|
|||||||
name = "[${var.environment}] Elasticache redis free memory {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
name = "[${var.environment}] Elasticache redis free memory {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||||
message = "${coalesce(var.free_memory_message, var.message)}"
|
message = "${coalesce(var.free_memory_message, var.message)}"
|
||||||
|
|
||||||
|
count = "${length(keys(local.memory))}"
|
||||||
|
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
${var.free_memory_time_aggregator}(${var.free_memory_timeframe}): (
|
${var.free_memory_time_aggregator}(${var.free_memory_timeframe}): (
|
||||||
avg:aws.elasticache.freeable_memory{${data.template_file.filter.rendered}} by {region,cacheclusterid,cachenodeid} /
|
avg:aws.elasticache.freeable_memory{dd_monitoring:enabled,dd_aws_red:enabled,env:${var.environment},cache_node_type:${element(keys(local.memory), count.index)}} by {region,cacheclusterid,cachenodeid} /
|
||||||
( ${local.memory[var.elasticache_size]} / ${var.nodes} )
|
( ${element(values(local.memory), count.index)} / ${var.nodes} )
|
||||||
) * 100 < ${var.free_memory_threshold_critical}
|
) * 100 < ${var.free_memory_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
@ -183,7 +185,7 @@ resource "datadog_monitor" "redis_free_memory" {
|
|||||||
critical = "${var.free_memory_threshold_critical}"
|
critical = "${var.free_memory_threshold_critical}"
|
||||||
}
|
}
|
||||||
|
|
||||||
notify_no_data = true
|
notify_no_data = false
|
||||||
evaluation_delay = "${var.delay}"
|
evaluation_delay = "${var.delay}"
|
||||||
renotify_interval = 0
|
renotify_interval = 0
|
||||||
notify_audit = false
|
notify_audit = false
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user