MON-308 add warning for part of unhealthy instances
This commit is contained in:
parent
d60756a87d
commit
57913a7385
@ -1,13 +1,15 @@
|
|||||||
resource "datadog_monitor" "ALB_no_healthy_instances" {
|
resource "datadog_monitor" "ALB_no_healthy_instances" {
|
||||||
count = "${var.alb_no_healthy_instances_enabled ? 1 : 0}"
|
count = "${var.alb_no_healthy_instances_enabled ? 1 : 0}"
|
||||||
name = "[${var.environment}] ALB no healthy instances"
|
name = "[${var.environment}] ALB {{#is_alert}}no healthy instances{{/is_alert}}{{#is_warning}}{{value}}% of healthy instances{{/is_warning}}"
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
message = "${coalesce(var.alb_no_healthy_instances_message, var.message)}"
|
message = "${coalesce(var.alb_no_healthy_instances_message, var.message)}"
|
||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
${var.alb_no_healthy_instances_time_aggregator}(${var.alb_no_healthy_instances_timeframe}): (
|
${var.alb_no_healthy_instances_time_aggregator}(${var.alb_no_healthy_instances_timeframe}): (
|
||||||
sum:aws.applicationelb.healthy_host_count.maximum${module.filter-tags.query_alert} by {region,loadbalancer}
|
sum:aws.applicationelb.healthy_host_count.maximum${module.filter-tags.query_alert} by {region,loadbalancer} / (
|
||||||
) < 1
|
sum:aws.applicationelb.healthy_host_count.maximum${module.filter-tags.query_alert} by {region,loadbalancer} +
|
||||||
|
sum:aws.applicationelb.un_healthy_host_count.maximum${module.filter-tags.query_alert} by {region,loadbalancer} )
|
||||||
|
) * 100 < 1
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
evaluation_delay = "${var.evaluation_delay}"
|
evaluation_delay = "${var.evaluation_delay}"
|
||||||
@ -15,6 +17,7 @@ resource "datadog_monitor" "ALB_no_healthy_instances" {
|
|||||||
|
|
||||||
thresholds {
|
thresholds {
|
||||||
critical = 1
|
critical = 1
|
||||||
|
warning = 100
|
||||||
}
|
}
|
||||||
|
|
||||||
notify_no_data = true
|
notify_no_data = true
|
||||||
|
|||||||
@ -1,16 +1,23 @@
|
|||||||
resource "datadog_monitor" "ELB_no_healthy_instances" {
|
resource "datadog_monitor" "ELB_no_healthy_instances" {
|
||||||
count = "${var.elb_no_healthy_instance_enabled ? 1 : 0}"
|
count = "${var.elb_no_healthy_instance_enabled ? 1 : 0}"
|
||||||
name = "[${var.environment}] ELB no healthy instances"
|
name = "[${var.environment}] ELB {{#is_alert}}no healthy instances{{/is_alert}}{{#is_warning}}{{value}}% of healthy instances{{/is_warning}}"
|
||||||
message = "${coalesce(var.elb_no_healthy_instance_message, var.message)}"
|
message = "${coalesce(var.elb_no_healthy_instance_message, var.message)}"
|
||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
${var.elb_no_healthy_instance_time_aggregator}(${var.elb_no_healthy_instance_timeframe}): (
|
${var.elb_no_healthy_instance_time_aggregator}(${var.elb_no_healthy_instance_timeframe}): (
|
||||||
sum:aws.elb.healthy_host_count.maximum${module.filter-tags.query_alert} by {region,loadbalancername}
|
sum:aws.elb.healthy_host_count.maximum${module.filter-tags.query_alert} by {region,loadbalancername} / (
|
||||||
) < 1
|
sum:aws.elb.healthy_host_count.maximum${module.filter-tags.query_alert} by {region,loadbalancername} +
|
||||||
|
sum:aws.elb.un_healthy_host_count.maximum${module.filter-tags.query_alert} by {region,loadbalancername} )
|
||||||
|
) * 100 < 1
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
|
|
||||||
|
thresholds {
|
||||||
|
critical = 1
|
||||||
|
warning = 100
|
||||||
|
}
|
||||||
|
|
||||||
notify_no_data = true
|
notify_no_data = true
|
||||||
evaluation_delay = "${var.evaluation_delay}"
|
evaluation_delay = "${var.evaluation_delay}"
|
||||||
renotify_interval = 0
|
renotify_interval = 0
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user