From 93eccaf593484a184c33dc6312915cd0aad2e4a4 Mon Sep 17 00:00:00 2001 From: Laurent Piroelle Date: Tue, 27 Nov 2018 10:23:56 +0100 Subject: [PATCH] MON-335 Fix IoTHub monitor for alert recovery on no data --- cloud/azure/iothubs/monitors-iothubs.tf | 84 ++++++++++++++----------- 1 file changed, 48 insertions(+), 36 deletions(-) diff --git a/cloud/azure/iothubs/monitors-iothubs.tf b/cloud/azure/iothubs/monitors-iothubs.tf index 9c86bb6..1cc6f7f 100644 --- a/cloud/azure/iothubs/monitors-iothubs.tf +++ b/cloud/azure/iothubs/monitors-iothubs.tf @@ -4,11 +4,12 @@ resource "datadog_monitor" "too_many_jobs_failed" { message = "${coalesce(var.failed_jobs_rate_message, var.message)}" query = < ${var.failed_jobs_rate_threshold_critical} + * 100 , 0) > ${var.failed_jobs_rate_threshold_critical} EOF type = "metric alert" @@ -20,7 +21,7 @@ resource "datadog_monitor" "too_many_jobs_failed" { silenced = "${var.failed_jobs_rate_silenced}" - notify_no_data = false + notify_no_data = true evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false @@ -39,11 +40,12 @@ resource "datadog_monitor" "too_many_list_jobs_failed" { message = "${coalesce(var.failed_listjobs_rate_message, var.message)}" query = < ${var.failed_listjobs_rate_threshold_critical} + * 100, 0) > ${var.failed_listjobs_rate_threshold_critical} EOF type = "metric alert" @@ -55,7 +57,7 @@ resource "datadog_monitor" "too_many_list_jobs_failed" { silenced = "${var.failed_listjobs_rate_silenced}" - notify_no_data = false + notify_no_data = true evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false @@ -74,11 +76,12 @@ resource "datadog_monitor" "too_many_query_jobs_failed" { message = "${coalesce(var.failed_queryjobs_rate_message, var.message)}" query = < ${var.failed_queryjobs_rate_threshold_critical} + * 100, 0) > ${var.failed_queryjobs_rate_threshold_critical} EOF type = "metric alert" @@ -90,7 +93,7 @@ resource "datadog_monitor" "too_many_query_jobs_failed" { silenced = "${var.failed_queryjobs_rate_silenced}" - notify_no_data = false + notify_no_data = true evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false @@ -165,11 +168,12 @@ resource "datadog_monitor" "too_many_c2d_methods_failed" { message = "${coalesce(var.failed_c2d_methods_rate_message, var.message)}" query = < ${var.failed_c2d_methods_rate_threshold_critical} + * 100, 0) > ${var.failed_c2d_methods_rate_threshold_critical} EOF type = "metric alert" @@ -181,7 +185,7 @@ resource "datadog_monitor" "too_many_c2d_methods_failed" { silenced = "${var.failed_c2d_methods_rate_silenced}" - notify_no_data = false + notify_no_data = true evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false @@ -200,11 +204,12 @@ resource "datadog_monitor" "too_many_c2d_twin_read_failed" { message = "${coalesce(var.failed_c2d_twin_read_rate_message, var.message)}" query = < ${var.failed_c2d_twin_read_rate_threshold_critical} + * 100, 0) > ${var.failed_c2d_twin_read_rate_threshold_critical} EOF type = "metric alert" @@ -216,7 +221,7 @@ resource "datadog_monitor" "too_many_c2d_twin_read_failed" { silenced = "${var.failed_c2d_twin_read_rate_silenced}" - notify_no_data = false + notify_no_data = true evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false @@ -235,11 +240,12 @@ resource "datadog_monitor" "too_many_c2d_twin_update_failed" { message = "${coalesce(var.failed_c2d_twin_update_rate_message, var.message)}" query = < ${var.failed_c2d_twin_update_rate_threshold_critical} + * 100, 0) > ${var.failed_c2d_twin_update_rate_threshold_critical} EOF type = "metric alert" @@ -251,7 +257,7 @@ resource "datadog_monitor" "too_many_c2d_twin_update_failed" { silenced = "${var.failed_c2d_twin_update_rate_silenced}" - notify_no_data = false + notify_no_data = true evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false @@ -270,11 +276,12 @@ resource "datadog_monitor" "too_many_d2c_twin_read_failed" { message = "${coalesce(var.failed_d2c_twin_read_rate_message, var.message)}" query = < ${var.failed_d2c_twin_read_rate_threshold_critical} + * 100, 0) > ${var.failed_d2c_twin_read_rate_threshold_critical} EOF type = "metric alert" @@ -286,7 +293,7 @@ resource "datadog_monitor" "too_many_d2c_twin_read_failed" { silenced = "${var.failed_d2c_twin_read_rate_silenced}" - notify_no_data = false + notify_no_data = true evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false @@ -305,11 +312,12 @@ resource "datadog_monitor" "too_many_d2c_twin_update_failed" { message = "${coalesce(var.failed_d2c_twin_update_rate_message, var.message)}" query = < ${var.failed_d2c_twin_update_rate_threshold_critical} + * 100, 0) > ${var.failed_d2c_twin_update_rate_threshold_critical} EOF type = "metric alert" @@ -321,7 +329,7 @@ resource "datadog_monitor" "too_many_d2c_twin_update_failed" { silenced = "${var.failed_d2c_twin_update_rate_silenced}" - notify_no_data = false + notify_no_data = true evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false @@ -340,13 +348,14 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" { message = "${coalesce(var.dropped_d2c_telemetry_egress_message, var.message)}" query = < ${var.dropped_d2c_telemetry_egress_rate_threshold_critical} + * 100, 0) > ${var.dropped_d2c_telemetry_egress_rate_threshold_critical} EOF type = "metric alert" @@ -358,7 +367,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" { silenced = "${var.dropped_d2c_telemetry_egress_silenced}" - notify_no_data = false + notify_no_data = true evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false @@ -377,13 +386,14 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" { message = "${coalesce(var.orphaned_d2c_telemetry_egress_message, var.message)}" query = < ${var.orphaned_d2c_telemetry_egress_rate_threshold_critical} + * 100, 0) > ${var.orphaned_d2c_telemetry_egress_rate_threshold_critical} EOF type = "metric alert" @@ -395,7 +405,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" { silenced = "${var.orphaned_d2c_telemetry_egress_silenced}" - notify_no_data = false + notify_no_data = true evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false @@ -414,13 +424,14 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" { message = "${coalesce(var.invalid_d2c_telemetry_egress_message, var.message)}" query = < ${var.invalid_d2c_telemetry_egress_rate_threshold_critical} + * 100, 0) > ${var.invalid_d2c_telemetry_egress_rate_threshold_critical} EOF type = "metric alert" @@ -432,7 +443,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" { silenced = "${var.invalid_d2c_telemetry_egress_silenced}" - notify_no_data = false + notify_no_data = true evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false @@ -451,17 +462,18 @@ resource "datadog_monitor" "too_many_d2c_telemetry_ingress_nosent" { message = "${coalesce(var.too_many_d2c_telemetry_ingress_nosent_message, var.message)}" query = < 0 + , 0) > 0 EOF type = "metric alert" silenced = "${var.too_many_d2c_telemetry_ingress_nosent_silenced}" - notify_no_data = false + notify_no_data = true evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false