From 04821f77648c0f2b430722d11bf26452501c3547 Mon Sep 17 00:00:00 2001 From: "gauthier.ampe@fr.clara.net" Date: Wed, 31 Jul 2019 09:28:16 +0200 Subject: [PATCH 1/5] MON-484 Corrects the monitor with Laurent suggestion --- cloud/azure/iothubs/inputs.tf | 2 +- cloud/azure/iothubs/monitors-iothubs.tf | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cloud/azure/iothubs/inputs.tf b/cloud/azure/iothubs/inputs.tf index 4c813a8..71bea7c 100644 --- a/cloud/azure/iothubs/inputs.tf +++ b/cloud/azure/iothubs/inputs.tf @@ -567,5 +567,5 @@ variable "invalid_d2c_telemetry_egress_rate_threshold_critical" { variable "too_many_d2c_telemetry_ingress_nosent_threshold_critical" { description = "D2C Telemetry ingress not sent limit (critical threshold)" - default = 0.3 + default = 1 } diff --git a/cloud/azure/iothubs/monitors-iothubs.tf b/cloud/azure/iothubs/monitors-iothubs.tf index 2dcb1a0..f5bfcaa 100644 --- a/cloud/azure/iothubs/monitors-iothubs.tf +++ b/cloud/azure/iothubs/monitors-iothubs.tf @@ -479,9 +479,9 @@ resource "datadog_monitor" "too_many_d2c_telemetry_ingress_nosent" { query = < ${var.too_many_d2c_telemetry_ingress_nosent_threshold_critical} + 100-(default(avg:zure.devices_iothubs.d2c.telemetry.ingress.success${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(),0) / + default(avg:azure.devices_iothubs.d2c.telemetry.ingress.all_protocol${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(),0) + * 100),0) > ${var.too_many_d2c_telemetry_ingress_nosent_threshold_critical} EOQ evaluation_delay = var.evaluation_delay From a9026cb29b9cfd2860d428c07a79540698b95cf8 Mon Sep 17 00:00:00 2001 From: "gauthier.ampe@fr.clara.net" Date: Wed, 31 Jul 2019 09:39:10 +0200 Subject: [PATCH 2/5] MON-484 Ajust threshold variable name and value --- cloud/azure/iothubs/inputs.tf | 9 +++++++-- cloud/azure/iothubs/monitors-iothubs.tf | 7 ++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/cloud/azure/iothubs/inputs.tf b/cloud/azure/iothubs/inputs.tf index 71bea7c..deca1b7 100644 --- a/cloud/azure/iothubs/inputs.tf +++ b/cloud/azure/iothubs/inputs.tf @@ -565,7 +565,12 @@ variable "invalid_d2c_telemetry_egress_rate_threshold_critical" { default = 90 } -variable "too_many_d2c_telemetry_ingress_nosent_threshold_critical" { +variable "too_many_d2c_telemetry_ingress_nosent_rate_threshold_critical" { description = "D2C Telemetry ingress not sent limit (critical threshold)" - default = 1 + default = 10 +} + +variable "too_many_d2c_telemetry_ingress_nosent_rate_threshold_warning" { + description = "D2C Telemetry ingress not sent limit (warning threshold)" + default = 20 } diff --git a/cloud/azure/iothubs/monitors-iothubs.tf b/cloud/azure/iothubs/monitors-iothubs.tf index f5bfcaa..ffb1764 100644 --- a/cloud/azure/iothubs/monitors-iothubs.tf +++ b/cloud/azure/iothubs/monitors-iothubs.tf @@ -481,9 +481,14 @@ resource "datadog_monitor" "too_many_d2c_telemetry_ingress_nosent" { default( 100-(default(avg:zure.devices_iothubs.d2c.telemetry.ingress.success${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(),0) / default(avg:azure.devices_iothubs.d2c.telemetry.ingress.all_protocol${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(),0) - * 100),0) > ${var.too_many_d2c_telemetry_ingress_nosent_threshold_critical} + * 100),0) > ${var.too_many_d2c_telemetry_ingress_nosent_rate_threshold_critical} EOQ + thresholds = { + warning = var.too_many_d2c_telemetry_ingress_nosent_rate_threshold_warning + critical = var.too_many_d2c_telemetry_ingress_nosent_rate_threshold_critical + } + evaluation_delay = var.evaluation_delay new_host_delay = var.new_host_delay notify_no_data = false From 34a189888f41c393cdfaadc15fc32db41e281030 Mon Sep 17 00:00:00 2001 From: "gauthier.ampe@fr.clara.net" Date: Wed, 31 Jul 2019 09:40:56 +0200 Subject: [PATCH 3/5] MON-484 Fix value threshold --- cloud/azure/iothubs/inputs.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cloud/azure/iothubs/inputs.tf b/cloud/azure/iothubs/inputs.tf index deca1b7..5bff36d 100644 --- a/cloud/azure/iothubs/inputs.tf +++ b/cloud/azure/iothubs/inputs.tf @@ -567,10 +567,10 @@ variable "invalid_d2c_telemetry_egress_rate_threshold_critical" { variable "too_many_d2c_telemetry_ingress_nosent_rate_threshold_critical" { description = "D2C Telemetry ingress not sent limit (critical threshold)" - default = 10 + default = 20 } variable "too_many_d2c_telemetry_ingress_nosent_rate_threshold_warning" { description = "D2C Telemetry ingress not sent limit (warning threshold)" - default = 20 + default = 10 } From 93da41a48cbef984f8166a7495f69038dff2c6e7 Mon Sep 17 00:00:00 2001 From: "gauthier.ampe@fr.clara.net" Date: Wed, 31 Jul 2019 10:11:44 +0200 Subject: [PATCH 4/5] MON-484 auto update --- cloud/azure/iothubs/README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cloud/azure/iothubs/README.md b/cloud/azure/iothubs/README.md index e98298b..8103a55 100644 --- a/cloud/azure/iothubs/README.md +++ b/cloud/azure/iothubs/README.md @@ -128,7 +128,8 @@ Creates DataDog monitors with the following checks: | too\_many\_d2c\_telemetry\_ingress\_nosent\_enabled | Flag to enable IoT Hub unsent d2c telemetry monitor | string | `"true"` | no | | too\_many\_d2c\_telemetry\_ingress\_nosent\_extra\_tags | Extra tags for IoT Hub unsent d2c telemetry monitor | list(string) | `[]` | no | | too\_many\_d2c\_telemetry\_ingress\_nosent\_message | Custom message for IoT Hub unsent d2c telemetry monitor | string | `""` | no | -| too\_many\_d2c\_telemetry\_ingress\_nosent\_threshold\_critical | D2C Telemetry ingress not sent limit (critical threshold) | string | `"0.3"` | no | +| too\_many\_d2c\_telemetry\_ingress\_nosent\_rate\_threshold\_critical | D2C Telemetry ingress not sent limit (critical threshold) | string | `"20"` | no | +| too\_many\_d2c\_telemetry\_ingress\_nosent\_rate\_threshold\_warning | D2C Telemetry ingress not sent limit (warning threshold) | string | `"10"` | no | | too\_many\_d2c\_telemetry\_ingress\_nosent\_timeframe | Monitor timeframe for IoT Hub unsent d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `"last_5m"` | no | | total\_devices\_enabled | Flag to enable IoT Hub total devices monitor | string | `"true"` | no | | total\_devices\_extra\_tags | Extra tags for IoT Hub total devices monitor | list(string) | `[]` | no | From 6a05a1eec2381527f4d012cfe70f906ba63bf4b1 Mon Sep 17 00:00:00 2001 From: "gauthier.ampe@fr.clara.net" Date: Wed, 14 Aug 2019 13:53:27 +0200 Subject: [PATCH 5/5] MON-484 Ajust monitor telemetry ingress nosent --- cloud/azure/iothubs/monitors-iothubs.tf | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cloud/azure/iothubs/monitors-iothubs.tf b/cloud/azure/iothubs/monitors-iothubs.tf index ffb1764..f744dbb 100644 --- a/cloud/azure/iothubs/monitors-iothubs.tf +++ b/cloud/azure/iothubs/monitors-iothubs.tf @@ -478,10 +478,9 @@ resource "datadog_monitor" "too_many_d2c_telemetry_ingress_nosent" { #For this monitor, the avg is needed to smooth the -1 and +1 that we meet regularly. With just a tiny diff like -1 / + 1, if we put 0.3 it should not ring anymore. But there is a bigger difference (exemple 20) The average will be strongly raised and an alert will be triggered. query = < ${var.too_many_d2c_telemetry_ingress_nosent_rate_threshold_critical} + 100-(default(avg:zure.devices_iothubs.d2c.telemetry.ingress.success${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate() / + avg:azure.devices_iothubs.d2c.telemetry.ingress.all_protocol${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(),1) + * 100) > ${var.too_many_d2c_telemetry_ingress_nosent_rate_threshold_critical} EOQ thresholds = {