diff --git a/cloud/azure/iothubs/README.md b/cloud/azure/iothubs/README.md index de92272..e98298b 100644 --- a/cloud/azure/iothubs/README.md +++ b/cloud/azure/iothubs/README.md @@ -128,6 +128,7 @@ Creates DataDog monitors with the following checks: | too\_many\_d2c\_telemetry\_ingress\_nosent\_enabled | Flag to enable IoT Hub unsent d2c telemetry monitor | string | `"true"` | no | | too\_many\_d2c\_telemetry\_ingress\_nosent\_extra\_tags | Extra tags for IoT Hub unsent d2c telemetry monitor | list(string) | `[]` | no | | too\_many\_d2c\_telemetry\_ingress\_nosent\_message | Custom message for IoT Hub unsent d2c telemetry monitor | string | `""` | no | +| too\_many\_d2c\_telemetry\_ingress\_nosent\_threshold\_critical | D2C Telemetry ingress not sent limit (critical threshold) | string | `"0.3"` | no | | too\_many\_d2c\_telemetry\_ingress\_nosent\_timeframe | Monitor timeframe for IoT Hub unsent d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `"last_5m"` | no | | total\_devices\_enabled | Flag to enable IoT Hub total devices monitor | string | `"true"` | no | | total\_devices\_extra\_tags | Extra tags for IoT Hub total devices monitor | list(string) | `[]` | no | diff --git a/cloud/azure/iothubs/inputs.tf b/cloud/azure/iothubs/inputs.tf index 1fa7bf5..4c813a8 100644 --- a/cloud/azure/iothubs/inputs.tf +++ b/cloud/azure/iothubs/inputs.tf @@ -565,3 +565,7 @@ variable "invalid_d2c_telemetry_egress_rate_threshold_critical" { default = 90 } +variable "too_many_d2c_telemetry_ingress_nosent_threshold_critical" { + description = "D2C Telemetry ingress not sent limit (critical threshold)" + default = 0.3 +} diff --git a/cloud/azure/iothubs/monitors-iothubs.tf b/cloud/azure/iothubs/monitors-iothubs.tf index 1b553f1..0f4e251 100644 --- a/cloud/azure/iothubs/monitors-iothubs.tf +++ b/cloud/azure/iothubs/monitors-iothubs.tf @@ -475,12 +475,13 @@ resource "datadog_monitor" "too_many_d2c_telemetry_ingress_nosent" { message = coalesce(var.too_many_d2c_telemetry_ingress_nosent_message, var.message) type = "query alert" + #For this monitor, the avg is needed to smooth the -1 and +1 that we meet regularly. With just a tiny diff like -1 / + 1, if we put 0.3 it should not ring anymore. But there is a bigger difference (exemple 20) The average will be strongly raised and an alert will be triggered. query = < 0 + avg:azure.devices_iothubs.d2c.telemetry.ingress.all_protocol${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate() - + avg:azure.devices_iothubs.d2c.telemetry.ingress.success${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate() + , 0) > ${var.too_many_d2c_telemetry_ingress_nosent_threshold_critical} EOQ evaluation_delay = var.evaluation_delay @@ -499,4 +500,3 @@ EOQ ignore_changes = ["silenced"] } } -