From effaaf0e12d6446510700e4bc71765c8a0b37441 Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Mon, 30 Oct 2017 17:13:42 +0100 Subject: [PATCH] MON-80 Add c2d and d2c monitors --- cloud/azure/iothubs/inputs.tf | 46 +++++++++++- cloud/azure/iothubs/monitors-iothubs.tf | 98 ++++++++++++++++++++++++- 2 files changed, 140 insertions(+), 4 deletions(-) diff --git a/cloud/azure/iothubs/inputs.tf b/cloud/azure/iothubs/inputs.tf index 38b1b44..093b3a3 100644 --- a/cloud/azure/iothubs/inputs.tf +++ b/cloud/azure/iothubs/inputs.tf @@ -43,12 +43,52 @@ variable "status_message" {} variable "total_devices_message" {} -variable "c2d_methods_failed_message" {} - variable "c2d_methods_failed_threshold_warning" { default = 0 } variable "c2d_methods_failed_threshold_critical" { default = 10 -} \ No newline at end of file +} + +variable "c2d_methods_failed_message" {} + +variable "c2d_twin_read_failed_threshold_warning" { + default = 0 +} + +variable "c2d_twin_read_failed_threshold_critical" { + default = 10 +} + +variable "c2d_twin_read_failed_message" {} + +variable "c2d_twin_update_failed_threshold_warning" { + default = 0 +} + +variable "c2d_twin_update_failed_threshold_critical" { + default = 10 +} + +variable "c2d_twin_update_failed_message" {} + +variable "d2c_twin_read_failed_threshold_warning" { + default = 0 +} + +variable "d2c_twin_read_failed_threshold_critical" { + default = 10 +} + +variable "d2c_twin_read_failed_message" {} + +variable "d2c_twin_update_failed_threshold_warning" { + default = 0 +} + +variable "d2c_twin_update_failed_threshold_critical" { + default = 10 +} + +variable "d2c_twin_update_failed_message" {} \ No newline at end of file diff --git a/cloud/azure/iothubs/monitors-iothubs.tf b/cloud/azure/iothubs/monitors-iothubs.tf index 12f3d9a..8d44dde 100644 --- a/cloud/azure/iothubs/monitors-iothubs.tf +++ b/cloud/azure/iothubs/monitors-iothubs.tf @@ -130,4 +130,100 @@ resource "datadog_monitor" "too_many_c2d_methods_failed" { require_full_window = true new_host_delay = "${var.delay}" no_data_timeframe = 20 -} \ No newline at end of file +} + +resource "datadog_monitor" "too_many_c2d_twin_read_failed" { + name = "[${var.environment}] Too many c2d twin read failure on {{name}} " + message = "${var.c2d_twin_read_failed_message}" + + query = "avg(last_5m):( avg:azure.devices_iothubs.c2d.twin.read.failure{*} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.c2d.twin.read.failure{*} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.c2d.twin.read.success{*} by {name,resource_group}.as_count() ) ) * 100 > ${var.c2d_twin_read_failed_threshold_critical}" + type = "query alert" + + thresholds { + warning = "${var.c2d_twin_read_failed_threshold_warning}" + critical = "${var.c2d_twin_read_failed_threshold_critical}" + } + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 60 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + no_data_timeframe = 20 +} + +resource "datadog_monitor" "too_many_c2d_twin_update_failed" { + name = "[${var.environment}] Too many c2d twin update failure on {{name}} " + message = "${var.c2d_twin_update_failed_message}" + + query = "avg(last_5m):( avg:azure.devices_iothubs.c2d.twin.update.failure{*} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.c2d.twin.update.failure{*} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.c2d.twin.update.success{*} by {name,resource_group}.as_count() ) ) * 100 > ${var.c2d_twin_update_failed_threshold_critical}" + type = "query alert" + + thresholds { + warning = "${var.c2d_twin_update_failed_threshold_warning}" + critical = "${var.c2d_twin_update_failed_threshold_critical}" + } + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 60 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + no_data_timeframe = 20 +} + +resource "datadog_monitor" "too_many_d2c_twin_read_failed" { + name = "[${var.environment}] Too many d2c twin read failure on {{name}} " + message = "${var.d2c_twin_read_failed_message}" + + query = "avg(last_5m):( avg:azure.devices_iothubs.d2c.twin.read.failure{*} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.d2c.twin.read.failure{*} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.d2c.twin.read.success{*} by {name,resource_group}.as_count() ) ) * 100 > ${var.d2c_twin_read_failed_threshold_critical}" + type = "query alert" + + thresholds { + warning = "${var.d2c_twin_read_failed_threshold_warning}" + critical = "${var.d2c_twin_read_failed_threshold_critical}" + } + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 60 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + no_data_timeframe = 20 +} + +resource "datadog_monitor" "too_many_d2c_twin_update_failed" { + name = "[${var.environment}] Too many d2c twin update failure on {{name}} " + message = "${var.d2c_twin_update_failed_message}" + + query = "avg(last_5m):( avg:azure.devices_iothubs.d2c.twin.update.failure{*} by {name,resource_group}.as_count() / ( avg:azure.devices_iothubs.d2c.twin.update.failure{*} by {name,resource_group}.as_count() + avg:azure.devices_iothubs.d2c.twin.update.success{*} by {name,resource_group}.as_count() ) ) * 100 > ${var.d2c_twin_update_failed_threshold_critical}" + type = "query alert" + + thresholds { + warning = "${var.d2c_twin_update_failed_threshold_warning}" + critical = "${var.d2c_twin_update_failed_threshold_critical}" + } + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 60 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + no_data_timeframe = 20 +}