MON-332 - Filter-tags added on Azure IOTHub monitors

This commit is contained in:
Alexandre Gaillet 2018-10-10 17:40:15 +02:00
parent 919e0261c5
commit 715fe91a7b
4 changed files with 90 additions and 82 deletions

View File

@ -109,7 +109,9 @@ Creates DataDog monitors with the following checks:
| failed_queryjobs_rate_threshold_warning | QueryJobs Failed rate limit (warning threshold) | string | `50` | no | | failed_queryjobs_rate_threshold_warning | QueryJobs Failed rate limit (warning threshold) | string | `50` | no |
| failed_queryjobs_rate_time_aggregator | Monitor aggregator for IoT Hub failed query jobs [available values: min, max, sum or avg] | string | `min` | no | | failed_queryjobs_rate_time_aggregator | Monitor aggregator for IoT Hub failed query jobs [available values: min, max, sum or avg] | string | `min` | no |
| failed_queryjobs_rate_timeframe | Monitor timeframe for IoT Hub failed query jobs [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | failed_queryjobs_rate_timeframe | Monitor timeframe for IoT Hub failed query jobs [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
| filter_tags | Tags used for filtering | string | `*` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
| filter_tags_custom_excluded | Tags excluded for custom filtering when filter_tags_use_defaults is false | string | `` | no |
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
| invalid_d2c_telemetry_egress_enabled | Flag to enable IoT Hub invalid d2c telemetry monitor | string | `true` | no | | invalid_d2c_telemetry_egress_enabled | Flag to enable IoT Hub invalid d2c telemetry monitor | string | `true` | no |
| invalid_d2c_telemetry_egress_extra_tags | Extra tags for IoT Hub invalid d2c telemetry monitor | list | `[]` | no | | invalid_d2c_telemetry_egress_extra_tags | Extra tags for IoT Hub invalid d2c telemetry monitor | list | `[]` | no |
| invalid_d2c_telemetry_egress_message | Custom message for IoT Hub invalid d2c telemetry monitor | string | `` | no | | invalid_d2c_telemetry_egress_message | Custom message for IoT Hub invalid d2c telemetry monitor | string | `` | no |

View File

@ -19,11 +19,21 @@ variable "message" {
description = "Message sent when an alert is triggered" description = "Message sent when an alert is triggered"
} }
variable "filter_tags" { variable "filter_tags_use_defaults" {
description = "Tags used for filtering" description = "Use default filter tags convention"
default = "true"
}
variable "filter_tags_custom" {
description = "Tags used for custom filtering when filter_tags_use_defaults is false"
default = "*" default = "*"
} }
variable "filter_tags_custom_excluded" {
description = "Tags excluded for custom filtering when filter_tags_use_defaults is false"
default = ""
}
# IOT Hub specific variables # IOT Hub specific variables
variable "status_silenced" { variable "status_silenced" {
description = "Groups to mute for IoT Hub status monitor" description = "Groups to mute for IoT Hub status monitor"

View File

@ -0,0 +1,8 @@
module "filter-tags" {
source = "../../../common/filter-tags"
environment = "${var.environment}"
resource = "azure_iothubs"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}"
}

View File

@ -4,12 +4,11 @@ resource "datadog_monitor" "too_many_jobs_failed" {
message = "${coalesce(var.failed_jobs_rate_message, var.message)}" message = "${coalesce(var.failed_jobs_rate_message, var.message)}"
query = <<EOF query = <<EOF
${var.failed_jobs_rate_time_aggregator}(${var.failed_jobs_rate_timeframe}): ${var.failed_jobs_rate_time_aggregator}(${var.failed_jobs_rate_timeframe}):(
default( default(avg:azure.devices_iothubs.jobs.failed${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) / (
default(avg:azure.devices_iothubs.jobs.failed{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / ( default(avg:azure.devices_iothubs.jobs.failed${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.jobs.failed{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) + default(avg:azure.devices_iothubs.jobs.completed${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) )
default(avg:azure.devices_iothubs.jobs.completed{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) ) ) * 100 > ${var.failed_jobs_rate_threshold_critical}
* 100 , 0) > ${var.failed_jobs_rate_threshold_critical}
EOF EOF
type = "metric alert" type = "metric alert"
@ -40,12 +39,11 @@ resource "datadog_monitor" "too_many_list_jobs_failed" {
message = "${coalesce(var.failed_listjobs_rate_message, var.message)}" message = "${coalesce(var.failed_listjobs_rate_message, var.message)}"
query = <<EOF query = <<EOF
${var.failed_listjobs_rate_time_aggregator}(${var.failed_listjobs_rate_timeframe}): ${var.failed_listjobs_rate_time_aggregator}(${var.failed_listjobs_rate_timeframe}):(
default( default(avg:azure.devices_iothubs.jobs.list_jobs.failure${module.filter-tags.query_alert} by {resource_group,name}.as_rate(), 0) / (
default(avg:azure.devices_iothubs.jobs.list_jobs.failure{${var.filter_tags}} by {resource_group,name}.as_rate(), 0) / ( default(avg:azure.devices_iothubs.jobs.list_jobs.success${module.filter-tags.query_alert} by {resource_group,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.jobs.list_jobs.success{${var.filter_tags}} by {resource_group,name}.as_rate(), 0) + default(avg:azure.devices_iothubs.jobs.list_jobs.failure${module.filter-tags.query_alert} by {resource_group,name}.as_rate(), 0) )
default(avg:azure.devices_iothubs.jobs.list_jobs.failure{${var.filter_tags}} by {resource_group,name}.as_rate(), 0) ) ) * 100 > ${var.failed_listjobs_rate_threshold_critical}
* 100, 0) > ${var.failed_listjobs_rate_threshold_critical}
EOF EOF
type = "metric alert" type = "metric alert"
@ -76,12 +74,11 @@ resource "datadog_monitor" "too_many_query_jobs_failed" {
message = "${coalesce(var.failed_queryjobs_rate_message, var.message)}" message = "${coalesce(var.failed_queryjobs_rate_message, var.message)}"
query = <<EOF query = <<EOF
${var.failed_queryjobs_rate_time_aggregator}(${var.failed_queryjobs_rate_timeframe}): ${var.failed_queryjobs_rate_time_aggregator}(${var.failed_queryjobs_rate_timeframe}):(
default( default(avg:azure.devices_iothubs.jobs.query_jobs.failure${module.filter-tags.query_alert} by {resource_group,name}.as_rate(), 0) / (
default(avg:azure.devices_iothubs.jobs.query_jobs.failure{${var.filter_tags}} by {resource_group,name}.as_rate(), 0) / ( default(avg:azure.devices_iothubs.jobs.query_jobs.success${module.filter-tags.query_alert} by {resource_group,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.jobs.query_jobs.success{${var.filter_tags}} by {resource_group,name}.as_rate(), 0) + default(avg:azure.devices_iothubs.jobs.query_jobs.failure${module.filter-tags.query_alert} by {resource_group,name}.as_rate(), 0) )
default(avg:azure.devices_iothubs.jobs.query_jobs.failure{${var.filter_tags}} by {resource_group,name}.as_rate(), 0) ) ) * 100 > ${var.failed_queryjobs_rate_threshold_critical}
* 100, 0) > ${var.failed_queryjobs_rate_threshold_critical}
EOF EOF
type = "metric alert" type = "metric alert"
@ -113,7 +110,7 @@ resource "datadog_monitor" "status" {
query = <<EOF query = <<EOF
${var.status_time_aggregator}(${var.status_timeframe}): ( ${var.status_time_aggregator}(${var.status_timeframe}): (
avg:azure.devices_iothubs.status{${var.filter_tags}} by {resource_group,region,name} avg:azure.devices_iothubs.status${module.filter-tags.query_alert} by {resource_group,region,name}
) < 1 ) < 1
EOF EOF
@ -141,7 +138,7 @@ resource "datadog_monitor" "total_devices" {
query = <<EOF query = <<EOF
${var.total_devices_time_aggregator}(${var.total_devices_timeframe}): ( ${var.total_devices_time_aggregator}(${var.total_devices_timeframe}): (
avg:azure.devices_iothubs.devices.total_devices{${var.filter_tags}} by {resource_group,region,name} avg:azure.devices_iothubs.devices.total_devices${module.filter-tags.query_alert} by {resource_group,region,name}
) == 0 ) == 0
EOF EOF
@ -168,12 +165,11 @@ resource "datadog_monitor" "too_many_c2d_methods_failed" {
message = "${coalesce(var.failed_c2d_methods_rate_message, var.message)}" message = "${coalesce(var.failed_c2d_methods_rate_message, var.message)}"
query = <<EOF query = <<EOF
${var.failed_c2d_methods_rate_time_aggregator}(${var.failed_c2d_methods_rate_timeframe}): ${var.failed_c2d_methods_rate_time_aggregator}(${var.failed_c2d_methods_rate_timeframe}):(
default( default(avg:azure.devices_iothubs.c2d.methods.failure${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) / (
default(avg:azure.devices_iothubs.c2d.methods.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / ( default(avg:azure.devices_iothubs.c2d.methods.failure${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.c2d.methods.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) + default(avg:azure.devices_iothubs.c2d.methods.success${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) )
default(avg:azure.devices_iothubs.c2d.methods.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) ) ) * 100 > ${var.failed_c2d_methods_rate_threshold_critical}
* 100, 0) > ${var.failed_c2d_methods_rate_threshold_critical}
EOF EOF
type = "metric alert" type = "metric alert"
@ -204,12 +200,11 @@ resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
message = "${coalesce(var.failed_c2d_twin_read_rate_message, var.message)}" message = "${coalesce(var.failed_c2d_twin_read_rate_message, var.message)}"
query = <<EOF query = <<EOF
${var.failed_c2d_twin_read_rate_time_aggregator}(${var.failed_c2d_twin_read_rate_timeframe}): ${var.failed_c2d_twin_read_rate_time_aggregator}(${var.failed_c2d_twin_read_rate_timeframe}):(
default( default(avg:azure.devices_iothubs.c2d.twin.read.failure${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) / (
default(avg:azure.devices_iothubs.c2d.twin.read.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / ( default(avg:azure.devices_iothubs.c2d.twin.read.failure${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.c2d.twin.read.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) + default(avg:azure.devices_iothubs.c2d.twin.read.success${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) )
default(avg:azure.devices_iothubs.c2d.twin.read.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) ) ) * 100 > ${var.failed_c2d_twin_read_rate_threshold_critical}
* 100, 0) > ${var.failed_c2d_twin_read_rate_threshold_critical}
EOF EOF
type = "metric alert" type = "metric alert"
@ -240,12 +235,11 @@ resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
message = "${coalesce(var.failed_c2d_twin_update_rate_message, var.message)}" message = "${coalesce(var.failed_c2d_twin_update_rate_message, var.message)}"
query = <<EOF query = <<EOF
${var.failed_c2d_twin_update_rate_time_aggregator}(${var.failed_c2d_twin_update_rate_timeframe}): ${var.failed_c2d_twin_update_rate_time_aggregator}(${var.failed_c2d_twin_update_rate_timeframe}):(
default( default(avg:azure.devices_iothubs.c2d.twin.update.failure${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) / (
default(avg:azure.devices_iothubs.c2d.twin.update.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / ( default(avg:azure.devices_iothubs.c2d.twin.update.failure${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.c2d.twin.update.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) + default(avg:azure.devices_iothubs.c2d.twin.update.success${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) )
default(avg:azure.devices_iothubs.c2d.twin.update.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) ) ) * 100 > ${var.failed_c2d_twin_update_rate_threshold_critical}
* 100, 0) > ${var.failed_c2d_twin_update_rate_threshold_critical}
EOF EOF
type = "metric alert" type = "metric alert"
@ -276,12 +270,11 @@ resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
message = "${coalesce(var.failed_d2c_twin_read_rate_message, var.message)}" message = "${coalesce(var.failed_d2c_twin_read_rate_message, var.message)}"
query = <<EOF query = <<EOF
${var.failed_d2c_twin_read_rate_time_aggregator}(${var.failed_d2c_twin_read_rate_timeframe}): ${var.failed_d2c_twin_read_rate_time_aggregator}(${var.failed_d2c_twin_read_rate_timeframe}):(
default( default(avg:azure.devices_iothubs.d2c.twin.read.failure${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) / (
default(avg:azure.devices_iothubs.d2c.twin.read.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / ( default(avg:azure.devices_iothubs.d2c.twin.read.failure${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.d2c.twin.read.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) + default(avg:azure.devices_iothubs.d2c.twin.read.success${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) )
default(avg:azure.devices_iothubs.d2c.twin.read.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) ) ) * 100 > ${var.failed_d2c_twin_read_rate_threshold_critical}
* 100, 0) > ${var.failed_d2c_twin_read_rate_threshold_critical}
EOF EOF
type = "metric alert" type = "metric alert"
@ -312,12 +305,11 @@ resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
message = "${coalesce(var.failed_d2c_twin_update_rate_message, var.message)}" message = "${coalesce(var.failed_d2c_twin_update_rate_message, var.message)}"
query = <<EOF query = <<EOF
${var.failed_d2c_twin_update_rate_time_aggregator}(${var.failed_d2c_twin_update_rate_timeframe}): ${var.failed_d2c_twin_update_rate_time_aggregator}(${var.failed_d2c_twin_update_rate_timeframe}):(
default( default(avg:azure.devices_iothubs.d2c.twin.update.failure${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) / (
default(avg:azure.devices_iothubs.d2c.twin.update.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / ( default(avg:azure.devices_iothubs.d2c.twin.update.failure${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.d2c.twin.update.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) + default(avg:azure.devices_iothubs.d2c.twin.update.success${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) )
default(avg:azure.devices_iothubs.d2c.twin.update.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) ) ) * 100 > ${var.failed_d2c_twin_update_rate_threshold_critical}
* 100, 0) > ${var.failed_d2c_twin_update_rate_threshold_critical}
EOF EOF
type = "metric alert" type = "metric alert"
@ -348,14 +340,13 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" {
message = "${coalesce(var.dropped_d2c_telemetry_egress_message, var.message)}" message = "${coalesce(var.dropped_d2c_telemetry_egress_message, var.message)}"
query = <<EOF query = <<EOF
${var.dropped_d2c_telemetry_egress_time_aggregator}(${var.dropped_d2c_telemetry_egress_timeframe}): ${var.dropped_d2c_telemetry_egress_time_aggregator}(${var.dropped_d2c_telemetry_egress_timeframe}): (
default( default(avg:azure.devices_iothubs.d2c.telemetry.egress.dropped${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) / (
default(avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / ( default(avg:azure.devices_iothubs.d2c.telemetry.egress.dropped${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) + default(avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) + default(avg:azure.devices_iothubs.d2c.telemetry.egress.invalid${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) + default(avg:azure.devices_iothubs.d2c.telemetry.egress.success${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) )
default(avg:azure.devices_iothubs.d2c.telemetry.egress.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) ) ) * 100 > ${var.dropped_d2c_telemetry_egress_rate_threshold_critical}
* 100, 0) > ${var.dropped_d2c_telemetry_egress_rate_threshold_critical}
EOF EOF
type = "metric alert" type = "metric alert"
@ -386,14 +377,13 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" {
message = "${coalesce(var.orphaned_d2c_telemetry_egress_message, var.message)}" message = "${coalesce(var.orphaned_d2c_telemetry_egress_message, var.message)}"
query = <<EOF query = <<EOF
${var.orphaned_d2c_telemetry_egress_time_aggregator}(${var.orphaned_d2c_telemetry_egress_timeframe}): ${var.orphaned_d2c_telemetry_egress_time_aggregator}(${var.orphaned_d2c_telemetry_egress_timeframe}): (
default( default(avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) / (
default(avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / ( default(avg:azure.devices_iothubs.d2c.telemetry.egress.dropped${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) + default(avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) + default(avg:azure.devices_iothubs.d2c.telemetry.egress.invalid${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) + default(avg:azure.devices_iothubs.d2c.telemetry.egress.success${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) )
default(avg:azure.devices_iothubs.d2c.telemetry.egress.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) ) ) * 100 > ${var.orphaned_d2c_telemetry_egress_rate_threshold_critical}
* 100, 0) > ${var.orphaned_d2c_telemetry_egress_rate_threshold_critical}
EOF EOF
type = "metric alert" type = "metric alert"
@ -424,14 +414,13 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" {
message = "${coalesce(var.invalid_d2c_telemetry_egress_message, var.message)}" message = "${coalesce(var.invalid_d2c_telemetry_egress_message, var.message)}"
query = <<EOF query = <<EOF
${var.invalid_d2c_telemetry_egress_time_aggregator}(${var.invalid_d2c_telemetry_egress_timeframe}): ${var.invalid_d2c_telemetry_egress_time_aggregator}(${var.invalid_d2c_telemetry_egress_timeframe}): (
default( default(avg:azure.devices_iothubs.d2c.telemetry.egress.invalid${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) / (
default(avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / ( default(avg:azure.devices_iothubs.d2c.telemetry.egress.dropped${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) + default(avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) + default(avg:azure.devices_iothubs.d2c.telemetry.egress.invalid${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) +
default(avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) + default(avg:azure.devices_iothubs.d2c.telemetry.egress.success${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) )
default(avg:azure.devices_iothubs.d2c.telemetry.egress.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) ) ) * 100 > ${var.invalid_d2c_telemetry_egress_rate_threshold_critical}
* 100, 0) > ${var.invalid_d2c_telemetry_egress_rate_threshold_critical}
EOF EOF
type = "metric alert" type = "metric alert"
@ -462,11 +451,10 @@ resource "datadog_monitor" "too_many_d2c_telemetry_ingress_nosent" {
message = "${coalesce(var.too_many_d2c_telemetry_ingress_nosent_message, var.message)}" message = "${coalesce(var.too_many_d2c_telemetry_ingress_nosent_message, var.message)}"
query = <<EOF query = <<EOF
sum(${var.too_many_d2c_telemetry_ingress_nosent_timeframe}): sum(${var.too_many_d2c_telemetry_ingress_nosent_timeframe}): (
default( avg:azure.devices_iothubs.d2c.telemetry.ingress.all_protocol${module.filter-tags.query_alert} by {resource_group,region,name}.as_count() -
avg:azure.devices_iothubs.d2c.telemetry.ingress.all_protocol{${var.filter_tags}} by {resource_group,region,name}.as_count() - avg:azure.devices_iothubs.d2c.telemetry.ingress.success${module.filter-tags.query_alert} by {resource_group,region,name}.as_count()
avg:azure.devices_iothubs.d2c.telemetry.ingress.success{${var.filter_tags}} by {resource_group,region,name}.as_count() ) > 0
, 0) > 0
EOF EOF
type = "metric alert" type = "metric alert"