MON-80 add tags

This commit is contained in:
Quentin Manfroi 2017-11-03 20:35:35 +01:00
parent 9186c69150
commit 4f2d9bd694
3 changed files with 85 additions and 51 deletions

View File

@ -9,10 +9,7 @@ module "iothubs" {
source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/iothubs?ref=MON-80-azure-hub-iot-monitors" source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/iothubs?ref=MON-80-azure-hub-iot-monitors"
message = "${module.datadog-message-alerting.alerting-message}" message = "${module.datadog-message-alerting.alerting-message}"
environment = "${var.environment}" environment = "${var.environment}"
stack = "${var.stack}"
client_name = "${var.client_name}"
subscription_id = "${var.subscription_id}" subscription_id = "${var.subscription_id}"
} }
``` ```
@ -48,7 +45,6 @@ Inputs
| c2d_twin_read_failed_threshold_warning | C2D Twin Read Failed rate limit (warning threshold) | string | `0` | no | | c2d_twin_read_failed_threshold_warning | C2D Twin Read Failed rate limit (warning threshold) | string | `0` | no |
| c2d_twin_update_failed_threshold_critical | C2D Twin Update Failed rate limit (critical threshold) | string | `10` | no | | c2d_twin_update_failed_threshold_critical | C2D Twin Update Failed rate limit (critical threshold) | string | `10` | no |
| c2d_twin_update_failed_threshold_warning | C2D Twin Update Failed rate limit (warning threshold) | string | `0` | no | | c2d_twin_update_failed_threshold_warning | C2D Twin Update Failed rate limit (warning threshold) | string | `0` | no |
| client_name | Client Name | string | - | yes |
| d2c_telemetry_egress_dropped_threshold_critical | D2C Telemetry Dropped Failed limit (critical threshold) | string | `1000` | no | | d2c_telemetry_egress_dropped_threshold_critical | D2C Telemetry Dropped Failed limit (critical threshold) | string | `1000` | no |
| d2c_telemetry_egress_dropped_threshold_warning | D2C Telemetry Dropped Failed limit (warning threshold) | string | `500` | no | | d2c_telemetry_egress_dropped_threshold_warning | D2C Telemetry Dropped Failed limit (warning threshold) | string | `500` | no |
| d2c_telemetry_egress_fallback_threshold_critical | D2C Telemetry Fallback Failed limit (critical threshold) | string | `1000` | no | | d2c_telemetry_egress_fallback_threshold_critical | D2C Telemetry Fallback Failed limit (critical threshold) | string | `1000` | no |
@ -77,4 +73,4 @@ Related documentation
DataDog documentation: [https://docs.datadoghq.com/integrations/azure_iot_hub](https://docs.datadoghq.com/integrations/azure_iot_hub) DataDog documentation: [https://docs.datadoghq.com/integrations/azure_iot_hub](https://docs.datadoghq.com/integrations/azure_iot_hub)
Azure IOT Hubs metrics documentation: [https://docs.microsoft.com/en-us/azure/iot-hub/iot-hub-monitor-resource-health](https://docs.microsoft.com/en-us/azure/iot-hub/iot-hub-monitor-resource-health) Azure IOT Hubs metrics documentation: [https://docs.microsoft.com/en-us/azure/iot-hub/iot-hub-monitor-resource-health](https://docs.microsoft.com/en-us/azure/iot-hub/iot-hub-monitor-resource-health)

View File

@ -1,23 +1,26 @@
# Global Terraform
variable "environment" { variable "environment" {
description = "Architecture Environment" description = "Architecture Environment"
type = "string" type = "string"
} }
variable "client_name" {
description = "Client Name"
type = "string"
}
variable "use_filter_tags" {
description = "Filter the data with service tags if true"
default = "true"
}
variable "subscription_id" { variable "subscription_id" {
description = "Subscription ID used to tag monitors" description = "Azure account id used as filter for monitors"
type = "string" type = "string"
} }
variable "provider" {
description = "Cloud provider which the monitor and its based metric depend on"
type = "string"
default = "azure"
}
variable "service" {
description = "Service monitored by this set of monitors"
type = "string"
default = "storage"
# Global DataDog
variable "delay" { variable "delay" {
description = "Delay in seconds for the metric evaluation" description = "Delay in seconds for the metric evaluation"
default = 600 default = 600
@ -27,7 +30,12 @@ variable "message" {
description = "Message sent when an alert is triggered" description = "Message sent when an alert is triggered"
} }
## IOT hubs variable "use_filter_tags" {
description = "Filter the data with service tags if true"
default = "true"
}
# Azure IOT hubs specific
variable "jobs_failed_threshold_warning" { variable "jobs_failed_threshold_warning" {
description = "Jobs Failed rate limit (warning threshold)" description = "Jobs Failed rate limit (warning threshold)"
default = 0 default = 0

View File

@ -2,7 +2,7 @@ data "template_file" "filter" {
template = "$${filter}" template = "$${filter}"
vars { vars {
filter = "${var.use_filter_tags == "true" ? format("dd_monitoring:enabled,dd_azure_appservices:enabled,subscription_id:%s,env:%s", var.subscription_id,var.environment) : var.subscription_id}" filter = "${var.use_filter_tags == "true" ? format("dd_monitoring:enabled,dd_azure_storage:enabled,env:%s", var.environment) : "${data.template_file.filter.rendered}"}"
} }
} }
@ -12,9 +12,9 @@ resource "datadog_monitor" "too_many_jobs_failed" {
query = <<EOF query = <<EOF
avg(last_5m):( avg(last_5m):(
avg:azure.devices_iothubs.jobs.failed{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() / avg:azure.devices_iothubs.jobs.failed{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() /
( avg:azure.devices_iothubs.jobs.failed{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() + ( avg:azure.devices_iothubs.jobs.failed{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() +
avg:azure.devices_iothubs.jobs.completed{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() ) avg:azure.devices_iothubs.jobs.completed{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() )
) * 100 > ${var.jobs_failed_threshold_critical} ) * 100 > ${var.jobs_failed_threshold_critical}
EOF EOF
type = "query alert" type = "query alert"
@ -34,6 +34,8 @@ resource "datadog_monitor" "too_many_jobs_failed" {
require_full_window = true require_full_window = true
new_host_delay = "${var.delay}" new_host_delay = "${var.delay}"
no_data_timeframe = 20 no_data_timeframe = 20
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
} }
resource "datadog_monitor" "too_many_list_jobs_failed" { resource "datadog_monitor" "too_many_list_jobs_failed" {
@ -42,9 +44,9 @@ resource "datadog_monitor" "too_many_list_jobs_failed" {
query = <<EOF query = <<EOF
avg(last_5m):( avg(last_5m):(
avg:azure.devices_iothubs.jobs.list_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() / avg:azure.devices_iothubs.jobs.list_jobs.failure{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() /
( avg:azure.devices_iothubs.jobs.list_jobs.success{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() + ( avg:azure.devices_iothubs.jobs.list_jobs.success{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() +
avg:azure.devices_iothubs.jobs.list_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() ) avg:azure.devices_iothubs.jobs.list_jobs.failure{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() )
) * 100 > ${var.listjobs_failed_threshold_critical} ) * 100 > ${var.listjobs_failed_threshold_critical}
EOF EOF
type = "query alert" type = "query alert"
@ -64,6 +66,8 @@ resource "datadog_monitor" "too_many_list_jobs_failed" {
require_full_window = true require_full_window = true
new_host_delay = "${var.delay}" new_host_delay = "${var.delay}"
no_data_timeframe = 20 no_data_timeframe = 20
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
} }
resource "datadog_monitor" "too_many_query_jobs_failed" { resource "datadog_monitor" "too_many_query_jobs_failed" {
@ -72,9 +76,9 @@ resource "datadog_monitor" "too_many_query_jobs_failed" {
query = <<EOF query = <<EOF
avg(last_5m):( avg(last_5m):(
avg:azure.devices_iothubs.jobs.query_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() / avg:azure.devices_iothubs.jobs.query_jobs.failure{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() /
( avg:azure.devices_iothubs.jobs.query_jobs.success{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() + ( avg:azure.devices_iothubs.jobs.query_jobs.success{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() +
avg:azure.devices_iothubs.jobs.query_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() ) avg:azure.devices_iothubs.jobs.query_jobs.failure{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() )
) * 100 > ${var.queryjobs_failed_threshold_critical} ) * 100 > ${var.queryjobs_failed_threshold_critical}
EOF EOF
type = "query alert" type = "query alert"
@ -94,6 +98,8 @@ resource "datadog_monitor" "too_many_query_jobs_failed" {
require_full_window = true require_full_window = true
new_host_delay = "${var.delay}" new_host_delay = "${var.delay}"
no_data_timeframe = 20 no_data_timeframe = 20
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
} }
resource "datadog_monitor" "status" { resource "datadog_monitor" "status" {
@ -101,7 +107,7 @@ resource "datadog_monitor" "status" {
message = "${var.message}" message = "${var.message}"
query = <<EOF query = <<EOF
avg(last_5m):avg:azure.devices_iothubs.status{subscription_id:${var.subscription_id}} by {name,resource_group} < 1 avg(last_5m):avg:azure.devices_iothubs.status{${data.template_file.filter.rendered}} by {name,resource_group} < 1
EOF EOF
type = "query alert" type = "query alert"
@ -115,6 +121,8 @@ resource "datadog_monitor" "status" {
require_full_window = true require_full_window = true
new_host_delay = "${var.delay}" new_host_delay = "${var.delay}"
no_data_timeframe = 20 no_data_timeframe = 20
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
} }
resource "datadog_monitor" "total_devices" { resource "datadog_monitor" "total_devices" {
@ -122,7 +130,7 @@ resource "datadog_monitor" "total_devices" {
message = "${var.message}" message = "${var.message}"
query = <<EOF query = <<EOF
avg(last_5m):avg:azure.devices_iothubs.devices.total_devices{subscription_id:${var.subscription_id}} by {name,resource_group} == 0 avg(last_5m):avg:azure.devices_iothubs.devices.total_devices{${data.template_file.filter.rendered}} by {name,resource_group} == 0
EOF EOF
type = "query alert" type = "query alert"
@ -136,6 +144,8 @@ resource "datadog_monitor" "total_devices" {
require_full_window = true require_full_window = true
new_host_delay = "${var.delay}" new_host_delay = "${var.delay}"
no_data_timeframe = 20 no_data_timeframe = 20
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
} }
resource "datadog_monitor" "too_many_c2d_methods_failed" { resource "datadog_monitor" "too_many_c2d_methods_failed" {
@ -144,9 +154,9 @@ resource "datadog_monitor" "too_many_c2d_methods_failed" {
query = <<EOF query = <<EOF
avg(last_5m):( avg(last_5m):(
avg:azure.devices_iothubs.c2d.methods.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() / avg:azure.devices_iothubs.c2d.methods.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() /
( avg:azure.devices_iothubs.c2d.methods.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() + ( avg:azure.devices_iothubs.c2d.methods.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() +
avg:azure.devices_iothubs.c2d.methods.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() ) avg:azure.devices_iothubs.c2d.methods.success{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() )
) * 100 > ${var.c2d_methods_failed_threshold_critical} ) * 100 > ${var.c2d_methods_failed_threshold_critical}
EOF EOF
type = "query alert" type = "query alert"
@ -166,6 +176,8 @@ resource "datadog_monitor" "too_many_c2d_methods_failed" {
require_full_window = true require_full_window = true
new_host_delay = "${var.delay}" new_host_delay = "${var.delay}"
no_data_timeframe = 20 no_data_timeframe = 20
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
} }
resource "datadog_monitor" "too_many_c2d_twin_read_failed" { resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
@ -174,9 +186,9 @@ resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
query = <<EOF query = <<EOF
avg(last_5m):( avg(last_5m):(
avg:azure.devices_iothubs.c2d.twin.read.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() / avg:azure.devices_iothubs.c2d.twin.read.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() /
( avg:azure.devices_iothubs.c2d.twin.read.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() + ( avg:azure.devices_iothubs.c2d.twin.read.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() +
avg:azure.devices_iothubs.c2d.twin.read.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() ) avg:azure.devices_iothubs.c2d.twin.read.success{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() )
) * 100 > ${var.c2d_twin_read_failed_threshold_critical} ) * 100 > ${var.c2d_twin_read_failed_threshold_critical}
EOF EOF
type = "query alert" type = "query alert"
@ -196,6 +208,8 @@ resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
require_full_window = true require_full_window = true
new_host_delay = "${var.delay}" new_host_delay = "${var.delay}"
no_data_timeframe = 20 no_data_timeframe = 20
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
} }
resource "datadog_monitor" "too_many_c2d_twin_update_failed" { resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
@ -204,9 +218,9 @@ resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
query = <<EOF query = <<EOF
avg(last_5m):( avg(last_5m):(
avg:azure.devices_iothubs.c2d.twin.update.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() / avg:azure.devices_iothubs.c2d.twin.update.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() /
( avg:azure.devices_iothubs.c2d.twin.update.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() + ( avg:azure.devices_iothubs.c2d.twin.update.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() +
avg:azure.devices_iothubs.c2d.twin.update.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() ) avg:azure.devices_iothubs.c2d.twin.update.success{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() )
) * 100 > ${var.c2d_twin_update_failed_threshold_critical} ) * 100 > ${var.c2d_twin_update_failed_threshold_critical}
EOF EOF
type = "query alert" type = "query alert"
@ -226,6 +240,8 @@ resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
require_full_window = true require_full_window = true
new_host_delay = "${var.delay}" new_host_delay = "${var.delay}"
no_data_timeframe = 20 no_data_timeframe = 20
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
} }
resource "datadog_monitor" "too_many_d2c_twin_read_failed" { resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
@ -234,9 +250,9 @@ resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
query = <<EOF query = <<EOF
avg(last_5m):( avg(last_5m):(
avg:azure.devices_iothubs.d2c.twin.read.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() / avg:azure.devices_iothubs.d2c.twin.read.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() /
( avg:azure.devices_iothubs.d2c.twin.read.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() + ( avg:azure.devices_iothubs.d2c.twin.read.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() +
avg:azure.devices_iothubs.d2c.twin.read.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() ) avg:azure.devices_iothubs.d2c.twin.read.success{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() )
) * 100 > ${var.d2c_twin_read_failed_threshold_critical} ) * 100 > ${var.d2c_twin_read_failed_threshold_critical}
EOF EOF
type = "query alert" type = "query alert"
@ -256,6 +272,8 @@ resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
require_full_window = true require_full_window = true
new_host_delay = "${var.delay}" new_host_delay = "${var.delay}"
no_data_timeframe = 20 no_data_timeframe = 20
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
} }
resource "datadog_monitor" "too_many_d2c_twin_update_failed" { resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
@ -264,9 +282,9 @@ resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
query = <<EOF query = <<EOF
avg(last_5m):( avg(last_5m):(
avg:azure.devices_iothubs.d2c.twin.update.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() / avg:azure.devices_iothubs.d2c.twin.update.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() /
( avg:azure.devices_iothubs.d2c.twin.update.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() + ( avg:azure.devices_iothubs.d2c.twin.update.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() +
avg:azure.devices_iothubs.d2c.twin.update.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() ) avg:azure.devices_iothubs.d2c.twin.update.success{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() )
) * 100 > ${var.d2c_twin_update_failed_threshold_critical} ) * 100 > ${var.d2c_twin_update_failed_threshold_critical}
EOF EOF
type = "query alert" type = "query alert"
@ -286,6 +304,8 @@ resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
require_full_window = true require_full_window = true
new_host_delay = "${var.delay}" new_host_delay = "${var.delay}"
no_data_timeframe = 20 no_data_timeframe = 20
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
} }
resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" { resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" {
@ -294,7 +314,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" {
query = <<EOF query = <<EOF
sum(last_5m): ( sum(last_5m): (
avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{${data.template_file.filter.rendered}} by {name,resource_group}.as_count()
) > ${var.d2c_telemetry_egress_dropped_threshold_critical} ) > ${var.d2c_telemetry_egress_dropped_threshold_critical}
EOF EOF
type = "query alert" type = "query alert"
@ -314,6 +334,8 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" {
require_full_window = true require_full_window = true
new_host_delay = "${var.delay}" new_host_delay = "${var.delay}"
no_data_timeframe = 20 no_data_timeframe = 20
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
} }
resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" { resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" {
@ -322,7 +344,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" {
query = <<EOF query = <<EOF
sum(last_5m): ( sum(last_5m): (
avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{${data.template_file.filter.rendered}} by {name,resource_group}.as_count()
) > ${var.d2c_telemetry_egress_orphaned_threshold_critical} ) > ${var.d2c_telemetry_egress_orphaned_threshold_critical}
EOF EOF
type = "query alert" type = "query alert"
@ -342,6 +364,8 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" {
require_full_window = true require_full_window = true
new_host_delay = "${var.delay}" new_host_delay = "${var.delay}"
no_data_timeframe = 20 no_data_timeframe = 20
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
} }
resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" { resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" {
@ -350,7 +374,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" {
query = <<EOF query = <<EOF
sum(last_5m): ( sum(last_5m): (
avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{${data.template_file.filter.rendered}} by {name,resource_group}.as_count()
) > ${var.d2c_telemetry_egress_invalid_threshold_critical} ) > ${var.d2c_telemetry_egress_invalid_threshold_critical}
EOF EOF
type = "query alert" type = "query alert"
@ -370,6 +394,8 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" {
require_full_window = true require_full_window = true
new_host_delay = "${var.delay}" new_host_delay = "${var.delay}"
no_data_timeframe = 20 no_data_timeframe = 20
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
} }
resource "datadog_monitor" "too_many_d2c_telemetry_egress_fallback" { resource "datadog_monitor" "too_many_d2c_telemetry_egress_fallback" {
@ -378,7 +404,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_fallback" {
query = <<EOF query = <<EOF
sum(last_5m): ( sum(last_5m): (
avg:azure.devices_iothubs.d2c.telemetry.egress.fallback{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() avg:azure.devices_iothubs.d2c.telemetry.egress.fallback{${data.template_file.filter.rendered}} by {name,resource_group}.as_count()
) > ${var.d2c_telemetry_egress_fallback_threshold_critical} ) > ${var.d2c_telemetry_egress_fallback_threshold_critical}
EOF EOF
type = "query alert" type = "query alert"
@ -398,6 +424,8 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_fallback" {
require_full_window = true require_full_window = true
new_host_delay = "${var.delay}" new_host_delay = "${var.delay}"
no_data_timeframe = 20 no_data_timeframe = 20
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
} }
resource "datadog_monitor" "too_many_d2c_telemetry_ingress_nosent" { resource "datadog_monitor" "too_many_d2c_telemetry_ingress_nosent" {
@ -406,8 +434,8 @@ resource "datadog_monitor" "too_many_d2c_telemetry_ingress_nosent" {
query = <<EOF query = <<EOF
sum(last_5m): ( sum(last_5m): (
avg:azure.devices_iothubs.d2c.telemetry.ingress.all_protocol{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() - avg:azure.devices_iothubs.d2c.telemetry.ingress.all_protocol{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() -
avg:azure.devices_iothubs.d2c.telemetry.ingress.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() avg:azure.devices_iothubs.d2c.telemetry.ingress.success{${data.template_file.filter.rendered}} by {name,resource_group}.as_count()
) > 0 ) > 0
EOF EOF
type = "query alert" type = "query alert"
@ -422,4 +450,6 @@ resource "datadog_monitor" "too_many_d2c_telemetry_ingress_nosent" {
require_full_window = true require_full_window = true
new_host_delay = "${var.delay}" new_host_delay = "${var.delay}"
no_data_timeframe = 20 no_data_timeframe = 20
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
} }