MON-80 add tags
This commit is contained in:
parent
9186c69150
commit
4f2d9bd694
@ -9,10 +9,7 @@ module "iothubs" {
|
|||||||
source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/iothubs?ref=MON-80-azure-hub-iot-monitors"
|
source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/iothubs?ref=MON-80-azure-hub-iot-monitors"
|
||||||
|
|
||||||
message = "${module.datadog-message-alerting.alerting-message}"
|
message = "${module.datadog-message-alerting.alerting-message}"
|
||||||
|
|
||||||
environment = "${var.environment}"
|
environment = "${var.environment}"
|
||||||
stack = "${var.stack}"
|
|
||||||
client_name = "${var.client_name}"
|
|
||||||
subscription_id = "${var.subscription_id}"
|
subscription_id = "${var.subscription_id}"
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
@ -48,7 +45,6 @@ Inputs
|
|||||||
| c2d_twin_read_failed_threshold_warning | C2D Twin Read Failed rate limit (warning threshold) | string | `0` | no |
|
| c2d_twin_read_failed_threshold_warning | C2D Twin Read Failed rate limit (warning threshold) | string | `0` | no |
|
||||||
| c2d_twin_update_failed_threshold_critical | C2D Twin Update Failed rate limit (critical threshold) | string | `10` | no |
|
| c2d_twin_update_failed_threshold_critical | C2D Twin Update Failed rate limit (critical threshold) | string | `10` | no |
|
||||||
| c2d_twin_update_failed_threshold_warning | C2D Twin Update Failed rate limit (warning threshold) | string | `0` | no |
|
| c2d_twin_update_failed_threshold_warning | C2D Twin Update Failed rate limit (warning threshold) | string | `0` | no |
|
||||||
| client_name | Client Name | string | - | yes |
|
|
||||||
| d2c_telemetry_egress_dropped_threshold_critical | D2C Telemetry Dropped Failed limit (critical threshold) | string | `1000` | no |
|
| d2c_telemetry_egress_dropped_threshold_critical | D2C Telemetry Dropped Failed limit (critical threshold) | string | `1000` | no |
|
||||||
| d2c_telemetry_egress_dropped_threshold_warning | D2C Telemetry Dropped Failed limit (warning threshold) | string | `500` | no |
|
| d2c_telemetry_egress_dropped_threshold_warning | D2C Telemetry Dropped Failed limit (warning threshold) | string | `500` | no |
|
||||||
| d2c_telemetry_egress_fallback_threshold_critical | D2C Telemetry Fallback Failed limit (critical threshold) | string | `1000` | no |
|
| d2c_telemetry_egress_fallback_threshold_critical | D2C Telemetry Fallback Failed limit (critical threshold) | string | `1000` | no |
|
||||||
@ -77,4 +73,4 @@ Related documentation
|
|||||||
|
|
||||||
DataDog documentation: [https://docs.datadoghq.com/integrations/azure_iot_hub](https://docs.datadoghq.com/integrations/azure_iot_hub)
|
DataDog documentation: [https://docs.datadoghq.com/integrations/azure_iot_hub](https://docs.datadoghq.com/integrations/azure_iot_hub)
|
||||||
|
|
||||||
Azure IOT Hubs metrics documentation: [https://docs.microsoft.com/en-us/azure/iot-hub/iot-hub-monitor-resource-health](https://docs.microsoft.com/en-us/azure/iot-hub/iot-hub-monitor-resource-health)
|
Azure IOT Hubs metrics documentation: [https://docs.microsoft.com/en-us/azure/iot-hub/iot-hub-monitor-resource-health](https://docs.microsoft.com/en-us/azure/iot-hub/iot-hub-monitor-resource-health)
|
||||||
|
|||||||
@ -1,23 +1,26 @@
|
|||||||
|
# Global Terraform
|
||||||
variable "environment" {
|
variable "environment" {
|
||||||
description = "Architecture Environment"
|
description = "Architecture Environment"
|
||||||
type = "string"
|
type = "string"
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "client_name" {
|
|
||||||
description = "Client Name"
|
|
||||||
type = "string"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "use_filter_tags" {
|
|
||||||
description = "Filter the data with service tags if true"
|
|
||||||
default = "true"
|
|
||||||
}
|
|
||||||
|
|
||||||
variable "subscription_id" {
|
variable "subscription_id" {
|
||||||
description = "Subscription ID used to tag monitors"
|
description = "Azure account id used as filter for monitors"
|
||||||
type = "string"
|
type = "string"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
variable "provider" {
|
||||||
|
description = "Cloud provider which the monitor and its based metric depend on"
|
||||||
|
type = "string"
|
||||||
|
default = "azure"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "service" {
|
||||||
|
description = "Service monitored by this set of monitors"
|
||||||
|
type = "string"
|
||||||
|
default = "storage"
|
||||||
|
|
||||||
|
# Global DataDog
|
||||||
variable "delay" {
|
variable "delay" {
|
||||||
description = "Delay in seconds for the metric evaluation"
|
description = "Delay in seconds for the metric evaluation"
|
||||||
default = 600
|
default = 600
|
||||||
@ -27,7 +30,12 @@ variable "message" {
|
|||||||
description = "Message sent when an alert is triggered"
|
description = "Message sent when an alert is triggered"
|
||||||
}
|
}
|
||||||
|
|
||||||
## IOT hubs
|
variable "use_filter_tags" {
|
||||||
|
description = "Filter the data with service tags if true"
|
||||||
|
default = "true"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Azure IOT hubs specific
|
||||||
variable "jobs_failed_threshold_warning" {
|
variable "jobs_failed_threshold_warning" {
|
||||||
description = "Jobs Failed rate limit (warning threshold)"
|
description = "Jobs Failed rate limit (warning threshold)"
|
||||||
default = 0
|
default = 0
|
||||||
|
|||||||
@ -2,7 +2,7 @@ data "template_file" "filter" {
|
|||||||
template = "$${filter}"
|
template = "$${filter}"
|
||||||
|
|
||||||
vars {
|
vars {
|
||||||
filter = "${var.use_filter_tags == "true" ? format("dd_monitoring:enabled,dd_azure_appservices:enabled,subscription_id:%s,env:%s", var.subscription_id,var.environment) : var.subscription_id}"
|
filter = "${var.use_filter_tags == "true" ? format("dd_monitoring:enabled,dd_azure_storage:enabled,env:%s", var.environment) : "${data.template_file.filter.rendered}"}"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -12,9 +12,9 @@ resource "datadog_monitor" "too_many_jobs_failed" {
|
|||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
avg(last_5m):(
|
avg(last_5m):(
|
||||||
avg:azure.devices_iothubs.jobs.failed{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() /
|
avg:azure.devices_iothubs.jobs.failed{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() /
|
||||||
( avg:azure.devices_iothubs.jobs.failed{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() +
|
( avg:azure.devices_iothubs.jobs.failed{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() +
|
||||||
avg:azure.devices_iothubs.jobs.completed{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() )
|
avg:azure.devices_iothubs.jobs.completed{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() )
|
||||||
) * 100 > ${var.jobs_failed_threshold_critical}
|
) * 100 > ${var.jobs_failed_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
type = "query alert"
|
type = "query alert"
|
||||||
@ -34,6 +34,8 @@ resource "datadog_monitor" "too_many_jobs_failed" {
|
|||||||
require_full_window = true
|
require_full_window = true
|
||||||
new_host_delay = "${var.delay}"
|
new_host_delay = "${var.delay}"
|
||||||
no_data_timeframe = 20
|
no_data_timeframe = 20
|
||||||
|
|
||||||
|
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
|
||||||
}
|
}
|
||||||
|
|
||||||
resource "datadog_monitor" "too_many_list_jobs_failed" {
|
resource "datadog_monitor" "too_many_list_jobs_failed" {
|
||||||
@ -42,9 +44,9 @@ resource "datadog_monitor" "too_many_list_jobs_failed" {
|
|||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
avg(last_5m):(
|
avg(last_5m):(
|
||||||
avg:azure.devices_iothubs.jobs.list_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() /
|
avg:azure.devices_iothubs.jobs.list_jobs.failure{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() /
|
||||||
( avg:azure.devices_iothubs.jobs.list_jobs.success{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() +
|
( avg:azure.devices_iothubs.jobs.list_jobs.success{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() +
|
||||||
avg:azure.devices_iothubs.jobs.list_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() )
|
avg:azure.devices_iothubs.jobs.list_jobs.failure{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() )
|
||||||
) * 100 > ${var.listjobs_failed_threshold_critical}
|
) * 100 > ${var.listjobs_failed_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
type = "query alert"
|
type = "query alert"
|
||||||
@ -64,6 +66,8 @@ resource "datadog_monitor" "too_many_list_jobs_failed" {
|
|||||||
require_full_window = true
|
require_full_window = true
|
||||||
new_host_delay = "${var.delay}"
|
new_host_delay = "${var.delay}"
|
||||||
no_data_timeframe = 20
|
no_data_timeframe = 20
|
||||||
|
|
||||||
|
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
|
||||||
}
|
}
|
||||||
|
|
||||||
resource "datadog_monitor" "too_many_query_jobs_failed" {
|
resource "datadog_monitor" "too_many_query_jobs_failed" {
|
||||||
@ -72,9 +76,9 @@ resource "datadog_monitor" "too_many_query_jobs_failed" {
|
|||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
avg(last_5m):(
|
avg(last_5m):(
|
||||||
avg:azure.devices_iothubs.jobs.query_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() /
|
avg:azure.devices_iothubs.jobs.query_jobs.failure{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() /
|
||||||
( avg:azure.devices_iothubs.jobs.query_jobs.success{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() +
|
( avg:azure.devices_iothubs.jobs.query_jobs.success{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() +
|
||||||
avg:azure.devices_iothubs.jobs.query_jobs.failure{subscription_id:${var.subscription_id}} by {resource_group,name}.as_count() )
|
avg:azure.devices_iothubs.jobs.query_jobs.failure{${data.template_file.filter.rendered}} by {resource_group,name}.as_count() )
|
||||||
) * 100 > ${var.queryjobs_failed_threshold_critical}
|
) * 100 > ${var.queryjobs_failed_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
type = "query alert"
|
type = "query alert"
|
||||||
@ -94,6 +98,8 @@ resource "datadog_monitor" "too_many_query_jobs_failed" {
|
|||||||
require_full_window = true
|
require_full_window = true
|
||||||
new_host_delay = "${var.delay}"
|
new_host_delay = "${var.delay}"
|
||||||
no_data_timeframe = 20
|
no_data_timeframe = 20
|
||||||
|
|
||||||
|
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
|
||||||
}
|
}
|
||||||
|
|
||||||
resource "datadog_monitor" "status" {
|
resource "datadog_monitor" "status" {
|
||||||
@ -101,7 +107,7 @@ resource "datadog_monitor" "status" {
|
|||||||
message = "${var.message}"
|
message = "${var.message}"
|
||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
avg(last_5m):avg:azure.devices_iothubs.status{subscription_id:${var.subscription_id}} by {name,resource_group} < 1
|
avg(last_5m):avg:azure.devices_iothubs.status{${data.template_file.filter.rendered}} by {name,resource_group} < 1
|
||||||
EOF
|
EOF
|
||||||
type = "query alert"
|
type = "query alert"
|
||||||
|
|
||||||
@ -115,6 +121,8 @@ resource "datadog_monitor" "status" {
|
|||||||
require_full_window = true
|
require_full_window = true
|
||||||
new_host_delay = "${var.delay}"
|
new_host_delay = "${var.delay}"
|
||||||
no_data_timeframe = 20
|
no_data_timeframe = 20
|
||||||
|
|
||||||
|
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
|
||||||
}
|
}
|
||||||
|
|
||||||
resource "datadog_monitor" "total_devices" {
|
resource "datadog_monitor" "total_devices" {
|
||||||
@ -122,7 +130,7 @@ resource "datadog_monitor" "total_devices" {
|
|||||||
message = "${var.message}"
|
message = "${var.message}"
|
||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
avg(last_5m):avg:azure.devices_iothubs.devices.total_devices{subscription_id:${var.subscription_id}} by {name,resource_group} == 0
|
avg(last_5m):avg:azure.devices_iothubs.devices.total_devices{${data.template_file.filter.rendered}} by {name,resource_group} == 0
|
||||||
EOF
|
EOF
|
||||||
type = "query alert"
|
type = "query alert"
|
||||||
|
|
||||||
@ -136,6 +144,8 @@ resource "datadog_monitor" "total_devices" {
|
|||||||
require_full_window = true
|
require_full_window = true
|
||||||
new_host_delay = "${var.delay}"
|
new_host_delay = "${var.delay}"
|
||||||
no_data_timeframe = 20
|
no_data_timeframe = 20
|
||||||
|
|
||||||
|
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
|
||||||
}
|
}
|
||||||
|
|
||||||
resource "datadog_monitor" "too_many_c2d_methods_failed" {
|
resource "datadog_monitor" "too_many_c2d_methods_failed" {
|
||||||
@ -144,9 +154,9 @@ resource "datadog_monitor" "too_many_c2d_methods_failed" {
|
|||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
avg(last_5m):(
|
avg(last_5m):(
|
||||||
avg:azure.devices_iothubs.c2d.methods.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() /
|
avg:azure.devices_iothubs.c2d.methods.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() /
|
||||||
( avg:azure.devices_iothubs.c2d.methods.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() +
|
( avg:azure.devices_iothubs.c2d.methods.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() +
|
||||||
avg:azure.devices_iothubs.c2d.methods.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() )
|
avg:azure.devices_iothubs.c2d.methods.success{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() )
|
||||||
) * 100 > ${var.c2d_methods_failed_threshold_critical}
|
) * 100 > ${var.c2d_methods_failed_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
type = "query alert"
|
type = "query alert"
|
||||||
@ -166,6 +176,8 @@ resource "datadog_monitor" "too_many_c2d_methods_failed" {
|
|||||||
require_full_window = true
|
require_full_window = true
|
||||||
new_host_delay = "${var.delay}"
|
new_host_delay = "${var.delay}"
|
||||||
no_data_timeframe = 20
|
no_data_timeframe = 20
|
||||||
|
|
||||||
|
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
|
||||||
}
|
}
|
||||||
|
|
||||||
resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
|
resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
|
||||||
@ -174,9 +186,9 @@ resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
|
|||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
avg(last_5m):(
|
avg(last_5m):(
|
||||||
avg:azure.devices_iothubs.c2d.twin.read.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() /
|
avg:azure.devices_iothubs.c2d.twin.read.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() /
|
||||||
( avg:azure.devices_iothubs.c2d.twin.read.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() +
|
( avg:azure.devices_iothubs.c2d.twin.read.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() +
|
||||||
avg:azure.devices_iothubs.c2d.twin.read.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() )
|
avg:azure.devices_iothubs.c2d.twin.read.success{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() )
|
||||||
) * 100 > ${var.c2d_twin_read_failed_threshold_critical}
|
) * 100 > ${var.c2d_twin_read_failed_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
type = "query alert"
|
type = "query alert"
|
||||||
@ -196,6 +208,8 @@ resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
|
|||||||
require_full_window = true
|
require_full_window = true
|
||||||
new_host_delay = "${var.delay}"
|
new_host_delay = "${var.delay}"
|
||||||
no_data_timeframe = 20
|
no_data_timeframe = 20
|
||||||
|
|
||||||
|
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
|
||||||
}
|
}
|
||||||
|
|
||||||
resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
|
resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
|
||||||
@ -204,9 +218,9 @@ resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
|
|||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
avg(last_5m):(
|
avg(last_5m):(
|
||||||
avg:azure.devices_iothubs.c2d.twin.update.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() /
|
avg:azure.devices_iothubs.c2d.twin.update.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() /
|
||||||
( avg:azure.devices_iothubs.c2d.twin.update.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() +
|
( avg:azure.devices_iothubs.c2d.twin.update.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() +
|
||||||
avg:azure.devices_iothubs.c2d.twin.update.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() )
|
avg:azure.devices_iothubs.c2d.twin.update.success{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() )
|
||||||
) * 100 > ${var.c2d_twin_update_failed_threshold_critical}
|
) * 100 > ${var.c2d_twin_update_failed_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
type = "query alert"
|
type = "query alert"
|
||||||
@ -226,6 +240,8 @@ resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
|
|||||||
require_full_window = true
|
require_full_window = true
|
||||||
new_host_delay = "${var.delay}"
|
new_host_delay = "${var.delay}"
|
||||||
no_data_timeframe = 20
|
no_data_timeframe = 20
|
||||||
|
|
||||||
|
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
|
||||||
}
|
}
|
||||||
|
|
||||||
resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
|
resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
|
||||||
@ -234,9 +250,9 @@ resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
|
|||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
avg(last_5m):(
|
avg(last_5m):(
|
||||||
avg:azure.devices_iothubs.d2c.twin.read.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() /
|
avg:azure.devices_iothubs.d2c.twin.read.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() /
|
||||||
( avg:azure.devices_iothubs.d2c.twin.read.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() +
|
( avg:azure.devices_iothubs.d2c.twin.read.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() +
|
||||||
avg:azure.devices_iothubs.d2c.twin.read.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() )
|
avg:azure.devices_iothubs.d2c.twin.read.success{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() )
|
||||||
) * 100 > ${var.d2c_twin_read_failed_threshold_critical}
|
) * 100 > ${var.d2c_twin_read_failed_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
type = "query alert"
|
type = "query alert"
|
||||||
@ -256,6 +272,8 @@ resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
|
|||||||
require_full_window = true
|
require_full_window = true
|
||||||
new_host_delay = "${var.delay}"
|
new_host_delay = "${var.delay}"
|
||||||
no_data_timeframe = 20
|
no_data_timeframe = 20
|
||||||
|
|
||||||
|
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
|
||||||
}
|
}
|
||||||
|
|
||||||
resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
|
resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
|
||||||
@ -264,9 +282,9 @@ resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
|
|||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
avg(last_5m):(
|
avg(last_5m):(
|
||||||
avg:azure.devices_iothubs.d2c.twin.update.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() /
|
avg:azure.devices_iothubs.d2c.twin.update.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() /
|
||||||
( avg:azure.devices_iothubs.d2c.twin.update.failure{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() +
|
( avg:azure.devices_iothubs.d2c.twin.update.failure{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() +
|
||||||
avg:azure.devices_iothubs.d2c.twin.update.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() )
|
avg:azure.devices_iothubs.d2c.twin.update.success{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() )
|
||||||
) * 100 > ${var.d2c_twin_update_failed_threshold_critical}
|
) * 100 > ${var.d2c_twin_update_failed_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
type = "query alert"
|
type = "query alert"
|
||||||
@ -286,6 +304,8 @@ resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
|
|||||||
require_full_window = true
|
require_full_window = true
|
||||||
new_host_delay = "${var.delay}"
|
new_host_delay = "${var.delay}"
|
||||||
no_data_timeframe = 20
|
no_data_timeframe = 20
|
||||||
|
|
||||||
|
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
|
||||||
}
|
}
|
||||||
|
|
||||||
resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" {
|
resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" {
|
||||||
@ -294,7 +314,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" {
|
|||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
sum(last_5m): (
|
sum(last_5m): (
|
||||||
avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count()
|
avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{${data.template_file.filter.rendered}} by {name,resource_group}.as_count()
|
||||||
) > ${var.d2c_telemetry_egress_dropped_threshold_critical}
|
) > ${var.d2c_telemetry_egress_dropped_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
type = "query alert"
|
type = "query alert"
|
||||||
@ -314,6 +334,8 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" {
|
|||||||
require_full_window = true
|
require_full_window = true
|
||||||
new_host_delay = "${var.delay}"
|
new_host_delay = "${var.delay}"
|
||||||
no_data_timeframe = 20
|
no_data_timeframe = 20
|
||||||
|
|
||||||
|
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
|
||||||
}
|
}
|
||||||
|
|
||||||
resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" {
|
resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" {
|
||||||
@ -322,7 +344,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" {
|
|||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
sum(last_5m): (
|
sum(last_5m): (
|
||||||
avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count()
|
avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{${data.template_file.filter.rendered}} by {name,resource_group}.as_count()
|
||||||
) > ${var.d2c_telemetry_egress_orphaned_threshold_critical}
|
) > ${var.d2c_telemetry_egress_orphaned_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
type = "query alert"
|
type = "query alert"
|
||||||
@ -342,6 +364,8 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" {
|
|||||||
require_full_window = true
|
require_full_window = true
|
||||||
new_host_delay = "${var.delay}"
|
new_host_delay = "${var.delay}"
|
||||||
no_data_timeframe = 20
|
no_data_timeframe = 20
|
||||||
|
|
||||||
|
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
|
||||||
}
|
}
|
||||||
|
|
||||||
resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" {
|
resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" {
|
||||||
@ -350,7 +374,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" {
|
|||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
sum(last_5m): (
|
sum(last_5m): (
|
||||||
avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count()
|
avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{${data.template_file.filter.rendered}} by {name,resource_group}.as_count()
|
||||||
) > ${var.d2c_telemetry_egress_invalid_threshold_critical}
|
) > ${var.d2c_telemetry_egress_invalid_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
type = "query alert"
|
type = "query alert"
|
||||||
@ -370,6 +394,8 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" {
|
|||||||
require_full_window = true
|
require_full_window = true
|
||||||
new_host_delay = "${var.delay}"
|
new_host_delay = "${var.delay}"
|
||||||
no_data_timeframe = 20
|
no_data_timeframe = 20
|
||||||
|
|
||||||
|
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
|
||||||
}
|
}
|
||||||
|
|
||||||
resource "datadog_monitor" "too_many_d2c_telemetry_egress_fallback" {
|
resource "datadog_monitor" "too_many_d2c_telemetry_egress_fallback" {
|
||||||
@ -378,7 +404,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_fallback" {
|
|||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
sum(last_5m): (
|
sum(last_5m): (
|
||||||
avg:azure.devices_iothubs.d2c.telemetry.egress.fallback{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count()
|
avg:azure.devices_iothubs.d2c.telemetry.egress.fallback{${data.template_file.filter.rendered}} by {name,resource_group}.as_count()
|
||||||
) > ${var.d2c_telemetry_egress_fallback_threshold_critical}
|
) > ${var.d2c_telemetry_egress_fallback_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
type = "query alert"
|
type = "query alert"
|
||||||
@ -398,6 +424,8 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_fallback" {
|
|||||||
require_full_window = true
|
require_full_window = true
|
||||||
new_host_delay = "${var.delay}"
|
new_host_delay = "${var.delay}"
|
||||||
no_data_timeframe = 20
|
no_data_timeframe = 20
|
||||||
|
|
||||||
|
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
|
||||||
}
|
}
|
||||||
|
|
||||||
resource "datadog_monitor" "too_many_d2c_telemetry_ingress_nosent" {
|
resource "datadog_monitor" "too_many_d2c_telemetry_ingress_nosent" {
|
||||||
@ -406,8 +434,8 @@ resource "datadog_monitor" "too_many_d2c_telemetry_ingress_nosent" {
|
|||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
sum(last_5m): (
|
sum(last_5m): (
|
||||||
avg:azure.devices_iothubs.d2c.telemetry.ingress.all_protocol{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count() -
|
avg:azure.devices_iothubs.d2c.telemetry.ingress.all_protocol{${data.template_file.filter.rendered}} by {name,resource_group}.as_count() -
|
||||||
avg:azure.devices_iothubs.d2c.telemetry.ingress.success{subscription_id:${var.subscription_id}} by {name,resource_group}.as_count()
|
avg:azure.devices_iothubs.d2c.telemetry.ingress.success{${data.template_file.filter.rendered}} by {name,resource_group}.as_count()
|
||||||
) > 0
|
) > 0
|
||||||
EOF
|
EOF
|
||||||
type = "query alert"
|
type = "query alert"
|
||||||
@ -422,4 +450,6 @@ resource "datadog_monitor" "too_many_d2c_telemetry_ingress_nosent" {
|
|||||||
require_full_window = true
|
require_full_window = true
|
||||||
new_host_delay = "${var.delay}"
|
new_host_delay = "${var.delay}"
|
||||||
no_data_timeframe = 20
|
no_data_timeframe = 20
|
||||||
|
|
||||||
|
tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"]
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user