MON-335 Fix IoTHub monitor for alert recovery on no data
This commit is contained in:
parent
3ce5b9c829
commit
93eccaf593
@ -4,11 +4,12 @@ resource "datadog_monitor" "too_many_jobs_failed" {
|
|||||||
message = "${coalesce(var.failed_jobs_rate_message, var.message)}"
|
message = "${coalesce(var.failed_jobs_rate_message, var.message)}"
|
||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
${var.failed_jobs_rate_time_aggregator}(${var.failed_jobs_rate_timeframe}):(
|
${var.failed_jobs_rate_time_aggregator}(${var.failed_jobs_rate_timeframe}):
|
||||||
|
default(
|
||||||
default(avg:azure.devices_iothubs.jobs.failed{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
|
default(avg:azure.devices_iothubs.jobs.failed{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
|
||||||
default(avg:azure.devices_iothubs.jobs.failed{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
default(avg:azure.devices_iothubs.jobs.failed{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
||||||
default(avg:azure.devices_iothubs.jobs.completed{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) )
|
default(avg:azure.devices_iothubs.jobs.completed{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) )
|
||||||
) * 100 > ${var.failed_jobs_rate_threshold_critical}
|
* 100 , 0) > ${var.failed_jobs_rate_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
@ -20,7 +21,7 @@ resource "datadog_monitor" "too_many_jobs_failed" {
|
|||||||
|
|
||||||
silenced = "${var.failed_jobs_rate_silenced}"
|
silenced = "${var.failed_jobs_rate_silenced}"
|
||||||
|
|
||||||
notify_no_data = false
|
notify_no_data = true
|
||||||
evaluation_delay = "${var.evaluation_delay}"
|
evaluation_delay = "${var.evaluation_delay}"
|
||||||
renotify_interval = 0
|
renotify_interval = 0
|
||||||
notify_audit = false
|
notify_audit = false
|
||||||
@ -39,11 +40,12 @@ resource "datadog_monitor" "too_many_list_jobs_failed" {
|
|||||||
message = "${coalesce(var.failed_listjobs_rate_message, var.message)}"
|
message = "${coalesce(var.failed_listjobs_rate_message, var.message)}"
|
||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
${var.failed_listjobs_rate_time_aggregator}(${var.failed_listjobs_rate_timeframe}):(
|
${var.failed_listjobs_rate_time_aggregator}(${var.failed_listjobs_rate_timeframe}):
|
||||||
|
default(
|
||||||
default(avg:azure.devices_iothubs.jobs.list_jobs.failure{${var.filter_tags}} by {resource_group,name}.as_rate(), 0) / (
|
default(avg:azure.devices_iothubs.jobs.list_jobs.failure{${var.filter_tags}} by {resource_group,name}.as_rate(), 0) / (
|
||||||
default(avg:azure.devices_iothubs.jobs.list_jobs.success{${var.filter_tags}} by {resource_group,name}.as_rate(), 0) +
|
default(avg:azure.devices_iothubs.jobs.list_jobs.success{${var.filter_tags}} by {resource_group,name}.as_rate(), 0) +
|
||||||
default(avg:azure.devices_iothubs.jobs.list_jobs.failure{${var.filter_tags}} by {resource_group,name}.as_rate(), 0) )
|
default(avg:azure.devices_iothubs.jobs.list_jobs.failure{${var.filter_tags}} by {resource_group,name}.as_rate(), 0) )
|
||||||
) * 100 > ${var.failed_listjobs_rate_threshold_critical}
|
* 100, 0) > ${var.failed_listjobs_rate_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
@ -55,7 +57,7 @@ resource "datadog_monitor" "too_many_list_jobs_failed" {
|
|||||||
|
|
||||||
silenced = "${var.failed_listjobs_rate_silenced}"
|
silenced = "${var.failed_listjobs_rate_silenced}"
|
||||||
|
|
||||||
notify_no_data = false
|
notify_no_data = true
|
||||||
evaluation_delay = "${var.evaluation_delay}"
|
evaluation_delay = "${var.evaluation_delay}"
|
||||||
renotify_interval = 0
|
renotify_interval = 0
|
||||||
notify_audit = false
|
notify_audit = false
|
||||||
@ -74,11 +76,12 @@ resource "datadog_monitor" "too_many_query_jobs_failed" {
|
|||||||
message = "${coalesce(var.failed_queryjobs_rate_message, var.message)}"
|
message = "${coalesce(var.failed_queryjobs_rate_message, var.message)}"
|
||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
${var.failed_queryjobs_rate_time_aggregator}(${var.failed_queryjobs_rate_timeframe}):(
|
${var.failed_queryjobs_rate_time_aggregator}(${var.failed_queryjobs_rate_timeframe}):
|
||||||
|
default(
|
||||||
default(avg:azure.devices_iothubs.jobs.query_jobs.failure{${var.filter_tags}} by {resource_group,name}.as_rate(), 0) / (
|
default(avg:azure.devices_iothubs.jobs.query_jobs.failure{${var.filter_tags}} by {resource_group,name}.as_rate(), 0) / (
|
||||||
default(avg:azure.devices_iothubs.jobs.query_jobs.success{${var.filter_tags}} by {resource_group,name}.as_rate(), 0) +
|
default(avg:azure.devices_iothubs.jobs.query_jobs.success{${var.filter_tags}} by {resource_group,name}.as_rate(), 0) +
|
||||||
default(avg:azure.devices_iothubs.jobs.query_jobs.failure{${var.filter_tags}} by {resource_group,name}.as_rate(), 0) )
|
default(avg:azure.devices_iothubs.jobs.query_jobs.failure{${var.filter_tags}} by {resource_group,name}.as_rate(), 0) )
|
||||||
) * 100 > ${var.failed_queryjobs_rate_threshold_critical}
|
* 100, 0) > ${var.failed_queryjobs_rate_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
@ -90,7 +93,7 @@ resource "datadog_monitor" "too_many_query_jobs_failed" {
|
|||||||
|
|
||||||
silenced = "${var.failed_queryjobs_rate_silenced}"
|
silenced = "${var.failed_queryjobs_rate_silenced}"
|
||||||
|
|
||||||
notify_no_data = false
|
notify_no_data = true
|
||||||
evaluation_delay = "${var.evaluation_delay}"
|
evaluation_delay = "${var.evaluation_delay}"
|
||||||
renotify_interval = 0
|
renotify_interval = 0
|
||||||
notify_audit = false
|
notify_audit = false
|
||||||
@ -165,11 +168,12 @@ resource "datadog_monitor" "too_many_c2d_methods_failed" {
|
|||||||
message = "${coalesce(var.failed_c2d_methods_rate_message, var.message)}"
|
message = "${coalesce(var.failed_c2d_methods_rate_message, var.message)}"
|
||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
${var.failed_c2d_methods_rate_time_aggregator}(${var.failed_c2d_methods_rate_timeframe}):(
|
${var.failed_c2d_methods_rate_time_aggregator}(${var.failed_c2d_methods_rate_timeframe}):
|
||||||
|
default(
|
||||||
default(avg:azure.devices_iothubs.c2d.methods.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
|
default(avg:azure.devices_iothubs.c2d.methods.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
|
||||||
default(avg:azure.devices_iothubs.c2d.methods.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
default(avg:azure.devices_iothubs.c2d.methods.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
||||||
default(avg:azure.devices_iothubs.c2d.methods.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) )
|
default(avg:azure.devices_iothubs.c2d.methods.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) )
|
||||||
) * 100 > ${var.failed_c2d_methods_rate_threshold_critical}
|
* 100, 0) > ${var.failed_c2d_methods_rate_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
@ -181,7 +185,7 @@ resource "datadog_monitor" "too_many_c2d_methods_failed" {
|
|||||||
|
|
||||||
silenced = "${var.failed_c2d_methods_rate_silenced}"
|
silenced = "${var.failed_c2d_methods_rate_silenced}"
|
||||||
|
|
||||||
notify_no_data = false
|
notify_no_data = true
|
||||||
evaluation_delay = "${var.evaluation_delay}"
|
evaluation_delay = "${var.evaluation_delay}"
|
||||||
renotify_interval = 0
|
renotify_interval = 0
|
||||||
notify_audit = false
|
notify_audit = false
|
||||||
@ -200,11 +204,12 @@ resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
|
|||||||
message = "${coalesce(var.failed_c2d_twin_read_rate_message, var.message)}"
|
message = "${coalesce(var.failed_c2d_twin_read_rate_message, var.message)}"
|
||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
${var.failed_c2d_twin_read_rate_time_aggregator}(${var.failed_c2d_twin_read_rate_timeframe}):(
|
${var.failed_c2d_twin_read_rate_time_aggregator}(${var.failed_c2d_twin_read_rate_timeframe}):
|
||||||
|
default(
|
||||||
default(avg:azure.devices_iothubs.c2d.twin.read.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
|
default(avg:azure.devices_iothubs.c2d.twin.read.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
|
||||||
default(avg:azure.devices_iothubs.c2d.twin.read.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
default(avg:azure.devices_iothubs.c2d.twin.read.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
||||||
default(avg:azure.devices_iothubs.c2d.twin.read.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) )
|
default(avg:azure.devices_iothubs.c2d.twin.read.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) )
|
||||||
) * 100 > ${var.failed_c2d_twin_read_rate_threshold_critical}
|
* 100, 0) > ${var.failed_c2d_twin_read_rate_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
@ -216,7 +221,7 @@ resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
|
|||||||
|
|
||||||
silenced = "${var.failed_c2d_twin_read_rate_silenced}"
|
silenced = "${var.failed_c2d_twin_read_rate_silenced}"
|
||||||
|
|
||||||
notify_no_data = false
|
notify_no_data = true
|
||||||
evaluation_delay = "${var.evaluation_delay}"
|
evaluation_delay = "${var.evaluation_delay}"
|
||||||
renotify_interval = 0
|
renotify_interval = 0
|
||||||
notify_audit = false
|
notify_audit = false
|
||||||
@ -235,11 +240,12 @@ resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
|
|||||||
message = "${coalesce(var.failed_c2d_twin_update_rate_message, var.message)}"
|
message = "${coalesce(var.failed_c2d_twin_update_rate_message, var.message)}"
|
||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
${var.failed_c2d_twin_update_rate_time_aggregator}(${var.failed_c2d_twin_update_rate_timeframe}):(
|
${var.failed_c2d_twin_update_rate_time_aggregator}(${var.failed_c2d_twin_update_rate_timeframe}):
|
||||||
|
default(
|
||||||
default(avg:azure.devices_iothubs.c2d.twin.update.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
|
default(avg:azure.devices_iothubs.c2d.twin.update.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
|
||||||
default(avg:azure.devices_iothubs.c2d.twin.update.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
default(avg:azure.devices_iothubs.c2d.twin.update.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
||||||
default(avg:azure.devices_iothubs.c2d.twin.update.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) )
|
default(avg:azure.devices_iothubs.c2d.twin.update.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) )
|
||||||
) * 100 > ${var.failed_c2d_twin_update_rate_threshold_critical}
|
* 100, 0) > ${var.failed_c2d_twin_update_rate_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
@ -251,7 +257,7 @@ resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
|
|||||||
|
|
||||||
silenced = "${var.failed_c2d_twin_update_rate_silenced}"
|
silenced = "${var.failed_c2d_twin_update_rate_silenced}"
|
||||||
|
|
||||||
notify_no_data = false
|
notify_no_data = true
|
||||||
evaluation_delay = "${var.evaluation_delay}"
|
evaluation_delay = "${var.evaluation_delay}"
|
||||||
renotify_interval = 0
|
renotify_interval = 0
|
||||||
notify_audit = false
|
notify_audit = false
|
||||||
@ -270,11 +276,12 @@ resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
|
|||||||
message = "${coalesce(var.failed_d2c_twin_read_rate_message, var.message)}"
|
message = "${coalesce(var.failed_d2c_twin_read_rate_message, var.message)}"
|
||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
${var.failed_d2c_twin_read_rate_time_aggregator}(${var.failed_d2c_twin_read_rate_timeframe}):(
|
${var.failed_d2c_twin_read_rate_time_aggregator}(${var.failed_d2c_twin_read_rate_timeframe}):
|
||||||
|
default(
|
||||||
default(avg:azure.devices_iothubs.d2c.twin.read.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
|
default(avg:azure.devices_iothubs.d2c.twin.read.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
|
||||||
default(avg:azure.devices_iothubs.d2c.twin.read.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
default(avg:azure.devices_iothubs.d2c.twin.read.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
||||||
default(avg:azure.devices_iothubs.d2c.twin.read.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) )
|
default(avg:azure.devices_iothubs.d2c.twin.read.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) )
|
||||||
) * 100 > ${var.failed_d2c_twin_read_rate_threshold_critical}
|
* 100, 0) > ${var.failed_d2c_twin_read_rate_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
@ -286,7 +293,7 @@ resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
|
|||||||
|
|
||||||
silenced = "${var.failed_d2c_twin_read_rate_silenced}"
|
silenced = "${var.failed_d2c_twin_read_rate_silenced}"
|
||||||
|
|
||||||
notify_no_data = false
|
notify_no_data = true
|
||||||
evaluation_delay = "${var.evaluation_delay}"
|
evaluation_delay = "${var.evaluation_delay}"
|
||||||
renotify_interval = 0
|
renotify_interval = 0
|
||||||
notify_audit = false
|
notify_audit = false
|
||||||
@ -305,11 +312,12 @@ resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
|
|||||||
message = "${coalesce(var.failed_d2c_twin_update_rate_message, var.message)}"
|
message = "${coalesce(var.failed_d2c_twin_update_rate_message, var.message)}"
|
||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
${var.failed_d2c_twin_update_rate_time_aggregator}(${var.failed_d2c_twin_update_rate_timeframe}):(
|
${var.failed_d2c_twin_update_rate_time_aggregator}(${var.failed_d2c_twin_update_rate_timeframe}):
|
||||||
|
default(
|
||||||
default(avg:azure.devices_iothubs.d2c.twin.update.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
|
default(avg:azure.devices_iothubs.d2c.twin.update.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
|
||||||
default(avg:azure.devices_iothubs.d2c.twin.update.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
default(avg:azure.devices_iothubs.d2c.twin.update.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
||||||
default(avg:azure.devices_iothubs.d2c.twin.update.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) )
|
default(avg:azure.devices_iothubs.d2c.twin.update.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) )
|
||||||
) * 100 > ${var.failed_d2c_twin_update_rate_threshold_critical}
|
* 100, 0) > ${var.failed_d2c_twin_update_rate_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
@ -321,7 +329,7 @@ resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
|
|||||||
|
|
||||||
silenced = "${var.failed_d2c_twin_update_rate_silenced}"
|
silenced = "${var.failed_d2c_twin_update_rate_silenced}"
|
||||||
|
|
||||||
notify_no_data = false
|
notify_no_data = true
|
||||||
evaluation_delay = "${var.evaluation_delay}"
|
evaluation_delay = "${var.evaluation_delay}"
|
||||||
renotify_interval = 0
|
renotify_interval = 0
|
||||||
notify_audit = false
|
notify_audit = false
|
||||||
@ -340,13 +348,14 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" {
|
|||||||
message = "${coalesce(var.dropped_d2c_telemetry_egress_message, var.message)}"
|
message = "${coalesce(var.dropped_d2c_telemetry_egress_message, var.message)}"
|
||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
${var.dropped_d2c_telemetry_egress_time_aggregator}(${var.dropped_d2c_telemetry_egress_timeframe}): (
|
${var.dropped_d2c_telemetry_egress_time_aggregator}(${var.dropped_d2c_telemetry_egress_timeframe}):
|
||||||
|
default(
|
||||||
default(avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
|
default(avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
|
||||||
default(avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
default(avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
||||||
default(avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
default(avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
||||||
default(avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
default(avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
||||||
default(avg:azure.devices_iothubs.d2c.telemetry.egress.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) )
|
default(avg:azure.devices_iothubs.d2c.telemetry.egress.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) )
|
||||||
) * 100 > ${var.dropped_d2c_telemetry_egress_rate_threshold_critical}
|
* 100, 0) > ${var.dropped_d2c_telemetry_egress_rate_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
@ -358,7 +367,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" {
|
|||||||
|
|
||||||
silenced = "${var.dropped_d2c_telemetry_egress_silenced}"
|
silenced = "${var.dropped_d2c_telemetry_egress_silenced}"
|
||||||
|
|
||||||
notify_no_data = false
|
notify_no_data = true
|
||||||
evaluation_delay = "${var.evaluation_delay}"
|
evaluation_delay = "${var.evaluation_delay}"
|
||||||
renotify_interval = 0
|
renotify_interval = 0
|
||||||
notify_audit = false
|
notify_audit = false
|
||||||
@ -377,13 +386,14 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" {
|
|||||||
message = "${coalesce(var.orphaned_d2c_telemetry_egress_message, var.message)}"
|
message = "${coalesce(var.orphaned_d2c_telemetry_egress_message, var.message)}"
|
||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
${var.orphaned_d2c_telemetry_egress_time_aggregator}(${var.orphaned_d2c_telemetry_egress_timeframe}): (
|
${var.orphaned_d2c_telemetry_egress_time_aggregator}(${var.orphaned_d2c_telemetry_egress_timeframe}):
|
||||||
|
default(
|
||||||
default(avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
|
default(avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
|
||||||
default(avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
default(avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
||||||
default(avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
default(avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
||||||
default(avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
default(avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
||||||
default(avg:azure.devices_iothubs.d2c.telemetry.egress.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) )
|
default(avg:azure.devices_iothubs.d2c.telemetry.egress.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) )
|
||||||
) * 100 > ${var.orphaned_d2c_telemetry_egress_rate_threshold_critical}
|
* 100, 0) > ${var.orphaned_d2c_telemetry_egress_rate_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
@ -395,7 +405,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" {
|
|||||||
|
|
||||||
silenced = "${var.orphaned_d2c_telemetry_egress_silenced}"
|
silenced = "${var.orphaned_d2c_telemetry_egress_silenced}"
|
||||||
|
|
||||||
notify_no_data = false
|
notify_no_data = true
|
||||||
evaluation_delay = "${var.evaluation_delay}"
|
evaluation_delay = "${var.evaluation_delay}"
|
||||||
renotify_interval = 0
|
renotify_interval = 0
|
||||||
notify_audit = false
|
notify_audit = false
|
||||||
@ -414,13 +424,14 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" {
|
|||||||
message = "${coalesce(var.invalid_d2c_telemetry_egress_message, var.message)}"
|
message = "${coalesce(var.invalid_d2c_telemetry_egress_message, var.message)}"
|
||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
${var.invalid_d2c_telemetry_egress_time_aggregator}(${var.invalid_d2c_telemetry_egress_timeframe}): (
|
${var.invalid_d2c_telemetry_egress_time_aggregator}(${var.invalid_d2c_telemetry_egress_timeframe}):
|
||||||
|
default(
|
||||||
default(avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
|
default(avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
|
||||||
default(avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
default(avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
||||||
default(avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
default(avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
||||||
default(avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
default(avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
||||||
default(avg:azure.devices_iothubs.d2c.telemetry.egress.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) )
|
default(avg:azure.devices_iothubs.d2c.telemetry.egress.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) )
|
||||||
) * 100 > ${var.invalid_d2c_telemetry_egress_rate_threshold_critical}
|
* 100, 0) > ${var.invalid_d2c_telemetry_egress_rate_threshold_critical}
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
@ -432,7 +443,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" {
|
|||||||
|
|
||||||
silenced = "${var.invalid_d2c_telemetry_egress_silenced}"
|
silenced = "${var.invalid_d2c_telemetry_egress_silenced}"
|
||||||
|
|
||||||
notify_no_data = false
|
notify_no_data = true
|
||||||
evaluation_delay = "${var.evaluation_delay}"
|
evaluation_delay = "${var.evaluation_delay}"
|
||||||
renotify_interval = 0
|
renotify_interval = 0
|
||||||
notify_audit = false
|
notify_audit = false
|
||||||
@ -451,17 +462,18 @@ resource "datadog_monitor" "too_many_d2c_telemetry_ingress_nosent" {
|
|||||||
message = "${coalesce(var.too_many_d2c_telemetry_ingress_nosent_message, var.message)}"
|
message = "${coalesce(var.too_many_d2c_telemetry_ingress_nosent_message, var.message)}"
|
||||||
|
|
||||||
query = <<EOF
|
query = <<EOF
|
||||||
sum(${var.too_many_d2c_telemetry_ingress_nosent_timeframe}): (
|
sum(${var.too_many_d2c_telemetry_ingress_nosent_timeframe}):
|
||||||
|
default(
|
||||||
avg:azure.devices_iothubs.d2c.telemetry.ingress.all_protocol{${var.filter_tags}} by {resource_group,region,name}.as_count() -
|
avg:azure.devices_iothubs.d2c.telemetry.ingress.all_protocol{${var.filter_tags}} by {resource_group,region,name}.as_count() -
|
||||||
avg:azure.devices_iothubs.d2c.telemetry.ingress.success{${var.filter_tags}} by {resource_group,region,name}.as_count()
|
avg:azure.devices_iothubs.d2c.telemetry.ingress.success{${var.filter_tags}} by {resource_group,region,name}.as_count()
|
||||||
) > 0
|
, 0) > 0
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
|
|
||||||
silenced = "${var.too_many_d2c_telemetry_ingress_nosent_silenced}"
|
silenced = "${var.too_many_d2c_telemetry_ingress_nosent_silenced}"
|
||||||
|
|
||||||
notify_no_data = false
|
notify_no_data = true
|
||||||
evaluation_delay = "${var.evaluation_delay}"
|
evaluation_delay = "${var.evaluation_delay}"
|
||||||
renotify_interval = 0
|
renotify_interval = 0
|
||||||
notify_audit = false
|
notify_audit = false
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user