MON-309 - Missing time aggregator added
This commit is contained in:
parent
6a86364299
commit
d55c0fb468
@ -34,6 +34,7 @@ Creates DataDog monitors with the following checks:
|
||||
| failed_requests_silenced | Groups to mute for API Management failed requests monitor | map | `{}` | no |
|
||||
| failed_requests_threshold_critical | Maximum acceptable percent of failed requests | string | `90` | no |
|
||||
| failed_requests_threshold_warning | Warning regarding acceptable percent of failed requests | string | `50` | no |
|
||||
| failed_requests_time_aggregator | Monitor aggregator for API Management failed requests [available values: min, max or avg] | string | `min` | no |
|
||||
| failed_requests_timeframe | Monitor timeframe for API Management failed requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
|
||||
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
|
||||
@ -45,6 +46,7 @@ Creates DataDog monitors with the following checks:
|
||||
| other_requests_silenced | Groups to mute for API Management other requests monitor | map | `{}` | no |
|
||||
| other_requests_threshold_critical | Maximum acceptable percent of other requests | string | `90` | no |
|
||||
| other_requests_threshold_warning | Warning regarding acceptable percent of other requests | string | `50` | no |
|
||||
| other_requests_time_aggregator | Monitor aggregator for API Management other requests [available values: min, max or avg] | string | `min` | no |
|
||||
| other_requests_timeframe | Monitor timeframe for API Management other requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| status_enabled | Flag to enable API Management status monitor | string | `true` | no |
|
||||
| status_extra_tags | Extra tags for API Management status monitor | list | `[]` | no |
|
||||
@ -58,6 +60,7 @@ Creates DataDog monitors with the following checks:
|
||||
| successful_requests_silenced | Groups to mute for API Management successful requests monitor | map | `{}` | no |
|
||||
| successful_requests_threshold_critical | Minimum acceptable percent of successful requests | string | `10` | no |
|
||||
| successful_requests_threshold_warning | Warning regarding acceptable percent of successful requests | string | `30` | no |
|
||||
| successful_requests_time_aggregator | Monitor aggregator for API Management successful requests [available values: min, max or avg] | string | `max` | no |
|
||||
| successful_requests_timeframe | Monitor timeframe for API Management successful requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| unauthorized_requests_enabled | Flag to enable API Management unauthorized requests monitor | string | `true` | no |
|
||||
| unauthorized_requests_extra_tags | Extra tags for API Management unauthorized requests monitor | list | `[]` | no |
|
||||
@ -65,6 +68,7 @@ Creates DataDog monitors with the following checks:
|
||||
| unauthorized_requests_silenced | Groups to mute for API Management unauthorized requests monitor | map | `{}` | no |
|
||||
| unauthorized_requests_threshold_critical | Maximum acceptable percent of unauthorized requests | string | `90` | no |
|
||||
| unauthorized_requests_threshold_warning | Warning regarding acceptable percent of unauthorized requests | string | `50` | no |
|
||||
| unauthorized_requests_time_aggregator | Monitor aggregator for API Management unauthorized requests [available values: min, max or avg] | string | `min` | no |
|
||||
| unauthorized_requests_timeframe | Monitor timeframe for API Management unauthorized requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
|
||||
## Outputs
|
||||
|
||||
@ -90,6 +90,12 @@ variable "failed_requests_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "failed_requests_time_aggregator" {
|
||||
description = "Monitor aggregator for API Management failed requests [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "failed_requests_timeframe" {
|
||||
description = "Monitor timeframe for API Management failed requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -130,6 +136,12 @@ variable "other_requests_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "other_requests_time_aggregator" {
|
||||
description = "Monitor aggregator for API Management other requests [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "other_requests_timeframe" {
|
||||
description = "Monitor timeframe for API Management other requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -170,6 +182,12 @@ variable "unauthorized_requests_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "unauthorized_requests_time_aggregator" {
|
||||
description = "Monitor aggregator for API Management unauthorized requests [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "unauthorized_requests_timeframe" {
|
||||
description = "Monitor timeframe for API Management unauthorized requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -210,6 +228,12 @@ variable "successful_requests_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "successful_requests_time_aggregator" {
|
||||
description = "Monitor aggregator for API Management successful requests [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "max"
|
||||
}
|
||||
|
||||
variable "successful_requests_timeframe" {
|
||||
description = "Monitor timeframe for API Management successful requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
|
||||
@ -34,7 +34,7 @@ resource "datadog_monitor" "apimgt_failed_requests" {
|
||||
message = "${coalesce(var.failed_requests_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.failed_requests_timeframe}): (
|
||||
${var.failed_requests_time_aggregator}(${var.failed_requests_timeframe}): (
|
||||
default(avg:azure.apimanagement_service.failed_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) /
|
||||
default(avg:azure.apimanagement_service.total_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 1)
|
||||
) * 100 > ${var.failed_requests_threshold_critical}
|
||||
@ -67,7 +67,7 @@ resource "datadog_monitor" "apimgt_other_requests" {
|
||||
message = "${coalesce(var.other_requests_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.other_requests_timeframe}): (
|
||||
${var.other_requests_time_aggregator}(${var.other_requests_timeframe}): (
|
||||
default(avg:azure.apimanagement_service.other_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) /
|
||||
default(avg:azure.apimanagement_service.total_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 1)
|
||||
) * 100 > ${var.other_requests_threshold_critical}
|
||||
@ -100,7 +100,7 @@ resource "datadog_monitor" "apimgt_unauthorized_requests" {
|
||||
message = "${coalesce(var.unauthorized_requests_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.unauthorized_requests_timeframe}): (
|
||||
${var.unauthorized_requests_time_aggregator}(${var.unauthorized_requests_timeframe}): (
|
||||
default(avg:azure.apimanagement_service.unauthorized_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) /
|
||||
default(avg:azure.apimanagement_service.total_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 1)
|
||||
) * 100 > ${var.unauthorized_requests_threshold_critical}
|
||||
@ -133,7 +133,7 @@ resource "datadog_monitor" "apimgt_successful_requests" {
|
||||
message = "${coalesce(var.successful_requests_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
max(${var.successful_requests_timeframe}): (
|
||||
${var.successful_requests_time_aggregator}(${var.successful_requests_timeframe}): (
|
||||
default(avg:azure.apimanagement_service.successful_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 1) /
|
||||
default(avg:azure.apimanagement_service.total_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 1)
|
||||
) * 100 < ${var.successful_requests_threshold_critical}
|
||||
|
||||
@ -36,6 +36,7 @@ Creates DataDog monitors with the following checks:
|
||||
| http_4xx_requests_silenced | Groups to mute for App Services 4xx requests monitor | map | `{}` | no |
|
||||
| http_4xx_requests_threshold_critical | Maximum critical acceptable percent of 4xx errors | string | `90` | no |
|
||||
| http_4xx_requests_threshold_warning | Warning regarding acceptable percent of 4xx errors | string | `50` | no |
|
||||
| http_4xx_requests_time_aggregator | Monitor aggregator for App Services 4xx requests [available values: min, max or avg] | string | `min` | no |
|
||||
| http_4xx_requests_timeframe | Monitor timeframe for App Services 4xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| http_5xx_requests_enabled | Flag to enable App Services 5xx requests monitor | string | `true` | no |
|
||||
| http_5xx_requests_extra_tags | Extra tags for App Services 5xx requests monitor | list | `[]` | no |
|
||||
@ -43,6 +44,7 @@ Creates DataDog monitors with the following checks:
|
||||
| http_5xx_requests_silenced | Groups to mute for App Services 5xx requests monitor | map | `{}` | no |
|
||||
| http_5xx_requests_threshold_critical | Maximum critical acceptable percent of 5xx errors | string | `90` | no |
|
||||
| http_5xx_requests_threshold_warning | Warning regarding acceptable percent of 5xx errors | string | `50` | no |
|
||||
| http_5xx_requests_time_aggregator | Monitor aggregator for App Services 5xx requests [available values: min, max or avg] | string | `min` | no |
|
||||
| http_5xx_requests_timeframe | Monitor timeframe for App Services 5xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| http_successful_requests_enabled | Flag to enable App Services successful requests monitor | string | `true` | no |
|
||||
| http_successful_requests_extra_tags | Extra tags for App Services successful requests monitor | list | `[]` | no |
|
||||
@ -50,6 +52,7 @@ Creates DataDog monitors with the following checks:
|
||||
| http_successful_requests_silenced | Groups to mute for App Services successful requests monitor | map | `{}` | no |
|
||||
| http_successful_requests_threshold_critical | Minimum critical acceptable percent of 2xx & 3xx requests | string | `10` | no |
|
||||
| http_successful_requests_threshold_warning | Warning regarding acceptable percent of 2xx & 3xx requests | string | `30` | no |
|
||||
| http_successful_requests_time_aggregator | Monitor aggregator for App Services successful requests [available values: min, max or avg] | string | `max` | no |
|
||||
| http_successful_requests_timeframe | Monitor timeframe for App Services successful requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| memory_usage_enabled | Flag to enable App Services memory usage monitor | string | `true` | no |
|
||||
| memory_usage_extra_tags | Extra tags for App Services memory usage monitor | list | `[]` | no |
|
||||
|
||||
@ -144,6 +144,12 @@ variable "http_4xx_requests_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "http_4xx_requests_time_aggregator" {
|
||||
description = "Monitor aggregator for App Services 4xx requests [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "http_4xx_requests_timeframe" {
|
||||
description = "Monitor timeframe for App Services 4xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -184,6 +190,12 @@ variable "http_5xx_requests_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "http_5xx_requests_time_aggregator" {
|
||||
description = "Monitor aggregator for App Services 5xx requests [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "http_5xx_requests_timeframe" {
|
||||
description = "Monitor timeframe for App Services 5xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -224,6 +236,12 @@ variable "http_successful_requests_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "http_successful_requests_time_aggregator" {
|
||||
description = "Monitor aggregator for App Services successful requests [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "max"
|
||||
}
|
||||
|
||||
variable "http_successful_requests_timeframe" {
|
||||
description = "Monitor timeframe for App Services successful requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
|
||||
@ -70,7 +70,7 @@ resource "datadog_monitor" "appservices_http_5xx_errors_count" {
|
||||
message = "${coalesce(var.http_5xx_requests_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.http_5xx_requests_timeframe}): (
|
||||
${var.http_5xx_requests_time_aggregator}(${var.http_5xx_requests_timeframe}): (
|
||||
default(avg:azure.app_services.http5xx${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) /
|
||||
default(avg:azure.app_services.requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 1)
|
||||
) * 100 > ${var.http_5xx_requests_threshold_critical}
|
||||
@ -103,7 +103,7 @@ resource "datadog_monitor" "appservices_http_4xx_errors_count" {
|
||||
message = "${coalesce(var.http_4xx_requests_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.http_4xx_requests_timeframe}): (
|
||||
${var.http_4xx_requests_time_aggregator}(${var.http_4xx_requests_timeframe}): (
|
||||
default(avg:azure.app_services.http4xx${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) /
|
||||
default(avg:azure.app_services.requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 1)
|
||||
) * 100 > ${var.http_4xx_requests_threshold_critical}
|
||||
@ -136,7 +136,7 @@ resource "datadog_monitor" "appservices_http_success_status_rate" {
|
||||
message = "${coalesce(var.http_successful_requests_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
max(${var.http_successful_requests_timeframe}): ( (
|
||||
${var.http_successful_requests_time_aggregator}(${var.http_successful_requests_timeframe}): ( (
|
||||
default(avg:azure.app_services.http2xx${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 1) +
|
||||
default(avg:azure.app_services.http3xx${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) ) /
|
||||
default(avg:azure.app_services.requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 1)
|
||||
|
||||
@ -31,6 +31,7 @@ Creates DataDog monitors with the following checks:
|
||||
| errors_rate_silenced | Groups to mute for Event Hub errors monitor | map | `{}` | no |
|
||||
| errors_rate_thresold_critical | Errors ratio (percentage) to trigger the critical alert | string | `90` | no |
|
||||
| errors_rate_thresold_warning | Errors ratio (percentage) to trigger a warning alert | string | `50` | no |
|
||||
| errors_rate_time_aggregator | Monitor aggregator for Event Hub errors [available values: min, max or avg] | string | `min` | no |
|
||||
| errors_rate_timeframe | Monitor timeframe for Event Hub errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no |
|
||||
| failed_requests_rate_enabled | Flag to enable Event Hub failed requests monitor | string | `true` | no |
|
||||
@ -39,6 +40,7 @@ Creates DataDog monitors with the following checks:
|
||||
| failed_requests_rate_silenced | Groups to mute for Event Hub failed requests monitor | map | `{}` | no |
|
||||
| failed_requests_rate_thresold_critical | Failed requests ratio (percentage) to trigger the critical alert | string | `90` | no |
|
||||
| failed_requests_rate_thresold_warning | Failed requests ratio (percentage) to trigger a warning alert | string | `50` | no |
|
||||
| failed_requests_rate_time_aggregator | Monitor aggregator for Event Hub failed requests [available values: min, max or avg] | string | `min` | no |
|
||||
| failed_requests_rate_timeframe | Monitor timeframe for Event Hub failed requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
|
||||
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
|
||||
|
||||
@ -90,6 +90,12 @@ variable "failed_requests_rate_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "failed_requests_rate_time_aggregator" {
|
||||
description = "Monitor aggregator for Event Hub failed requests [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "failed_requests_rate_timeframe" {
|
||||
description = "Monitor timeframe for Event Hub failed requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -130,6 +136,12 @@ variable "errors_rate_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "errors_rate_time_aggregator" {
|
||||
description = "Monitor aggregator for Event Hub errors [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "errors_rate_timeframe" {
|
||||
description = "Monitor timeframe for Event Hub errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
|
||||
@ -32,7 +32,7 @@ resource "datadog_monitor" "eventhub_failed_requests" {
|
||||
message = "${coalesce(var.failed_requests_rate_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.failed_requests_rate_timeframe}): (
|
||||
${var.failed_requests_rate_time_aggregator}(${var.failed_requests_rate_timeframe}): (
|
||||
default(avg:azure.eventhub_namespaces.failed_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) /
|
||||
default(avg:azure.eventhub_namespaces.incoming_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 1)
|
||||
) * 100 > ${var.failed_requests_rate_thresold_critical}
|
||||
@ -66,7 +66,7 @@ resource "datadog_monitor" "eventhub_errors" {
|
||||
message = "${coalesce(var.errors_rate_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.errors_rate_timeframe}): ( (
|
||||
${var.errors_rate_time_aggregator}(${var.errors_rate_timeframe}): ( (
|
||||
default(avg:azure.eventhub_namespaces.internal_server_errors${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) +
|
||||
default(avg:azure.eventhub_namespaces.server_busy_errors${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) +
|
||||
default(avg:azure.eventhub_namespaces.other_errors${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) ) /
|
||||
|
||||
@ -41,6 +41,7 @@ Creates DataDog monitors with the following checks:
|
||||
| dropped_d2c_telemetry_egress_rate_threshold_critical | D2C Telemetry Dropped limit (critical threshold) | string | `90` | no |
|
||||
| dropped_d2c_telemetry_egress_rate_threshold_warning | D2C Telemetry Dropped limit (warning threshold) | string | `50` | no |
|
||||
| dropped_d2c_telemetry_egress_silenced | Groups to mute for IoT Hub dropped d2c telemetry monitor | map | `{}` | no |
|
||||
| dropped_d2c_telemetry_egress_time_aggregator | Monitor aggregator for IoT Hub dropped d2c telemetry [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| dropped_d2c_telemetry_egress_timeframe | Monitor timeframe for IoT Hub dropped d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| environment | Architecture Environment | string | - | yes |
|
||||
| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no |
|
||||
@ -50,6 +51,7 @@ Creates DataDog monitors with the following checks:
|
||||
| failed_c2d_methods_rate_silenced | Groups to mute for IoT Hub failed c2d methods monitor | map | `{}` | no |
|
||||
| failed_c2d_methods_rate_threshold_critical | C2D Methods Failed rate limit (critical threshold) | string | `90` | no |
|
||||
| failed_c2d_methods_rate_threshold_warning | C2D Methods Failed rate limit (warning threshold) | string | `50` | no |
|
||||
| failed_c2d_methods_rate_time_aggregator | Monitor aggregator for IoT Hub failed c2d method [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| failed_c2d_methods_rate_timeframe | Monitor timeframe for IoT Hub failed c2d method [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| failed_c2d_twin_read_rate_enabled | Flag to enable IoT Hub failed c2d twin read monitor | string | `true` | no |
|
||||
| failed_c2d_twin_read_rate_extra_tags | Extra tags for IoT Hub failed c2d twin read monitor | list | `[]` | no |
|
||||
@ -57,6 +59,7 @@ Creates DataDog monitors with the following checks:
|
||||
| failed_c2d_twin_read_rate_silenced | Groups to mute for IoT Hub failed c2d twin read monitor | map | `{}` | no |
|
||||
| failed_c2d_twin_read_rate_threshold_critical | C2D Twin Read Failed rate limit (critical threshold) | string | `90` | no |
|
||||
| failed_c2d_twin_read_rate_threshold_warning | C2D Twin Read Failed rate limit (warning threshold) | string | `50` | no |
|
||||
| failed_c2d_twin_read_rate_time_aggregator | Monitor aggregator for IoT Hub failed c2d twin read [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| failed_c2d_twin_read_rate_timeframe | Monitor timeframe for IoT Hub failed c2d twin read [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| failed_c2d_twin_update_rate_enabled | Flag to enable IoT Hub failed c2d twin update monitor | string | `true` | no |
|
||||
| failed_c2d_twin_update_rate_extra_tags | Extra tags for IoT Hub failed c2d twin update monitor | list | `[]` | no |
|
||||
@ -64,6 +67,7 @@ Creates DataDog monitors with the following checks:
|
||||
| failed_c2d_twin_update_rate_silenced | Groups to mute for IoT Hub failed c2d twin update monitor | map | `{}` | no |
|
||||
| failed_c2d_twin_update_rate_threshold_critical | C2D Twin Update Failed rate limit (critical threshold) | string | `90` | no |
|
||||
| failed_c2d_twin_update_rate_threshold_warning | C2D Twin Update Failed rate limit (warning threshold) | string | `50` | no |
|
||||
| failed_c2d_twin_update_rate_time_aggregator | Monitor aggregator for IoT Hub failed c2d twin update [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| failed_c2d_twin_update_rate_timeframe | Monitor timeframe for IoT Hub failed c2d twin update [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| failed_d2c_twin_read_rate_enabled | Flag to enable IoT Hub failed d2c twin read monitor | string | `true` | no |
|
||||
| failed_d2c_twin_read_rate_extra_tags | Extra tags for IoT Hub failed d2c twin read monitor | list | `[]` | no |
|
||||
@ -71,6 +75,7 @@ Creates DataDog monitors with the following checks:
|
||||
| failed_d2c_twin_read_rate_silenced | Groups to mute for IoT Hub failed d2c twin read monitor | map | `{}` | no |
|
||||
| failed_d2c_twin_read_rate_threshold_critical | D2C Twin Read Failed rate limit (critical threshold) | string | `90` | no |
|
||||
| failed_d2c_twin_read_rate_threshold_warning | D2C Twin Read Failed rate limit (warning threshold) | string | `50` | no |
|
||||
| failed_d2c_twin_read_rate_time_aggregator | Monitor aggregator for IoT Hub failed d2c twin read [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| failed_d2c_twin_read_rate_timeframe | Monitor timeframe for IoT Hub failed d2c twin read [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| failed_d2c_twin_update_rate_enabled | Flag to enable IoT Hub failed d2c twin update monitor | string | `true` | no |
|
||||
| failed_d2c_twin_update_rate_extra_tags | Extra tags for IoT Hub failed d2c twin update monitor | list | `[]` | no |
|
||||
@ -78,6 +83,7 @@ Creates DataDog monitors with the following checks:
|
||||
| failed_d2c_twin_update_rate_silenced | Groups to mute for IoT Hub failed d2c twin update monitor | map | `{}` | no |
|
||||
| failed_d2c_twin_update_rate_threshold_critical | D2C Twin Update Failed rate limit (critical threshold) | string | `90` | no |
|
||||
| failed_d2c_twin_update_rate_threshold_warning | D2C Twin Update Failed rate limit (warning threshold) | string | `50` | no |
|
||||
| failed_d2c_twin_update_rate_time_aggregator | Monitor aggregator for IoT Hub failed d2c twin update [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| failed_d2c_twin_update_rate_timeframe | Monitor timeframe for IoT Hub failed d2c twin update [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| failed_jobs_rate_enabled | Flag to enable IoT Hub failed jobs monitor | string | `true` | no |
|
||||
| failed_jobs_rate_extra_tags | Extra tags for IoT Hub failed jobs monitor | list | `[]` | no |
|
||||
@ -85,6 +91,7 @@ Creates DataDog monitors with the following checks:
|
||||
| failed_jobs_rate_silenced | Groups to mute for IoT Hub failed jobs monitor | map | `{}` | no |
|
||||
| failed_jobs_rate_threshold_critical | Jobs Failed rate limit (critical threshold) | string | `90` | no |
|
||||
| failed_jobs_rate_threshold_warning | Jobs Failed rate limit (warning threshold) | string | `50` | no |
|
||||
| failed_jobs_rate_time_aggregator | Monitor aggregator for IoT Hub failed jobs [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| failed_jobs_rate_timeframe | Monitor timeframe for IoT Hub failed jobs [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| failed_listjobs_rate_enabled | Flag to enable IoT Hub failed list jobs monitor | string | `true` | no |
|
||||
| failed_listjobs_rate_extra_tags | Extra tags for IoT Hub failed list jobs monitor | list | `[]` | no |
|
||||
@ -92,6 +99,7 @@ Creates DataDog monitors with the following checks:
|
||||
| failed_listjobs_rate_silenced | Groups to mute for IoT Hub failed list jobs monitor | map | `{}` | no |
|
||||
| failed_listjobs_rate_threshold_critical | ListJobs Failed rate limit (critical threshold) | string | `90` | no |
|
||||
| failed_listjobs_rate_threshold_warning | ListJobs Failed rate limit (warning threshold) | string | `50` | no |
|
||||
| failed_listjobs_rate_time_aggregator | Monitor aggregator for IoT Hub failed list jobs [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| failed_listjobs_rate_timeframe | Monitor timeframe for IoT Hub failed list jobs [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| failed_queryjobs_rate_enabled | Flag to enable IoT Hub failed query jobs monitor | string | `true` | no |
|
||||
| failed_queryjobs_rate_extra_tags | Extra tags for IoT Hub failed query jobs monitor | list | `[]` | no |
|
||||
@ -99,6 +107,7 @@ Creates DataDog monitors with the following checks:
|
||||
| failed_queryjobs_rate_silenced | Groups to mute for IoT Hub failed query jobs monitor | map | `{}` | no |
|
||||
| failed_queryjobs_rate_threshold_critical | QueryJobs Failed rate limit (critical threshold) | string | `90` | no |
|
||||
| failed_queryjobs_rate_threshold_warning | QueryJobs Failed rate limit (warning threshold) | string | `50` | no |
|
||||
| failed_queryjobs_rate_time_aggregator | Monitor aggregator for IoT Hub failed query jobs [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| failed_queryjobs_rate_timeframe | Monitor timeframe for IoT Hub failed query jobs [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| filter_tags | Tags used for filtering | string | `*` | no |
|
||||
| invalid_d2c_telemetry_egress_enabled | Flag to enable IoT Hub invalid d2c telemetry monitor | string | `true` | no |
|
||||
@ -107,6 +116,7 @@ Creates DataDog monitors with the following checks:
|
||||
| invalid_d2c_telemetry_egress_rate_threshold_critical | D2C Telemetry Invalid limit (critical threshold) | string | `90` | no |
|
||||
| invalid_d2c_telemetry_egress_rate_threshold_warning | D2C Telemetry Invalid limit (warning threshold) | string | `50` | no |
|
||||
| invalid_d2c_telemetry_egress_silenced | Groups to mute for IoT Hub invalid d2c telemetry monitor | map | `{}` | no |
|
||||
| invalid_d2c_telemetry_egress_time_aggregator | Monitor aggregator for IoT Hub invalid d2c telemetry [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| invalid_d2c_telemetry_egress_timeframe | Monitor timeframe for IoT Hub invalid d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| message | Message sent when an alert is triggered | string | - | yes |
|
||||
| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no |
|
||||
@ -116,6 +126,7 @@ Creates DataDog monitors with the following checks:
|
||||
| orphaned_d2c_telemetry_egress_rate_threshold_critical | D2C Telemetry Orphaned limit (critical threshold) | string | `90` | no |
|
||||
| orphaned_d2c_telemetry_egress_rate_threshold_warning | D2C Telemetry Orphaned limit (warning threshold) | string | `50` | no |
|
||||
| orphaned_d2c_telemetry_egress_silenced | Groups to mute for IoT Hub orphaned d2c telemetry monitor | map | `{}` | no |
|
||||
| orphaned_d2c_telemetry_egress_time_aggregator | Monitor aggregator for IoT Hub orphaned d2c telemetry [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| orphaned_d2c_telemetry_egress_timeframe | Monitor timeframe for IoT Hub orphaned d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| status_enabled | Flag to enable IoT Hub status monitor | string | `true` | no |
|
||||
| status_extra_tags | Extra tags for IoT Hub status monitor | list | `[]` | no |
|
||||
|
||||
@ -151,6 +151,12 @@ variable "failed_jobs_rate_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "failed_jobs_rate_time_aggregator" {
|
||||
description = "Monitor aggregator for IoT Hub failed jobs [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "failed_jobs_rate_timeframe" {
|
||||
description = "Monitor timeframe for IoT Hub failed jobs [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -191,6 +197,12 @@ variable "failed_listjobs_rate_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "failed_listjobs_rate_time_aggregator" {
|
||||
description = "Monitor aggregator for IoT Hub failed list jobs [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "failed_listjobs_rate_timeframe" {
|
||||
description = "Monitor timeframe for IoT Hub failed list jobs [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -231,6 +243,12 @@ variable "failed_queryjobs_rate_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "failed_queryjobs_rate_time_aggregator" {
|
||||
description = "Monitor aggregator for IoT Hub failed query jobs [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "failed_queryjobs_rate_timeframe" {
|
||||
description = "Monitor timeframe for IoT Hub failed query jobs [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -271,6 +289,12 @@ variable "failed_c2d_methods_rate_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "failed_c2d_methods_rate_time_aggregator" {
|
||||
description = "Monitor aggregator for IoT Hub failed c2d method [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "failed_c2d_methods_rate_timeframe" {
|
||||
description = "Monitor timeframe for IoT Hub failed c2d method [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -311,6 +335,12 @@ variable "failed_c2d_twin_read_rate_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "failed_c2d_twin_read_rate_time_aggregator" {
|
||||
description = "Monitor aggregator for IoT Hub failed c2d twin read [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "failed_c2d_twin_read_rate_timeframe" {
|
||||
description = "Monitor timeframe for IoT Hub failed c2d twin read [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -351,6 +381,12 @@ variable "failed_c2d_twin_update_rate_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "failed_c2d_twin_update_rate_time_aggregator" {
|
||||
description = "Monitor aggregator for IoT Hub failed c2d twin update [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "failed_c2d_twin_update_rate_timeframe" {
|
||||
description = "Monitor timeframe for IoT Hub failed c2d twin update [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -391,6 +427,12 @@ variable "failed_d2c_twin_read_rate_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "failed_d2c_twin_read_rate_time_aggregator" {
|
||||
description = "Monitor aggregator for IoT Hub failed d2c twin read [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "failed_d2c_twin_read_rate_timeframe" {
|
||||
description = "Monitor timeframe for IoT Hub failed d2c twin read [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -431,6 +473,12 @@ variable "failed_d2c_twin_update_rate_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "failed_d2c_twin_update_rate_time_aggregator" {
|
||||
description = "Monitor aggregator for IoT Hub failed d2c twin update [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "failed_d2c_twin_update_rate_timeframe" {
|
||||
description = "Monitor timeframe for IoT Hub failed d2c twin update [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -471,6 +519,12 @@ variable "dropped_d2c_telemetry_egress_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "dropped_d2c_telemetry_egress_time_aggregator" {
|
||||
description = "Monitor aggregator for IoT Hub dropped d2c telemetry [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "dropped_d2c_telemetry_egress_timeframe" {
|
||||
description = "Monitor timeframe for IoT Hub dropped d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -511,6 +565,12 @@ variable "orphaned_d2c_telemetry_egress_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "orphaned_d2c_telemetry_egress_time_aggregator" {
|
||||
description = "Monitor aggregator for IoT Hub orphaned d2c telemetry [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "orphaned_d2c_telemetry_egress_timeframe" {
|
||||
description = "Monitor timeframe for IoT Hub orphaned d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -551,6 +611,12 @@ variable "invalid_d2c_telemetry_egress_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "invalid_d2c_telemetry_egress_time_aggregator" {
|
||||
description = "Monitor aggregator for IoT Hub invalid d2c telemetry [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "invalid_d2c_telemetry_egress_timeframe" {
|
||||
description = "Monitor timeframe for IoT Hub invalid d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
|
||||
@ -4,7 +4,7 @@ resource "datadog_monitor" "too_many_jobs_failed" {
|
||||
message = "${coalesce(var.failed_jobs_rate_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.failed_jobs_rate_timeframe}):(
|
||||
${var.failed_jobs_rate_time_aggregator}(${var.failed_jobs_rate_timeframe}):(
|
||||
default(avg:azure.devices_iothubs.jobs.failed{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
|
||||
default(avg:azure.devices_iothubs.jobs.failed{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
||||
default(avg:azure.devices_iothubs.jobs.completed{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 1) )
|
||||
@ -39,7 +39,7 @@ resource "datadog_monitor" "too_many_list_jobs_failed" {
|
||||
message = "${coalesce(var.failed_listjobs_rate_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.failed_listjobs_rate_timeframe}):(
|
||||
${var.failed_listjobs_rate_time_aggregator}(${var.failed_listjobs_rate_timeframe}):(
|
||||
default(avg:azure.devices_iothubs.jobs.list_jobs.failure{${var.filter_tags}} by {resource_group,name}.as_rate(), 0) / (
|
||||
default(avg:azure.devices_iothubs.jobs.list_jobs.success{${var.filter_tags}} by {resource_group,name}.as_rate(), 1) +
|
||||
default(avg:azure.devices_iothubs.jobs.list_jobs.failure{${var.filter_tags}} by {resource_group,name}.as_rate(), 0) )
|
||||
@ -74,7 +74,7 @@ resource "datadog_monitor" "too_many_query_jobs_failed" {
|
||||
message = "${coalesce(var.failed_queryjobs_rate_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.failed_queryjobs_rate_timeframe}):(
|
||||
${var.failed_queryjobs_rate_time_aggregator}(${var.failed_queryjobs_rate_timeframe}):(
|
||||
default(avg:azure.devices_iothubs.jobs.query_jobs.failure{${var.filter_tags}} by {resource_group,name}.as_rate(), 0) / (
|
||||
default(avg:azure.devices_iothubs.jobs.query_jobs.success{${var.filter_tags}} by {resource_group,name}.as_rate(), 1) +
|
||||
default(avg:azure.devices_iothubs.jobs.query_jobs.failure{${var.filter_tags}} by {resource_group,name}.as_rate(), 0) )
|
||||
@ -165,7 +165,7 @@ resource "datadog_monitor" "too_many_c2d_methods_failed" {
|
||||
message = "${coalesce(var.failed_c2d_methods_rate_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.failed_c2d_methods_rate_timeframe}):(
|
||||
${var.failed_c2d_methods_rate_time_aggregator}(${var.failed_c2d_methods_rate_timeframe}):(
|
||||
default(avg:azure.devices_iothubs.c2d.methods.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
|
||||
default(avg:azure.devices_iothubs.c2d.methods.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
||||
default(avg:azure.devices_iothubs.c2d.methods.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 1) )
|
||||
@ -200,7 +200,7 @@ resource "datadog_monitor" "too_many_c2d_twin_read_failed" {
|
||||
message = "${coalesce(var.failed_c2d_twin_read_rate_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.failed_c2d_twin_read_rate_timeframe}):(
|
||||
${var.failed_c2d_twin_read_rate_time_aggregator}(${var.failed_c2d_twin_read_rate_timeframe}):(
|
||||
default(avg:azure.devices_iothubs.c2d.twin.read.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
|
||||
default(avg:azure.devices_iothubs.c2d.twin.read.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
||||
default(avg:azure.devices_iothubs.c2d.twin.read.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 1) )
|
||||
@ -235,7 +235,7 @@ resource "datadog_monitor" "too_many_c2d_twin_update_failed" {
|
||||
message = "${coalesce(var.failed_c2d_twin_update_rate_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.failed_c2d_twin_update_rate_timeframe}):(
|
||||
${var.failed_c2d_twin_update_rate_time_aggregator}(${var.failed_c2d_twin_update_rate_timeframe}):(
|
||||
default(avg:azure.devices_iothubs.c2d.twin.update.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
|
||||
default(avg:azure.devices_iothubs.c2d.twin.update.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
||||
default(avg:azure.devices_iothubs.c2d.twin.update.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 1) )
|
||||
@ -270,7 +270,7 @@ resource "datadog_monitor" "too_many_d2c_twin_read_failed" {
|
||||
message = "${coalesce(var.failed_d2c_twin_read_rate_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.failed_d2c_twin_read_rate_timeframe}):(
|
||||
${var.failed_d2c_twin_read_rate_time_aggregator}(${var.failed_d2c_twin_read_rate_timeframe}):(
|
||||
default(avg:azure.devices_iothubs.d2c.twin.read.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
|
||||
default(avg:azure.devices_iothubs.d2c.twin.read.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
||||
default(avg:azure.devices_iothubs.d2c.twin.read.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 1) )
|
||||
@ -305,7 +305,7 @@ resource "datadog_monitor" "too_many_d2c_twin_update_failed" {
|
||||
message = "${coalesce(var.failed_d2c_twin_update_rate_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.failed_d2c_twin_update_rate_timeframe}):(
|
||||
${var.failed_d2c_twin_update_rate_time_aggregator}(${var.failed_d2c_twin_update_rate_timeframe}):(
|
||||
default(avg:azure.devices_iothubs.d2c.twin.update.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
|
||||
default(avg:azure.devices_iothubs.d2c.twin.update.failure{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
||||
default(avg:azure.devices_iothubs.d2c.twin.update.success{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 1) )
|
||||
@ -340,7 +340,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_dropped" {
|
||||
message = "${coalesce(var.dropped_d2c_telemetry_egress_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.dropped_d2c_telemetry_egress_timeframe}): (
|
||||
${var.dropped_d2c_telemetry_egress_time_aggregator}(${var.dropped_d2c_telemetry_egress_timeframe}): (
|
||||
default(avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
|
||||
default(avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
||||
default(avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
||||
@ -377,7 +377,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_orphaned" {
|
||||
message = "${coalesce(var.orphaned_d2c_telemetry_egress_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.orphaned_d2c_telemetry_egress_timeframe}): (
|
||||
${var.orphaned_d2c_telemetry_egress_time_aggregator}(${var.orphaned_d2c_telemetry_egress_timeframe}): (
|
||||
default(avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
|
||||
default(avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
||||
default(avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
||||
@ -414,7 +414,7 @@ resource "datadog_monitor" "too_many_d2c_telemetry_egress_invalid" {
|
||||
message = "${coalesce(var.invalid_d2c_telemetry_egress_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.invalid_d2c_telemetry_egress_timeframe}): (
|
||||
${var.invalid_d2c_telemetry_egress_time_aggregator}(${var.invalid_d2c_telemetry_egress_timeframe}): (
|
||||
default(avg:azure.devices_iothubs.d2c.telemetry.egress.invalid{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) / (
|
||||
default(avg:azure.devices_iothubs.d2c.telemetry.egress.dropped{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
||||
default(avg:azure.devices_iothubs.d2c.telemetry.egress.orphaned{${var.filter_tags}} by {resource_group,region,name}.as_rate(), 0) +
|
||||
|
||||
@ -41,6 +41,7 @@ Creates DataDog monitors with the following checks:
|
||||
| server_errors_silenced | Groups to mute for Service Bus server errors monitor | map | `{}` | no |
|
||||
| server_errors_threshold_critical | Critical threshold for Service Bus server errors monitor | string | `90` | no |
|
||||
| server_errors_threshold_warning | Warning threshold for Service Bus server errors monitor | string | `50` | no |
|
||||
| server_errors_time_aggregator | Monitor aggregator for Service Bus server errors [available values: min, max or avg] | string | `min` | no |
|
||||
| server_errors_timeframe | Monitor timeframe for Service Bus server errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| status_enabled | Flag to enable Service Bus status monitor | string | `true` | no |
|
||||
| status_extra_tags | Extra tags for Service Bus status monitor | list | `[]` | no |
|
||||
@ -53,6 +54,7 @@ Creates DataDog monitors with the following checks:
|
||||
| user_errors_silenced | Groups to mute for Service Bus user errors monitor | map | `{}` | no |
|
||||
| user_errors_threshold_critical | Critical threshold for Service Bus user errors monitor | string | `90` | no |
|
||||
| user_errors_threshold_warning | Warning threshold for Service Bus user errors monitor | string | `50` | no |
|
||||
| user_errors_time_aggregator | Monitor aggregator for Service Bus user errors [available values: min, max or avg] | string | `min` | no |
|
||||
| user_errors_timeframe | Monitor timeframe for Service Bus user errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
|
||||
## Outputs
|
||||
|
||||
@ -113,6 +113,12 @@ variable "server_errors_silenced" {
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "server_errors_time_aggregator" {
|
||||
description = "Monitor aggregator for Service Bus server errors [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "server_errors_timeframe" {
|
||||
description = "Monitor timeframe for Service Bus server errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -147,6 +153,12 @@ variable "user_errors_silenced" {
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "user_errors_time_aggregator" {
|
||||
description = "Monitor aggregator for Service Bus user errors [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "user_errors_timeframe" {
|
||||
description = "Monitor timeframe for Service Bus user errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
|
||||
@ -63,7 +63,7 @@ resource "datadog_monitor" "service_bus_user_errors" {
|
||||
message = "${coalesce(var.user_errors_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.user_errors_timeframe}): (
|
||||
${var.user_errors_time_aggregator}(${var.user_errors_timeframe}): (
|
||||
default(avg:azure.servicebus_namespaces.user_errors.preview${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) /
|
||||
default(avg:azure.servicebus_namespaces.incoming_requests_preview${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 1)
|
||||
) * 100 > ${var.user_errors_threshold_critical}
|
||||
@ -98,7 +98,7 @@ resource "datadog_monitor" "service_bus_server_errors" {
|
||||
message = "${coalesce(var.server_errors_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.server_errors_timeframe}): (
|
||||
${var.server_errors_time_aggregator}(${var.server_errors_timeframe}): (
|
||||
default(avg:azure.servicebus_namespaces.server_errors.preview${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) /
|
||||
default(avg:azure.servicebus_namespaces.incoming_requests_preview${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 1)
|
||||
) * 100 > ${var.server_errors_threshold_critical}
|
||||
|
||||
@ -42,6 +42,7 @@ Creates DataDog monitors with the following checks:
|
||||
| failed_function_requests_silenced | Groups to mute for Stream Analytics failed requests monitor | map | `{}` | no |
|
||||
| failed_function_requests_threshold_critical | Failed Function Request rate limit (critical threshold) | string | `10` | no |
|
||||
| failed_function_requests_threshold_warning | Failed Function Request rate limit (warning threshold) | string | `0` | no |
|
||||
| failed_function_requests_time_aggregator | Monitor aggregator for Stream Analytics failed requests [available values: min, max or avg] | string | `min` | no |
|
||||
| failed_function_requests_timeframe | Monitor timeframe for Stream Analytics failed requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
|
||||
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
|
||||
|
||||
@ -136,6 +136,12 @@ variable "failed_function_requests_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "failed_function_requests_time_aggregator" {
|
||||
description = "Monitor aggregator for Stream Analytics failed requests [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "failed_function_requests_timeframe" {
|
||||
description = "Monitor timeframe for Stream Analytics failed requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
|
||||
@ -65,7 +65,7 @@ resource "datadog_monitor" "failed_function_requests" {
|
||||
message = "${coalesce(var.failed_function_requests_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.failed_function_requests_timeframe}): (
|
||||
${var.failed_function_requests_time_aggregator}(${var.failed_function_requests_timeframe}): (
|
||||
default(avg:azure.streamanalytics_streamingjobs.aml_callout_failed_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 0) /
|
||||
default(avg:azure.streamanalytics_streamingjobs.aml_callout_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_rate(), 1)
|
||||
) * 100 > ${var.failed_function_requests_threshold_critical}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user