diff --git a/cloud/aws/alb/README.md b/cloud/aws/alb/README.md index 027e1fd..f4bb659 100644 Binary files a/cloud/aws/alb/README.md and b/cloud/aws/alb/README.md differ diff --git a/cloud/aws/alb/inputs.tf b/cloud/aws/alb/inputs.tf index 585740e..07cb8b3 100644 --- a/cloud/aws/alb/inputs.tf +++ b/cloud/aws/alb/inputs.tf @@ -38,10 +38,16 @@ variable "alb_no_healthy_instances_message" { default = "" } +variable "alb_no_healthy_instances_time_aggregator" { + description = "Monitor aggregator for ALB no healthy instances [available values: min, max or avg]" + type = "string" + default = "min" +} + variable "alb_no_healthy_instances_timeframe" { description = "Monitor timeframe for ALB no healthy instances [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" - default = "last_1m" + default = "last_5m" } variable "latency_silenced" { @@ -56,6 +62,12 @@ variable "latency_message" { default = "" } +variable "latency_time_aggregator" { + description = "Monitor aggregator for ALB latency [available values: min, max or avg]" + type = "string" + default = "min" +} + variable "latency_timeframe" { description = "Monitor timeframe for ALB latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" @@ -72,30 +84,30 @@ variable "latency_threshold_warning" { description = "latency warning threshold in milliseconds" } -variable "httpcode_elb_4xx_silenced" { +variable "httpcode_alb_4xx_silenced" { description = "Groups to mute for ALB httpcode 4xx monitor" type = "map" default = {} } -variable "httpcode_elb_4xx_message" { +variable "httpcode_alb_4xx_message" { description = "Custom message for ALB httpcode 4xx monitor" type = "string" default = "" } -variable "httpcode_elb_4xx_timeframe" { +variable "httpcode_alb_4xx_timeframe" { description = "Monitor timeframe for ALB httpcode 4xx [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" default = "last_5m" } -variable "httpcode_elb_4xx_threshold_critical" { +variable "httpcode_alb_4xx_threshold_critical" { default = 80 description = "loadbalancer 4xx critical threshold in percentage" } -variable "httpcode_elb_4xx_threshold_warning" { +variable "httpcode_alb_4xx_threshold_warning" { default = 60 description = "loadbalancer 4xx warning threshold in percentage" } @@ -128,30 +140,30 @@ variable "httpcode_target_4xx_threshold_warning" { description = "target 4xx warning threshold in percentage" } -variable "httpcode_elb_5xx_silenced" { +variable "httpcode_alb_5xx_silenced" { description = "Groups to mute for ALB httpcode 5xx monitor" type = "map" default = {} } -variable "httpcode_elb_5xx_message" { +variable "httpcode_alb_5xx_message" { description = "Custom message for ALB httpcode 5xx monitor" type = "string" default = "" } -variable "httpcode_elb_5xx_timeframe" { +variable "httpcode_alb_5xx_timeframe" { description = "Monitor timeframe for ALB httpcode 5xx [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" default = "last_5m" } -variable "httpcode_elb_5xx_threshold_critical" { +variable "httpcode_alb_5xx_threshold_critical" { default = 80 - description = "loadbalancer 5xxcritical threshold in percentage" + description = "loadbalancer 5xx critical threshold in percentage" } -variable "httpcode_elb_5xx_threshold_warning" { +variable "httpcode_alb_5xx_threshold_warning" { default = 60 description = "loadbalancer 5xx warning threshold in percentage" } diff --git a/cloud/aws/alb/monitors-alb.tf b/cloud/aws/alb/monitors-alb.tf index 51d2d5b..96e122d 100644 --- a/cloud/aws/alb/monitors-alb.tf +++ b/cloud/aws/alb/monitors-alb.tf @@ -14,9 +14,9 @@ resource "datadog_monitor" "ALB_no_healthy_instances" { message = "${coalesce(var.alb_no_healthy_instances_message, var.message)}" query = < ${var.latency_threshold_critical} EOF @@ -67,26 +67,26 @@ resource "datadog_monitor" "ALB_latency" { tags = ["env:${var.environment}", "resource:alb", "team:aws", "provider:aws"] } -resource "datadog_monitor" "ALB_httpcode_elb_5xx" { +resource "datadog_monitor" "ALB_httpcode_5xx" { name = "[${var.environment}] ALB HTTP code 5xx {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" type = "metric alert" - message = "${coalesce(var.httpcode_elb_5xx_message, var.message)}" + message = "${coalesce(var.httpcode_alb_5xx_message, var.message)}" query = < ${var.httpcode_elb_5xx_threshold_critical} + ) > ${var.httpcode_alb_5xx_threshold_critical} EOF evaluation_delay = "${var.delay}" new_host_delay = "${var.delay}" thresholds { - critical = "${var.httpcode_elb_5xx_threshold_critical}" - warning = "${var.httpcode_elb_5xx_threshold_warning}" + critical = "${var.httpcode_alb_5xx_threshold_critical}" + warning = "${var.httpcode_alb_5xx_threshold_warning}" } notify_no_data = false @@ -95,31 +95,31 @@ resource "datadog_monitor" "ALB_httpcode_elb_5xx" { timeout_h = 0 include_tags = true - silenced = "${var.httpcode_elb_5xx_silenced}" + silenced = "${var.httpcode_alb_5xx_silenced}" tags = ["env:${var.environment}", "resource:alb", "team:aws", "provider:aws"] } -resource "datadog_monitor" "ALB_httpcode_elb_4xx" { +resource "datadog_monitor" "ALB_httpcode_4xx" { name = "[${var.environment}] ALB HTTP code 4xx {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" type = "metric alert" - message = "${coalesce(var.httpcode_elb_4xx_message, var.message)}" + message = "${coalesce(var.httpcode_alb_4xx_message, var.message)}" query = < ${var.httpcode_elb_4xx_threshold_critical} + ) > ${var.httpcode_alb_4xx_threshold_critical} EOF evaluation_delay = "${var.delay}" new_host_delay = "${var.delay}" thresholds { - critical = "${var.httpcode_elb_4xx_threshold_critical}" - warning = "${var.httpcode_elb_4xx_threshold_warning}" + critical = "${var.httpcode_alb_4xx_threshold_critical}" + warning = "${var.httpcode_alb_4xx_threshold_warning}" } notify_no_data = false @@ -128,7 +128,7 @@ resource "datadog_monitor" "ALB_httpcode_elb_4xx" { timeout_h = 0 include_tags = true - silenced = "${var.httpcode_elb_4xx_silenced}" + silenced = "${var.httpcode_alb_4xx_silenced}" tags = ["env:${var.environment}", "resource:alb", "team:aws", "provider:aws"] } @@ -139,10 +139,10 @@ resource "datadog_monitor" "ALB_httpcode_target_5xx" { message = "${coalesce(var.httpcode_target_5xx_message, var.message)}" query = < ${var.httpcode_target_5xx_threshold_critical} EOF @@ -172,10 +172,10 @@ resource "datadog_monitor" "ALB_httpcode_target_4xx" { message = "${coalesce(var.httpcode_target_4xx_message, var.message)}" query = < ${var.httpcode_target_4xx_threshold_critical} EOF diff --git a/cloud/aws/apigateway/README.md b/cloud/aws/apigateway/README.md index 755cfce..8e242ed 100644 Binary files a/cloud/aws/apigateway/README.md and b/cloud/aws/apigateway/README.md differ diff --git a/cloud/aws/apigateway/inputs.tf b/cloud/aws/apigateway/inputs.tf index 83acd7c..1430ab5 100644 --- a/cloud/aws/apigateway/inputs.tf +++ b/cloud/aws/apigateway/inputs.tf @@ -33,6 +33,12 @@ variable "latency_message" { default = "" } +variable "latency_time_aggregator" { + description = "Monitor aggregator for API Gateway latency [available values: min, max or avg]" + type = "string" + default = "min" +} + variable "latency_timeframe" { description = "Monitor timeframe for API latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" diff --git a/cloud/aws/apigateway/monitors-api.tf b/cloud/aws/apigateway/monitors-api.tf index 8fd52a7..b6bee79 100644 --- a/cloud/aws/apigateway/monitors-api.tf +++ b/cloud/aws/apigateway/monitors-api.tf @@ -5,8 +5,8 @@ resource "datadog_monitor" "API_Gateway_latency" { message = "${coalesce(var.latency_message, var.message)}" query = < ${var.latency_threshold_critical} EOF @@ -36,10 +36,10 @@ resource "datadog_monitor" "API_http_5xx_errors_count" { message = "${coalesce(var.http_5xx_requests_message, var.message)}" query = < ${var.http_5xx_requests_threshold_critical} EOF @@ -70,10 +70,10 @@ resource "datadog_monitor" "API_http_4xx_errors_count" { message = "${coalesce(var.http_4xx_requests_message, var.message)}" query = < ${var.http_4xx_requests_threshold_critical} EOF diff --git a/cloud/aws/elasticsearch/README.md b/cloud/aws/elasticsearch/README.md index 0d57cbd..44fb596 100644 Binary files a/cloud/aws/elasticsearch/README.md and b/cloud/aws/elasticsearch/README.md differ diff --git a/cloud/aws/elasticsearch/inputs.tf b/cloud/aws/elasticsearch/inputs.tf index 4d3fb2c..6f56332 100644 --- a/cloud/aws/elasticsearch/inputs.tf +++ b/cloud/aws/elasticsearch/inputs.tf @@ -60,6 +60,12 @@ variable "diskspace_message" { default = "" } +variable "diskspace_time_aggregator" { + description = "Monitor aggregator for ES cluster diskspace [available values: min, max or avg]" + type = "string" + default = "max" +} + variable "diskspace_timeframe" { description = "Monitor timeframe for ES cluster diskspace [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" @@ -88,6 +94,12 @@ variable "cpu_message" { default = "" } +variable "cpu_time_aggregator" { + description = "Monitor aggregator for ES cluster cpu [available values: min, max or avg]" + type = "string" + default = "min" +} + variable "cpu_timeframe" { description = "Monitor timeframe for ES cluster cpu [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" diff --git a/cloud/aws/elasticsearch/monitors-elasticsearch.tf b/cloud/aws/elasticsearch/monitors-elasticsearch.tf index 47dc1e2..b46b5e1 100644 --- a/cloud/aws/elasticsearch/monitors-elasticsearch.tf +++ b/cloud/aws/elasticsearch/monitors-elasticsearch.tf @@ -52,7 +52,7 @@ resource "datadog_monitor" "es_free_space_low" { type = "metric alert" query = < ${var.cpu_threshold_critical} EOF diff --git a/cloud/aws/elb/README.md b/cloud/aws/elb/README.md index ec188f8..bd2a0a7 100644 --- a/cloud/aws/elb/README.md +++ b/cloud/aws/elb/README.md @@ -25,42 +25,45 @@ Creates DataDog monitors with the following checks : * ELB backend http code 4xx percent to high * ELB backend http code 5xx percent to high -Inputs ------- - -| Name | Description | Type | Default | Required | -|------|-------------|:----:|:-----:|:-----:| -| artificial_requests_count | Number of false requests used to mitigate false positive in case of low trafic | string | `5` | no | -| delay | Delay in seconds for the metric evaluation | string | `900` | no | -| elb_4xx_message | Custom message for ELB 4xx errors monitor | string | `` | no | -| elb_4xx_silenced | Groups to mute for ELB 4xx errors monitor | map | `` | no | -| elb_4xx_threshold_critical | loadbalancer 4xx critical threshold in percentage | string | `10` | no | -| elb_4xx_threshold_warning | loadbalancer 4xx warning threshold in percentage | string | `5` | no | -| elb_4xx_timeframe | Monitor timeframe for ELB 4xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| elb_5xx_message | Custom message for ELB 5xx errors monitor | string | `` | no | -| elb_5xx_silenced | Groups to mute for ELB 5xx errors monitor | map | `` | no | -| elb_5xx_threshold_critical | loadbalancer 5xx critical threshold in percentage | string | `10` | no | -| elb_5xx_threshold_warning | loadbalancer 5xx warning threshold in percentage | string | `5` | no | -| elb_5xx_timeframe | Monitor timeframe for ELB 5xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| elb_backend_4xx_message | Custom message for ELB backend 4xx errors monitor | string | `` | no | -| elb_backend_4xx_silenced | Groups to mute for ELB backend 4xx errors monitor | map | `` | no | -| elb_backend_4xx_threshold_critical | loadbalancer backend 4xx critical threshold in percentage | string | `10` | no | -| elb_backend_4xx_threshold_warning | loadbalancer backend 4xx warning threshold in percentage | string | `5` | no | -| elb_backend_4xx_timeframe | Monitor timeframe for ELB backend 4xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| elb_backend_5xx_message | Custom message for ELB backend 5xx errors monitor | string | `` | no | -| elb_backend_5xx_silenced | Groups to mute for ELB backend 5xx errors monitor | map | `` | no | -| elb_backend_5xx_threshold_critical | loadbalancer backend 5xx critical threshold in percentage | string | `10` | no | -| elb_backend_5xx_threshold_warning | loadbalancer backend 5xx warning threshold in percentage | string | `5` | no | -| elb_backend_5xx_timeframe | Monitor timeframe for ELB backend 5xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| elb_backend_latency_critical | latency critical threshold in seconds | string | `5` | no | -| elb_backend_latency_message | Custom message for ELB backend latency monitor | string | `` | no | -| elb_backend_latency_silenced | Groups to mute for ELB backend latency monitor | map | `` | no | -| elb_backend_latency_timeframe | Monitor timeframe for ELB backend latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| elb_backend_latency_warning | latency warning threshold in seconds | string | `1` | no | -| elb_no_healthy_instance_message | Custom message for ELB no healty instance monitor | string | `` | no | -| elb_no_healthy_instance_silenced | Groups to mute for ELB no healty instance monitor | map | `` | no | -| elb_no_healthy_instance_timeframe | Monitor timeframe for ELB no healty instance [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| environment | Architecture Environment | string | - | yes | -| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | -| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | -| message | Message sent when an alert is triggered | string | - | yes | +Inputs +------ + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| artificial_requests_count | Number of false requests used to mitigate false positive in case of low trafic | string | `5` | no | +| delay | Delay in seconds for the metric evaluation | string | `900` | no | +| elb_4xx_message | Custom message for ELB 4xx errors monitor | string | `` | no | +| elb_4xx_silenced | Groups to mute for ELB 4xx errors monitor | map | `` | no | +| elb_4xx_threshold_critical | loadbalancer 4xx critical threshold in percentage | string | `10` | no | +| elb_4xx_threshold_warning | loadbalancer 4xx warning threshold in percentage | string | `5` | no | +| elb_4xx_timeframe | Monitor timeframe for ELB 4xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| elb_5xx_message | Custom message for ELB 5xx errors monitor | string | `` | no | +| elb_5xx_silenced | Groups to mute for ELB 5xx errors monitor | map | `` | no | +| elb_5xx_threshold_critical | loadbalancer 5xx critical threshold in percentage | string | `10` | no | +| elb_5xx_threshold_warning | loadbalancer 5xx warning threshold in percentage | string | `5` | no | +| elb_5xx_timeframe | Monitor timeframe for ELB 5xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| elb_backend_4xx_message | Custom message for ELB backend 4xx errors monitor | string | `` | no | +| elb_backend_4xx_silenced | Groups to mute for ELB backend 4xx errors monitor | map | `` | no | +| elb_backend_4xx_threshold_critical | loadbalancer backend 4xx critical threshold in percentage | string | `10` | no | +| elb_backend_4xx_threshold_warning | loadbalancer backend 4xx warning threshold in percentage | string | `5` | no | +| elb_backend_4xx_timeframe | Monitor timeframe for ELB backend 4xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| elb_backend_5xx_message | Custom message for ELB backend 5xx errors monitor | string | `` | no | +| elb_backend_5xx_silenced | Groups to mute for ELB backend 5xx errors monitor | map | `` | no | +| elb_backend_5xx_threshold_critical | loadbalancer backend 5xx critical threshold in percentage | string | `10` | no | +| elb_backend_5xx_threshold_warning | loadbalancer backend 5xx warning threshold in percentage | string | `5` | no | +| elb_backend_5xx_timeframe | Monitor timeframe for ELB backend 5xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| elb_backend_latency_critical | latency critical threshold in seconds | string | `5` | no | +| elb_backend_latency_message | Custom message for ELB backend latency monitor | string | `` | no | +| elb_backend_latency_silenced | Groups to mute for ELB backend latency monitor | map | `` | no | +| elb_backend_latency_time_aggregator | Monitor aggregator for ELB backend latency [available values: min, max or avg] | string | `min` | no | +| elb_backend_latency_timeframe | Monitor timeframe for ELB backend latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| elb_backend_latency_warning | latency warning threshold in seconds | string | `1` | no | +| elb_no_healthy_instance_message | Custom message for ELB no healty instance monitor | string | `` | no | +| elb_no_healthy_instance_silenced | Groups to mute for ELB no healty instance monitor | map | `` | no | +| elb_no_healthy_instance_time_aggregator | Monitor aggregator for ELB no healty instance [available values: min or max] | string | `min` | no | +| elb_no_healthy_instance_timeframe | Monitor timeframe for ELB no healty instance [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| environment | Architecture Environment | string | - | yes | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| message | Message sent when an alert is triggered | string | - | yes | + diff --git a/cloud/aws/elb/inputs.tf b/cloud/aws/elb/inputs.tf index 4809da0..b0f2e80 100644 --- a/cloud/aws/elb/inputs.tf +++ b/cloud/aws/elb/inputs.tf @@ -37,6 +37,12 @@ variable "elb_no_healthy_instance_message" { default = "" } +variable "elb_no_healthy_instance_time_aggregator" { + description = "Monitor aggregator for ELB no healty instance [available values: min or max]" + type = "string" + default = "min" +} + variable "elb_no_healthy_instance_timeframe" { description = "Monitor timeframe for ELB no healty instance [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" @@ -167,6 +173,12 @@ variable "elb_backend_latency_message" { default = "" } +variable "elb_backend_latency_time_aggregator" { + description = "Monitor aggregator for ELB backend latency [available values: min, max or avg]" + type = "string" + default = "min" +} + variable "elb_backend_latency_timeframe" { description = "Monitor timeframe for ELB backend latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" diff --git a/cloud/aws/elb/monitors-elb.tf b/cloud/aws/elb/monitors-elb.tf index a36f403..fbb2267 100644 --- a/cloud/aws/elb/monitors-elb.tf +++ b/cloud/aws/elb/monitors-elb.tf @@ -11,7 +11,7 @@ resource "datadog_monitor" "ELB_no_healthy_instances" { message = "${coalesce(var.elb_no_healthy_instance_message, var.message)}" query = < ${var.elb_4xx_threshold_critical} EOF @@ -73,10 +73,10 @@ resource "datadog_monitor" "ELB_too_much_5xx" { message = "${coalesce(var.elb_5xx_message, var.message)}" query = < ${var.elb_5xx_threshold_critical} EOF @@ -108,10 +108,10 @@ resource "datadog_monitor" "ELB_too_much_4xx_backend" { message = "${coalesce(var.elb_backend_4xx_message, var.message)}" query = < ${var.elb_backend_4xx_threshold_critical} EOF @@ -143,10 +143,10 @@ resource "datadog_monitor" "ELB_too_much_5xx_backend" { message = "${coalesce(var.elb_backend_5xx_message, var.message)}" query = < ${var.elb_backend_5xx_threshold_critical} EOF @@ -178,8 +178,8 @@ resource "datadog_monitor" "ELB_backend_latency" { message = "${coalesce(var.elb_backend_latency_message, var.message)}" query = < ${var.elb_backend_latency_critical} EOF diff --git a/cloud/aws/kinesis-firehose/README.md b/cloud/aws/kinesis-firehose/README.md index f4bffe6..77f7c43 100644 Binary files a/cloud/aws/kinesis-firehose/README.md and b/cloud/aws/kinesis-firehose/README.md differ diff --git a/cloud/aws/kinesis-firehose/monitors-kinesis-firehose.tf b/cloud/aws/kinesis-firehose/monitors-kinesis-firehose.tf index 1f1a71b..7d73d6d 100644 --- a/cloud/aws/kinesis-firehose/monitors-kinesis-firehose.tf +++ b/cloud/aws/kinesis-firehose/monitors-kinesis-firehose.tf @@ -14,10 +14,10 @@ resource "datadog_monitor" "firehose_incoming_records" { type = "metric alert" query = < ${var.cpu_threshold_critical} EOF @@ -46,7 +46,7 @@ resource "datadog_monitor" "rds_free_space_low" { type = "metric alert" query = <` | no | | apimanagement_failed_requests_threshold_critical | Maximum acceptable percent of failed requests | string | `90` | no | | apimanagement_failed_requests_threshold_warning | Warning regarding acceptable percent of failed requests | string | `50` | no | +| apimanagement_failed_requests_timeframe | Monitor timeframe for API Management failed requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | apimanagement_other_requests_message | Custom message for API Management other requests monitor | string | `` | no | | apimanagement_other_requests_silenced | Groups to mute for API Management other requests monitor | map | `` | no | | apimanagement_other_requests_threshold_critical | Maximum acceptable percent of other requests | string | `90` | no | | apimanagement_other_requests_threshold_warning | Warning regarding acceptable percent of other requests | string | `50` | no | +| apimanagement_other_requests_timeframe | Monitor timeframe for API Management other requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | apimanagement_status_message | Custom message for API Management status monitor | string | `` | no | | apimanagement_status_silenced | Groups to mute for API Management status monitor | map | `` | no | +| apimanagement_status_time_aggregator | Monitor aggregator for API Management status [available values: min, max or avg] | string | `max` | no | +| apimanagement_status_timeframe | Monitor timeframe for API Management status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | apimanagement_successful_requests_message | Custom message for API Management successful requests monitor | string | `` | no | | apimanagement_successful_requests_silenced | Groups to mute for API Management successful requests monitor | map | `` | no | | apimanagement_successful_requests_threshold_critical | Minimum acceptable percent of successful requests | string | `10` | no | | apimanagement_successful_requests_threshold_warning | Warning regarding acceptable percent of successful requests | string | `30` | no | +| apimanagement_successful_requests_timeframe | Monitor timeframe for API Management successful requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | apimanagement_unauthorized_requests_message | Custom message for API Management unauthorized requests monitor | string | `` | no | | apimanagement_unauthorized_requests_silenced | Groups to mute for API Management unauthorized requests monitor | map | `` | no | | apimanagement_unauthorized_requests_threshold_critical | Maximum acceptable percent of unauthorized requests | string | `90` | no | | apimanagement_unauthorized_requests_threshold_warning | Warning regarding acceptable percent of unauthorized requests | string | `50` | no | +| apimanagement_unauthorized_requests_timeframe | Monitor timeframe for API Management unauthorized requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | appservices_http_4xx_requests_message | Custom message for App Services 4xx requests monitor | string | `` | no | | appservices_http_4xx_requests_silenced | Groups to mute for App Services 4xx requests monitor | map | `` | no | | appservices_http_4xx_requests_threshold_critical | Maximum critical acceptable percent of 4xx errors | string | `90` | no | | appservices_http_4xx_requests_threshold_warning | Warning regarding acceptable percent of 4xx errors | string | `50` | no | +| appservices_http_4xx_requests_timeframe | Monitor timeframe for App Services 4xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | appservices_http_5xx_requests_message | Custom message for App Services 5xx requests monitor | string | `` | no | | appservices_http_5xx_requests_silenced | Groups to mute for App Services 5xx requests monitor | map | `` | no | | appservices_http_5xx_requests_threshold_critical | Maximum critical acceptable percent of 5xx errors | string | `90` | no | | appservices_http_5xx_requests_threshold_warning | Warning regarding acceptable percent of 5xx errors | string | `50` | no | +| appservices_http_5xx_requests_timeframe | Monitor timeframe for App Services 5xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | appservices_http_successful_requests_message | Custom message for App Services successful requests monitor | string | `` | no | | appservices_http_successful_requests_silenced | Groups to mute for App Services successful requests monitor | map | `` | no | | appservices_http_successful_requests_threshold_critical | Minimum critical acceptable percent of 2xx & 3xx requests | string | `10` | no | | appservices_http_successful_requests_threshold_warning | Warning regarding acceptable percent of 2xx & 3xx requests | string | `30` | no | +| appservices_http_successful_requests_timeframe | Monitor timeframe for App Services successful requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | appservices_memory_usage_message | Custom message for App Services memory usage monitor | string | `` | no | | appservices_memory_usage_silenced | Groups to mute for App Services memory usage monitor | map | `` | no | | appservices_memory_usage_threshold_critical | Alerting threshold in Mib | string | `1073741824` | no | | appservices_memory_usage_threshold_warning | Warning threshold in MiB | string | `536870912` | no | +| appservices_memory_usage_time_aggregator | Monitor aggregator for App Services memory usage [available values: min, max or avg] | string | `min` | no | +| appservices_memory_usage_timeframe | Monitor timeframe for App Services memory usage [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | appservices_response_time_message | Custom message for App Services response time monitor | string | `` | no | | appservices_response_time_silenced | Groups to mute for App Services response time monitor | map | `` | no | | appservices_response_time_threshold_critical | Alerting threshold for response time in seconds | string | `10` | no | | appservices_response_time_threshold_warning | Warning threshold for response time in seconds | string | `5` | no | +| appservices_response_time_time_aggregator | Monitor aggregator for App Services response time [available values: min, max or avg] | string | `min` | no | +| appservices_response_time_timeframe | Monitor timeframe for App Services response time [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | | environment | Architecture environment | string | - | yes | | eventhub_errors_rate_message | Custom message for Event Hub errors monitor | string | `` | no | | eventhub_errors_rate_silenced | Groups to mute for Event Hub errors monitor | map | `` | no | | eventhub_errors_rate_thresold_critical | Errors ratio (percentage) to trigger the critical alert | string | `90` | no | | eventhub_errors_rate_thresold_warning | Errors ratio (percentage) to trigger a warning alert | string | `50` | no | +| eventhub_errors_rate_timeframe | Monitor timeframe for Event Hub errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | eventhub_failed_requests_rate_message | Custom message for Event Hub failed requests monitor | string | `` | no | | eventhub_failed_requests_rate_silenced | Groups to mute for Event Hub failed requests monitor | map | `` | no | | eventhub_failed_requests_rate_thresold_critical | Failed requests ratio (percentage) to trigger the critical alert | string | `90` | no | | eventhub_failed_requests_rate_thresold_warning | Failed requests ratio (percentage) to trigger a warning alert | string | `50` | no | +| eventhub_failed_requests_rate_timeframe | Monitor timeframe for Event Hub failed requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | eventhub_status_message | Custom message for Event Hub status monitor | string | `` | no | | eventhub_status_silenced | Groups to mute for Event Hub status monitor | map | `` | no | +| eventhub_status_time_aggregator | Monitor aggregator for Event Hub status [available values: min, max or avg] | string | `max` | no | +| eventhub_status_timeframe | Monitor timeframe for Event Hub status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | iothub_dropped_d2c_telemetry_egress_message | Custom message for IoT Hub dropped d2c telemetry monitor | string | `` | no | | iothub_dropped_d2c_telemetry_egress_rate_threshold_critical | D2C Telemetry Dropped limit (critical threshold) | string | `90` | no | | iothub_dropped_d2c_telemetry_egress_rate_threshold_warning | D2C Telemetry Dropped limit (warning threshold) | string | `50` | no | | iothub_dropped_d2c_telemetry_egress_silenced | Groups to mute for IoT Hub dropped d2c telemetry monitor | map | `` | no | +| iothub_dropped_d2c_telemetry_egress_timeframe | Monitor timeframe for IoT Hub dropped d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | iothub_failed_c2d_methods_rate_message | Custom message for IoT Hub failed c2d method monitor | string | `` | no | | iothub_failed_c2d_methods_rate_silenced | Groups to mute for IoT Hub failed c2d methods monitor | map | `` | no | | iothub_failed_c2d_methods_rate_threshold_critical | C2D Methods Failed rate limit (critical threshold) | string | `90` | no | | iothub_failed_c2d_methods_rate_threshold_warning | C2D Methods Failed rate limit (warning threshold) | string | `50` | no | +| iothub_failed_c2d_methods_rate_timeframe | Monitor timeframe for IoT Hub failed c2d method [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | iothub_failed_c2d_twin_read_rate_message | Custom message for IoT Hub failed c2d twin read monitor | string | `` | no | | iothub_failed_c2d_twin_read_rate_silenced | Groups to mute for IoT Hub failed c2d twin read monitor | map | `` | no | | iothub_failed_c2d_twin_read_rate_threshold_critical | C2D Twin Read Failed rate limit (critical threshold) | string | `90` | no | | iothub_failed_c2d_twin_read_rate_threshold_warning | C2D Twin Read Failed rate limit (warning threshold) | string | `50` | no | +| iothub_failed_c2d_twin_read_rate_timeframe | Monitor timeframe for IoT Hub failed c2d twin read [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | iothub_failed_c2d_twin_update_rate_message | Custom message for IoT Hub failed c2d twin update monitor | string | `` | no | | iothub_failed_c2d_twin_update_rate_silenced | Groups to mute for IoT Hub failed c2d twin update monitor | map | `` | no | | iothub_failed_c2d_twin_update_rate_threshold_critical | C2D Twin Update Failed rate limit (critical threshold) | string | `90` | no | | iothub_failed_c2d_twin_update_rate_threshold_warning | C2D Twin Update Failed rate limit (warning threshold) | string | `50` | no | +| iothub_failed_c2d_twin_update_rate_timeframe | Monitor timeframe for IoT Hub failed c2d twin update [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | iothub_failed_d2c_twin_read_rate_message | Custom message for IoT Hub failed d2c twin read monitor | string | `` | no | | iothub_failed_d2c_twin_read_rate_silenced | Groups to mute for IoT Hub failed d2c twin read monitor | map | `` | no | | iothub_failed_d2c_twin_read_rate_threshold_critical | D2C Twin Read Failed rate limit (critical threshold) | string | `90` | no | | iothub_failed_d2c_twin_read_rate_threshold_warning | D2C Twin Read Failed rate limit (warning threshold) | string | `50` | no | +| iothub_failed_d2c_twin_read_rate_timeframe | Monitor timeframe for IoT Hub failed d2c twin read [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | iothub_failed_d2c_twin_update_rate_message | Custom message for IoT Hub failed d2c twin update monitor | string | `` | no | | iothub_failed_d2c_twin_update_rate_silenced | Groups to mute for IoT Hub failed d2c twin update monitor | map | `` | no | | iothub_failed_d2c_twin_update_rate_threshold_critical | D2C Twin Update Failed rate limit (critical threshold) | string | `90` | no | | iothub_failed_d2c_twin_update_rate_threshold_warning | D2C Twin Update Failed rate limit (warning threshold) | string | `50` | no | +| iothub_failed_d2c_twin_update_rate_timeframe | Monitor timeframe for IoT Hub failed d2c twin update [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | iothub_failed_jobs_rate_message | Custom message for IoT Hub failed jobs monitor | string | `` | no | | iothub_failed_jobs_rate_silenced | Groups to mute for IoT Hub failed jobs monitor | map | `` | no | | iothub_failed_jobs_rate_threshold_critical | Jobs Failed rate limit (critical threshold) | string | `90` | no | | iothub_failed_jobs_rate_threshold_warning | Jobs Failed rate limit (warning threshold) | string | `50` | no | +| iothub_failed_jobs_rate_timeframe | Monitor timeframe for IoT Hub failed jobs [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | iothub_failed_listjobs_rate_message | Custom message for IoT Hub failed list jobs monitor | string | `` | no | | iothub_failed_listjobs_rate_silenced | Groups to mute for IoT Hub failed list jobs monitor | map | `` | no | | iothub_failed_listjobs_rate_threshold_critical | ListJobs Failed rate limit (critical threshold) | string | `90` | no | | iothub_failed_listjobs_rate_threshold_warning | ListJobs Failed rate limit (warning threshold) | string | `50` | no | +| iothub_failed_listjobs_rate_timeframe | Monitor timeframe for IoT Hub failed list jobs [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | iothub_failed_queryjobs_rate_message | Custom message for IoT Hub failed query jobs monitor | string | `` | no | | iothub_failed_queryjobs_rate_silenced | Groups to mute for IoT Hub failed query jobs monitor | map | `` | no | | iothub_failed_queryjobs_rate_threshold_critical | QueryJobs Failed rate limit (critical threshold) | string | `90` | no | | iothub_failed_queryjobs_rate_threshold_warning | QueryJobs Failed rate limit (warning threshold) | string | `50` | no | +| iothub_failed_queryjobs_rate_timeframe | Monitor timeframe for IoT Hub failed query jobs [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | iothub_invalid_d2c_telemetry_egress_message | Custom message for IoT Hub invalid d2c telemetry monitor | string | `` | no | | iothub_invalid_d2c_telemetry_egress_rate_threshold_critical | D2C Telemetry Invalid limit (critical threshold) | string | `90` | no | | iothub_invalid_d2c_telemetry_egress_rate_threshold_warning | D2C Telemetry Invalid limit (warning threshold) | string | `50` | no | | iothub_invalid_d2c_telemetry_egress_silenced | Groups to mute for IoT Hub invalid d2c telemetry monitor | map | `` | no | +| iothub_invalid_d2c_telemetry_egress_timeframe | Monitor timeframe for IoT Hub invalid d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | iothub_orphaned_d2c_telemetry_egress_message | Custom message for IoT Hub orphaned d2c telemetry monitor | string | `` | no | | iothub_orphaned_d2c_telemetry_egress_rate_threshold_critical | D2C Telemetry Orphaned limit (critical threshold) | string | `90` | no | | iothub_orphaned_d2c_telemetry_egress_rate_threshold_warning | D2C Telemetry Orphaned limit (warning threshold) | string | `50` | no | | iothub_orphaned_d2c_telemetry_egress_silenced | Groups to mute for IoT Hub orphaned d2c telemetry monitor | map | `` | no | +| iothub_orphaned_d2c_telemetry_egress_timeframe | Monitor timeframe for IoT Hub orphaned d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | iothub_status_message | Custom message for IoT Hub status monitor | string | `` | no | | iothub_status_silenced | Groups to mute for IoT Hub status monitor | map | `` | no | +| iothub_status_time_aggregator | Monitor aggregator for IoT Hub status [available values: min, max or avg] | string | `max` | no | +| iothub_status_timeframe | Monitor timeframe for IoT Hub status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | iothub_too_many_d2c_telemetry_ingress_nosent_message | Custom message for IoT Hub unsent d2c telemetry monitor | string | `` | no | | iothub_too_many_d2c_telemetry_ingress_nosent_silenced | Groups to mute for IoT Hub unsent d2c telemetry monitor | map | `` | no | +| iothub_too_many_d2c_telemetry_ingress_nosent_timeframe | Monitor timeframe for IoT Hub unsent d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | iothub_total_devices_message | Custom message for IoT Hub total devices monitor | string | `` | no | | iothub_total_devices_silenced | Groups to mute for IoT Hub total devices monitor | map | `` | no | +| iothub_total_devices_time_aggregator | Monitor aggregator for IoT Hub total devices [available values: min, max or avg] | string | `min` | no | +| iothub_total_devices_timeframe | Monitor timeframe for IoT Hub total devices [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when a monitor is triggered | string | - | yes | | non_taggable_filter_tags | Tags used for filtering for components without tag support | string | `*` | no | | redis_evictedkeys_limit_message | Custom message for Redis evicted keys monitor | string | `` | no | | redis_evictedkeys_limit_silenced | Groups to mute for Redis evicted keys monitor | map | `` | no | | redis_evictedkeys_limit_threshold_critical | Evicted keys limit (critical threshold) | string | `100` | no | | redis_evictedkeys_limit_threshold_warning | Evicted keys limit (warning threshold) | string | `0` | no | +| redis_evictedkeys_limit_time_aggregator | Monitor aggregator for Redis evicted keys [available values: min, max or avg] | string | `avg` | no | +| redis_evictedkeys_limit_timeframe | Monitor timeframe for Redis evicted keys [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | redis_percent_processor_time_message | Custom message for Redis processor monitor | string | `` | no | | redis_percent_processor_time_silenced | Groups to mute for Redis processor monitor | map | `` | no | | redis_percent_processor_time_threshold_critical | Processor time percent (critical threshold) | string | `80` | no | | redis_percent_processor_time_threshold_warning | Processor time percent (warning threshold) | string | `60` | no | +| redis_percent_processor_time_time_aggregator | Monitor aggregator for Redis processor [available values: min, max or avg] | string | `min` | no | +| redis_percent_processor_time_timeframe | Monitor timeframe for Redis processor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | redis_server_load_rate_message | Custom message for Redis server load monitor | string | `` | no | | redis_server_load_rate_silenced | Groups to mute for Redis server load monitor | map | `` | no | | redis_server_load_rate_threshold_critical | Server CPU load rate (critical threshold) | string | `90` | no | | redis_server_load_rate_threshold_warning | Server CPU load rate (warning threshold) | string | `70` | no | +| redis_server_load_rate_time_aggregator | Monitor aggregator for Redis server load [available values: min, max or avg] | string | `min` | no | +| redis_server_load_rate_timeframe | Monitor timeframe for Redis server load [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | redis_status_message | Custom message for Redis status monitor | string | `` | no | | redis_status_silenced | Groups to mute for Redis status monitor | map | `` | no | +| redis_status_time_aggregator | Monitor aggregator for Redis status [available values: min, max or avg] | string | `max` | no | +| redis_status_timeframe | Monitor timeframe for Redis status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | servicebus_status_message | Custom message for Service Bus status monitor | string | `` | no | | servicebus_status_silenced | Groups to mute for Service Bus status monitor | map | `` | no | +| servicebus_status_time_aggregator | Monitor aggregator for Service Bus status [available values: min, max or avg] | string | `max` | no | | servicebus_status_timeframe | Monitor timeframe for Service Bus status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | -| servicebus_status_aggregator | Monitor timeframe aggregator for Service Bus status [available values: min, max, sum or avg] | string | `min` | no | | sqldatabase_cpu_message | Custom message for SQL CPU monitor | string | `` | no | | sqldatabase_cpu_silenced | Groups to mute for SQL CPU monitor | map | `` | no | | sqldatabase_cpu_threshold_critical | CPU usage in percent (critical threshold) | string | `90` | no | | sqldatabase_cpu_threshold_warning | CPU usage in percent (warning threshold) | string | `80` | no | +| sqldatabase_cpu_time_aggregator | Monitor aggregator for SQL CPU [available values: min, max or avg] | string | `min` | no | +| sqldatabase_cpu_timeframe | Monitor timeframe for SQL CPU [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | sqldatabase_deadlock_message | Custom message for SQL Deadlock monitor | string | `` | no | | sqldatabase_deadlock_silenced | Groups to mute for SQL Deadlock monitor | map | `` | no | | sqldatabase_deadlock_threshold_critical | Amount of Deadlocks (critical threshold) | string | `1` | no | +| sqldatabase_deadlock_timeframe | Monitor timeframe for SQL Deadlock [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | sqldatabase_diskspace_message | Custom message for SQL disk space monitor | string | `` | no | | sqldatabase_diskspace_silenced | Groups to mute for SQL disk space monitor | map | `` | no | | sqldatabase_diskspace_threshold_critical | Disk space used in percent (critical threshold) | string | `90` | no | | sqldatabase_diskspace_threshold_warning | Disk space used in percent (warning threshold) | string | `80` | no | +| sqldatabase_diskspace_time_aggregator | Monitor aggregator for SQL disk space [available values: min, max or avg] | string | `max` | no | +| sqldatabase_diskspace_timeframe | Monitor timeframe for SQL disk space [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | sqldatabase_dtu_message | Custom message for SQL DTU monitor | string | `` | no | | sqldatabase_dtu_silenced | Groups to mute for SQL DTU monitor | map | `` | no | | sqldatabase_dtu_threshold_critical | Amount of DTU used (critical threshold) | string | `90` | no | | sqldatabase_dtu_threshold_warning | Amount of DTU used (warning threshold) | string | `85` | no | +| sqldatabase_dtu_time_aggregator | Monitor aggregator for SQL DTU [available values: min, max or avg] | string | `avg` | no | +| sqldatabase_dtu_timeframe | Monitor timeframe for SQL DTU [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | storage_authorization_error_requests_message | Custom message for Storage authorization errors monitor | string | `` | no | | storage_authorization_error_requests_silenced | Groups to mute for Storage authorization errors monitor | map | `` | no | | storage_authorization_error_requests_threshold_critical | Maximum acceptable percent of authorization error requests for a storage | string | `90` | no | | storage_authorization_error_requests_threshold_warning | Warning regarding acceptable percent of authorization error requests for a storage | string | `50` | no | +| storage_authorization_error_requests_time_aggregator | Monitor aggregator for Storage authorization errors [available values: min, max or avg] | string | `min` | no | +| storage_authorization_error_requests_timeframe | Monitor timeframe for Storage authorization errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | storage_availability_message | Custom message for Storage availability monitor | string | `` | no | | storage_availability_silenced | Groups to mute for Storage availability monitor | map | `` | no | | storage_availability_threshold_critical | Minimum acceptable percent of availability for a storage | string | `50` | no | | storage_availability_threshold_warning | Warning regarding acceptable percent of availability for a storage | string | `90` | no | +| storage_availability_time_aggregator | Monitor aggregator for Storage availability [available values: min, max or avg] | string | `max` | no | +| storage_availability_timeframe | Monitor timeframe for Storage availability [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | storage_client_other_error_requests_message | Custom message for Storage other errors monitor | string | `` | no | | storage_client_other_error_requests_silenced | Groups to mute for Storage other errors monitor | map | `` | no | | storage_client_other_error_requests_threshold_critical | Maximum acceptable percent of client other error requests for a storage | string | `90` | no | | storage_client_other_error_requests_threshold_warning | Warning regarding acceptable percent of client other error requests for a storage | string | `50` | no | +| storage_client_other_error_requests_time_aggregator | Monitor aggregator for Storage other errors [available values: min, max or avg] | string | `min` | no | +| storage_client_other_error_requests_timeframe | Monitor timeframe for Storage other errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | storage_latency_message | Custom message for Storage latency monitor | string | `` | no | | storage_latency_silenced | Groups to mute for Storage latency monitor | map | `` | no | | storage_latency_threshold_critical | Maximum acceptable end to end latency (ms) for a storage | string | `2000` | no | | storage_latency_threshold_warning | Warning regarding acceptable end to end latency (ms) for a storage | string | `1000` | no | +| storage_latency_time_aggregator | Monitor aggregator for Storage latency [available values: min, max or avg] | string | `min` | no | +| storage_latency_timeframe | Monitor timeframe for Storage latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | storage_network_error_requests_message | Custom message for Storage network errors monitor | string | `` | no | | storage_network_error_requests_silenced | Groups to mute for Storage network errors monitor | map | `` | no | | storage_network_error_requests_threshold_critical | Maximum acceptable percent of network error requests for a storage | string | `90` | no | | storage_network_error_requests_threshold_warning | Warning regarding acceptable percent of network error requests for a storage | string | `50` | no | +| storage_network_error_requests_time_aggregator | Monitor aggregator for Storage network errors [available values: min, max or avg] | string | `min` | no | +| storage_network_error_requests_timeframe | Monitor timeframe for Storage network errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | storage_server_other_error_requests_message | Custom message for Storage server other errors monitor | string | `` | no | | storage_server_other_error_requests_silenced | Groups to mute for Storage server other errors monitor | map | `` | no | | storage_server_other_error_requests_threshold_critical | Maximum acceptable percent of server other error requests for a storage | string | `90` | no | | storage_server_other_error_requests_threshold_warning | Warning regarding acceptable percent of server other error requests for a storage | string | `50` | no | +| storage_server_other_error_requests_time_aggregator | Monitor aggregator for Storage other errors [available values: min, max or avg] | string | `min` | no | +| storage_server_other_error_requests_timeframe | Monitor timeframe for Storage server other errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | storage_successful_requests_message | Custom message for Storage sucessful requests monitor | string | `` | no | | storage_successful_requests_silenced | Groups to mute for Storage sucessful requests monitor | map | `` | no | | storage_successful_requests_threshold_critical | Minimum acceptable percent of successful requests for a storage | string | `10` | no | | storage_successful_requests_threshold_warning | Warning regarding acceptable percent of successful requests for a storage | string | `30` | no | +| storage_successful_requests_time_aggregator | Monitor aggregator for Storage sucessful requests [available values: min, max or avg] | string | `max` | no | +| storage_successful_requests_timeframe | Monitor timeframe for Storage sucessful requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | storage_throttling_error_requests_message | Custom message for Storage throttling error monitor | string | `` | no | | storage_throttling_error_requests_silenced | Groups to mute for Storage throttling error monitor | map | `` | no | | storage_throttling_error_requests_threshold_critical | Maximum acceptable percent of throttling error requests for a storage | string | `90` | no | | storage_throttling_error_requests_threshold_warning | Warning regarding acceptable percent of throttling error requests for a storage | string | `50` | no | +| storage_throttling_error_requests_time_aggregator | Monitor aggregator for Storage throttling errors [available values: min, max or avg] | string | `min` | no | +| storage_throttling_error_requests_timeframe | Monitor timeframe for Storage throttling errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | storage_timeout_error_requests_message | Custom message for Storage timeout monitor | string | `` | no | | storage_timeout_error_requests_silenced | Groups to mute for Storage timeout monitor | map | `` | no | | storage_timeout_error_requests_threshold_critical | Maximum acceptable percent of timeout error requests for a storage | string | `90` | no | | storage_timeout_error_requests_threshold_warning | Warning regarding acceptable percent of timeout error requests for a storage | string | `50` | no | +| storage_timeout_error_requests_time_aggregator | Monitor aggregator for Storage timeout [available values: min, max or avg] | string | `min` | no | +| storage_timeout_error_requests_timeframe | Monitor timeframe for Storage timeout [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | streamanalytics_conversion_errors_message | Custom message for Stream Analytics conversion errors monitor | string | `` | no | | streamanalytics_conversion_errors_silenced | Groups to mute for Stream Analytics conversion errors monitor | map | `` | no | | streamanalytics_conversion_errors_threshold_critical | Conversion errors limit (critical threshold) | string | `10` | no | | streamanalytics_conversion_errors_threshold_warning | Conversion errors limit (warning threshold) | string | `0` | no | +| streamanalytics_conversion_errors_time_aggregator | Monitor aggregator for Stream Analytics conversion errors [available values: min, max or avg] | string | `min` | no | +| streamanalytics_conversion_errors_timeframe | Monitor timeframe for Stream Analytics conversion errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | streamanalytics_failed_function_requests_message | Custom message for Stream Analytics failed requests monitor | string | `` | no | | streamanalytics_failed_function_requests_silenced | Groups to mute for Stream Analytics failed requests monitor | map | `` | no | | streamanalytics_failed_function_requests_threshold_critical | Failed Function Request rate limit (critical threshold) | string | `10` | no | | streamanalytics_failed_function_requests_threshold_warning | Failed Function Request rate limit (warning threshold) | string | `0` | no | +| streamanalytics_failed_function_requests_timeframe | Monitor timeframe for Stream Analytics failed requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | streamanalytics_runtime_errors_message | Custom message for Stream Analytics runtime errors monitor | string | `` | no | | streamanalytics_runtime_errors_silenced | Groups to mute for Stream Analytics runtime errors monitor | map | `` | no | | streamanalytics_runtime_errors_threshold_critical | Runtime errors limit (critical threshold) | string | `10` | no | | streamanalytics_runtime_errors_threshold_warning | Runtime errors limit (warning threshold) | string | `0` | no | +| streamanalytics_runtime_errors_time_aggregator | Monitor aggregator for Stream Analytics runtime errors [available values: min, max or avg] | string | `min` | no | +| streamanalytics_runtime_errors_timeframe | Monitor timeframe for Stream Analytics runtime errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | streamanalytics_status_message | Custom message for Stream Analytics status monitor | string | `` | no | | streamanalytics_status_silenced | Groups to mute for Stream Analytics status monitor | map | `` | no | +| streamanalytics_status_time_aggregator | Monitor aggregator for Stream Analytics status [available values: min, max or avg] | string | `max` | no | +| streamanalytics_status_timeframe | Monitor timeframe for Stream Analytics status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | streamanalytics_su_utilization_message | Custom message for Stream Analytics utilization monitor | string | `` | no | | streamanalytics_su_utilization_silenced | Groups to mute for Stream Analytics utilization monitor | map | `` | no | | streamanalytics_su_utilization_threshold_critical | Streaming Unit utilization rate limit (critical threshold) | string | `80` | no | | streamanalytics_su_utilization_threshold_warning | Streaming Unit utilization rate limit (warning threshold) | string | `60` | no | +| streamanalytics_su_utilization_time_aggregator | Monitor aggregator for Stream Analytics utilization [available values: min, max or avg] | string | `min` | no | +| streamanalytics_su_utilization_timeframe | Monitor timeframe for Stream Analytics utilization [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | Related documentation --------------------- @@ -230,3 +305,4 @@ Related documentation DataDog documentation: [https://docs.datadoghq.com/integrations/azure/](https://docs.datadoghq.com/integrations/azure/) Azure metrics documentation: [https://docs.microsoft.com/en-us/azure/monitoring-and-diagnostics/monitoring-overview-metrics](https://docs.microsoft.com/en-us/azure/monitoring-and-diagnostics/monitoring-overview-metrics) + \ No newline at end of file diff --git a/cloud/azure/apimanagement/README.md b/cloud/azure/apimanagement/README.md index aa1ec85..d47673f 100644 Binary files a/cloud/azure/apimanagement/README.md and b/cloud/azure/apimanagement/README.md differ diff --git a/cloud/azure/apimanagement/inputs.tf b/cloud/azure/apimanagement/inputs.tf index 0a91eea..2a19b46 100644 --- a/cloud/azure/apimanagement/inputs.tf +++ b/cloud/azure/apimanagement/inputs.tf @@ -37,6 +37,12 @@ variable "status_message" { default = "" } +variable "status_time_aggregator" { + description = "Monitor aggregator for API Management status [available values: min, max or avg]" + type = "string" + default = "max" +} + variable "status_timeframe" { description = "Monitor timeframe for API Management status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" diff --git a/cloud/azure/apimanagement/monitors-azure-apimanagement.tf b/cloud/azure/apimanagement/monitors-azure-apimanagement.tf index 1775ed2..6f2d114 100644 --- a/cloud/azure/apimanagement/monitors-azure-apimanagement.tf +++ b/cloud/azure/apimanagement/monitors-azure-apimanagement.tf @@ -13,8 +13,8 @@ resource "datadog_monitor" "apimgt_status" { message = "${coalesce(var.status_message, var.message)}" query = < ${var.failed_requests_threshold_critical} - EOF + EOF thresholds { critical = "${var.failed_requests_threshold_critical}" @@ -78,7 +78,7 @@ resource "datadog_monitor" "apimgt_other_requests" { avg:azure.apimanagement_service.other_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() / avg:azure.apimanagement_service.total_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() * 100 ) > ${var.other_requests_threshold_critical} - EOF + EOF thresholds { critical = "${var.other_requests_threshold_critical}" @@ -110,7 +110,7 @@ resource "datadog_monitor" "apimgt_unauthorized_requests" { avg:azure.apimanagement_service.unauthorized_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() / avg:azure.apimanagement_service.total_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() * 100 ) > ${var.unauthorized_requests_threshold_critical} - EOF + EOF thresholds { critical = "${var.unauthorized_requests_threshold_critical}" @@ -142,7 +142,7 @@ resource "datadog_monitor" "apimgt_successful_requests" { avg:azure.apimanagement_service.successful_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() / avg:azure.apimanagement_service.total_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() * 100 ) < ${var.successful_requests_threshold_critical} - EOF + EOF thresholds { critical = "${var.successful_requests_threshold_critical}" diff --git a/cloud/azure/app-services/README.md b/cloud/azure/app-services/README.md index 4534cbc..cbfd940 100644 Binary files a/cloud/azure/app-services/README.md and b/cloud/azure/app-services/README.md differ diff --git a/cloud/azure/app-services/inputs.tf b/cloud/azure/app-services/inputs.tf index d41e977..d038d4b 100644 --- a/cloud/azure/app-services/inputs.tf +++ b/cloud/azure/app-services/inputs.tf @@ -35,6 +35,12 @@ variable "response_time_message" { default = "" } +variable "response_time_time_aggregator" { + description = "Monitor aggregator for App Services response time [available values: min, max or avg]" + type = "string" + default = "min" +} + variable "response_time_timeframe" { description = "Monitor timeframe for App Services response time [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" @@ -63,6 +69,12 @@ variable "memory_usage_message" { default = "" } +variable "memory_usage_time_aggregator" { + description = "Monitor aggregator for App Services memory usage [available values: min, max or avg]" + type = "string" + default = "min" +} + variable "memory_usage_timeframe" { description = "Monitor timeframe for App Services memory usage [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" diff --git a/cloud/azure/app-services/monitors-app_services.tf b/cloud/azure/app-services/monitors-app_services.tf index b8f639b..df635c3 100644 --- a/cloud/azure/app-services/monitors-app_services.tf +++ b/cloud/azure/app-services/monitors-app_services.tf @@ -13,7 +13,7 @@ resource "datadog_monitor" "appservices_response_time" { message = "${coalesce(var.response_time_message, var.message)}" query = < ${var.response_time_threshold_critical} EOF @@ -44,7 +44,7 @@ resource "datadog_monitor" "appservices_memory_usage_count" { message = "${coalesce(var.memory_usage_message, var.message)}" query = < ${var.memory_usage_threshold_critical} EOF diff --git a/cloud/azure/eventhub/README.md b/cloud/azure/eventhub/README.md index dc8e307..53dc2dd 100644 Binary files a/cloud/azure/eventhub/README.md and b/cloud/azure/eventhub/README.md differ diff --git a/cloud/azure/eventhub/inputs.tf b/cloud/azure/eventhub/inputs.tf index 963be45..5229093 100644 --- a/cloud/azure/eventhub/inputs.tf +++ b/cloud/azure/eventhub/inputs.tf @@ -37,6 +37,12 @@ variable "status_message" { default = "" } +variable "status_time_aggregator" { + description = "Monitor aggregator for Event Hub status [available values: min, max or avg]" + type = "string" + default = "max" +} + variable "status_timeframe" { description = "Monitor timeframe for Event Hub status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" diff --git a/cloud/azure/eventhub/monitors-eventhub.tf b/cloud/azure/eventhub/monitors-eventhub.tf index 21e531b..a3d70c3 100644 --- a/cloud/azure/eventhub/monitors-eventhub.tf +++ b/cloud/azure/eventhub/monitors-eventhub.tf @@ -11,8 +11,10 @@ resource "datadog_monitor" "eventhub_status" { message = "${coalesce(var.status_message, var.message)}" query = < ${var.failed_requests_rate_thresold_critical} - EOF + EOF type = "metric alert" @@ -82,7 +84,7 @@ resource "datadog_monitor" "eventhub_errors" { ), 0) * 100 ) > ${var.errors_rate_thresold_critical} - EOF + EOF type = "metric alert" diff --git a/cloud/azure/inputs.tf b/cloud/azure/inputs.tf index f7b804f..35acc9d 100644 --- a/cloud/azure/inputs.tf +++ b/cloud/azure/inputs.tf @@ -41,6 +41,18 @@ variable "apimanagement_status_message" { default = "" } +variable "apimanagement_status_time_aggregator" { + description = "Monitor aggregator for API Management status [available values: min, max or avg]" + type = "string" + default = "max" +} + +variable "apimanagement_status_timeframe" { + description = "Monitor timeframe for API Management status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "apimanagement_failed_requests_silenced" { description = "Groups to mute for API Management failed requests monitor" type = "map" @@ -53,6 +65,12 @@ variable "apimanagement_failed_requests_message" { default = "" } +variable "apimanagement_failed_requests_timeframe" { + description = "Monitor timeframe for API Management failed requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "apimanagement_failed_requests_threshold_critical" { description = "Maximum acceptable percent of failed requests" default = 90 @@ -75,6 +93,12 @@ variable "apimanagement_other_requests_message" { default = "" } +variable "apimanagement_other_requests_timeframe" { + description = "Monitor timeframe for API Management other requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "apimanagement_other_requests_threshold_critical" { description = "Maximum acceptable percent of other requests" default = 90 @@ -97,6 +121,12 @@ variable "apimanagement_unauthorized_requests_message" { default = "" } +variable "apimanagement_unauthorized_requests_timeframe" { + description = "Monitor timeframe for API Management unauthorized requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "apimanagement_unauthorized_requests_threshold_critical" { description = "Maximum acceptable percent of unauthorized requests" default = 90 @@ -119,6 +149,12 @@ variable "apimanagement_successful_requests_message" { default = "" } +variable "apimanagement_successful_requests_timeframe" { + description = "Monitor timeframe for API Management successful requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "apimanagement_successful_requests_threshold_critical" { description = "Minimum acceptable percent of successful requests" default = 10 @@ -142,6 +178,18 @@ variable "appservices_response_time_message" { default = "" } +variable "appservices_response_time_time_aggregator" { + description = "Monitor aggregator for App Services response time [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "appservices_response_time_timeframe" { + description = "Monitor timeframe for App Services response time [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "appservices_response_time_threshold_critical" { default = 10 description = "Alerting threshold for response time in seconds" @@ -164,6 +212,18 @@ variable "appservices_memory_usage_message" { default = "" } +variable "appservices_memory_usage_time_aggregator" { + description = "Monitor aggregator for App Services memory usage [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "appservices_memory_usage_timeframe" { + description = "Monitor timeframe for App Services memory usage [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "appservices_memory_usage_threshold_critical" { default = 1073741824 # 1Gb description = "Alerting threshold in Mib" @@ -186,6 +246,12 @@ variable "appservices_http_4xx_requests_message" { default = "" } +variable "appservices_http_4xx_requests_timeframe" { + description = "Monitor timeframe for App Services 4xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "appservices_http_4xx_requests_threshold_critical" { default = 90 description = "Maximum critical acceptable percent of 4xx errors" @@ -208,6 +274,12 @@ variable "appservices_http_5xx_requests_message" { default = "" } +variable "appservices_http_5xx_requests_timeframe" { + description = "Monitor timeframe for App Services 5xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "appservices_http_5xx_requests_threshold_critical" { default = 90 description = "Maximum critical acceptable percent of 5xx errors" @@ -230,6 +302,12 @@ variable "appservices_http_successful_requests_message" { default = "" } +variable "appservices_http_successful_requests_timeframe" { + description = "Monitor timeframe for App Services successful requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "appservices_http_successful_requests_threshold_critical" { default = 10 description = "Minimum critical acceptable percent of 2xx & 3xx requests" @@ -253,6 +331,18 @@ variable "eventhub_status_message" { default = "" } +variable "eventhub_status_time_aggregator" { + description = "Monitor aggregator for Event Hub status [available values: min, max or avg]" + type = "string" + default = "max" +} + +variable "eventhub_status_timeframe" { + description = "Monitor timeframe for Event Hub status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "eventhub_failed_requests_rate_silenced" { description = "Groups to mute for Event Hub failed requests monitor" type = "map" @@ -265,6 +355,12 @@ variable "eventhub_failed_requests_rate_message" { default = "" } +variable "eventhub_failed_requests_rate_timeframe" { + description = "Monitor timeframe for Event Hub failed requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "eventhub_failed_requests_rate_thresold_critical" { description = "Failed requests ratio (percentage) to trigger the critical alert" default = 90 @@ -287,6 +383,12 @@ variable "eventhub_errors_rate_message" { default = "" } +variable "eventhub_errors_rate_timeframe" { + description = "Monitor timeframe for Event Hub errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "eventhub_errors_rate_thresold_critical" { description = "Errors ratio (percentage) to trigger the critical alert" default = 90 @@ -310,6 +412,18 @@ variable "iothub_status_message" { default = "" } +variable "iothub_status_time_aggregator" { + description = "Monitor aggregator for IoT Hub status [available values: min, max or avg]" + type = "string" + default = "max" +} + +variable "iothub_status_timeframe" { + description = "Monitor timeframe for IoT Hub status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "iothub_total_devices_silenced" { description = "Groups to mute for IoT Hub total devices monitor" type = "map" @@ -322,6 +436,18 @@ variable "iothub_total_devices_message" { default = "" } +variable "iothub_total_devices_time_aggregator" { + description = "Monitor aggregator for IoT Hub total devices [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "iothub_total_devices_timeframe" { + description = "Monitor timeframe for IoT Hub total devices [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "iothub_too_many_d2c_telemetry_ingress_nosent_silenced" { description = "Groups to mute for IoT Hub unsent d2c telemetry monitor" type = "map" @@ -334,6 +460,12 @@ variable "iothub_too_many_d2c_telemetry_ingress_nosent_message" { default = "" } +variable "iothub_too_many_d2c_telemetry_ingress_nosent_timeframe" { + description = "Monitor timeframe for IoT Hub unsent d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "iothub_failed_jobs_rate_silenced" { description = "Groups to mute for IoT Hub failed jobs monitor" type = "map" @@ -346,6 +478,12 @@ variable "iothub_failed_jobs_rate_message" { default = "" } +variable "iothub_failed_jobs_rate_timeframe" { + description = "Monitor timeframe for IoT Hub failed jobs [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "iothub_failed_jobs_rate_threshold_warning" { description = "Jobs Failed rate limit (warning threshold)" default = 50 @@ -368,6 +506,12 @@ variable "iothub_failed_listjobs_rate_message" { default = "" } +variable "iothub_failed_listjobs_rate_timeframe" { + description = "Monitor timeframe for IoT Hub failed list jobs [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "iothub_failed_listjobs_rate_threshold_warning" { description = "ListJobs Failed rate limit (warning threshold)" default = 50 @@ -390,6 +534,12 @@ variable "iothub_failed_queryjobs_rate_message" { default = "" } +variable "iothub_failed_queryjobs_rate_timeframe" { + description = "Monitor timeframe for IoT Hub failed query jobs [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "iothub_failed_queryjobs_rate_threshold_warning" { description = "QueryJobs Failed rate limit (warning threshold)" default = 50 @@ -412,6 +562,12 @@ variable "iothub_failed_c2d_methods_rate_message" { default = "" } +variable "iothub_failed_c2d_methods_rate_timeframe" { + description = "Monitor timeframe for IoT Hub failed c2d method [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "iothub_failed_c2d_methods_rate_threshold_warning" { description = "C2D Methods Failed rate limit (warning threshold)" default = 50 @@ -434,6 +590,12 @@ variable "iothub_failed_c2d_twin_read_rate_message" { default = "" } +variable "iothub_failed_c2d_twin_read_rate_timeframe" { + description = "Monitor timeframe for IoT Hub failed c2d twin read [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "iothub_failed_c2d_twin_read_rate_threshold_warning" { description = "C2D Twin Read Failed rate limit (warning threshold)" default = 50 @@ -456,6 +618,12 @@ variable "iothub_failed_c2d_twin_update_rate_message" { default = "" } +variable "iothub_failed_c2d_twin_update_rate_timeframe" { + description = "Monitor timeframe for IoT Hub failed c2d twin update [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "iothub_failed_c2d_twin_update_rate_threshold_warning" { description = "C2D Twin Update Failed rate limit (warning threshold)" default = 50 @@ -478,6 +646,12 @@ variable "iothub_failed_d2c_twin_read_rate_message" { default = "" } +variable "iothub_failed_d2c_twin_read_rate_timeframe" { + description = "Monitor timeframe for IoT Hub failed d2c twin read [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "iothub_failed_d2c_twin_read_rate_threshold_warning" { description = "D2C Twin Read Failed rate limit (warning threshold)" default = 50 @@ -500,6 +674,12 @@ variable "iothub_failed_d2c_twin_update_rate_message" { default = "" } +variable "iothub_failed_d2c_twin_update_rate_timeframe" { + description = "Monitor timeframe for IoT Hub failed d2c twin update [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "iothub_failed_d2c_twin_update_rate_threshold_warning" { description = "D2C Twin Update Failed rate limit (warning threshold)" default = 50 @@ -522,6 +702,12 @@ variable "iothub_dropped_d2c_telemetry_egress_message" { default = "" } +variable "iothub_dropped_d2c_telemetry_egress_timeframe" { + description = "Monitor timeframe for IoT Hub dropped d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "iothub_dropped_d2c_telemetry_egress_rate_threshold_warning" { description = "D2C Telemetry Dropped limit (warning threshold)" default = 50 @@ -544,6 +730,12 @@ variable "iothub_orphaned_d2c_telemetry_egress_message" { default = "" } +variable "iothub_orphaned_d2c_telemetry_egress_timeframe" { + description = "Monitor timeframe for IoT Hub orphaned d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "iothub_orphaned_d2c_telemetry_egress_rate_threshold_warning" { description = "D2C Telemetry Orphaned limit (warning threshold)" default = 50 @@ -566,6 +758,12 @@ variable "iothub_invalid_d2c_telemetry_egress_message" { default = "" } +variable "iothub_invalid_d2c_telemetry_egress_timeframe" { + description = "Monitor timeframe for IoT Hub invalid d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "iothub_invalid_d2c_telemetry_egress_rate_threshold_warning" { description = "D2C Telemetry Invalid limit (warning threshold)" default = 50 @@ -589,6 +787,18 @@ variable "redis_status_message" { default = "" } +variable "redis_status_time_aggregator" { + description = "Monitor aggregator for Redis status [available values: min, max or avg]" + type = "string" + default = "max" +} + +variable "redis_status_timeframe" { + description = "Monitor timeframe for Redis status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "redis_evictedkeys_limit_silenced" { description = "Groups to mute for Redis evicted keys monitor" type = "map" @@ -601,6 +811,18 @@ variable "redis_evictedkeys_limit_message" { default = "" } +variable "redis_evictedkeys_limit_time_aggregator" { + description = "Monitor aggregator for Redis evicted keys [available values: min, max or avg]" + type = "string" + default = "avg" +} + +variable "redis_evictedkeys_limit_timeframe" { + description = "Monitor timeframe for Redis evicted keys [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "redis_evictedkeys_limit_threshold_warning" { description = "Evicted keys limit (warning threshold)" default = 0 @@ -623,6 +845,18 @@ variable "redis_percent_processor_time_message" { default = "" } +variable "redis_percent_processor_time_time_aggregator" { + description = "Monitor aggregator for Redis processor [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "redis_percent_processor_time_timeframe" { + description = "Monitor timeframe for Redis processor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "redis_percent_processor_time_threshold_critical" { description = "Processor time percent (critical threshold)" default = 80 @@ -645,6 +879,18 @@ variable "redis_server_load_rate_message" { default = "" } +variable "redis_server_load_rate_time_aggregator" { + description = "Monitor aggregator for Redis server load [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "redis_server_load_rate_timeframe" { + description = "Monitor timeframe for Redis server load [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "redis_server_load_rate_threshold_critical" { description = "Server CPU load rate (critical threshold)" default = 90 @@ -668,10 +914,10 @@ variable "servicebus_status_message" { default = "" } -variable "servicebus_status_aggregator" { - description = "Monitor aggregator for Service Bus status [available values: min, max, sum or avg]" +variable "servicebus_status_time_aggregator" { + description = "Monitor aggregator for Service Bus status [available values: min, max or avg]" type = "string" - default = "min" + default = "max" } variable "servicebus_status_timeframe" { @@ -692,6 +938,18 @@ variable "sqldatabase_cpu_message" { default = "" } +variable "sqldatabase_cpu_time_aggregator" { + description = "Monitor aggregator for SQL CPU [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "sqldatabase_cpu_timeframe" { + description = "Monitor timeframe for SQL CPU [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_15m" +} + variable "sqldatabase_cpu_threshold_warning" { description = "CPU usage in percent (warning threshold)" default = "80" @@ -714,6 +972,18 @@ variable "sqldatabase_diskspace_message" { default = "" } +variable "sqldatabase_diskspace_time_aggregator" { + description = "Monitor aggregator for SQL disk space [available values: min, max or avg]" + type = "string" + default = "max" +} + +variable "sqldatabase_diskspace_timeframe" { + description = "Monitor timeframe for SQL disk space [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_15m" +} + variable "sqldatabase_diskspace_threshold_warning" { description = "Disk space used in percent (warning threshold)" default = "80" @@ -736,6 +1006,18 @@ variable "sqldatabase_dtu_message" { default = "" } +variable "sqldatabase_dtu_time_aggregator" { + description = "Monitor aggregator for SQL DTU [available values: min, max or avg]" + type = "string" + default = "avg" +} + +variable "sqldatabase_dtu_timeframe" { + description = "Monitor timeframe for SQL DTU [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_15m" +} + variable "sqldatabase_dtu_threshold_warning" { description = "Amount of DTU used (warning threshold)" default = "85" @@ -758,6 +1040,12 @@ variable "sqldatabase_deadlock_message" { default = "" } +variable "sqldatabase_deadlock_timeframe" { + description = "Monitor timeframe for SQL Deadlock [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "sqldatabase_deadlock_threshold_critical" { description = "Amount of Deadlocks (critical threshold)" default = "1" @@ -776,6 +1064,18 @@ variable "storage_availability_message" { default = "" } +variable "storage_availability_time_aggregator" { + description = "Monitor aggregator for Storage availability [available values: min, max or avg]" + type = "string" + default = "max" +} + +variable "storage_availability_timeframe" { + description = "Monitor timeframe for Storage availability [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "storage_availability_threshold_critical" { description = "Minimum acceptable percent of availability for a storage" default = 50 @@ -798,6 +1098,18 @@ variable "storage_successful_requests_message" { default = "" } +variable "storage_successful_requests_time_aggregator" { + description = "Monitor aggregator for Storage sucessful requests [available values: min, max or avg]" + type = "string" + default = "max" +} + +variable "storage_successful_requests_timeframe" { + description = "Monitor timeframe for Storage sucessful requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "storage_successful_requests_threshold_critical" { description = "Minimum acceptable percent of successful requests for a storage" default = 10 @@ -820,6 +1132,18 @@ variable "storage_latency_message" { default = "" } +variable "storage_latency_time_aggregator" { + description = "Monitor aggregator for Storage latency [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "storage_latency_timeframe" { + description = "Monitor timeframe for Storage latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "storage_latency_threshold_critical" { description = "Maximum acceptable end to end latency (ms) for a storage" default = 2000 @@ -842,6 +1166,18 @@ variable "storage_timeout_error_requests_message" { default = "" } +variable "storage_timeout_error_requests_time_aggregator" { + description = "Monitor aggregator for Storage timeout [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "storage_timeout_error_requests_timeframe" { + description = "Monitor timeframe for Storage timeout [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "storage_timeout_error_requests_threshold_critical" { description = "Maximum acceptable percent of timeout error requests for a storage" default = 90 @@ -864,6 +1200,18 @@ variable "storage_network_error_requests_message" { default = "" } +variable "storage_network_error_requests_time_aggregator" { + description = "Monitor aggregator for Storage network errors [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "storage_network_error_requests_timeframe" { + description = "Monitor timeframe for Storage network errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "storage_network_error_requests_threshold_critical" { description = "Maximum acceptable percent of network error requests for a storage" default = 90 @@ -886,6 +1234,18 @@ variable "storage_throttling_error_requests_message" { default = "" } +variable "storage_throttling_error_requests_time_aggregator" { + description = "Monitor aggregator for Storage throttling errors [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "storage_throttling_error_requests_timeframe" { + description = "Monitor timeframe for Storage throttling errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "storage_throttling_error_requests_threshold_critical" { description = "Maximum acceptable percent of throttling error requests for a storage" default = 90 @@ -908,6 +1268,18 @@ variable "storage_server_other_error_requests_message" { default = "" } +variable "storage_server_other_error_requests_time_aggregator" { + description = "Monitor aggregator for Storage other errors [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "storage_server_other_error_requests_timeframe" { + description = "Monitor timeframe for Storage server other errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "storage_server_other_error_requests_threshold_critical" { description = "Maximum acceptable percent of server other error requests for a storage" default = 90 @@ -930,6 +1302,18 @@ variable "storage_client_other_error_requests_message" { default = "" } +variable "storage_client_other_error_requests_time_aggregator" { + description = "Monitor aggregator for Storage other errors [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "storage_client_other_error_requests_timeframe" { + description = "Monitor timeframe for Storage other errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "storage_client_other_error_requests_threshold_critical" { description = "Maximum acceptable percent of client other error requests for a storage" default = 90 @@ -952,6 +1336,18 @@ variable "storage_authorization_error_requests_message" { default = "" } +variable "storage_authorization_error_requests_time_aggregator" { + description = "Monitor aggregator for Storage authorization errors [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "storage_authorization_error_requests_timeframe" { + description = "Monitor timeframe for Storage authorization errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "storage_authorization_error_requests_threshold_critical" { description = "Maximum acceptable percent of authorization error requests for a storage" default = 90 @@ -975,6 +1371,18 @@ variable "streamanalytics_status_message" { default = "" } +variable "streamanalytics_status_time_aggregator" { + description = "Monitor aggregator for Stream Analytics status [available values: min, max or avg]" + type = "string" + default = "max" +} + +variable "streamanalytics_status_timeframe" { + description = "Monitor timeframe for Stream Analytics status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "streamanalytics_su_utilization_silenced" { description = "Groups to mute for Stream Analytics utilization monitor" type = "map" @@ -987,6 +1395,18 @@ variable "streamanalytics_su_utilization_message" { default = "" } +variable "streamanalytics_su_utilization_time_aggregator" { + description = "Monitor aggregator for Stream Analytics utilization [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "streamanalytics_su_utilization_timeframe" { + description = "Monitor timeframe for Stream Analytics utilization [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "streamanalytics_su_utilization_threshold_warning" { description = "Streaming Unit utilization rate limit (warning threshold)" default = 60 @@ -1009,6 +1429,12 @@ variable "streamanalytics_failed_function_requests_message" { default = "" } +variable "streamanalytics_failed_function_requests_timeframe" { + description = "Monitor timeframe for Stream Analytics failed requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "streamanalytics_failed_function_requests_threshold_warning" { description = "Failed Function Request rate limit (warning threshold)" default = 0 @@ -1031,6 +1457,18 @@ variable "streamanalytics_conversion_errors_message" { default = "" } +variable "streamanalytics_conversion_errors_time_aggregator" { + description = "Monitor aggregator for Stream Analytics conversion errors [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "streamanalytics_conversion_errors_timeframe" { + description = "Monitor timeframe for Stream Analytics conversion errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "streamanalytics_conversion_errors_threshold_warning" { description = "Conversion errors limit (warning threshold)" default = 0 @@ -1053,6 +1491,18 @@ variable "streamanalytics_runtime_errors_message" { default = "" } +variable "streamanalytics_runtime_errors_time_aggregator" { + description = "Monitor aggregator for Stream Analytics runtime errors [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "streamanalytics_runtime_errors_timeframe" { + description = "Monitor timeframe for Stream Analytics runtime errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "streamanalytics_runtime_errors_threshold_warning" { description = "Runtime errors limit (warning threshold)" default = 0 diff --git a/cloud/azure/iothubs/README.md b/cloud/azure/iothubs/README.md index 4e6ecd9..ca9982f 100644 --- a/cloud/azure/iothubs/README.md +++ b/cloud/azure/iothubs/README.md @@ -33,83 +33,86 @@ Creates a DataDog monitors with the following checks : * D2C telemetry egress fallback count check * D2C telemetry ingress no sent count check -Inputs ------- - -| Name | Description | Type | Default | Required | -|------|-------------|:----:|:-----:|:-----:| -| delay | Delay in seconds for the metric evaluation | string | `900` | no | -| dropped_d2c_telemetry_egress_message | Custom message for IoT Hub dropped d2c telemetry monitor | string | `` | no | -| dropped_d2c_telemetry_egress_rate_threshold_critical | D2C Telemetry Dropped limit (critical threshold) | string | `90` | no | -| dropped_d2c_telemetry_egress_rate_threshold_warning | D2C Telemetry Dropped limit (warning threshold) | string | `50` | no | -| dropped_d2c_telemetry_egress_silenced | Groups to mute for IoT Hub dropped d2c telemetry monitor | map | `` | no | -| dropped_d2c_telemetry_egress_timeframe | Monitor timeframe for IoT Hub dropped d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| environment | Architecture Environment | string | - | yes | -| failed_c2d_methods_rate_message | Custom message for IoT Hub failed c2d method monitor | string | `` | no | -| failed_c2d_methods_rate_silenced | Groups to mute for IoT Hub failed c2d methods monitor | map | `` | no | -| failed_c2d_methods_rate_threshold_critical | C2D Methods Failed rate limit (critical threshold) | string | `90` | no | -| failed_c2d_methods_rate_threshold_warning | C2D Methods Failed rate limit (warning threshold) | string | `50` | no | -| failed_c2d_methods_rate_timeframe | Monitor timeframe for IoT Hub failed c2d method [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| failed_c2d_twin_read_rate_message | Custom message for IoT Hub failed c2d twin read monitor | string | `` | no | -| failed_c2d_twin_read_rate_silenced | Groups to mute for IoT Hub failed c2d twin read monitor | map | `` | no | -| failed_c2d_twin_read_rate_threshold_critical | C2D Twin Read Failed rate limit (critical threshold) | string | `90` | no | -| failed_c2d_twin_read_rate_threshold_warning | C2D Twin Read Failed rate limit (warning threshold) | string | `50` | no | -| failed_c2d_twin_read_rate_timeframe | Monitor timeframe for IoT Hub failed c2d twin read [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| failed_c2d_twin_update_rate_message | Custom message for IoT Hub failed c2d twin update monitor | string | `` | no | -| failed_c2d_twin_update_rate_silenced | Groups to mute for IoT Hub failed c2d twin update monitor | map | `` | no | -| failed_c2d_twin_update_rate_threshold_critical | C2D Twin Update Failed rate limit (critical threshold) | string | `90` | no | -| failed_c2d_twin_update_rate_threshold_warning | C2D Twin Update Failed rate limit (warning threshold) | string | `50` | no | -| failed_c2d_twin_update_rate_timeframe | Monitor timeframe for IoT Hub failed c2d twin update [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| failed_d2c_twin_read_rate_message | Custom message for IoT Hub failed d2c twin read monitor | string | `` | no | -| failed_d2c_twin_read_rate_silenced | Groups to mute for IoT Hub failed d2c twin read monitor | map | `` | no | -| failed_d2c_twin_read_rate_threshold_critical | D2C Twin Read Failed rate limit (critical threshold) | string | `90` | no | -| failed_d2c_twin_read_rate_threshold_warning | D2C Twin Read Failed rate limit (warning threshold) | string | `50` | no | -| failed_d2c_twin_read_rate_timeframe | Monitor timeframe for IoT Hub failed d2c twin read [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| failed_d2c_twin_update_rate_message | Custom message for IoT Hub failed d2c twin update monitor | string | `` | no | -| failed_d2c_twin_update_rate_silenced | Groups to mute for IoT Hub failed d2c twin update monitor | map | `` | no | -| failed_d2c_twin_update_rate_threshold_critical | D2C Twin Update Failed rate limit (critical threshold) | string | `90` | no | -| failed_d2c_twin_update_rate_threshold_warning | D2C Twin Update Failed rate limit (warning threshold) | string | `50` | no | -| failed_d2c_twin_update_rate_timeframe | Monitor timeframe for IoT Hub failed d2c twin update [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| failed_jobs_rate_message | Custom message for IoT Hub failed jobs monitor | string | `` | no | -| failed_jobs_rate_silenced | Groups to mute for IoT Hub failed jobs monitor | map | `` | no | -| failed_jobs_rate_threshold_critical | Jobs Failed rate limit (critical threshold) | string | `90` | no | -| failed_jobs_rate_threshold_warning | Jobs Failed rate limit (warning threshold) | string | `50` | no | -| failed_jobs_rate_timeframe | Monitor timeframe for IoT Hub failed jobs [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| failed_listjobs_rate_message | Custom message for IoT Hub failed list jobs monitor | string | `` | no | -| failed_listjobs_rate_silenced | Groups to mute for IoT Hub failed list jobs monitor | map | `` | no | -| failed_listjobs_rate_threshold_critical | ListJobs Failed rate limit (critical threshold) | string | `90` | no | -| failed_listjobs_rate_threshold_warning | ListJobs Failed rate limit (warning threshold) | string | `50` | no | -| failed_listjobs_rate_timeframe | Monitor timeframe for IoT Hub failed list jobs [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| failed_queryjobs_rate_message | Custom message for IoT Hub failed query jobs monitor | string | `` | no | -| failed_queryjobs_rate_silenced | Groups to mute for IoT Hub failed query jobs monitor | map | `` | no | -| failed_queryjobs_rate_threshold_critical | QueryJobs Failed rate limit (critical threshold) | string | `90` | no | -| failed_queryjobs_rate_threshold_warning | QueryJobs Failed rate limit (warning threshold) | string | `50` | no | -| failed_queryjobs_rate_timeframe | Monitor timeframe for IoT Hub failed query jobs [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| filter_tags | Tags used for filtering | string | `*` | no | -| invalid_d2c_telemetry_egress_message | Custom message for IoT Hub invalid d2c telemetry monitor | string | `` | no | -| invalid_d2c_telemetry_egress_rate_threshold_critical | D2C Telemetry Invalid limit (critical threshold) | string | `90` | no | -| invalid_d2c_telemetry_egress_rate_threshold_warning | D2C Telemetry Invalid limit (warning threshold) | string | `50` | no | -| invalid_d2c_telemetry_egress_silenced | Groups to mute for IoT Hub invalid d2c telemetry monitor | map | `` | no | -| invalid_d2c_telemetry_egress_timeframe | Monitor timeframe for IoT Hub invalid d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| message | Message sent when an alert is triggered | string | - | yes | -| orphaned_d2c_telemetry_egress_message | Custom message for IoT Hub orphaned d2c telemetry monitor | string | `` | no | -| orphaned_d2c_telemetry_egress_rate_threshold_critical | D2C Telemetry Orphaned limit (critical threshold) | string | `90` | no | -| orphaned_d2c_telemetry_egress_rate_threshold_warning | D2C Telemetry Orphaned limit (warning threshold) | string | `50` | no | -| orphaned_d2c_telemetry_egress_silenced | Groups to mute for IoT Hub orphaned d2c telemetry monitor | map | `` | no | -| orphaned_d2c_telemetry_egress_timeframe | Monitor timeframe for IoT Hub orphaned d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| status_message | Custom message for IoT Hub status monitor | string | `` | no | -| status_silenced | Groups to mute for IoT Hub status monitor | map | `` | no | -| status_timeframe | Monitor timeframe for IoT Hub status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| too_many_d2c_telemetry_ingress_nosent_message | Custom message for IoT Hub unsent d2c telemetry monitor | string | `` | no | -| too_many_d2c_telemetry_ingress_nosent_silenced | Groups to mute for IoT Hub unsent d2c telemetry monitor | map | `` | no | -| too_many_d2c_telemetry_ingress_nosent_timeframe | Monitor timeframe for IoT Hub unsent d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| total_devices_message | Custom message for IoT Hub total devices monitor | string | `` | no | -| total_devices_silenced | Groups to mute for IoT Hub total devices monitor | map | `` | no | -| total_devices_timeframe | Monitor timeframe for IoT Hub total devices [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | - +Inputs +------ + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| delay | Delay in seconds for the metric evaluation | string | `900` | no | +| dropped_d2c_telemetry_egress_message | Custom message for IoT Hub dropped d2c telemetry monitor | string | `` | no | +| dropped_d2c_telemetry_egress_rate_threshold_critical | D2C Telemetry Dropped limit (critical threshold) | string | `90` | no | +| dropped_d2c_telemetry_egress_rate_threshold_warning | D2C Telemetry Dropped limit (warning threshold) | string | `50` | no | +| dropped_d2c_telemetry_egress_silenced | Groups to mute for IoT Hub dropped d2c telemetry monitor | map | `` | no | +| dropped_d2c_telemetry_egress_timeframe | Monitor timeframe for IoT Hub dropped d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| environment | Architecture Environment | string | - | yes | +| failed_c2d_methods_rate_message | Custom message for IoT Hub failed c2d method monitor | string | `` | no | +| failed_c2d_methods_rate_silenced | Groups to mute for IoT Hub failed c2d methods monitor | map | `` | no | +| failed_c2d_methods_rate_threshold_critical | C2D Methods Failed rate limit (critical threshold) | string | `90` | no | +| failed_c2d_methods_rate_threshold_warning | C2D Methods Failed rate limit (warning threshold) | string | `50` | no | +| failed_c2d_methods_rate_timeframe | Monitor timeframe for IoT Hub failed c2d method [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| failed_c2d_twin_read_rate_message | Custom message for IoT Hub failed c2d twin read monitor | string | `` | no | +| failed_c2d_twin_read_rate_silenced | Groups to mute for IoT Hub failed c2d twin read monitor | map | `` | no | +| failed_c2d_twin_read_rate_threshold_critical | C2D Twin Read Failed rate limit (critical threshold) | string | `90` | no | +| failed_c2d_twin_read_rate_threshold_warning | C2D Twin Read Failed rate limit (warning threshold) | string | `50` | no | +| failed_c2d_twin_read_rate_timeframe | Monitor timeframe for IoT Hub failed c2d twin read [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| failed_c2d_twin_update_rate_message | Custom message for IoT Hub failed c2d twin update monitor | string | `` | no | +| failed_c2d_twin_update_rate_silenced | Groups to mute for IoT Hub failed c2d twin update monitor | map | `` | no | +| failed_c2d_twin_update_rate_threshold_critical | C2D Twin Update Failed rate limit (critical threshold) | string | `90` | no | +| failed_c2d_twin_update_rate_threshold_warning | C2D Twin Update Failed rate limit (warning threshold) | string | `50` | no | +| failed_c2d_twin_update_rate_timeframe | Monitor timeframe for IoT Hub failed c2d twin update [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| failed_d2c_twin_read_rate_message | Custom message for IoT Hub failed d2c twin read monitor | string | `` | no | +| failed_d2c_twin_read_rate_silenced | Groups to mute for IoT Hub failed d2c twin read monitor | map | `` | no | +| failed_d2c_twin_read_rate_threshold_critical | D2C Twin Read Failed rate limit (critical threshold) | string | `90` | no | +| failed_d2c_twin_read_rate_threshold_warning | D2C Twin Read Failed rate limit (warning threshold) | string | `50` | no | +| failed_d2c_twin_read_rate_timeframe | Monitor timeframe for IoT Hub failed d2c twin read [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| failed_d2c_twin_update_rate_message | Custom message for IoT Hub failed d2c twin update monitor | string | `` | no | +| failed_d2c_twin_update_rate_silenced | Groups to mute for IoT Hub failed d2c twin update monitor | map | `` | no | +| failed_d2c_twin_update_rate_threshold_critical | D2C Twin Update Failed rate limit (critical threshold) | string | `90` | no | +| failed_d2c_twin_update_rate_threshold_warning | D2C Twin Update Failed rate limit (warning threshold) | string | `50` | no | +| failed_d2c_twin_update_rate_timeframe | Monitor timeframe for IoT Hub failed d2c twin update [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| failed_jobs_rate_message | Custom message for IoT Hub failed jobs monitor | string | `` | no | +| failed_jobs_rate_silenced | Groups to mute for IoT Hub failed jobs monitor | map | `` | no | +| failed_jobs_rate_threshold_critical | Jobs Failed rate limit (critical threshold) | string | `90` | no | +| failed_jobs_rate_threshold_warning | Jobs Failed rate limit (warning threshold) | string | `50` | no | +| failed_jobs_rate_timeframe | Monitor timeframe for IoT Hub failed jobs [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| failed_listjobs_rate_message | Custom message for IoT Hub failed list jobs monitor | string | `` | no | +| failed_listjobs_rate_silenced | Groups to mute for IoT Hub failed list jobs monitor | map | `` | no | +| failed_listjobs_rate_threshold_critical | ListJobs Failed rate limit (critical threshold) | string | `90` | no | +| failed_listjobs_rate_threshold_warning | ListJobs Failed rate limit (warning threshold) | string | `50` | no | +| failed_listjobs_rate_timeframe | Monitor timeframe for IoT Hub failed list jobs [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| failed_queryjobs_rate_message | Custom message for IoT Hub failed query jobs monitor | string | `` | no | +| failed_queryjobs_rate_silenced | Groups to mute for IoT Hub failed query jobs monitor | map | `` | no | +| failed_queryjobs_rate_threshold_critical | QueryJobs Failed rate limit (critical threshold) | string | `90` | no | +| failed_queryjobs_rate_threshold_warning | QueryJobs Failed rate limit (warning threshold) | string | `50` | no | +| failed_queryjobs_rate_timeframe | Monitor timeframe for IoT Hub failed query jobs [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| filter_tags | Tags used for filtering | string | `*` | no | +| invalid_d2c_telemetry_egress_message | Custom message for IoT Hub invalid d2c telemetry monitor | string | `` | no | +| invalid_d2c_telemetry_egress_rate_threshold_critical | D2C Telemetry Invalid limit (critical threshold) | string | `90` | no | +| invalid_d2c_telemetry_egress_rate_threshold_warning | D2C Telemetry Invalid limit (warning threshold) | string | `50` | no | +| invalid_d2c_telemetry_egress_silenced | Groups to mute for IoT Hub invalid d2c telemetry monitor | map | `` | no | +| invalid_d2c_telemetry_egress_timeframe | Monitor timeframe for IoT Hub invalid d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| message | Message sent when an alert is triggered | string | - | yes | +| orphaned_d2c_telemetry_egress_message | Custom message for IoT Hub orphaned d2c telemetry monitor | string | `` | no | +| orphaned_d2c_telemetry_egress_rate_threshold_critical | D2C Telemetry Orphaned limit (critical threshold) | string | `90` | no | +| orphaned_d2c_telemetry_egress_rate_threshold_warning | D2C Telemetry Orphaned limit (warning threshold) | string | `50` | no | +| orphaned_d2c_telemetry_egress_silenced | Groups to mute for IoT Hub orphaned d2c telemetry monitor | map | `` | no | +| orphaned_d2c_telemetry_egress_timeframe | Monitor timeframe for IoT Hub orphaned d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| status_message | Custom message for IoT Hub status monitor | string | `` | no | +| status_silenced | Groups to mute for IoT Hub status monitor | map | `` | no | +| status_time_aggregator | Monitor aggregator for IoT Hub status [available values: min, max, sum or avg] | string | `max` | no | +| status_timeframe | Monitor timeframe for IoT Hub status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| too_many_d2c_telemetry_ingress_nosent_message | Custom message for IoT Hub unsent d2c telemetry monitor | string | `` | no | +| too_many_d2c_telemetry_ingress_nosent_silenced | Groups to mute for IoT Hub unsent d2c telemetry monitor | map | `` | no | +| too_many_d2c_telemetry_ingress_nosent_timeframe | Monitor timeframe for IoT Hub unsent d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| total_devices_message | Custom message for IoT Hub total devices monitor | string | `` | no | +| total_devices_silenced | Groups to mute for IoT Hub total devices monitor | map | `` | no | +| total_devices_time_aggregator | Monitor aggregator for IoT Hub total devices [available values: min, max, sum or avg] | string | `min` | no | +| total_devices_timeframe | Monitor timeframe for IoT Hub total devices [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | + Related documentation --------------------- DataDog documentation: [https://docs.datadoghq.com/integrations/azure_iot_hub](https://docs.datadoghq.com/integrations/azure_iot_hub) Azure IOT Hubs metrics documentation: [https://docs.microsoft.com/en-us/azure/iot-hub/iot-hub-monitor-resource-health](https://docs.microsoft.com/en-us/azure/iot-hub/iot-hub-monitor-resource-health) + \ No newline at end of file diff --git a/cloud/azure/iothubs/inputs.tf b/cloud/azure/iothubs/inputs.tf index 47680e2..d50ec27 100644 --- a/cloud/azure/iothubs/inputs.tf +++ b/cloud/azure/iothubs/inputs.tf @@ -32,6 +32,12 @@ variable "status_message" { default = "" } +variable "status_time_aggregator" { + description = "Monitor aggregator for IoT Hub status [available values: min, max, sum or avg]" + type = "string" + default = "max" +} + variable "status_timeframe" { description = "Monitor timeframe for IoT Hub status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" @@ -50,6 +56,12 @@ variable "total_devices_message" { default = "" } +variable "total_devices_time_aggregator" { + description = "Monitor aggregator for IoT Hub total devices [available values: min, max, sum or avg]" + type = "string" + default = "min" +} + variable "total_devices_timeframe" { description = "Monitor timeframe for IoT Hub total devices [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" diff --git a/cloud/azure/iothubs/monitors-iothubs.tf b/cloud/azure/iothubs/monitors-iothubs.tf index 4a70a02..bb81de9 100644 --- a/cloud/azure/iothubs/monitors-iothubs.tf +++ b/cloud/azure/iothubs/monitors-iothubs.tf @@ -105,7 +105,9 @@ resource "datadog_monitor" "status" { message = "${coalesce(var.status_message, var.message)}" query = < ${var.evictedkeys_limit_threshold_critical} EOF @@ -68,7 +70,7 @@ resource "datadog_monitor" "percent_processor_time" { message = "${coalesce(var.percent_processor_time_message, var.message)}" query = < ${var.percent_processor_time_threshold_critical} EOF @@ -100,7 +102,7 @@ resource "datadog_monitor" "server_load" { message = "${coalesce(var.server_load_rate_message, var.message)}" query = < ${var.server_load_rate_threshold_critical} EOF diff --git a/cloud/azure/servicebus/README.md b/cloud/azure/servicebus/README.md index 461ee40..0b0507a 100644 Binary files a/cloud/azure/servicebus/README.md and b/cloud/azure/servicebus/README.md differ diff --git a/cloud/azure/servicebus/inputs.tf b/cloud/azure/servicebus/inputs.tf index 6ea0587..0987acd 100644 --- a/cloud/azure/servicebus/inputs.tf +++ b/cloud/azure/servicebus/inputs.tf @@ -37,10 +37,10 @@ variable "status_message" { default = "" } -variable "status_aggregator" { - description = "Monitor aggregator for Service Bus status [available values: min, max, sum or avg]" +variable "status_time_aggregator" { + description = "Monitor aggregator for Service Bus status [available values: min, max or avg]" type = "string" - default = "min" + default = "max" } variable "status_timeframe" { diff --git a/cloud/azure/servicebus/monitors-service-bus.tf b/cloud/azure/servicebus/monitors-service-bus.tf index 607d99b..a72e27f 100644 --- a/cloud/azure/servicebus/monitors-service-bus.tf +++ b/cloud/azure/servicebus/monitors-service-bus.tf @@ -11,8 +11,8 @@ resource "datadog_monitor" "servicebus_status" { message = "${coalesce(var.status_message, var.message)}" query = < ${var.cpu_threshold_critical} EOF @@ -44,7 +44,7 @@ resource "datadog_monitor" "sql-database_free_space_low" { type = "metric alert" query = < ${var.diskspace_threshold_critical} EOF @@ -76,7 +76,7 @@ resource "datadog_monitor" "sql-database_dtu_consumption_high" { type = "metric alert" query = < ${var.dtu_threshold_critical} EOF diff --git a/cloud/azure/storage/README.md b/cloud/azure/storage/README.md index e85f146..1565478 100644 --- a/cloud/azure/storage/README.md +++ b/cloud/azure/storage/README.md @@ -36,16 +36,19 @@ Inputs | authorization_error_requests_silenced | Groups to mute for Storage authorization errors monitor | map | `` | no | | authorization_error_requests_threshold_critical | Maximum acceptable percent of authorization error requests for a storage | string | `90` | no | | authorization_error_requests_threshold_warning | Warning regarding acceptable percent of authorization error requests for a storage | string | `50` | no | +| authorization_error_requests_time_aggregator | Monitor aggregator for Storage authorization errors [available values: min, max or avg] | string | `min` | no | | authorization_error_requests_timeframe | Monitor timeframe for Storage authorization errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | availability_message | Custom message for Storage availability monitor | string | `` | no | | availability_silenced | Groups to mute for Storage availability monitor | map | `` | no | | availability_threshold_critical | Minimum acceptable percent of availability for a storage | string | `50` | no | | availability_threshold_warning | Warning regarding acceptable percent of availability for a storage | string | `90` | no | +| availability_time_aggregator | Monitor aggregator for Storage availability [available values: min, max or avg] | string | `max` | no | | availability_timeframe | Monitor timeframe for Storage availability [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | client_other_error_requests_message | Custom message for Storage other errors monitor | string | `` | no | | client_other_error_requests_silenced | Groups to mute for Storage other errors monitor | map | `` | no | | client_other_error_requests_threshold_critical | Maximum acceptable percent of client other error requests for a storage | string | `90` | no | | client_other_error_requests_threshold_warning | Warning regarding acceptable percent of client other error requests for a storage | string | `50` | no | +| client_other_error_requests_time_aggregator | Monitor aggregator for Storage other errors [available values: min, max or avg] | string | `min` | no | | client_other_error_requests_timeframe | Monitor timeframe for Storage other errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | | environment | Architecture environment | string | - | yes | @@ -55,32 +58,38 @@ Inputs | latency_silenced | Groups to mute for Storage latency monitor | map | `` | no | | latency_threshold_critical | Maximum acceptable end to end latency (ms) for a storage | string | `2000` | no | | latency_threshold_warning | Warning regarding acceptable end to end latency (ms) for a storage | string | `1000` | no | +| latency_time_aggregator | Monitor aggregator for Storage latency [available values: min, max or avg] | string | `min` | no | | latency_timeframe | Monitor timeframe for Storage latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when a Redis monitor is triggered | string | - | yes | | network_error_requests_message | Custom message for Storage network errors monitor | string | `` | no | | network_error_requests_silenced | Groups to mute for Storage network errors monitor | map | `` | no | | network_error_requests_threshold_critical | Maximum acceptable percent of network error requests for a storage | string | `90` | no | | network_error_requests_threshold_warning | Warning regarding acceptable percent of network error requests for a storage | string | `50` | no | +| network_error_requests_time_aggregator | Monitor aggregator for Storage network errors [available values: min, max or avg] | string | `min` | no | | network_error_requests_timeframe | Monitor timeframe for Storage network errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | server_other_error_requests_message | Custom message for Storage server other errors monitor | string | `` | no | | server_other_error_requests_silenced | Groups to mute for Storage server other errors monitor | map | `` | no | | server_other_error_requests_threshold_critical | Maximum acceptable percent of server other error requests for a storage | string | `90` | no | | server_other_error_requests_threshold_warning | Warning regarding acceptable percent of server other error requests for a storage | string | `50` | no | +| server_other_error_requests_time_aggregator | Monitor aggregator for Storage other errors [available values: min, max or avg] | string | `min` | no | | server_other_error_requests_timeframe | Monitor timeframe for Storage server other errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | successful_requests_message | Custom message for Storage sucessful requests monitor | string | `` | no | | successful_requests_silenced | Groups to mute for Storage sucessful requests monitor | map | `` | no | | successful_requests_threshold_critical | Minimum acceptable percent of successful requests for a storage | string | `10` | no | | successful_requests_threshold_warning | Warning regarding acceptable percent of successful requests for a storage | string | `30` | no | +| successful_requests_time_aggregator | Monitor aggregator for Storage sucessful requests [available values: min, max or avg] | string | `max` | no | | successful_requests_timeframe | Monitor timeframe for Storage sucessful requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | throttling_error_requests_message | Custom message for Storage throttling error monitor | string | `` | no | | throttling_error_requests_silenced | Groups to mute for Storage throttling error monitor | map | `` | no | | throttling_error_requests_threshold_critical | Maximum acceptable percent of throttling error requests for a storage | string | `90` | no | | throttling_error_requests_threshold_warning | Warning regarding acceptable percent of throttling error requests for a storage | string | `50` | no | +| throttling_error_requests_time_aggregator | Monitor aggregator for Storage throttling errors [available values: min, max or avg] | string | `min` | no | | throttling_error_requests_timeframe | Monitor timeframe for Storage throttling errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | timeout_error_requests_message | Custom message for Storage timeout monitor | string | `` | no | | timeout_error_requests_silenced | Groups to mute for Storage timeout monitor | map | `` | no | | timeout_error_requests_threshold_critical | Maximum acceptable percent of timeout error requests for a storage | string | `90` | no | | timeout_error_requests_threshold_warning | Warning regarding acceptable percent of timeout error requests for a storage | string | `50` | no | +| timeout_error_requests_time_aggregator | Monitor aggregator for Storage timeout [available values: min, max or avg] | string | `min` | no | | timeout_error_requests_timeframe | Monitor timeframe for Storage timeout [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | Related documentation @@ -93,3 +102,4 @@ DataDog blog: [https://www.datadoghq.com/blog/monitor-azure-storage-datadog/](ht Azure Storage metrics documentation: [https://docs.microsoft.com/en-us/azure/storage/common/storage-monitor-storage-account](https://docs.microsoft.com/en-us/azure/storage/common/storage-monitor-storage-account) Azure Storage metrics detailed documentation [https://docs.microsoft.com/en-us/rest/api/storageservices/storage-analytics-metrics-table-schema](https://docs.microsoft.com/en-us/rest/api/storageservices/storage-analytics-metrics-table-schema) + \ No newline at end of file diff --git a/cloud/azure/storage/inputs.tf b/cloud/azure/storage/inputs.tf index dfd9ae8..a2f527f 100644 --- a/cloud/azure/storage/inputs.tf +++ b/cloud/azure/storage/inputs.tf @@ -37,6 +37,12 @@ variable "availability_message" { default = "" } +variable "availability_time_aggregator" { + description = "Monitor aggregator for Storage availability [available values: min, max or avg]" + type = "string" + default = "max" +} + variable "availability_timeframe" { description = "Monitor timeframe for Storage availability [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" @@ -65,6 +71,12 @@ variable "successful_requests_message" { default = "" } +variable "successful_requests_time_aggregator" { + description = "Monitor aggregator for Storage sucessful requests [available values: min, max or avg]" + type = "string" + default = "max" +} + variable "successful_requests_timeframe" { description = "Monitor timeframe for Storage sucessful requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" @@ -93,6 +105,12 @@ variable "latency_message" { default = "" } +variable "latency_time_aggregator" { + description = "Monitor aggregator for Storage latency [available values: min, max or avg]" + type = "string" + default = "min" +} + variable "latency_timeframe" { description = "Monitor timeframe for Storage latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" @@ -121,6 +139,12 @@ variable "timeout_error_requests_message" { default = "" } +variable "timeout_error_requests_time_aggregator" { + description = "Monitor aggregator for Storage timeout [available values: min, max or avg]" + type = "string" + default = "min" +} + variable "timeout_error_requests_timeframe" { description = "Monitor timeframe for Storage timeout [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" @@ -149,6 +173,12 @@ variable "network_error_requests_message" { default = "" } +variable "network_error_requests_time_aggregator" { + description = "Monitor aggregator for Storage network errors [available values: min, max or avg]" + type = "string" + default = "min" +} + variable "network_error_requests_timeframe" { description = "Monitor timeframe for Storage network errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" @@ -177,6 +207,12 @@ variable "throttling_error_requests_message" { default = "" } +variable "throttling_error_requests_time_aggregator" { + description = "Monitor aggregator for Storage throttling errors [available values: min, max or avg]" + type = "string" + default = "min" +} + variable "throttling_error_requests_timeframe" { description = "Monitor timeframe for Storage throttling errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" @@ -205,6 +241,12 @@ variable "server_other_error_requests_message" { default = "" } +variable "server_other_error_requests_time_aggregator" { + description = "Monitor aggregator for Storage other errors [available values: min, max or avg]" + type = "string" + default = "min" +} + variable "server_other_error_requests_timeframe" { description = "Monitor timeframe for Storage server other errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" @@ -233,6 +275,12 @@ variable "client_other_error_requests_message" { default = "" } +variable "client_other_error_requests_time_aggregator" { + description = "Monitor aggregator for Storage other errors [available values: min, max or avg]" + type = "string" + default = "min" +} + variable "client_other_error_requests_timeframe" { description = "Monitor timeframe for Storage other errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" @@ -261,6 +309,12 @@ variable "authorization_error_requests_message" { default = "" } +variable "authorization_error_requests_time_aggregator" { + description = "Monitor aggregator for Storage authorization errors [available values: min, max or avg]" + type = "string" + default = "min" +} + variable "authorization_error_requests_timeframe" { description = "Monitor timeframe for Storage authorization errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" diff --git a/cloud/azure/storage/monitors-azure-storage.tf b/cloud/azure/storage/monitors-azure-storage.tf index 34d5396..71f8870 100644 --- a/cloud/azure/storage/monitors-azure-storage.tf +++ b/cloud/azure/storage/monitors-azure-storage.tf @@ -11,7 +11,7 @@ resource "datadog_monitor" "availability" { message = "${coalesce(var.availability_message, var.message)}" query = < ${var.latency_threshold_critical} EOF @@ -104,7 +104,7 @@ resource "datadog_monitor" "timeout_error_requests" { message = "${coalesce(var.timeout_error_requests_message, var.message)}" query = < ${var.timeout_error_requests_threshold_critical} EOF @@ -135,7 +135,7 @@ resource "datadog_monitor" "network_error_requests" { message = "${coalesce(var.network_error_requests_message, var.message)}" query = < ${var.network_error_requests_threshold_critical} EOF @@ -166,7 +166,7 @@ resource "datadog_monitor" "throttling_error_requests" { message = "${coalesce(var.throttling_error_requests_message, var.message)}" query = < ${var.throttling_error_requests_threshold_critical} EOF @@ -197,7 +197,7 @@ resource "datadog_monitor" "server_other_error_requests" { message = "${coalesce(var.server_other_error_requests_message, var.message)}" query = < ${var.server_other_error_requests_threshold_critical} EOF @@ -228,7 +228,7 @@ resource "datadog_monitor" "client_other_error_requests" { message = "${coalesce(var.client_other_error_requests_message, var.message)}" query = < ${var.client_other_error_requests_threshold_critical} EOF @@ -259,7 +259,7 @@ resource "datadog_monitor" "authorization_error_requests" { message = "${coalesce(var.authorization_error_requests_message, var.message)}" query = < ${var.authorization_error_requests_threshold_critical} EOF diff --git a/cloud/azure/stream-analytics/README.md b/cloud/azure/stream-analytics/README.md index 32d5b1d..0658fef 100644 Binary files a/cloud/azure/stream-analytics/README.md and b/cloud/azure/stream-analytics/README.md differ diff --git a/cloud/azure/stream-analytics/inputs.tf b/cloud/azure/stream-analytics/inputs.tf index 0c2170c..615783d 100644 --- a/cloud/azure/stream-analytics/inputs.tf +++ b/cloud/azure/stream-analytics/inputs.tf @@ -37,6 +37,12 @@ variable "status_message" { default = "" } +variable "status_time_aggregator" { + description = "Monitor aggregator for Stream Analytics status [available values: min, max or avg]" + type = "string" + default = "max" +} + variable "status_timeframe" { description = "Monitor timeframe for Stream Analytics status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" @@ -55,6 +61,12 @@ variable "su_utilization_message" { default = "" } +variable "su_utilization_time_aggregator" { + description = "Monitor aggregator for Stream Analytics utilization [available values: min, max or avg]" + type = "string" + default = "min" +} + variable "su_utilization_timeframe" { description = "Monitor timeframe for Stream Analytics utilization [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" @@ -111,6 +123,12 @@ variable "conversion_errors_message" { default = "" } +variable "conversion_errors_time_aggregator" { + description = "Monitor aggregator for Stream Analytics conversion errors [available values: min, max or avg]" + type = "string" + default = "min" +} + variable "conversion_errors_timeframe" { description = "Monitor timeframe for Stream Analytics conversion errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" @@ -139,6 +157,12 @@ variable "runtime_errors_message" { default = "" } +variable "runtime_errors_time_aggregator" { + description = "Monitor aggregator for Stream Analytics runtime errors [available values: min, max or avg]" + type = "string" + default = "min" +} + variable "runtime_errors_timeframe" { description = "Monitor timeframe for Stream Analytics runtime errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" diff --git a/cloud/azure/stream-analytics/monitors-stream-analytics.tf b/cloud/azure/stream-analytics/monitors-stream-analytics.tf index 51ab80a..b14d31f 100644 --- a/cloud/azure/stream-analytics/monitors-stream-analytics.tf +++ b/cloud/azure/stream-analytics/monitors-stream-analytics.tf @@ -11,7 +11,9 @@ resource "datadog_monitor" "status" { message = "${coalesce(var.status_message, var.message)}" query = < ${var.su_utilization_threshold_critical} EOF @@ -101,7 +103,7 @@ resource "datadog_monitor" "conversion_errors" { message = "${coalesce(var.conversion_errors_message, var.message)}" query = < ${var.conversion_errors_threshold_critical} EOF @@ -133,7 +135,7 @@ resource "datadog_monitor" "runtime_errors" { message = "${coalesce(var.runtime_errors_message, var.message)}" query = < ${var.runtime_errors_threshold_critical} EOF diff --git a/common/alerting-message/README.md b/common/alerting-message/README.md index 067682e..d9883f9 100644 Binary files a/common/alerting-message/README.md and b/common/alerting-message/README.md differ diff --git a/common/alerting-message/output.tf b/common/alerting-message/output.tf index 5166314..8a9d93e 100644 --- a/common/alerting-message/output.tf +++ b/common/alerting-message/output.tf @@ -1,3 +1,4 @@ output "alerting-message" { - value = "${data.template_file.alerting-message.rendered}" + description = "The generated message string" + value = "${data.template_file.alerting-message.rendered}" } diff --git a/databases/mongodb/README.md b/databases/mongodb/README.md index fc521cd..b665243 100644 --- a/databases/mongodb/README.md +++ b/databases/mongodb/README.md @@ -81,4 +81,6 @@ Inputs | message | Message sent when an alert is triggered | string | - | yes | | mongodb_replicaset_message | Custom message for Mongodb replicaset monitor | string | `` | no | | mongodb_replicaset_silenced | Groups to mute for Mongodb replicaset monitor | map | `` | no | +| mongodb_replicaset_time_aggregator | Monitor aggregator for Mongodb replicaset [available values: min, max or avg] | string | `max` | no | | mongodb_replicaset_timeframe | Monitor timeframe for Mongodb replicaset [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | + diff --git a/databases/mongodb/inputs.tf b/databases/mongodb/inputs.tf index 46cf5ee..0df8a02 100644 --- a/databases/mongodb/inputs.tf +++ b/databases/mongodb/inputs.tf @@ -36,6 +36,12 @@ variable "mongodb_replicaset_message" { default = "" } +variable "mongodb_replicaset_time_aggregator" { + description = "Monitor aggregator for Mongodb replicaset [available values: min, max or avg]" + type = "string" + default = "max" +} + variable "mongodb_replicaset_timeframe" { description = "Monitor timeframe for Mongodb replicaset [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" diff --git a/databases/mongodb/monitors-mongo.tf b/databases/mongodb/monitors-mongo.tf index 5357aeb..aaa2549 100644 --- a/databases/mongodb/monitors-mongo.tf +++ b/databases/mongodb/monitors-mongo.tf @@ -11,7 +11,7 @@ resource "datadog_monitor" "mongodb_replicaset_state" { message = "${coalesce(var.mongodb_replicaset_message, var.message)}" query = <` | no | -| delay | Delay in seconds for the metric evaluation | string | `15` | no | -| environment | Architecture Environment | string | - | yes | -| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | -| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | -| message | Message sent when an alert is triggered | string | - | yes | +Inputs +------ + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| apache_connect_message | Custom message for Apache process monitor | string | `` | no | +| apache_connect_silenced | Groups to mute for Apache process monitor | map | `` | no | +| delay | Delay in seconds for the metric evaluation | string | `15` | no | +| environment | Architecture Environment | string | - | yes | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| message | Message sent when an alert is triggered | string | - | yes | + diff --git a/middleware/nginx/README.md b/middleware/nginx/README.md index 90935cd..09b0386 100644 --- a/middleware/nginx/README.md +++ b/middleware/nginx/README.md @@ -19,15 +19,16 @@ Creates a DataDog monitors with the following checks : * Nginx connect -Inputs ------- - -| Name | Description | Type | Default | Required | -|------|-------------|:----:|:-----:|:-----:| -| environment | Architecture Environment | string | - | yes | -| delay | Delay in seconds for the metric evaluation | string | `15` | no | -| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | -| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | -| message | Message sent when an alert is triggered | string | - | yes | -| nginx_connect_message | Custom message for Nginx process monitor | string | `` | no | -| nginx_connect_silenced | Groups to mute for Nginx process monitor | map | `` | no | +Inputs +------ + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| delay | Delay in seconds for the metric evaluation | string | `15` | no | +| environment | Architecture Environment | string | - | yes | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| message | Message sent when an alert is triggered | string | - | yes | +| nginx_connect_message | Custom message for Nginx process monitor | string | `` | no | +| nginx_connect_silenced | Groups to mute for Nginx process monitor | map | `` | no | + diff --git a/middleware/php-fpm/README.md b/middleware/php-fpm/README.md index 85b6e3e..7da65aa 100644 --- a/middleware/php-fpm/README.md +++ b/middleware/php-fpm/README.md @@ -20,20 +20,22 @@ Creates a DataDog monitors with the following checks : * PHP FPM connect * PHP FPM load -Inputs ------- - -| Name | Description | Type | Default | Required | -|------|-------------|:----:|:-----:|:-----:| -| delay | Delay in seconds for the metric evaluation | string | `15` | no | -| environment | Architecture Environment | string | - | yes | -| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | -| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | -| message | Message sent when an alert is triggered | string | - | yes | -| php_fpm_busy_message | Custom message for PHP FPM busy worker monitor | string | `` | no | -| php_fpm_busy_silenced | Groups to mute for PHP FPM busy worker monitor | map | `` | no | -| php_fpm_busy_threshold_critical | php fpm busy critical threshold | string | `0.9` | no | -| php_fpm_busy_threshold_warning | php fpm busy warning threshold | string | `0.8` | no | -| php_fpm_busy_timeframe | Monitor timeframe for PHP FPM busy worker [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_10m` | no | -| php_fpm_connect_message | Custom message for PHP FPM process monitor | string | `` | no | -| php_fpm_connect_silenced | Groups to mute for PHP FPM process monitor | map | `` | no | +Inputs +------ + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| delay | Delay in seconds for the metric evaluation | string | `15` | no | +| environment | Architecture Environment | string | - | yes | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| message | Message sent when an alert is triggered | string | - | yes | +| php_fpm_busy_message | Custom message for PHP FPM busy worker monitor | string | `` | no | +| php_fpm_busy_silenced | Groups to mute for PHP FPM busy worker monitor | map | `` | no | +| php_fpm_busy_threshold_critical | php fpm busy critical threshold | string | `0.9` | no | +| php_fpm_busy_threshold_warning | php fpm busy warning threshold | string | `0.8` | no | +| php_fpm_busy_time_aggregator | Monitor aggregator for PHP FPM busy worker [available values: min, max or avg] | string | `avg` | no | +| php_fpm_busy_timeframe | Monitor timeframe for PHP FPM busy worker [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_10m` | no | +| php_fpm_connect_message | Custom message for PHP FPM process monitor | string | `` | no | +| php_fpm_connect_silenced | Groups to mute for PHP FPM process monitor | map | `` | no | + diff --git a/middleware/php-fpm/inputs.tf b/middleware/php-fpm/inputs.tf index b88d5f5..c35f5dc 100644 --- a/middleware/php-fpm/inputs.tf +++ b/middleware/php-fpm/inputs.tf @@ -38,6 +38,12 @@ variable "php_fpm_busy_message" { default = "" } +variable "php_fpm_busy_time_aggregator" { + description = "Monitor aggregator for PHP FPM busy worker [available values: min, max or avg]" + type = "string" + default = "avg" +} + variable "php_fpm_busy_timeframe" { description = "Monitor timeframe for PHP FPM busy worker [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" diff --git a/middleware/php-fpm/monitors-fpm.tf b/middleware/php-fpm/monitors-fpm.tf index f0d9e90..8273490 100644 --- a/middleware/php-fpm/monitors-fpm.tf +++ b/middleware/php-fpm/monitors-fpm.tf @@ -13,7 +13,7 @@ resource "datadog_monitor" "datadog_php_fpm_connect_idle" { type = "metric alert" query = <` | no | -| cpu_high_threshold_critical | CPU high critical threshold | string | `95` | no | -| cpu_high_threshold_warning | CPU high warning threshold | string | `80` | no | -| cpu_high_timeframe | Monitor timeframe for CPU high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| cpu_load_message | Custom message for CPU load ratio monitor | string | `` | no | -| cpu_load_silenced | Groups to mute for CPU load ratio monitor | map | `` | no | -| cpu_load_threshold_critical | CPU load ratio critical threshold | string | `4` | no | -| cpu_load_threshold_warning | CPU load ratio warning threshold | string | `3` | no | -| cpu_load_timeframe | Monitor timeframe for CPU load ratio [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| delay | Delay in seconds for the metric evaluation | string | `15` | no | -| environment | Architecture Environment | string | - | yes | -| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | -| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | -| free_disk_inodes_message | Custom message for Free disk inodes monitor | string | `` | no | -| free_disk_inodes_silenced | Groups to mute for Free disk inodes monitor | map | `` | no | -| free_disk_inodes_threshold_critical | Free disk space critical threshold | string | `5` | no | -| free_disk_inodes_threshold_warning | Free disk space warning threshold | string | `10` | no | -| free_disk_inodes_timeframe | Monitor timeframe for Free disk inodes [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| free_disk_space_message | Custom message for Free diskspace monitor | string | `` | no | -| free_disk_space_silenced | Groups to mute for Free diskspace monitor | map | `` | no | -| free_disk_space_threshold_critical | Free disk space critical threshold | string | `5` | no | -| free_disk_space_threshold_warning | Free disk space warning threshold | string | `10` | no | -| free_disk_space_timeframe | Monitor timeframe for Free diskspace [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| free_memory_message | Custom message for Free memory monitor | string | - | yes | -| free_memory_silenced | Groups to mute for Free memory monitor | map | `` | no | -| free_memory_threshold_critical | Free disk space critical threshold | string | `5` | no | -| free_memory_threshold_warning | Free disk space warning threshold | string | `10` | no | -| free_memory_timeframe | Monitor timeframe for Free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_1m` | no | -| message | Message sent when an alert is triggered | string | - | yes | +Inputs +------ + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| cpu_high_message | Custom message for CPU high monitor | string | `` | no | +| cpu_high_silenced | Groups to mute for CPU high monitor | map | `` | no | +| cpu_high_threshold_critical | CPU high critical threshold | string | `90` | no | +| cpu_high_threshold_warning | CPU high warning threshold | string | `85` | no | +| cpu_high_time_aggregator | Monitor aggregator for CPU high [available values: min, max or avg] | string | `min` | no | +| cpu_high_timeframe | Monitor timeframe for CPU high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_10m` | no | +| cpu_load_message | Custom message for CPU load ratio monitor | string | `` | no | +| cpu_load_silenced | Groups to mute for CPU load ratio monitor | map | `` | no | +| cpu_load_threshold_critical | CPU load ratio critical threshold | string | `2.5` | no | +| cpu_load_threshold_warning | CPU load ratio warning threshold | string | `2` | no | +| cpu_load_time_aggregator | Monitor aggregator for CPU load ratio [available values: min, max or avg] | string | `min` | no | +| cpu_load_timeframe | Monitor timeframe for CPU load ratio [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| delay | Delay in seconds for the metric evaluation | string | `15` | no | +| environment | Architecture Environment | string | - | yes | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| free_disk_inodes_message | Custom message for Free disk inodes monitor | string | `` | no | +| free_disk_inodes_silenced | Groups to mute for Free disk inodes monitor | map | `` | no | +| free_disk_inodes_threshold_critical | Free disk space critical threshold | string | `5` | no | +| free_disk_inodes_threshold_warning | Free disk space warning threshold | string | `10` | no | +| free_disk_inodes_time_aggregator | Monitor aggregator for Free disk inodes [available values: min, max or avg] | string | `min` | no | +| free_disk_inodes_timeframe | Monitor timeframe for Free disk inodes [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| free_disk_space_message | Custom message for Free diskspace monitor | string | `` | no | +| free_disk_space_silenced | Groups to mute for Free diskspace monitor | map | `` | no | +| free_disk_space_threshold_critical | Free disk space critical threshold | string | `10` | no | +| free_disk_space_threshold_warning | Free disk space warning threshold | string | `20` | no | +| free_disk_space_time_aggregator | Monitor aggregator for Free diskspace [available values: min, max or avg] | string | `min` | no | +| free_disk_space_timeframe | Monitor timeframe for Free diskspace [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| free_memory_message | Custom message for Free memory monitor | string | - | yes | +| free_memory_silenced | Groups to mute for Free memory monitor | map | `` | no | +| free_memory_threshold_critical | Free disk space critical threshold | string | `5` | no | +| free_memory_threshold_warning | Free disk space warning threshold | string | `10` | no | +| free_memory_time_aggregator | Monitor aggregator for Free memory [available values: min, max or avg] | string | `max` | no | +| free_memory_timeframe | Monitor timeframe for Free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| message | Message sent when an alert is triggered | string | - | yes | + diff --git a/system/generic/inputs.tf b/system/generic/inputs.tf index 6c66909..2d522ee 100644 --- a/system/generic/inputs.tf +++ b/system/generic/inputs.tf @@ -38,20 +38,26 @@ variable "cpu_high_message" { default = "" } +variable "cpu_high_time_aggregator" { + description = "Monitor aggregator for CPU high [available values: min, max or avg]" + type = "string" + default = "min" +} + variable "cpu_high_timeframe" { description = "Monitor timeframe for CPU high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" - default = "last_5m" + default = "last_10m" } variable "cpu_high_threshold_warning" { description = "CPU high warning threshold" - default = 80 + default = 85 } variable "cpu_high_threshold_critical" { description = "CPU high critical threshold" - default = 95 + default = 90 } variable "cpu_load_silenced" { @@ -66,20 +72,26 @@ variable "cpu_load_message" { default = "" } +variable "cpu_load_time_aggregator" { + description = "Monitor aggregator for CPU load ratio [available values: min, max or avg]" + type = "string" + default = "min" +} + variable "cpu_load_timeframe" { description = "Monitor timeframe for CPU load ratio [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" - default = "last_5m" + default = "last_15m" } variable "cpu_load_threshold_warning" { description = "CPU load ratio warning threshold" - default = 3 + default = 2 } variable "cpu_load_threshold_critical" { description = "CPU load ratio critical threshold" - default = 4 + default = 2.5 } variable "free_disk_space_silenced" { @@ -94,6 +106,12 @@ variable "free_disk_space_message" { default = "" } +variable "free_disk_space_time_aggregator" { + description = "Monitor aggregator for Free diskspace [available values: min, max or avg]" + type = "string" + default = "min" +} + variable "free_disk_space_timeframe" { description = "Monitor timeframe for Free diskspace [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" @@ -102,12 +120,12 @@ variable "free_disk_space_timeframe" { variable "free_disk_space_threshold_warning" { description = "Free disk space warning threshold" - default = 10 + default = 20 } variable "free_disk_space_threshold_critical" { description = "Free disk space critical threshold" - default = 5 + default = 10 } variable "free_disk_inodes_silenced" { @@ -122,6 +140,12 @@ variable "free_disk_inodes_message" { default = "" } +variable "free_disk_inodes_time_aggregator" { + description = "Monitor aggregator for Free disk inodes [available values: min, max or avg]" + type = "string" + default = "min" +} + variable "free_disk_inodes_timeframe" { description = "Monitor timeframe for Free disk inodes [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" @@ -149,10 +173,16 @@ variable "free_memory_message" { type = "string" } +variable "free_memory_time_aggregator" { + description = "Monitor aggregator for Free memory [available values: min, max or avg]" + type = "string" + default = "max" +} + variable "free_memory_timeframe" { description = "Monitor timeframe for Free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" - default = "last_1m" + default = "last_5m" } variable "free_memory_threshold_warning" { diff --git a/system/generic/monitors-system.tf b/system/generic/monitors-system.tf index d737f01..e6c2428 100644 --- a/system/generic/monitors-system.tf +++ b/system/generic/monitors-system.tf @@ -11,7 +11,7 @@ resource "datadog_monitor" "datadog_cpu_too_high" { message = "${coalesce(var.cpu_high_message, var.message)}" query = < ${var.cpu_high_threshold_critical} EOF @@ -42,7 +42,7 @@ resource "datadog_monitor" "datadog_load_too_high" { message = "${coalesce(var.cpu_load_message, var.message)}" query = < ${var.cpu_load_threshold_critical} @@ -74,7 +74,7 @@ resource "datadog_monitor" "datadog_free_disk_space_too_low" { message = "${coalesce(var.free_disk_space_message, var.message)}" query = <