From 3f17c0215cf2a4310f5f5acf8c5e2b098b1b508a Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Thu, 26 Apr 2018 17:11:58 +0200 Subject: [PATCH 1/7] MON-160 - ALB monitors updated --- cloud/aws/alb/README.md | 6 ++++++ cloud/aws/alb/inputs.tf | 36 +++++++++++++++++++++++++++++++++++ cloud/aws/alb/monitors-alb.tf | 12 ++++++------ 3 files changed, 48 insertions(+), 6 deletions(-) diff --git a/cloud/aws/alb/README.md b/cloud/aws/alb/README.md index 25aa0ca..027e1fd 100644 --- a/cloud/aws/alb/README.md +++ b/cloud/aws/alb/README.md @@ -32,6 +32,7 @@ Inputs |------|-------------|:----:|:-----:|:-----:| | alb_no_healthy_instances_message | Custom message for ALB no healthy instances monitor | string | `` | no | | alb_no_healthy_instances_silenced | Groups to mute for ALB no healthy instances monitor | map | `` | no | +| alb_no_healthy_instances_timeframe | Monitor timeframe for ALB no healthy instances [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_1m` | no | | artificial_requests_count | Number of false requests used to mitigate false positive in case of low trafic | string | `5` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | | environment | Architecture environment | string | - | yes | @@ -41,22 +42,27 @@ Inputs | httpcode_elb_4xx_silenced | Groups to mute for ALB httpcode 4xx monitor | map | `` | no | | httpcode_elb_4xx_threshold_critical | loadbalancer 4xx critical threshold in percentage | string | `80` | no | | httpcode_elb_4xx_threshold_warning | loadbalancer 4xx warning threshold in percentage | string | `60` | no | +| httpcode_elb_4xx_timeframe | Monitor timeframe for ALB httpcode 4xx [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | httpcode_elb_5xx_message | Custom message for ALB httpcode 5xx monitor | string | `` | no | | httpcode_elb_5xx_silenced | Groups to mute for ALB httpcode 5xx monitor | map | `` | no | | httpcode_elb_5xx_threshold_critical | loadbalancer 5xxcritical threshold in percentage | string | `80` | no | | httpcode_elb_5xx_threshold_warning | loadbalancer 5xx warning threshold in percentage | string | `60` | no | +| httpcode_elb_5xx_timeframe | Monitor timeframe for ALB httpcode 5xx [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | httpcode_target_4xx_message | Custom message for ALB target httpcode 4xx monitor | string | `` | no | | httpcode_target_4xx_silenced | Groups to mute for ALB target httpcode 4xx monitor | map | `` | no | | httpcode_target_4xx_threshold_critical | target 4xx critical threshold in percentage | string | `80` | no | | httpcode_target_4xx_threshold_warning | target 4xx warning threshold in percentage | string | `60` | no | +| httpcode_target_4xx_timeframe | Monitor timeframe for ALB target httpcode 4xx [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | httpcode_target_5xx_message | Custom message for ALB target httpcode 5xx monitor | string | `` | no | | httpcode_target_5xx_silenced | Groups to mute for ALB target httpcode 5xx monitor | map | `` | no | | httpcode_target_5xx_threshold_critical | target 5xx critical threshold in percentage | string | `80` | no | | httpcode_target_5xx_threshold_warning | target 5xx warning threshold in percentage | string | `60` | no | +| httpcode_target_5xx_timeframe | Monitor timeframe for ALB target httpcode 5xx [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | latency_message | Custom message for ALB latency monitor | string | `` | no | | latency_silenced | Groups to mute for ALB latency monitor | map | `` | no | | latency_threshold_critical | latency critical threshold in milliseconds | string | `1000` | no | | latency_threshold_warning | latency warning threshold in milliseconds | string | `500` | no | +| latency_timeframe | Monitor timeframe for ALB latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when a monitor is triggered | string | - | yes | Related documentation diff --git a/cloud/aws/alb/inputs.tf b/cloud/aws/alb/inputs.tf index e6d71dd..585740e 100644 --- a/cloud/aws/alb/inputs.tf +++ b/cloud/aws/alb/inputs.tf @@ -38,6 +38,12 @@ variable "alb_no_healthy_instances_message" { default = "" } +variable "alb_no_healthy_instances_timeframe" { + description = "Monitor timeframe for ALB no healthy instances [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_1m" +} + variable "latency_silenced" { description = "Groups to mute for ALB latency monitor" type = "map" @@ -50,6 +56,12 @@ variable "latency_message" { default = "" } +variable "latency_timeframe" { + description = "Monitor timeframe for ALB latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "latency_threshold_critical" { default = 1000 description = "latency critical threshold in milliseconds" @@ -72,6 +84,12 @@ variable "httpcode_elb_4xx_message" { default = "" } +variable "httpcode_elb_4xx_timeframe" { + description = "Monitor timeframe for ALB httpcode 4xx [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "httpcode_elb_4xx_threshold_critical" { default = 80 description = "loadbalancer 4xx critical threshold in percentage" @@ -94,6 +112,12 @@ variable "httpcode_target_4xx_message" { default = "" } +variable "httpcode_target_4xx_timeframe" { + description = "Monitor timeframe for ALB target httpcode 4xx [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "httpcode_target_4xx_threshold_critical" { default = 80 description = "target 4xx critical threshold in percentage" @@ -116,6 +140,12 @@ variable "httpcode_elb_5xx_message" { default = "" } +variable "httpcode_elb_5xx_timeframe" { + description = "Monitor timeframe for ALB httpcode 5xx [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "httpcode_elb_5xx_threshold_critical" { default = 80 description = "loadbalancer 5xxcritical threshold in percentage" @@ -138,6 +168,12 @@ variable "httpcode_target_5xx_message" { default = "" } +variable "httpcode_target_5xx_timeframe" { + description = "Monitor timeframe for ALB target httpcode 5xx [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "httpcode_target_5xx_threshold_critical" { default = 80 description = "target 5xx critical threshold in percentage" diff --git a/cloud/aws/alb/monitors-alb.tf b/cloud/aws/alb/monitors-alb.tf index b7079f0..51d2d5b 100644 --- a/cloud/aws/alb/monitors-alb.tf +++ b/cloud/aws/alb/monitors-alb.tf @@ -14,7 +14,7 @@ resource "datadog_monitor" "ALB_no_healthy_instances" { message = "${coalesce(var.alb_no_healthy_instances_message, var.message)}" query = < ${var.latency_threshold_critical} EOF @@ -73,7 +73,7 @@ resource "datadog_monitor" "ALB_httpcode_elb_5xx" { message = "${coalesce(var.httpcode_elb_5xx_message, var.message)}" query = < Date: Thu, 26 Apr 2018 17:47:47 +0200 Subject: [PATCH 2/7] MON-160 - AWS monitors updated --- cloud/aws/apigateway/README.md | 3 ++ cloud/aws/apigateway/inputs.tf | 18 +++++++++ cloud/aws/apigateway/monitors-api.tf | 6 +-- cloud/aws/elasticsearch/README.md | 3 ++ cloud/aws/elasticsearch/inputs.tf | 18 +++++++++ .../elasticsearch/monitors-elasticsearch.tf | 6 +-- cloud/aws/elb/README.md | 9 ++++- cloud/aws/elb/inputs.tf | 40 +++++++++++++++++-- cloud/aws/elb/monitors-elb.tf | 12 +++--- cloud/aws/rds/README.md | 4 +- cloud/aws/rds/inputs.tf | 12 ++++++ cloud/aws/rds/monitors-rds.tf | 4 +- cloud/aws/vpn/README.md | 3 +- cloud/aws/vpn/inputs.tf | 6 +++ cloud/aws/vpn/monitors-vpn.tf | 2 +- 15 files changed, 123 insertions(+), 23 deletions(-) diff --git a/cloud/aws/apigateway/README.md b/cloud/aws/apigateway/README.md index d20226b..755cfce 100644 --- a/cloud/aws/apigateway/README.md +++ b/cloud/aws/apigateway/README.md @@ -35,14 +35,17 @@ Inputs | http_4xx_requests_silenced | Groups to mute for API Gateway HTTP 4xx requests monitor | map | `` | no | | http_4xx_requests_threshold_critical | Maximum critical acceptable percent of 4xx errors | string | `30` | no | | http_4xx_requests_threshold_warning | Maximum warning acceptable percent of 4xx errors | string | `15` | no | +| http_4xx_requests_timeframe | Monitor timeframe for API HTTP 4xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | http_5xx_requests_message | Custom message for API Gateway HTTP 5xx requests monitor | string | `` | no | | http_5xx_requests_silenced | Groups to mute for API Gateway HTTP 5xx requests monitor | map | `` | no | | http_5xx_requests_threshold_critical | Maximum critical acceptable percent of 5xx errors | string | `20` | no | | http_5xx_requests_threshold_warning | Maximum warning acceptable percent of 5xx errors | string | `10` | no | +| http_5xx_requests_timeframe | Monitor timeframe for API HTTP 5xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | latency_message | Custom message for API Gateway latency monitor | string | `` | no | | latency_silenced | Groups to mute for API Gateway latency monitor | map | `` | no | | latency_threshold_critical | Alerting threshold in milliseconds | string | `800` | no | | latency_threshold_warning | Warning threshold in milliseconds | string | `400` | no | +| latency_timeframe | Monitor timeframe for API latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when a monitor is triggered | string | - | yes | Related documentation diff --git a/cloud/aws/apigateway/inputs.tf b/cloud/aws/apigateway/inputs.tf index 917c7ed..83acd7c 100644 --- a/cloud/aws/apigateway/inputs.tf +++ b/cloud/aws/apigateway/inputs.tf @@ -33,6 +33,12 @@ variable "latency_message" { default = "" } +variable "latency_timeframe" { + description = "Monitor timeframe for API latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "latency_threshold_critical" { default = 800 description = "Alerting threshold in milliseconds" @@ -59,6 +65,12 @@ variable "http_5xx_requests_message" { default = "" } +variable "http_5xx_requests_timeframe" { + description = "Monitor timeframe for API HTTP 5xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "http_5xx_requests_threshold_critical" { default = 20 description = "Maximum critical acceptable percent of 5xx errors" @@ -85,6 +97,12 @@ variable "http_4xx_requests_message" { default = "" } +variable "http_4xx_requests_timeframe" { + description = "Monitor timeframe for API HTTP 4xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "http_4xx_requests_threshold_critical" { default = 30 description = "Maximum critical acceptable percent of 4xx errors" diff --git a/cloud/aws/apigateway/monitors-api.tf b/cloud/aws/apigateway/monitors-api.tf index 07ed72e..8fd52a7 100644 --- a/cloud/aws/apigateway/monitors-api.tf +++ b/cloud/aws/apigateway/monitors-api.tf @@ -5,7 +5,7 @@ resource "datadog_monitor" "API_Gateway_latency" { message = "${coalesce(var.latency_message, var.message)}" query = < ${var.latency_threshold_critical} EOF @@ -36,7 +36,7 @@ resource "datadog_monitor" "API_http_5xx_errors_count" { message = "${coalesce(var.http_5xx_requests_message, var.message)}" query = <` | no | | cpu_threshold_critical | CPU usage in percent (critical threshold) | string | `90` | no | | cpu_threshold_warning | CPU usage in percent (warning threshold) | string | `80` | no | +| cpu_timeframe | Monitor timeframe for ES cluster cpu [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | | diskspace_message | Custom message for ES cluster diskspace monitor | string | `` | no | | diskspace_silenced | Groups to mute for ES cluster diskspace monitor | map | `` | no | | diskspace_threshold_critical | Disk free space in percent (critical threshold) | string | `10` | no | | diskspace_threshold_warning | Disk free space in percent (warning threshold) | string | `20` | no | +| diskspace_timeframe | Monitor timeframe for ES cluster diskspace [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | environment | Architecture Environment | string | - | yes | | es_cluster_status_message | Custom message for ES cluster status monitor | string | `` | no | | es_cluster_status_silenced | Groups to mute for ES cluster status monitor | map | `` | no | +| es_cluster_status_timeframe | Monitor timeframe for ES cluster status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_30m` | no | | es_cluster_volume_size | ElasticSearch Domain volume size (in GB) | string | - | yes | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | diff --git a/cloud/aws/elasticsearch/inputs.tf b/cloud/aws/elasticsearch/inputs.tf index 55b26c5..4d3fb2c 100644 --- a/cloud/aws/elasticsearch/inputs.tf +++ b/cloud/aws/elasticsearch/inputs.tf @@ -38,6 +38,12 @@ variable "es_cluster_status_message" { default = "" } +variable "es_cluster_status_timeframe" { + description = "Monitor timeframe for ES cluster status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_30m" +} + variable "es_cluster_volume_size" { description = "ElasticSearch Domain volume size (in GB)" } @@ -54,6 +60,12 @@ variable "diskspace_message" { default = "" } +variable "diskspace_timeframe" { + description = "Monitor timeframe for ES cluster diskspace [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_15m" +} + variable "diskspace_threshold_warning" { description = "Disk free space in percent (warning threshold)" default = "20" @@ -76,6 +88,12 @@ variable "cpu_message" { default = "" } +variable "cpu_timeframe" { + description = "Monitor timeframe for ES cluster cpu [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_15m" +} + variable "cpu_threshold_warning" { description = "CPU usage in percent (warning threshold)" default = "80" diff --git a/cloud/aws/elasticsearch/monitors-elasticsearch.tf b/cloud/aws/elasticsearch/monitors-elasticsearch.tf index 0db5ae1..47dc1e2 100644 --- a/cloud/aws/elasticsearch/monitors-elasticsearch.tf +++ b/cloud/aws/elasticsearch/monitors-elasticsearch.tf @@ -18,7 +18,7 @@ resource "datadog_monitor" "es_cluster_status" { type = "metric alert" query = <= 2 @@ -52,7 +52,7 @@ resource "datadog_monitor" "es_free_space_low" { type = "metric alert" query = < ${var.cpu_threshold_critical} EOF diff --git a/cloud/aws/elb/README.md b/cloud/aws/elb/README.md index 424b8c7..ec188f8 100644 --- a/cloud/aws/elb/README.md +++ b/cloud/aws/elb/README.md @@ -30,32 +30,37 @@ Inputs | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| -| dd_aws_elb | # ELB | string | `disable` | no | | artificial_requests_count | Number of false requests used to mitigate false positive in case of low trafic | string | `5` | no | +| delay | Delay in seconds for the metric evaluation | string | `900` | no | | elb_4xx_message | Custom message for ELB 4xx errors monitor | string | `` | no | | elb_4xx_silenced | Groups to mute for ELB 4xx errors monitor | map | `` | no | | elb_4xx_threshold_critical | loadbalancer 4xx critical threshold in percentage | string | `10` | no | | elb_4xx_threshold_warning | loadbalancer 4xx warning threshold in percentage | string | `5` | no | +| elb_4xx_timeframe | Monitor timeframe for ELB 4xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | elb_5xx_message | Custom message for ELB 5xx errors monitor | string | `` | no | | elb_5xx_silenced | Groups to mute for ELB 5xx errors monitor | map | `` | no | | elb_5xx_threshold_critical | loadbalancer 5xx critical threshold in percentage | string | `10` | no | | elb_5xx_threshold_warning | loadbalancer 5xx warning threshold in percentage | string | `5` | no | +| elb_5xx_timeframe | Monitor timeframe for ELB 5xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | elb_backend_4xx_message | Custom message for ELB backend 4xx errors monitor | string | `` | no | | elb_backend_4xx_silenced | Groups to mute for ELB backend 4xx errors monitor | map | `` | no | | elb_backend_4xx_threshold_critical | loadbalancer backend 4xx critical threshold in percentage | string | `10` | no | | elb_backend_4xx_threshold_warning | loadbalancer backend 4xx warning threshold in percentage | string | `5` | no | +| elb_backend_4xx_timeframe | Monitor timeframe for ELB backend 4xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | elb_backend_5xx_message | Custom message for ELB backend 5xx errors monitor | string | `` | no | | elb_backend_5xx_silenced | Groups to mute for ELB backend 5xx errors monitor | map | `` | no | | elb_backend_5xx_threshold_critical | loadbalancer backend 5xx critical threshold in percentage | string | `10` | no | | elb_backend_5xx_threshold_warning | loadbalancer backend 5xx warning threshold in percentage | string | `5` | no | +| elb_backend_5xx_timeframe | Monitor timeframe for ELB backend 5xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | elb_backend_latency_critical | latency critical threshold in seconds | string | `5` | no | | elb_backend_latency_message | Custom message for ELB backend latency monitor | string | `` | no | | elb_backend_latency_silenced | Groups to mute for ELB backend latency monitor | map | `` | no | +| elb_backend_latency_timeframe | Monitor timeframe for ELB backend latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | elb_backend_latency_warning | latency warning threshold in seconds | string | `1` | no | | elb_no_healthy_instance_message | Custom message for ELB no healty instance monitor | string | `` | no | | elb_no_healthy_instance_silenced | Groups to mute for ELB no healty instance monitor | map | `` | no | +| elb_no_healthy_instance_timeframe | Monitor timeframe for ELB no healty instance [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | environment | Architecture Environment | string | - | yes | -| delay | Delay in seconds for the metric evaluation | string | `900` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | message | Message sent when an alert is triggered | string | - | yes | diff --git a/cloud/aws/elb/inputs.tf b/cloud/aws/elb/inputs.tf index 6cd1338..4809da0 100644 --- a/cloud/aws/elb/inputs.tf +++ b/cloud/aws/elb/inputs.tf @@ -25,10 +25,6 @@ variable "filter_tags_custom" { } ## ELB -variable "dd_aws_elb" { - default = "disable" -} - variable "elb_no_healthy_instance_silenced" { description = "Groups to mute for ELB no healty instance monitor" type = "map" @@ -41,6 +37,12 @@ variable "elb_no_healthy_instance_message" { default = "" } +variable "elb_no_healthy_instance_timeframe" { + description = "Monitor timeframe for ELB no healty instance [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "elb_4xx_silenced" { description = "Groups to mute for ELB 4xx errors monitor" type = "map" @@ -53,6 +55,12 @@ variable "elb_4xx_message" { default = "" } +variable "elb_4xx_timeframe" { + description = "Monitor timeframe for ELB 4xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "elb_4xx_threshold_warning" { description = "loadbalancer 4xx warning threshold in percentage" default = 5 @@ -75,6 +83,12 @@ variable "elb_5xx_message" { default = "" } +variable "elb_5xx_timeframe" { + description = "Monitor timeframe for ELB 5xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "elb_5xx_threshold_warning" { description = "loadbalancer 5xx warning threshold in percentage" default = 5 @@ -97,6 +111,12 @@ variable "elb_backend_4xx_message" { default = "" } +variable "elb_backend_4xx_timeframe" { + description = "Monitor timeframe for ELB backend 4xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "elb_backend_4xx_threshold_warning" { description = "loadbalancer backend 4xx warning threshold in percentage" default = 5 @@ -119,6 +139,12 @@ variable "elb_backend_5xx_message" { default = "" } +variable "elb_backend_5xx_timeframe" { + description = "Monitor timeframe for ELB backend 5xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "elb_backend_5xx_threshold_warning" { description = "loadbalancer backend 5xx warning threshold in percentage" default = 5 @@ -141,6 +167,12 @@ variable "elb_backend_latency_message" { default = "" } +variable "elb_backend_latency_timeframe" { + description = "Monitor timeframe for ELB backend latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "elb_backend_latency_warning" { description = "latency warning threshold in seconds" default = 1 diff --git a/cloud/aws/elb/monitors-elb.tf b/cloud/aws/elb/monitors-elb.tf index 1977a08..a36f403 100644 --- a/cloud/aws/elb/monitors-elb.tf +++ b/cloud/aws/elb/monitors-elb.tf @@ -11,7 +11,7 @@ resource "datadog_monitor" "ELB_no_healthy_instances" { message = "${coalesce(var.elb_no_healthy_instance_message, var.message)}" query = < ${var.elb_backend_latency_critical} EOF diff --git a/cloud/aws/rds/README.md b/cloud/aws/rds/README.md index c3eaa1d..f921ee0 100644 --- a/cloud/aws/rds/README.md +++ b/cloud/aws/rds/README.md @@ -29,12 +29,14 @@ Inputs | cpu_silenced | Groups to mute for RDS CPU usage monitor | map | `` | no | | cpu_threshold_critical | CPU usage in percent (critical threshold) | string | `90` | no | | cpu_threshold_warning | CPU usage in percent (warning threshold) | string | `80` | no | +| cpu_timeframe | Monitor timeframe for RDS CPU usage [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| delay | Delay in seconds for the metric evaluation | string | `900` | no | | diskspace_message | Custom message for RDS free diskspace monitor | string | `` | no | | diskspace_silenced | Groups to mute for RDS free diskspace monitor | map | `` | no | | diskspace_threshold_critical | Disk free space in percent (critical threshold) | string | `10` | no | | diskspace_threshold_warning | Disk free space in percent (warning threshold) | string | `20` | no | +| diskspace_timeframe | Monitor timeframe for RDS free diskspace [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | environment | Architecture Environment | string | - | yes | -| delay | Delay in seconds for the metric evaluation | string | `900` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | message | Message sent when an alert is triggered | string | - | yes | diff --git a/cloud/aws/rds/inputs.tf b/cloud/aws/rds/inputs.tf index d651b09..37be0e2 100644 --- a/cloud/aws/rds/inputs.tf +++ b/cloud/aws/rds/inputs.tf @@ -38,6 +38,12 @@ variable "cpu_message" { default = "" } +variable "cpu_timeframe" { + description = "Monitor timeframe for RDS CPU usage [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_15m" +} + variable "cpu_threshold_warning" { description = "CPU usage in percent (warning threshold)" default = "80" @@ -60,6 +66,12 @@ variable "diskspace_message" { default = "" } +variable "diskspace_timeframe" { + description = "Monitor timeframe for RDS free diskspace [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_15m" +} + variable "diskspace_threshold_warning" { description = "Disk free space in percent (warning threshold)" default = "20" diff --git a/cloud/aws/rds/monitors-rds.tf b/cloud/aws/rds/monitors-rds.tf index 79b1670..2717d1d 100644 --- a/cloud/aws/rds/monitors-rds.tf +++ b/cloud/aws/rds/monitors-rds.tf @@ -14,7 +14,7 @@ resource "datadog_monitor" "rds_cpu_90_15min" { type = "metric alert" query = < ${var.cpu_threshold_critical} EOF @@ -46,7 +46,7 @@ resource "datadog_monitor" "rds_free_space_low" { type = "metric alert" query = <` | no | +| vpn_status_timeframe | Monitor timeframe for VPN status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | diff --git a/cloud/aws/vpn/inputs.tf b/cloud/aws/vpn/inputs.tf index 7e9f468..13314b6 100644 --- a/cloud/aws/vpn/inputs.tf +++ b/cloud/aws/vpn/inputs.tf @@ -35,3 +35,9 @@ variable "vpn_status_message" { type = "string" default = "" } + +variable "vpn_status_timeframe" { + description = "Monitor timeframe for VPN status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} diff --git a/cloud/aws/vpn/monitors-vpn.tf b/cloud/aws/vpn/monitors-vpn.tf index 8a3cddb..1f94ebf 100644 --- a/cloud/aws/vpn/monitors-vpn.tf +++ b/cloud/aws/vpn/monitors-vpn.tf @@ -11,7 +11,7 @@ resource "datadog_monitor" "VPN_status" { message = "${coalesce(var.vpn_status_message, var.message)}" query = < Date: Thu, 26 Apr 2018 18:27:49 +0200 Subject: [PATCH 3/7] MON-160 - Azure monitors updated --- cloud/azure/apimanagement/README.md | 5 ++ cloud/azure/apimanagement/inputs.tf | 30 +++++++ .../monitors-azure-apimanagement.tf | 10 +-- cloud/azure/app-services/README.md | 5 ++ cloud/azure/app-services/inputs.tf | 30 +++++++ .../app-services/monitors-app_services.tf | 8 +- cloud/azure/eventhub/README.md | 3 + cloud/azure/eventhub/inputs.tf | 18 ++++ cloud/azure/eventhub/monitors-eventhub.tf | 6 +- cloud/azure/iothubs/README.md | 14 ++++ cloud/azure/iothubs/inputs.tf | 84 +++++++++++++++++++ cloud/azure/iothubs/monitors-iothubs.tf | 28 +++---- cloud/azure/redis/README.md | 4 + cloud/azure/redis/inputs.tf | 24 ++++++ cloud/azure/redis/monitors-azure-redis.tf | 8 +- cloud/azure/sql-database/README.md | 4 + cloud/azure/sql-database/inputs.tf | 24 ++++++ .../monitors-sql-database-basics.tf | 8 +- 18 files changed, 279 insertions(+), 34 deletions(-) diff --git a/cloud/azure/apimanagement/README.md b/cloud/azure/apimanagement/README.md index f08ce97..aa1ec85 100644 --- a/cloud/azure/apimanagement/README.md +++ b/cloud/azure/apimanagement/README.md @@ -33,6 +33,7 @@ Inputs | failed_requests_silenced | Groups to mute for API Management failed requests monitor | map | `` | no | | failed_requests_threshold_critical | Maximum acceptable percent of failed requests | string | `90` | no | | failed_requests_threshold_warning | Warning regarding acceptable percent of failed requests | string | `50` | no | +| failed_requests_timeframe | Monitor timeframe for API Management failed requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | message | Message sent when a Redis monitor is triggered | string | - | yes | @@ -40,16 +41,20 @@ Inputs | other_requests_silenced | Groups to mute for API Management other requests monitor | map | `` | no | | other_requests_threshold_critical | Maximum acceptable percent of other requests | string | `90` | no | | other_requests_threshold_warning | Warning regarding acceptable percent of other requests | string | `50` | no | +| other_requests_timeframe | Monitor timeframe for API Management other requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | status_message | Custom message for API Management status monitor | string | `` | no | | status_silenced | Groups to mute for API Management status monitor | map | `` | no | +| status_timeframe | Monitor timeframe for API Management status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | successful_requests_message | Custom message for API Management successful requests monitor | string | `` | no | | successful_requests_silenced | Groups to mute for API Management successful requests monitor | map | `` | no | | successful_requests_threshold_critical | Minimum acceptable percent of successful requests | string | `10` | no | | successful_requests_threshold_warning | Warning regarding acceptable percent of successful requests | string | `30` | no | +| successful_requests_timeframe | Monitor timeframe for API Management successful requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | unauthorized_requests_message | Custom message for API Management unauthorized requests monitor | string | `` | no | | unauthorized_requests_silenced | Groups to mute for API Management unauthorized requests monitor | map | `` | no | | unauthorized_requests_threshold_critical | Maximum acceptable percent of unauthorized requests | string | `90` | no | | unauthorized_requests_threshold_warning | Warning regarding acceptable percent of unauthorized requests | string | `50` | no | +| unauthorized_requests_timeframe | Monitor timeframe for API Management unauthorized requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | Related documentation --------------------- diff --git a/cloud/azure/apimanagement/inputs.tf b/cloud/azure/apimanagement/inputs.tf index b006f1e..0a91eea 100644 --- a/cloud/azure/apimanagement/inputs.tf +++ b/cloud/azure/apimanagement/inputs.tf @@ -37,6 +37,12 @@ variable "status_message" { default = "" } +variable "status_timeframe" { + description = "Monitor timeframe for API Management status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "failed_requests_silenced" { description = "Groups to mute for API Management failed requests monitor" type = "map" @@ -49,6 +55,12 @@ variable "failed_requests_message" { default = "" } +variable "failed_requests_timeframe" { + description = "Monitor timeframe for API Management failed requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "failed_requests_threshold_critical" { description = "Maximum acceptable percent of failed requests" default = 90 @@ -71,6 +83,12 @@ variable "other_requests_message" { default = "" } +variable "other_requests_timeframe" { + description = "Monitor timeframe for API Management other requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "other_requests_threshold_critical" { description = "Maximum acceptable percent of other requests" default = 90 @@ -93,6 +111,12 @@ variable "unauthorized_requests_message" { default = "" } +variable "unauthorized_requests_timeframe" { + description = "Monitor timeframe for API Management unauthorized requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "unauthorized_requests_threshold_critical" { description = "Maximum acceptable percent of unauthorized requests" default = 90 @@ -115,6 +139,12 @@ variable "successful_requests_message" { default = "" } +variable "successful_requests_timeframe" { + description = "Monitor timeframe for API Management successful requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "successful_requests_threshold_critical" { description = "Minimum acceptable percent of successful requests" default = 10 diff --git a/cloud/azure/apimanagement/monitors-azure-apimanagement.tf b/cloud/azure/apimanagement/monitors-azure-apimanagement.tf index 573ebb7..1775ed2 100644 --- a/cloud/azure/apimanagement/monitors-azure-apimanagement.tf +++ b/cloud/azure/apimanagement/monitors-azure-apimanagement.tf @@ -13,7 +13,7 @@ resource "datadog_monitor" "apimgt_status" { message = "${coalesce(var.status_message, var.message)}" query = < ${var.failed_requests_threshold_critical} @@ -74,7 +74,7 @@ resource "datadog_monitor" "apimgt_other_requests" { message = "${coalesce(var.other_requests_message, var.message)}" query = < ${var.other_requests_threshold_critical} @@ -106,7 +106,7 @@ resource "datadog_monitor" "apimgt_unauthorized_requests" { message = "${coalesce(var.unauthorized_requests_message, var.message)}" query = < ${var.unauthorized_requests_threshold_critical} @@ -138,7 +138,7 @@ resource "datadog_monitor" "apimgt_successful_requests" { message = "${coalesce(var.successful_requests_message, var.message)}" query = <` | no | | http_4xx_requests_threshold_critical | Maximum critical acceptable percent of 4xx errors | string | `90` | no | | http_4xx_requests_threshold_warning | Warning regarding acceptable percent of 4xx errors | string | `50` | no | +| http_4xx_requests_timeframe | Monitor timeframe for App Services 4xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | http_5xx_requests_message | Custom message for App Services 5xx requests monitor | string | `` | no | | http_5xx_requests_silenced | Groups to mute for App Services 5xx requests monitor | map | `` | no | | http_5xx_requests_threshold_critical | Maximum critical acceptable percent of 5xx errors | string | `90` | no | | http_5xx_requests_threshold_warning | Warning regarding acceptable percent of 5xx errors | string | `50` | no | +| http_5xx_requests_timeframe | Monitor timeframe for App Services 5xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | http_successful_requests_message | Custom message for App Services successful requests monitor | string | `` | no | | http_successful_requests_silenced | Groups to mute for App Services successful requests monitor | map | `` | no | | http_successful_requests_threshold_critical | Minimum critical acceptable percent of 2xx & 3xx requests | string | `10` | no | | http_successful_requests_threshold_warning | Warning regarding acceptable percent of 2xx & 3xx requests | string | `30` | no | +| http_successful_requests_timeframe | Monitor timeframe for App Services successful requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | memory_usage_message | Custom message for App Services memory usage monitor | string | `` | no | | memory_usage_silenced | Groups to mute for App Services memory usage monitor | map | `` | no | | memory_usage_threshold_critical | Alerting threshold in Mib | string | `1073741824` | no | | memory_usage_threshold_warning | Warning threshold in MiB | string | `536870912` | no | +| memory_usage_timeframe | Monitor timeframe for App Services memory usage [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when a monitor is triggered | string | - | yes | | response_time_message | Custom message for App Services response time monitor | string | `` | no | | response_time_silenced | Groups to mute for App Services response time monitor | map | `` | no | | response_time_threshold_critical | Alerting threshold for response time in seconds | string | `10` | no | | response_time_threshold_warning | Warning threshold for response time in seconds | string | `5` | no | +| response_time_timeframe | Monitor timeframe for App Services response time [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | Related documentation --------------------- diff --git a/cloud/azure/app-services/inputs.tf b/cloud/azure/app-services/inputs.tf index dd2a9fa..d41e977 100644 --- a/cloud/azure/app-services/inputs.tf +++ b/cloud/azure/app-services/inputs.tf @@ -35,6 +35,12 @@ variable "response_time_message" { default = "" } +variable "response_time_timeframe" { + description = "Monitor timeframe for App Services response time [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "response_time_threshold_critical" { default = 10 description = "Alerting threshold for response time in seconds" @@ -57,6 +63,12 @@ variable "memory_usage_message" { default = "" } +variable "memory_usage_timeframe" { + description = "Monitor timeframe for App Services memory usage [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "memory_usage_threshold_critical" { default = 1073741824 # 1Gb description = "Alerting threshold in Mib" @@ -79,6 +91,12 @@ variable "http_4xx_requests_message" { default = "" } +variable "http_4xx_requests_timeframe" { + description = "Monitor timeframe for App Services 4xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "http_4xx_requests_threshold_critical" { default = 90 description = "Maximum critical acceptable percent of 4xx errors" @@ -101,6 +119,12 @@ variable "http_5xx_requests_message" { default = "" } +variable "http_5xx_requests_timeframe" { + description = "Monitor timeframe for App Services 5xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "http_5xx_requests_threshold_critical" { default = 90 description = "Maximum critical acceptable percent of 5xx errors" @@ -123,6 +147,12 @@ variable "http_successful_requests_message" { default = "" } +variable "http_successful_requests_timeframe" { + description = "Monitor timeframe for App Services successful requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "http_successful_requests_threshold_critical" { default = 10 description = "Minimum critical acceptable percent of 2xx & 3xx requests" diff --git a/cloud/azure/app-services/monitors-app_services.tf b/cloud/azure/app-services/monitors-app_services.tf index fab2097..b8f639b 100644 --- a/cloud/azure/app-services/monitors-app_services.tf +++ b/cloud/azure/app-services/monitors-app_services.tf @@ -44,7 +44,7 @@ resource "datadog_monitor" "appservices_memory_usage_count" { message = "${coalesce(var.memory_usage_message, var.message)}" query = < ${var.memory_usage_threshold_critical} EOF @@ -75,7 +75,7 @@ resource "datadog_monitor" "appservices_http_5xx_errors_count" { message = "${coalesce(var.http_5xx_requests_message, var.message)}" query = < ${var.http_5xx_requests_threshold_critical} @@ -107,7 +107,7 @@ resource "datadog_monitor" "appservices_http_4xx_errors_count" { message = "${coalesce(var.http_4xx_requests_message, var.message)}" query = < ${var.http_4xx_requests_threshold_critical} @@ -139,7 +139,7 @@ resource "datadog_monitor" "appservices_http_success_status_rate" { message = "${coalesce(var.http_successful_requests_message, var.message)}" query = <` | no | | errors_rate_thresold_critical | Errors ratio (percentage) to trigger the critical alert | string | `90` | no | | errors_rate_thresold_warning | Errors ratio (percentage) to trigger a warning alert | string | `50` | no | +| errors_rate_timeframe | Monitor timeframe for Event Hub errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | failed_requests_rate_message | Custom message for Event Hub failed requests monitor | string | `` | no | | failed_requests_rate_silenced | Groups to mute for Event Hub failed requests monitor | map | `` | no | | failed_requests_rate_thresold_critical | Failed requests ratio (percentage) to trigger the critical alert | string | `90` | no | | failed_requests_rate_thresold_warning | Failed requests ratio (percentage) to trigger a warning alert | string | `50` | no | +| failed_requests_rate_timeframe | Monitor timeframe for Event Hub failed requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | message | Message sent when an alert is triggered | string | - | yes | | status_message | Custom message for Event Hub status monitor | string | `` | no | | status_silenced | Groups to mute for Event Hub status monitor | map | `` | no | +| status_timeframe | Monitor timeframe for Event Hub status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | Related documentation --------------------- diff --git a/cloud/azure/eventhub/inputs.tf b/cloud/azure/eventhub/inputs.tf index 707a293..963be45 100644 --- a/cloud/azure/eventhub/inputs.tf +++ b/cloud/azure/eventhub/inputs.tf @@ -37,6 +37,12 @@ variable "status_message" { default = "" } +variable "status_timeframe" { + description = "Monitor timeframe for Event Hub status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "failed_requests_rate_silenced" { description = "Groups to mute for Event Hub failed requests monitor" type = "map" @@ -49,6 +55,12 @@ variable "failed_requests_rate_message" { default = "" } +variable "failed_requests_rate_timeframe" { + description = "Monitor timeframe for Event Hub failed requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "failed_requests_rate_thresold_critical" { description = "Failed requests ratio (percentage) to trigger the critical alert" default = 90 @@ -71,6 +83,12 @@ variable "errors_rate_message" { default = "" } +variable "errors_rate_timeframe" { + description = "Monitor timeframe for Event Hub errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "errors_rate_thresold_critical" { description = "Errors ratio (percentage) to trigger the critical alert" default = 90 diff --git a/cloud/azure/eventhub/monitors-eventhub.tf b/cloud/azure/eventhub/monitors-eventhub.tf index b68ccf0..21e531b 100644 --- a/cloud/azure/eventhub/monitors-eventhub.tf +++ b/cloud/azure/eventhub/monitors-eventhub.tf @@ -11,7 +11,7 @@ resource "datadog_monitor" "eventhub_status" { message = "${coalesce(var.status_message, var.message)}" query = <` | no | +| dropped_d2c_telemetry_egress_timeframe | Monitor timeframe for IoT Hub dropped d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | environment | Architecture Environment | string | - | yes | | failed_c2d_methods_rate_message | Custom message for IoT Hub failed c2d method monitor | string | `` | no | | failed_c2d_methods_rate_silenced | Groups to mute for IoT Hub failed c2d methods monitor | map | `` | no | | failed_c2d_methods_rate_threshold_critical | C2D Methods Failed rate limit (critical threshold) | string | `90` | no | | failed_c2d_methods_rate_threshold_warning | C2D Methods Failed rate limit (warning threshold) | string | `50` | no | +| failed_c2d_methods_rate_timeframe | Monitor timeframe for IoT Hub failed c2d method [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | failed_c2d_twin_read_rate_message | Custom message for IoT Hub failed c2d twin read monitor | string | `` | no | | failed_c2d_twin_read_rate_silenced | Groups to mute for IoT Hub failed c2d twin read monitor | map | `` | no | | failed_c2d_twin_read_rate_threshold_critical | C2D Twin Read Failed rate limit (critical threshold) | string | `90` | no | | failed_c2d_twin_read_rate_threshold_warning | C2D Twin Read Failed rate limit (warning threshold) | string | `50` | no | +| failed_c2d_twin_read_rate_timeframe | Monitor timeframe for IoT Hub failed c2d twin read [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | failed_c2d_twin_update_rate_message | Custom message for IoT Hub failed c2d twin update monitor | string | `` | no | | failed_c2d_twin_update_rate_silenced | Groups to mute for IoT Hub failed c2d twin update monitor | map | `` | no | | failed_c2d_twin_update_rate_threshold_critical | C2D Twin Update Failed rate limit (critical threshold) | string | `90` | no | | failed_c2d_twin_update_rate_threshold_warning | C2D Twin Update Failed rate limit (warning threshold) | string | `50` | no | +| failed_c2d_twin_update_rate_timeframe | Monitor timeframe for IoT Hub failed c2d twin update [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | failed_d2c_twin_read_rate_message | Custom message for IoT Hub failed d2c twin read monitor | string | `` | no | | failed_d2c_twin_read_rate_silenced | Groups to mute for IoT Hub failed d2c twin read monitor | map | `` | no | | failed_d2c_twin_read_rate_threshold_critical | D2C Twin Read Failed rate limit (critical threshold) | string | `90` | no | | failed_d2c_twin_read_rate_threshold_warning | D2C Twin Read Failed rate limit (warning threshold) | string | `50` | no | +| failed_d2c_twin_read_rate_timeframe | Monitor timeframe for IoT Hub failed d2c twin read [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | failed_d2c_twin_update_rate_message | Custom message for IoT Hub failed d2c twin update monitor | string | `` | no | | failed_d2c_twin_update_rate_silenced | Groups to mute for IoT Hub failed d2c twin update monitor | map | `` | no | | failed_d2c_twin_update_rate_threshold_critical | D2C Twin Update Failed rate limit (critical threshold) | string | `90` | no | | failed_d2c_twin_update_rate_threshold_warning | D2C Twin Update Failed rate limit (warning threshold) | string | `50` | no | +| failed_d2c_twin_update_rate_timeframe | Monitor timeframe for IoT Hub failed d2c twin update [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | failed_jobs_rate_message | Custom message for IoT Hub failed jobs monitor | string | `` | no | | failed_jobs_rate_silenced | Groups to mute for IoT Hub failed jobs monitor | map | `` | no | | failed_jobs_rate_threshold_critical | Jobs Failed rate limit (critical threshold) | string | `90` | no | | failed_jobs_rate_threshold_warning | Jobs Failed rate limit (warning threshold) | string | `50` | no | +| failed_jobs_rate_timeframe | Monitor timeframe for IoT Hub failed jobs [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | failed_listjobs_rate_message | Custom message for IoT Hub failed list jobs monitor | string | `` | no | | failed_listjobs_rate_silenced | Groups to mute for IoT Hub failed list jobs monitor | map | `` | no | | failed_listjobs_rate_threshold_critical | ListJobs Failed rate limit (critical threshold) | string | `90` | no | | failed_listjobs_rate_threshold_warning | ListJobs Failed rate limit (warning threshold) | string | `50` | no | +| failed_listjobs_rate_timeframe | Monitor timeframe for IoT Hub failed list jobs [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | failed_queryjobs_rate_message | Custom message for IoT Hub failed query jobs monitor | string | `` | no | | failed_queryjobs_rate_silenced | Groups to mute for IoT Hub failed query jobs monitor | map | `` | no | | failed_queryjobs_rate_threshold_critical | QueryJobs Failed rate limit (critical threshold) | string | `90` | no | | failed_queryjobs_rate_threshold_warning | QueryJobs Failed rate limit (warning threshold) | string | `50` | no | +| failed_queryjobs_rate_timeframe | Monitor timeframe for IoT Hub failed query jobs [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | filter_tags | Tags used for filtering | string | `*` | no | | invalid_d2c_telemetry_egress_message | Custom message for IoT Hub invalid d2c telemetry monitor | string | `` | no | | invalid_d2c_telemetry_egress_rate_threshold_critical | D2C Telemetry Invalid limit (critical threshold) | string | `90` | no | | invalid_d2c_telemetry_egress_rate_threshold_warning | D2C Telemetry Invalid limit (warning threshold) | string | `50` | no | | invalid_d2c_telemetry_egress_silenced | Groups to mute for IoT Hub invalid d2c telemetry monitor | map | `` | no | +| invalid_d2c_telemetry_egress_timeframe | Monitor timeframe for IoT Hub invalid d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when an alert is triggered | string | - | yes | | orphaned_d2c_telemetry_egress_message | Custom message for IoT Hub orphaned d2c telemetry monitor | string | `` | no | | orphaned_d2c_telemetry_egress_rate_threshold_critical | D2C Telemetry Orphaned limit (critical threshold) | string | `90` | no | | orphaned_d2c_telemetry_egress_rate_threshold_warning | D2C Telemetry Orphaned limit (warning threshold) | string | `50` | no | | orphaned_d2c_telemetry_egress_silenced | Groups to mute for IoT Hub orphaned d2c telemetry monitor | map | `` | no | +| orphaned_d2c_telemetry_egress_timeframe | Monitor timeframe for IoT Hub orphaned d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | status_message | Custom message for IoT Hub status monitor | string | `` | no | | status_silenced | Groups to mute for IoT Hub status monitor | map | `` | no | +| status_timeframe | Monitor timeframe for IoT Hub status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | too_many_d2c_telemetry_ingress_nosent_message | Custom message for IoT Hub unsent d2c telemetry monitor | string | `` | no | | too_many_d2c_telemetry_ingress_nosent_silenced | Groups to mute for IoT Hub unsent d2c telemetry monitor | map | `` | no | +| too_many_d2c_telemetry_ingress_nosent_timeframe | Monitor timeframe for IoT Hub unsent d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | total_devices_message | Custom message for IoT Hub total devices monitor | string | `` | no | | total_devices_silenced | Groups to mute for IoT Hub total devices monitor | map | `` | no | +| total_devices_timeframe | Monitor timeframe for IoT Hub total devices [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | Related documentation --------------------- diff --git a/cloud/azure/iothubs/inputs.tf b/cloud/azure/iothubs/inputs.tf index e58f862..47680e2 100644 --- a/cloud/azure/iothubs/inputs.tf +++ b/cloud/azure/iothubs/inputs.tf @@ -32,6 +32,12 @@ variable "status_message" { default = "" } +variable "status_timeframe" { + description = "Monitor timeframe for IoT Hub status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "total_devices_silenced" { description = "Groups to mute for IoT Hub total devices monitor" type = "map" @@ -44,6 +50,12 @@ variable "total_devices_message" { default = "" } +variable "total_devices_timeframe" { + description = "Monitor timeframe for IoT Hub total devices [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "too_many_d2c_telemetry_ingress_nosent_silenced" { description = "Groups to mute for IoT Hub unsent d2c telemetry monitor" type = "map" @@ -56,6 +68,12 @@ variable "too_many_d2c_telemetry_ingress_nosent_message" { default = "" } +variable "too_many_d2c_telemetry_ingress_nosent_timeframe" { + description = "Monitor timeframe for IoT Hub unsent d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "failed_jobs_rate_silenced" { description = "Groups to mute for IoT Hub failed jobs monitor" type = "map" @@ -68,6 +86,12 @@ variable "failed_jobs_rate_message" { default = "" } +variable "failed_jobs_rate_timeframe" { + description = "Monitor timeframe for IoT Hub failed jobs [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "failed_jobs_rate_threshold_warning" { description = "Jobs Failed rate limit (warning threshold)" default = 50 @@ -90,6 +114,12 @@ variable "failed_listjobs_rate_message" { default = "" } +variable "failed_listjobs_rate_timeframe" { + description = "Monitor timeframe for IoT Hub failed list jobs [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "failed_listjobs_rate_threshold_warning" { description = "ListJobs Failed rate limit (warning threshold)" default = 50 @@ -112,6 +142,12 @@ variable "failed_queryjobs_rate_message" { default = "" } +variable "failed_queryjobs_rate_timeframe" { + description = "Monitor timeframe for IoT Hub failed query jobs [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "failed_queryjobs_rate_threshold_warning" { description = "QueryJobs Failed rate limit (warning threshold)" default = 50 @@ -134,6 +170,12 @@ variable "failed_c2d_methods_rate_message" { default = "" } +variable "failed_c2d_methods_rate_timeframe" { + description = "Monitor timeframe for IoT Hub failed c2d method [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "failed_c2d_methods_rate_threshold_warning" { description = "C2D Methods Failed rate limit (warning threshold)" default = 50 @@ -156,6 +198,12 @@ variable "failed_c2d_twin_read_rate_message" { default = "" } +variable "failed_c2d_twin_read_rate_timeframe" { + description = "Monitor timeframe for IoT Hub failed c2d twin read [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "failed_c2d_twin_read_rate_threshold_warning" { description = "C2D Twin Read Failed rate limit (warning threshold)" default = 50 @@ -178,6 +226,12 @@ variable "failed_c2d_twin_update_rate_message" { default = "" } +variable "failed_c2d_twin_update_rate_timeframe" { + description = "Monitor timeframe for IoT Hub failed c2d twin update [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "failed_c2d_twin_update_rate_threshold_warning" { description = "C2D Twin Update Failed rate limit (warning threshold)" default = 50 @@ -200,6 +254,12 @@ variable "failed_d2c_twin_read_rate_message" { default = "" } +variable "failed_d2c_twin_read_rate_timeframe" { + description = "Monitor timeframe for IoT Hub failed d2c twin read [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "failed_d2c_twin_read_rate_threshold_warning" { description = "D2C Twin Read Failed rate limit (warning threshold)" default = 50 @@ -222,6 +282,12 @@ variable "failed_d2c_twin_update_rate_message" { default = "" } +variable "failed_d2c_twin_update_rate_timeframe" { + description = "Monitor timeframe for IoT Hub failed d2c twin update [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "failed_d2c_twin_update_rate_threshold_warning" { description = "D2C Twin Update Failed rate limit (warning threshold)" default = 50 @@ -244,6 +310,12 @@ variable "dropped_d2c_telemetry_egress_message" { default = "" } +variable "dropped_d2c_telemetry_egress_timeframe" { + description = "Monitor timeframe for IoT Hub dropped d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "dropped_d2c_telemetry_egress_rate_threshold_warning" { description = "D2C Telemetry Dropped limit (warning threshold)" default = 50 @@ -266,6 +338,12 @@ variable "orphaned_d2c_telemetry_egress_message" { default = "" } +variable "orphaned_d2c_telemetry_egress_timeframe" { + description = "Monitor timeframe for IoT Hub orphaned d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "orphaned_d2c_telemetry_egress_rate_threshold_warning" { description = "D2C Telemetry Orphaned limit (warning threshold)" default = 50 @@ -288,6 +366,12 @@ variable "invalid_d2c_telemetry_egress_message" { default = "" } +variable "invalid_d2c_telemetry_egress_timeframe" { + description = "Monitor timeframe for IoT Hub invalid d2c telemetry [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "invalid_d2c_telemetry_egress_rate_threshold_warning" { description = "D2C Telemetry Invalid limit (warning threshold)" default = 50 diff --git a/cloud/azure/iothubs/monitors-iothubs.tf b/cloud/azure/iothubs/monitors-iothubs.tf index 6c5f53f..4a70a02 100644 --- a/cloud/azure/iothubs/monitors-iothubs.tf +++ b/cloud/azure/iothubs/monitors-iothubs.tf @@ -3,7 +3,7 @@ resource "datadog_monitor" "too_many_jobs_failed" { message = "${coalesce(var.failed_jobs_rate_message, var.message)}" query = < 0 diff --git a/cloud/azure/redis/README.md b/cloud/azure/redis/README.md index a6b0883..b8ebdcc 100644 --- a/cloud/azure/redis/README.md +++ b/cloud/azure/redis/README.md @@ -33,6 +33,7 @@ Inputs | evictedkeys_limit_silenced | Groups to mute for Redis evicted keys monitor | map | `` | no | | evictedkeys_limit_threshold_critical | Evicted keys limit (critical threshold) | string | `100` | no | | evictedkeys_limit_threshold_warning | Evicted keys limit (warning threshold) | string | `0` | no | +| evictedkeys_limit_timeframe | Monitor timeframe for Redis evicted keys [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | message | Message sent when a Redis monitor is triggered | string | - | yes | @@ -40,12 +41,15 @@ Inputs | percent_processor_time_silenced | Groups to mute for Redis processor monitor | map | `` | no | | percent_processor_time_threshold_critical | Processor time percent (critical threshold) | string | `80` | no | | percent_processor_time_threshold_warning | Processor time percent (warning threshold) | string | `60` | no | +| percent_processor_time_timeframe | Monitor timeframe for Redis processor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | server_load_rate_message | Custom message for Redis server load monitor | string | `` | no | | server_load_rate_silenced | Groups to mute for Redis server load monitor | map | `` | no | | server_load_rate_threshold_critical | Server CPU load rate (critical threshold) | string | `90` | no | | server_load_rate_threshold_warning | Server CPU load rate (warning threshold) | string | `70` | no | +| server_load_rate_timeframe | Monitor timeframe for Redis server load [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | status_message | Custom message for Redis status monitor | string | `` | no | | status_silenced | Groups to mute for Redis status monitor | map | `` | no | +| status_timeframe | Monitor timeframe for Redis status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | Related documentation --------------------- diff --git a/cloud/azure/redis/inputs.tf b/cloud/azure/redis/inputs.tf index 11725bc..e3c0887 100644 --- a/cloud/azure/redis/inputs.tf +++ b/cloud/azure/redis/inputs.tf @@ -37,6 +37,12 @@ variable "status_message" { default = "" } +variable "status_timeframe" { + description = "Monitor timeframe for Redis status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "evictedkeys_limit_silenced" { description = "Groups to mute for Redis evicted keys monitor" type = "map" @@ -49,6 +55,12 @@ variable "evictedkeys_limit_message" { default = "" } +variable "evictedkeys_limit_timeframe" { + description = "Monitor timeframe for Redis evicted keys [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "evictedkeys_limit_threshold_warning" { description = "Evicted keys limit (warning threshold)" default = 0 @@ -71,6 +83,12 @@ variable "percent_processor_time_message" { default = "" } +variable "percent_processor_time_timeframe" { + description = "Monitor timeframe for Redis processor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "percent_processor_time_threshold_critical" { description = "Processor time percent (critical threshold)" default = 80 @@ -93,6 +111,12 @@ variable "server_load_rate_message" { default = "" } +variable "server_load_rate_timeframe" { + description = "Monitor timeframe for Redis server load [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "server_load_rate_threshold_critical" { description = "Server CPU load rate (critical threshold)" default = 90 diff --git a/cloud/azure/redis/monitors-azure-redis.tf b/cloud/azure/redis/monitors-azure-redis.tf index 0002d96..a61b651 100644 --- a/cloud/azure/redis/monitors-azure-redis.tf +++ b/cloud/azure/redis/monitors-azure-redis.tf @@ -11,7 +11,7 @@ resource "datadog_monitor" "status" { message = "${coalesce(var.status_message, var.message)}" query = < ${var.evictedkeys_limit_threshold_critical} EOF @@ -68,7 +68,7 @@ resource "datadog_monitor" "percent_processor_time" { message = "${coalesce(var.percent_processor_time_message, var.message)}" query = < ${var.percent_processor_time_threshold_critical} EOF @@ -100,7 +100,7 @@ resource "datadog_monitor" "server_load" { message = "${coalesce(var.server_load_rate_message, var.message)}" query = < ${var.server_load_rate_threshold_critical} EOF diff --git a/cloud/azure/sql-database/README.md b/cloud/azure/sql-database/README.md index 1291389..8b993bb 100644 --- a/cloud/azure/sql-database/README.md +++ b/cloud/azure/sql-database/README.md @@ -31,18 +31,22 @@ Inputs | cpu_silenced | Groups to mute for SQL CPU monitor | map | `` | no | | cpu_threshold_critical | CPU usage in percent (critical threshold) | string | `90` | no | | cpu_threshold_warning | CPU usage in percent (warning threshold) | string | `80` | no | +| cpu_timeframe | Monitor timeframe for SQL CPU [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | deadlock_message | Custom message for SQL Deadlock monitor | string | `` | no | | deadlock_silenced | Groups to mute for SQL Deadlock monitor | map | `` | no | | deadlock_threshold_critical | Amount of Deadlocks (critical threshold) | string | `1` | no | +| deadlock_timeframe | Monitor timeframe for SQL Deadlock [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | | diskspace_message | Custom message for SQL disk space monitor | string | `` | no | | diskspace_silenced | Groups to mute for SQL disk space monitor | map | `` | no | | diskspace_threshold_critical | Disk space used in percent (critical threshold) | string | `90` | no | | diskspace_threshold_warning | Disk space used in percent (warning threshold) | string | `80` | no | +| diskspace_timeframe | Monitor timeframe for SQL disk space [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | dtu_message | Custom message for SQL DTU monitor | string | `` | no | | dtu_silenced | Groups to mute for SQL DTU monitor | map | `` | no | | dtu_threshold_critical | Amount of DTU used (critical threshold) | string | `90` | no | | dtu_threshold_warning | Amount of DTU used (warning threshold) | string | `85` | no | +| dtu_timeframe | Monitor timeframe for SQL DTU [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | environment | Architecture Environment | string | - | yes | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | diff --git a/cloud/azure/sql-database/inputs.tf b/cloud/azure/sql-database/inputs.tf index 9f6d6b8..386de15 100644 --- a/cloud/azure/sql-database/inputs.tf +++ b/cloud/azure/sql-database/inputs.tf @@ -37,6 +37,12 @@ variable "cpu_message" { default = "" } +variable "cpu_timeframe" { + description = "Monitor timeframe for SQL CPU [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_15m" +} + variable "cpu_threshold_warning" { description = "CPU usage in percent (warning threshold)" default = "80" @@ -59,6 +65,12 @@ variable "diskspace_message" { default = "" } +variable "diskspace_timeframe" { + description = "Monitor timeframe for SQL disk space [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_15m" +} + variable "diskspace_threshold_warning" { description = "Disk space used in percent (warning threshold)" default = "80" @@ -81,6 +93,12 @@ variable "dtu_message" { default = "" } +variable "dtu_timeframe" { + description = "Monitor timeframe for SQL DTU [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_15m" +} + variable "dtu_threshold_warning" { description = "Amount of DTU used (warning threshold)" default = "85" @@ -103,6 +121,12 @@ variable "deadlock_message" { default = "" } +variable "deadlock_timeframe" { + description = "Monitor timeframe for SQL Deadlock [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "deadlock_threshold_critical" { description = "Amount of Deadlocks (critical threshold)" default = "1" diff --git a/cloud/azure/sql-database/monitors-sql-database-basics.tf b/cloud/azure/sql-database/monitors-sql-database-basics.tf index 678e8c0..391db76 100644 --- a/cloud/azure/sql-database/monitors-sql-database-basics.tf +++ b/cloud/azure/sql-database/monitors-sql-database-basics.tf @@ -11,7 +11,7 @@ resource "datadog_monitor" "sql-database_cpu_90_15min" { message = "${coalesce(var.cpu_message, var.message)}" query = < ${var.cpu_threshold_critical} EOF @@ -44,7 +44,7 @@ resource "datadog_monitor" "sql-database_free_space_low" { type = "metric alert" query = < ${var.diskspace_threshold_critical} EOF @@ -76,7 +76,7 @@ resource "datadog_monitor" "sql-database_dtu_consumption_high" { type = "metric alert" query = < ${var.dtu_threshold_critical} EOF @@ -108,7 +108,7 @@ resource "datadog_monitor" "sql-database_deadlocks_count" { type = "metric alert" query = < ${var.deadlock_threshold_critical} EOF From 6acd9c5671d1b0ef529ec55dd8af39e3514d9e3c Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Fri, 27 Apr 2018 09:58:57 +0200 Subject: [PATCH 4/7] MON-160 - Azure Storage & Stream Analytics updated --- cloud/azure/storage/README.md | 9 ++++ cloud/azure/storage/inputs.tf | 54 +++++++++++++++++++ cloud/azure/storage/monitors-azure-storage.tf | 18 +++---- cloud/azure/stream-analytics/README.md | 5 ++ cloud/azure/stream-analytics/inputs.tf | 30 +++++++++++ .../monitors-stream-analytics.tf | 10 ++-- 6 files changed, 112 insertions(+), 14 deletions(-) diff --git a/cloud/azure/storage/README.md b/cloud/azure/storage/README.md index 65942eb..e85f146 100644 --- a/cloud/azure/storage/README.md +++ b/cloud/azure/storage/README.md @@ -36,14 +36,17 @@ Inputs | authorization_error_requests_silenced | Groups to mute for Storage authorization errors monitor | map | `` | no | | authorization_error_requests_threshold_critical | Maximum acceptable percent of authorization error requests for a storage | string | `90` | no | | authorization_error_requests_threshold_warning | Warning regarding acceptable percent of authorization error requests for a storage | string | `50` | no | +| authorization_error_requests_timeframe | Monitor timeframe for Storage authorization errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | availability_message | Custom message for Storage availability monitor | string | `` | no | | availability_silenced | Groups to mute for Storage availability monitor | map | `` | no | | availability_threshold_critical | Minimum acceptable percent of availability for a storage | string | `50` | no | | availability_threshold_warning | Warning regarding acceptable percent of availability for a storage | string | `90` | no | +| availability_timeframe | Monitor timeframe for Storage availability [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | client_other_error_requests_message | Custom message for Storage other errors monitor | string | `` | no | | client_other_error_requests_silenced | Groups to mute for Storage other errors monitor | map | `` | no | | client_other_error_requests_threshold_critical | Maximum acceptable percent of client other error requests for a storage | string | `90` | no | | client_other_error_requests_threshold_warning | Warning regarding acceptable percent of client other error requests for a storage | string | `50` | no | +| client_other_error_requests_timeframe | Monitor timeframe for Storage other errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | | environment | Architecture environment | string | - | yes | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | @@ -52,27 +55,33 @@ Inputs | latency_silenced | Groups to mute for Storage latency monitor | map | `` | no | | latency_threshold_critical | Maximum acceptable end to end latency (ms) for a storage | string | `2000` | no | | latency_threshold_warning | Warning regarding acceptable end to end latency (ms) for a storage | string | `1000` | no | +| latency_timeframe | Monitor timeframe for Storage latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | message | Message sent when a Redis monitor is triggered | string | - | yes | | network_error_requests_message | Custom message for Storage network errors monitor | string | `` | no | | network_error_requests_silenced | Groups to mute for Storage network errors monitor | map | `` | no | | network_error_requests_threshold_critical | Maximum acceptable percent of network error requests for a storage | string | `90` | no | | network_error_requests_threshold_warning | Warning regarding acceptable percent of network error requests for a storage | string | `50` | no | +| network_error_requests_timeframe | Monitor timeframe for Storage network errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | server_other_error_requests_message | Custom message for Storage server other errors monitor | string | `` | no | | server_other_error_requests_silenced | Groups to mute for Storage server other errors monitor | map | `` | no | | server_other_error_requests_threshold_critical | Maximum acceptable percent of server other error requests for a storage | string | `90` | no | | server_other_error_requests_threshold_warning | Warning regarding acceptable percent of server other error requests for a storage | string | `50` | no | +| server_other_error_requests_timeframe | Monitor timeframe for Storage server other errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | successful_requests_message | Custom message for Storage sucessful requests monitor | string | `` | no | | successful_requests_silenced | Groups to mute for Storage sucessful requests monitor | map | `` | no | | successful_requests_threshold_critical | Minimum acceptable percent of successful requests for a storage | string | `10` | no | | successful_requests_threshold_warning | Warning regarding acceptable percent of successful requests for a storage | string | `30` | no | +| successful_requests_timeframe | Monitor timeframe for Storage sucessful requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | throttling_error_requests_message | Custom message for Storage throttling error monitor | string | `` | no | | throttling_error_requests_silenced | Groups to mute for Storage throttling error monitor | map | `` | no | | throttling_error_requests_threshold_critical | Maximum acceptable percent of throttling error requests for a storage | string | `90` | no | | throttling_error_requests_threshold_warning | Warning regarding acceptable percent of throttling error requests for a storage | string | `50` | no | +| throttling_error_requests_timeframe | Monitor timeframe for Storage throttling errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | timeout_error_requests_message | Custom message for Storage timeout monitor | string | `` | no | | timeout_error_requests_silenced | Groups to mute for Storage timeout monitor | map | `` | no | | timeout_error_requests_threshold_critical | Maximum acceptable percent of timeout error requests for a storage | string | `90` | no | | timeout_error_requests_threshold_warning | Warning regarding acceptable percent of timeout error requests for a storage | string | `50` | no | +| timeout_error_requests_timeframe | Monitor timeframe for Storage timeout [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | Related documentation --------------------- diff --git a/cloud/azure/storage/inputs.tf b/cloud/azure/storage/inputs.tf index bcfd4d9..dfd9ae8 100644 --- a/cloud/azure/storage/inputs.tf +++ b/cloud/azure/storage/inputs.tf @@ -37,6 +37,12 @@ variable "availability_message" { default = "" } +variable "availability_timeframe" { + description = "Monitor timeframe for Storage availability [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "availability_threshold_critical" { description = "Minimum acceptable percent of availability for a storage" default = 50 @@ -59,6 +65,12 @@ variable "successful_requests_message" { default = "" } +variable "successful_requests_timeframe" { + description = "Monitor timeframe for Storage sucessful requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "successful_requests_threshold_critical" { description = "Minimum acceptable percent of successful requests for a storage" default = 10 @@ -81,6 +93,12 @@ variable "latency_message" { default = "" } +variable "latency_timeframe" { + description = "Monitor timeframe for Storage latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "latency_threshold_critical" { description = "Maximum acceptable end to end latency (ms) for a storage" default = 2000 @@ -103,6 +121,12 @@ variable "timeout_error_requests_message" { default = "" } +variable "timeout_error_requests_timeframe" { + description = "Monitor timeframe for Storage timeout [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "timeout_error_requests_threshold_critical" { description = "Maximum acceptable percent of timeout error requests for a storage" default = 90 @@ -125,6 +149,12 @@ variable "network_error_requests_message" { default = "" } +variable "network_error_requests_timeframe" { + description = "Monitor timeframe for Storage network errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "network_error_requests_threshold_critical" { description = "Maximum acceptable percent of network error requests for a storage" default = 90 @@ -147,6 +177,12 @@ variable "throttling_error_requests_message" { default = "" } +variable "throttling_error_requests_timeframe" { + description = "Monitor timeframe for Storage throttling errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "throttling_error_requests_threshold_critical" { description = "Maximum acceptable percent of throttling error requests for a storage" default = 90 @@ -169,6 +205,12 @@ variable "server_other_error_requests_message" { default = "" } +variable "server_other_error_requests_timeframe" { + description = "Monitor timeframe for Storage server other errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "server_other_error_requests_threshold_critical" { description = "Maximum acceptable percent of server other error requests for a storage" default = 90 @@ -191,6 +233,12 @@ variable "client_other_error_requests_message" { default = "" } +variable "client_other_error_requests_timeframe" { + description = "Monitor timeframe for Storage other errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "client_other_error_requests_threshold_critical" { description = "Maximum acceptable percent of client other error requests for a storage" default = 90 @@ -213,6 +261,12 @@ variable "authorization_error_requests_message" { default = "" } +variable "authorization_error_requests_timeframe" { + description = "Monitor timeframe for Storage authorization errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "authorization_error_requests_threshold_critical" { description = "Maximum acceptable percent of authorization error requests for a storage" default = 90 diff --git a/cloud/azure/storage/monitors-azure-storage.tf b/cloud/azure/storage/monitors-azure-storage.tf index f71b3b0..34d5396 100644 --- a/cloud/azure/storage/monitors-azure-storage.tf +++ b/cloud/azure/storage/monitors-azure-storage.tf @@ -11,7 +11,7 @@ resource "datadog_monitor" "availability" { message = "${coalesce(var.availability_message, var.message)}" query = < ${var.latency_threshold_critical} EOF @@ -104,7 +104,7 @@ resource "datadog_monitor" "timeout_error_requests" { message = "${coalesce(var.timeout_error_requests_message, var.message)}" query = < ${var.timeout_error_requests_threshold_critical} EOF @@ -135,7 +135,7 @@ resource "datadog_monitor" "network_error_requests" { message = "${coalesce(var.network_error_requests_message, var.message)}" query = < ${var.network_error_requests_threshold_critical} EOF @@ -166,7 +166,7 @@ resource "datadog_monitor" "throttling_error_requests" { message = "${coalesce(var.throttling_error_requests_message, var.message)}" query = < ${var.throttling_error_requests_threshold_critical} EOF @@ -197,7 +197,7 @@ resource "datadog_monitor" "server_other_error_requests" { message = "${coalesce(var.server_other_error_requests_message, var.message)}" query = < ${var.server_other_error_requests_threshold_critical} EOF @@ -228,7 +228,7 @@ resource "datadog_monitor" "client_other_error_requests" { message = "${coalesce(var.client_other_error_requests_message, var.message)}" query = < ${var.client_other_error_requests_threshold_critical} EOF @@ -259,7 +259,7 @@ resource "datadog_monitor" "authorization_error_requests" { message = "${coalesce(var.authorization_error_requests_message, var.message)}" query = < ${var.authorization_error_requests_threshold_critical} EOF diff --git a/cloud/azure/stream-analytics/README.md b/cloud/azure/stream-analytics/README.md index 51f7f8d..32d5b1d 100644 --- a/cloud/azure/stream-analytics/README.md +++ b/cloud/azure/stream-analytics/README.md @@ -22,12 +22,14 @@ Inputs | conversion_errors_silenced | Groups to mute for Stream Analytics conversion errors monitor | map | `` | no | | conversion_errors_threshold_critical | Conversion errors limit (critical threshold) | string | `10` | no | | conversion_errors_threshold_warning | Conversion errors limit (warning threshold) | string | `0` | no | +| conversion_errors_timeframe | Monitor timeframe for Stream Analytics conversion errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | | environment | Architecture environment | string | - | yes | | failed_function_requests_message | Custom message for Stream Analytics failed requests monitor | string | `` | no | | failed_function_requests_silenced | Groups to mute for Stream Analytics failed requests monitor | map | `` | no | | failed_function_requests_threshold_critical | Failed Function Request rate limit (critical threshold) | string | `10` | no | | failed_function_requests_threshold_warning | Failed Function Request rate limit (warning threshold) | string | `0` | no | +| failed_function_requests_timeframe | Monitor timeframe for Stream Analytics failed requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | message | Message sent when a Redis monitor is triggered | string | - | yes | @@ -35,12 +37,15 @@ Inputs | runtime_errors_silenced | Groups to mute for Stream Analytics runtime errors monitor | map | `` | no | | runtime_errors_threshold_critical | Runtime errors limit (critical threshold) | string | `10` | no | | runtime_errors_threshold_warning | Runtime errors limit (warning threshold) | string | `0` | no | +| runtime_errors_timeframe | Monitor timeframe for Stream Analytics runtime errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | status_message | Custom message for Stream Analytics status monitor | string | `` | no | | status_silenced | Groups to mute for Stream Analytics status monitor | map | `` | no | +| status_timeframe | Monitor timeframe for Stream Analytics status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | su_utilization_message | Custom message for Stream Analytics utilization monitor | string | `` | no | | su_utilization_silenced | Groups to mute for Stream Analytics utilization monitor | map | `` | no | | su_utilization_threshold_critical | Streaming Unit utilization rate limit (critical threshold) | string | `80` | no | | su_utilization_threshold_warning | Streaming Unit utilization rate limit (warning threshold) | string | `60` | no | +| su_utilization_timeframe | Monitor timeframe for Stream Analytics utilization [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | Related documentation --------------------- diff --git a/cloud/azure/stream-analytics/inputs.tf b/cloud/azure/stream-analytics/inputs.tf index 4f4cdce..0c2170c 100644 --- a/cloud/azure/stream-analytics/inputs.tf +++ b/cloud/azure/stream-analytics/inputs.tf @@ -37,6 +37,12 @@ variable "status_message" { default = "" } +variable "status_timeframe" { + description = "Monitor timeframe for Stream Analytics status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "su_utilization_silenced" { description = "Groups to mute for Stream Analytics utilization monitor" type = "map" @@ -49,6 +55,12 @@ variable "su_utilization_message" { default = "" } +variable "su_utilization_timeframe" { + description = "Monitor timeframe for Stream Analytics utilization [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "su_utilization_threshold_warning" { description = "Streaming Unit utilization rate limit (warning threshold)" default = 60 @@ -71,6 +83,12 @@ variable "failed_function_requests_message" { default = "" } +variable "failed_function_requests_timeframe" { + description = "Monitor timeframe for Stream Analytics failed requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "failed_function_requests_threshold_warning" { description = "Failed Function Request rate limit (warning threshold)" default = 0 @@ -93,6 +111,12 @@ variable "conversion_errors_message" { default = "" } +variable "conversion_errors_timeframe" { + description = "Monitor timeframe for Stream Analytics conversion errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "conversion_errors_threshold_warning" { description = "Conversion errors limit (warning threshold)" default = 0 @@ -115,6 +139,12 @@ variable "runtime_errors_message" { default = "" } +variable "runtime_errors_timeframe" { + description = "Monitor timeframe for Stream Analytics runtime errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "runtime_errors_threshold_warning" { description = "Runtime errors limit (warning threshold)" default = 0 diff --git a/cloud/azure/stream-analytics/monitors-stream-analytics.tf b/cloud/azure/stream-analytics/monitors-stream-analytics.tf index aab5e7c..51ab80a 100644 --- a/cloud/azure/stream-analytics/monitors-stream-analytics.tf +++ b/cloud/azure/stream-analytics/monitors-stream-analytics.tf @@ -11,7 +11,7 @@ resource "datadog_monitor" "status" { message = "${coalesce(var.status_message, var.message)}" query = < ${var.su_utilization_threshold_critical} EOF @@ -68,7 +68,7 @@ resource "datadog_monitor" "failed_function_requests" { message = "${coalesce(var.failed_function_requests_message, var.message)}" query = < ${var.failed_function_requests_threshold_critical} @@ -101,7 +101,7 @@ resource "datadog_monitor" "conversion_errors" { message = "${coalesce(var.conversion_errors_message, var.message)}" query = < ${var.conversion_errors_threshold_critical} EOF @@ -133,7 +133,7 @@ resource "datadog_monitor" "runtime_errors" { message = "${coalesce(var.runtime_errors_message, var.message)}" query = < ${var.runtime_errors_threshold_critical} EOF From 6276d9364c8f7c2fb31a1d083eb799c96038eea8 Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Fri, 27 Apr 2018 10:03:21 +0200 Subject: [PATCH 5/7] MON-160 - Mongodb monitors updated --- databases/mongodb/README.md | 3 ++- databases/mongodb/inputs.tf | 6 ++++++ databases/mongodb/monitors-mongo.tf | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/databases/mongodb/README.md b/databases/mongodb/README.md index f71378f..fc521cd 100644 --- a/databases/mongodb/README.md +++ b/databases/mongodb/README.md @@ -74,10 +74,11 @@ Inputs | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| -| environment | Architecture Environment | string | - | yes | | delay | Delay in seconds for the metric evaluation | string | `15` | no | +| environment | Architecture Environment | string | - | yes | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | message | Message sent when an alert is triggered | string | - | yes | | mongodb_replicaset_message | Custom message for Mongodb replicaset monitor | string | `` | no | | mongodb_replicaset_silenced | Groups to mute for Mongodb replicaset monitor | map | `` | no | +| mongodb_replicaset_timeframe | Monitor timeframe for Mongodb replicaset [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | diff --git a/databases/mongodb/inputs.tf b/databases/mongodb/inputs.tf index 091d746..46cf5ee 100644 --- a/databases/mongodb/inputs.tf +++ b/databases/mongodb/inputs.tf @@ -35,3 +35,9 @@ variable "mongodb_replicaset_message" { type = "string" default = "" } + +variable "mongodb_replicaset_timeframe" { + description = "Monitor timeframe for Mongodb replicaset [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} diff --git a/databases/mongodb/monitors-mongo.tf b/databases/mongodb/monitors-mongo.tf index 6e4fd1a..5357aeb 100644 --- a/databases/mongodb/monitors-mongo.tf +++ b/databases/mongodb/monitors-mongo.tf @@ -11,7 +11,7 @@ resource "datadog_monitor" "mongodb_replicaset_state" { message = "${coalesce(var.mongodb_replicaset_message, var.message)}" query = < Date: Fri, 27 Apr 2018 10:34:15 +0200 Subject: [PATCH 6/7] MON-160 - Php-fpm monitors updated --- middleware/php-fpm/README.md | 3 ++- middleware/php-fpm/inputs.tf | 6 ++++++ middleware/php-fpm/monitors-fpm.tf | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/middleware/php-fpm/README.md b/middleware/php-fpm/README.md index 0782c56..85b6e3e 100644 --- a/middleware/php-fpm/README.md +++ b/middleware/php-fpm/README.md @@ -25,8 +25,8 @@ Inputs | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| -| environment | Architecture Environment | string | - | yes | | delay | Delay in seconds for the metric evaluation | string | `15` | no | +| environment | Architecture Environment | string | - | yes | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | message | Message sent when an alert is triggered | string | - | yes | @@ -34,5 +34,6 @@ Inputs | php_fpm_busy_silenced | Groups to mute for PHP FPM busy worker monitor | map | `` | no | | php_fpm_busy_threshold_critical | php fpm busy critical threshold | string | `0.9` | no | | php_fpm_busy_threshold_warning | php fpm busy warning threshold | string | `0.8` | no | +| php_fpm_busy_timeframe | Monitor timeframe for PHP FPM busy worker [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_10m` | no | | php_fpm_connect_message | Custom message for PHP FPM process monitor | string | `` | no | | php_fpm_connect_silenced | Groups to mute for PHP FPM process monitor | map | `` | no | diff --git a/middleware/php-fpm/inputs.tf b/middleware/php-fpm/inputs.tf index 858574b..b88d5f5 100644 --- a/middleware/php-fpm/inputs.tf +++ b/middleware/php-fpm/inputs.tf @@ -38,6 +38,12 @@ variable "php_fpm_busy_message" { default = "" } +variable "php_fpm_busy_timeframe" { + description = "Monitor timeframe for PHP FPM busy worker [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_10m" +} + variable "php_fpm_busy_threshold_warning" { description = "php fpm busy warning threshold" default = 0.8 diff --git a/middleware/php-fpm/monitors-fpm.tf b/middleware/php-fpm/monitors-fpm.tf index a939c95..f0d9e90 100644 --- a/middleware/php-fpm/monitors-fpm.tf +++ b/middleware/php-fpm/monitors-fpm.tf @@ -13,7 +13,7 @@ resource "datadog_monitor" "datadog_php_fpm_connect_idle" { type = "metric alert" query = < Date: Fri, 27 Apr 2018 10:49:51 +0200 Subject: [PATCH 7/7] MON-160 - System monitors updated --- system/generic/README.md | 9 ++++++--- system/generic/inputs.tf | 24 ++++++++++++++++++++++-- system/generic/monitors-system.tf | 6 +++--- 3 files changed, 31 insertions(+), 8 deletions(-) diff --git a/system/generic/README.md b/system/generic/README.md index e56af90..ac23407 100644 --- a/system/generic/README.md +++ b/system/generic/README.md @@ -32,26 +32,29 @@ Inputs | cpu_high_silenced | Groups to mute for CPU high monitor | map | `` | no | | cpu_high_threshold_critical | CPU high critical threshold | string | `95` | no | | cpu_high_threshold_warning | CPU high warning threshold | string | `80` | no | -| cpu_high_timeframe | CPU high timeframe | string | `last_5m` | no | +| cpu_high_timeframe | Monitor timeframe for CPU high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | cpu_load_message | Custom message for CPU load ratio monitor | string | `` | no | | cpu_load_silenced | Groups to mute for CPU load ratio monitor | map | `` | no | | cpu_load_threshold_critical | CPU load ratio critical threshold | string | `4` | no | | cpu_load_threshold_warning | CPU load ratio warning threshold | string | `3` | no | -| cpu_load_timeframe | CPU load timeframe | string | `last_5m` | no | -| environment | Architecture Environment | string | - | yes | +| cpu_load_timeframe | Monitor timeframe for CPU load ratio [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | delay | Delay in seconds for the metric evaluation | string | `15` | no | +| environment | Architecture Environment | string | - | yes | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | free_disk_inodes_message | Custom message for Free disk inodes monitor | string | `` | no | | free_disk_inodes_silenced | Groups to mute for Free disk inodes monitor | map | `` | no | | free_disk_inodes_threshold_critical | Free disk space critical threshold | string | `5` | no | | free_disk_inodes_threshold_warning | Free disk space warning threshold | string | `10` | no | +| free_disk_inodes_timeframe | Monitor timeframe for Free disk inodes [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | free_disk_space_message | Custom message for Free diskspace monitor | string | `` | no | | free_disk_space_silenced | Groups to mute for Free diskspace monitor | map | `` | no | | free_disk_space_threshold_critical | Free disk space critical threshold | string | `5` | no | | free_disk_space_threshold_warning | Free disk space warning threshold | string | `10` | no | +| free_disk_space_timeframe | Monitor timeframe for Free diskspace [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | free_memory_message | Custom message for Free memory monitor | string | - | yes | | free_memory_silenced | Groups to mute for Free memory monitor | map | `` | no | | free_memory_threshold_critical | Free disk space critical threshold | string | `5` | no | | free_memory_threshold_warning | Free disk space warning threshold | string | `10` | no | +| free_memory_timeframe | Monitor timeframe for Free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_1m` | no | | message | Message sent when an alert is triggered | string | - | yes | diff --git a/system/generic/inputs.tf b/system/generic/inputs.tf index 8a77608..6c66909 100644 --- a/system/generic/inputs.tf +++ b/system/generic/inputs.tf @@ -39,7 +39,8 @@ variable "cpu_high_message" { } variable "cpu_high_timeframe" { - description = "CPU high timeframe" + description = "Monitor timeframe for CPU high [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" default = "last_5m" } @@ -66,7 +67,8 @@ variable "cpu_load_message" { } variable "cpu_load_timeframe" { - description = "CPU load ratio timeframe" + description = "Monitor timeframe for CPU load ratio [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" default = "last_5m" } @@ -92,6 +94,12 @@ variable "free_disk_space_message" { default = "" } +variable "free_disk_space_timeframe" { + description = "Monitor timeframe for Free diskspace [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "free_disk_space_threshold_warning" { description = "Free disk space warning threshold" default = 10 @@ -114,6 +122,12 @@ variable "free_disk_inodes_message" { default = "" } +variable "free_disk_inodes_timeframe" { + description = "Monitor timeframe for Free disk inodes [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "free_disk_inodes_threshold_warning" { description = "Free disk space warning threshold" default = 10 @@ -135,6 +149,12 @@ variable "free_memory_message" { type = "string" } +variable "free_memory_timeframe" { + description = "Monitor timeframe for Free memory [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_1m" +} + variable "free_memory_threshold_warning" { description = "Free disk space warning threshold" default = 10 diff --git a/system/generic/monitors-system.tf b/system/generic/monitors-system.tf index fe8b394..d737f01 100644 --- a/system/generic/monitors-system.tf +++ b/system/generic/monitors-system.tf @@ -74,7 +74,7 @@ resource "datadog_monitor" "datadog_free_disk_space_too_low" { message = "${coalesce(var.free_disk_space_message, var.message)}" query = <