MON-191 - AWS monitors updated with customizable aggregator
This commit is contained in:
parent
a3ecf61909
commit
966a9b5f54
@ -30,6 +30,7 @@ Inputs
|
||||
|
||||
| Name | Description | Type | Default | Required |
|
||||
|------|-------------|:----:|:-----:|:-----:|
|
||||
| alb_no_healthy_instances_aggregator | Monitor aggregator for ALB no healthy instances [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| alb_no_healthy_instances_message | Custom message for ALB no healthy instances monitor | string | `` | no |
|
||||
| alb_no_healthy_instances_silenced | Groups to mute for ALB no healthy instances monitor | map | `<map>` | no |
|
||||
| alb_no_healthy_instances_timeframe | Monitor timeframe for ALB no healthy instances [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_1m` | no |
|
||||
@ -38,26 +39,31 @@ Inputs
|
||||
| environment | Architecture environment | string | - | yes |
|
||||
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
|
||||
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
|
||||
| httpcode_elb_4xx_aggregator | Monitor aggregator for ALB httpcode 4xx [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| httpcode_elb_4xx_message | Custom message for ALB httpcode 4xx monitor | string | `` | no |
|
||||
| httpcode_elb_4xx_silenced | Groups to mute for ALB httpcode 4xx monitor | map | `<map>` | no |
|
||||
| httpcode_elb_4xx_threshold_critical | loadbalancer 4xx critical threshold in percentage | string | `80` | no |
|
||||
| httpcode_elb_4xx_threshold_warning | loadbalancer 4xx warning threshold in percentage | string | `60` | no |
|
||||
| httpcode_elb_4xx_timeframe | Monitor timeframe for ALB httpcode 4xx [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| httpcode_elb_5xx_aggregator | Monitor aggregator for ALB httpcode 5xx [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| httpcode_elb_5xx_message | Custom message for ALB httpcode 5xx monitor | string | `` | no |
|
||||
| httpcode_elb_5xx_silenced | Groups to mute for ALB httpcode 5xx monitor | map | `<map>` | no |
|
||||
| httpcode_elb_5xx_threshold_critical | loadbalancer 5xxcritical threshold in percentage | string | `80` | no |
|
||||
| httpcode_elb_5xx_threshold_warning | loadbalancer 5xx warning threshold in percentage | string | `60` | no |
|
||||
| httpcode_elb_5xx_timeframe | Monitor timeframe for ALB httpcode 5xx [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| httpcode_target_4xx_aggregator | Monitor aggregator for ALB target httpcode 4xx [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| httpcode_target_4xx_message | Custom message for ALB target httpcode 4xx monitor | string | `` | no |
|
||||
| httpcode_target_4xx_silenced | Groups to mute for ALB target httpcode 4xx monitor | map | `<map>` | no |
|
||||
| httpcode_target_4xx_threshold_critical | target 4xx critical threshold in percentage | string | `80` | no |
|
||||
| httpcode_target_4xx_threshold_warning | target 4xx warning threshold in percentage | string | `60` | no |
|
||||
| httpcode_target_4xx_timeframe | Monitor timeframe for ALB target httpcode 4xx [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| httpcode_target_5xx_aggregator | Monitor aggregator for ALB target httpcode 5xx [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| httpcode_target_5xx_message | Custom message for ALB target httpcode 5xx monitor | string | `` | no |
|
||||
| httpcode_target_5xx_silenced | Groups to mute for ALB target httpcode 5xx monitor | map | `<map>` | no |
|
||||
| httpcode_target_5xx_threshold_critical | target 5xx critical threshold in percentage | string | `80` | no |
|
||||
| httpcode_target_5xx_threshold_warning | target 5xx warning threshold in percentage | string | `60` | no |
|
||||
| httpcode_target_5xx_timeframe | Monitor timeframe for ALB target httpcode 5xx [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| latency_aggregator | Monitor aggregator for ALB latency [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| latency_message | Custom message for ALB latency monitor | string | `` | no |
|
||||
| latency_silenced | Groups to mute for ALB latency monitor | map | `<map>` | no |
|
||||
| latency_threshold_critical | latency critical threshold in milliseconds | string | `1000` | no |
|
||||
|
||||
@ -38,6 +38,12 @@ variable "alb_no_healthy_instances_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "alb_no_healthy_instances_aggregator" {
|
||||
description = "Monitor aggregator for ALB no healthy instances [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "alb_no_healthy_instances_timeframe" {
|
||||
description = "Monitor timeframe for ALB no healthy instances [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -56,6 +62,12 @@ variable "latency_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "latency_aggregator" {
|
||||
description = "Monitor aggregator for ALB latency [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "latency_timeframe" {
|
||||
description = "Monitor timeframe for ALB latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -84,6 +96,12 @@ variable "httpcode_elb_4xx_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "httpcode_elb_4xx_aggregator" {
|
||||
description = "Monitor aggregator for ALB httpcode 4xx [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "httpcode_elb_4xx_timeframe" {
|
||||
description = "Monitor timeframe for ALB httpcode 4xx [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -112,6 +130,12 @@ variable "httpcode_target_4xx_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "httpcode_target_4xx_aggregator" {
|
||||
description = "Monitor aggregator for ALB target httpcode 4xx [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "httpcode_target_4xx_timeframe" {
|
||||
description = "Monitor timeframe for ALB target httpcode 4xx [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -140,6 +164,12 @@ variable "httpcode_elb_5xx_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "httpcode_elb_5xx_aggregator" {
|
||||
description = "Monitor aggregator for ALB httpcode 5xx [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "httpcode_elb_5xx_timeframe" {
|
||||
description = "Monitor timeframe for ALB httpcode 5xx [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -168,6 +198,12 @@ variable "httpcode_target_5xx_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "httpcode_target_5xx_aggregator" {
|
||||
description = "Monitor aggregator for ALB target httpcode 5xx [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "httpcode_target_5xx_timeframe" {
|
||||
description = "Monitor timeframe for ALB target httpcode 5xx [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
|
||||
@ -14,8 +14,8 @@ resource "datadog_monitor" "ALB_no_healthy_instances" {
|
||||
message = "${coalesce(var.alb_no_healthy_instances_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.alb_no_healthy_instances_timeframe}): (
|
||||
min:aws.applicationelb.healthy_host_count{${data.template_file.filter.rendered}} by {region,loadbalancer}
|
||||
${var.alb_no_healthy_instances_aggregator}(${var.alb_no_healthy_instances_timeframe}): (
|
||||
${var.alb_no_healthy_instances_aggregator}:aws.applicationelb.healthy_host_count{${data.template_file.filter.rendered}} by {region,loadbalancer}
|
||||
) <= 0
|
||||
EOF
|
||||
|
||||
@ -43,8 +43,8 @@ resource "datadog_monitor" "ALB_latency" {
|
||||
message = "${coalesce(var.latency_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.latency_timeframe}): (
|
||||
min:aws.applicationelb.target_response_time.average{${data.template_file.filter.rendered}} by {region,loadbalancer}
|
||||
${var.latency_aggregator}(${var.latency_timeframe}): (
|
||||
${var.latency_aggregator}:aws.applicationelb.target_response_time.average{${data.template_file.filter.rendered}} by {region,loadbalancer}
|
||||
) > ${var.latency_threshold_critical}
|
||||
EOF
|
||||
|
||||
@ -73,10 +73,10 @@ resource "datadog_monitor" "ALB_httpcode_elb_5xx" {
|
||||
message = "${coalesce(var.httpcode_elb_5xx_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.httpcode_elb_5xx_timeframe}): (
|
||||
${var.httpcode_elb_5xx_aggregator}(${var.httpcode_elb_5xx_timeframe}): (
|
||||
default(
|
||||
min:aws.applicationelb.httpcode_elb_5xx{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() /
|
||||
(min:aws.applicationelb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() + ${var.artificial_requests_count}),
|
||||
${var.httpcode_elb_5xx_aggregator}:aws.applicationelb.httpcode_elb_5xx{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() /
|
||||
(${var.httpcode_elb_5xx_aggregator}:aws.applicationelb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() + ${var.artificial_requests_count}),
|
||||
0) * 100
|
||||
) > ${var.httpcode_elb_5xx_threshold_critical}
|
||||
EOF
|
||||
@ -106,10 +106,10 @@ resource "datadog_monitor" "ALB_httpcode_elb_4xx" {
|
||||
message = "${coalesce(var.httpcode_elb_4xx_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.httpcode_elb_4xx_timeframe}): (
|
||||
${var.httpcode_elb_4xx_aggregator}(${var.httpcode_elb_4xx_timeframe}): (
|
||||
default(
|
||||
min:aws.applicationelb.httpcode_elb_4xx{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() /
|
||||
(min:aws.applicationelb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() + ${var.artificial_requests_count}),
|
||||
${var.httpcode_elb_4xx_aggregator}:aws.applicationelb.httpcode_elb_4xx{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() /
|
||||
(${var.httpcode_elb_4xx_aggregator}:aws.applicationelb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() + ${var.artificial_requests_count}),
|
||||
0) * 100
|
||||
) > ${var.httpcode_elb_4xx_threshold_critical}
|
||||
EOF
|
||||
@ -139,10 +139,10 @@ resource "datadog_monitor" "ALB_httpcode_target_5xx" {
|
||||
message = "${coalesce(var.httpcode_target_5xx_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.httpcode_target_5xx_timeframe}): (
|
||||
${var.httpcode_target_5xx_aggregator}(${var.httpcode_target_5xx_timeframe}): (
|
||||
default(
|
||||
min:aws.applicationelb.httpcode_target_5xx{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() /
|
||||
(min:aws.applicationelb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() + ${var.artificial_requests_count}),
|
||||
${var.httpcode_target_5xx_aggregator}:aws.applicationelb.httpcode_target_5xx{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() /
|
||||
(${var.httpcode_target_5xx_aggregator}:aws.applicationelb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() + ${var.artificial_requests_count}),
|
||||
0) * 100
|
||||
) > ${var.httpcode_target_5xx_threshold_critical}
|
||||
EOF
|
||||
@ -172,10 +172,10 @@ resource "datadog_monitor" "ALB_httpcode_target_4xx" {
|
||||
message = "${coalesce(var.httpcode_target_4xx_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.httpcode_target_4xx_timeframe}): (
|
||||
${var.httpcode_target_4xx_aggregator}(${var.httpcode_target_4xx_timeframe}): (
|
||||
default(
|
||||
min:aws.applicationelb.httpcode_target_4xx{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() /
|
||||
(min:aws.applicationelb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() + ${var.artificial_requests_count}),
|
||||
${var.httpcode_target_4xx_aggregator}:aws.applicationelb.httpcode_target_4xx{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() /
|
||||
(${var.httpcode_target_4xx_aggregator}:aws.applicationelb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancer}.as_count() + ${var.artificial_requests_count}),
|
||||
0) * 100
|
||||
) > ${var.httpcode_target_4xx_threshold_critical}
|
||||
EOF
|
||||
|
||||
@ -31,23 +31,25 @@ Inputs
|
||||
| delay | Delay in seconds for the metric evaluation | string | `900` | no |
|
||||
| environment | Environment | string | - | yes |
|
||||
| filter_tags | Tags used for filtering | string | `*` | no |
|
||||
| http_4xx_requests_aggregator | Monitor aggregator for API Gateway HTTP 4xx requests [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| http_4xx_requests_message | Custom message for API Gateway HTTP 4xx requests monitor | string | `` | no |
|
||||
| http_4xx_requests_silenced | Groups to mute for API Gateway HTTP 4xx requests monitor | map | `<map>` | no |
|
||||
| http_4xx_requests_threshold_critical | Maximum critical acceptable percent of 4xx errors | string | `30` | no |
|
||||
| http_4xx_requests_threshold_warning | Maximum warning acceptable percent of 4xx errors | string | `15` | no |
|
||||
| http_4xx_requests_timeframe | Monitor timeframe for API HTTP 4xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| http_5xx_requests_aggregator | Monitor aggregator for API Gateway HTTP 5xx requests [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| http_5xx_requests_message | Custom message for API Gateway HTTP 5xx requests monitor | string | `` | no |
|
||||
| http_5xx_requests_silenced | Groups to mute for API Gateway HTTP 5xx requests monitor | map | `<map>` | no |
|
||||
| http_5xx_requests_threshold_critical | Maximum critical acceptable percent of 5xx errors | string | `20` | no |
|
||||
| http_5xx_requests_threshold_warning | Maximum warning acceptable percent of 5xx errors | string | `10` | no |
|
||||
| http_5xx_requests_timeframe | Monitor timeframe for API HTTP 5xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| latency_aggregator | Monitor aggregator for API Gateway latency [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| latency_message | Custom message for API Gateway latency monitor | string | `` | no |
|
||||
| latency_silenced | Groups to mute for API Gateway latency monitor | map | `<map>` | no |
|
||||
| latency_threshold_critical | Alerting threshold in milliseconds | string | `800` | no |
|
||||
| latency_threshold_warning | Warning threshold in milliseconds | string | `400` | no |
|
||||
| latency_timeframe | Monitor timeframe for API latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| message | Message sent when a monitor is triggered | string | - | yes |
|
||||
|
||||
Related documentation
|
||||
---------------------
|
||||
|
||||
|
||||
@ -33,6 +33,12 @@ variable "latency_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "latency_aggregator" {
|
||||
description = "Monitor aggregator for API Gateway latency [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "latency_timeframe" {
|
||||
description = "Monitor timeframe for API latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -65,6 +71,12 @@ variable "http_5xx_requests_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "http_5xx_requests_aggregator" {
|
||||
description = "Monitor aggregator for API Gateway HTTP 5xx requests [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "http_5xx_requests_timeframe" {
|
||||
description = "Monitor timeframe for API HTTP 5xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -97,6 +109,12 @@ variable "http_4xx_requests_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "http_4xx_requests_aggregator" {
|
||||
description = "Monitor aggregator for API Gateway HTTP 4xx requests [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "http_4xx_requests_timeframe" {
|
||||
description = "Monitor timeframe for API HTTP 4xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
|
||||
@ -5,8 +5,8 @@ resource "datadog_monitor" "API_Gateway_latency" {
|
||||
message = "${coalesce(var.latency_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.latency_timeframe}): (
|
||||
min:aws.apigateway.latency{${var.filter_tags}} by {region,apiname}
|
||||
${var.latency_aggregator}(${var.latency_timeframe}): (
|
||||
${var.latency_aggregator}:aws.apigateway.latency{${var.filter_tags}} by {region,apiname}
|
||||
) > ${var.latency_threshold_critical}
|
||||
EOF
|
||||
|
||||
@ -36,10 +36,10 @@ resource "datadog_monitor" "API_http_5xx_errors_count" {
|
||||
message = "${coalesce(var.http_5xx_requests_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.http_5xx_requests_timeframe}): (
|
||||
${var.http_5xx_requests_aggregator}(${var.http_5xx_requests_timeframe}): (
|
||||
default(
|
||||
min:aws.apigateway.5xxerror{${var.filter_tags}} by {region,apiname}.as_count() /
|
||||
(min:aws.apigateway.count{${var.filter_tags}} by {region,apiname}.as_count() + ${var.artificial_requests_count}),
|
||||
${var.http_5xx_requests_aggregator}:aws.apigateway.5xxerror{${var.filter_tags}} by {region,apiname}.as_count() /
|
||||
(${var.http_5xx_requests_aggregator}:aws.apigateway.count{${var.filter_tags}} by {region,apiname}.as_count() + ${var.artificial_requests_count}),
|
||||
0) * 100
|
||||
) > ${var.http_5xx_requests_threshold_critical}
|
||||
EOF
|
||||
@ -70,10 +70,10 @@ resource "datadog_monitor" "API_http_4xx_errors_count" {
|
||||
message = "${coalesce(var.http_4xx_requests_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.http_4xx_requests_timeframe}): (
|
||||
${var.http_4xx_requests_aggregator}(${var.http_4xx_requests_timeframe}): (
|
||||
default(
|
||||
min:aws.apigateway.4xxerror{${var.filter_tags}} by {region,apiname}.as_count() /
|
||||
(min:aws.apigateway.count{${var.filter_tags}} by {region,apiname}.as_count() + ${var.artificial_requests_count}),
|
||||
${var.http_4xx_requests_aggregator}:aws.apigateway.4xxerror{${var.filter_tags}} by {region,apiname}.as_count() /
|
||||
(${var.http_4xx_requests_aggregator}:aws.apigateway.count{${var.filter_tags}} by {region,apiname}.as_count() + ${var.artificial_requests_count}),
|
||||
0) * 100
|
||||
) > ${var.http_4xx_requests_threshold_critical}
|
||||
EOF
|
||||
|
||||
@ -29,18 +29,21 @@ Inputs
|
||||
|
||||
| Name | Description | Type | Default | Required |
|
||||
|------|-------------|:----:|:-----:|:-----:|
|
||||
| cpu_aggregator | Monitor aggregator for ES cluster cpu [available values: min, max, sum or avg] | string | `avg` | no |
|
||||
| cpu_message | Custom message for ES cluster cpu monitor | string | `` | no |
|
||||
| cpu_silenced | Groups to mute for ES cluster cpu monitor | map | `<map>` | no |
|
||||
| cpu_threshold_critical | CPU usage in percent (critical threshold) | string | `90` | no |
|
||||
| cpu_threshold_warning | CPU usage in percent (warning threshold) | string | `80` | no |
|
||||
| cpu_timeframe | Monitor timeframe for ES cluster cpu [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no |
|
||||
| delay | Delay in seconds for the metric evaluation | string | `900` | no |
|
||||
| diskspace_aggregator | Monitor aggregator for ES cluster diskspace [available values: min, max, sum or avg] | string | `avg` | no |
|
||||
| diskspace_message | Custom message for ES cluster diskspace monitor | string | `` | no |
|
||||
| diskspace_silenced | Groups to mute for ES cluster diskspace monitor | map | `<map>` | no |
|
||||
| diskspace_threshold_critical | Disk free space in percent (critical threshold) | string | `10` | no |
|
||||
| diskspace_threshold_warning | Disk free space in percent (warning threshold) | string | `20` | no |
|
||||
| diskspace_timeframe | Monitor timeframe for ES cluster diskspace [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no |
|
||||
| environment | Architecture Environment | string | - | yes |
|
||||
| es_cluster_status_aggregator | Monitor aggregator for ES cluster status [available values: min, max, sum or avg] | string | `max` | no |
|
||||
| es_cluster_status_message | Custom message for ES cluster status monitor | string | `` | no |
|
||||
| es_cluster_status_silenced | Groups to mute for ES cluster status monitor | map | `<map>` | no |
|
||||
| es_cluster_status_timeframe | Monitor timeframe for ES cluster status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_30m` | no |
|
||||
|
||||
@ -38,6 +38,12 @@ variable "es_cluster_status_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "es_cluster_status_aggregator" {
|
||||
description = "Monitor aggregator for ES cluster status [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "max"
|
||||
}
|
||||
|
||||
variable "es_cluster_status_timeframe" {
|
||||
description = "Monitor timeframe for ES cluster status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -60,6 +66,12 @@ variable "diskspace_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "diskspace_aggregator" {
|
||||
description = "Monitor aggregator for ES cluster diskspace [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "avg"
|
||||
}
|
||||
|
||||
variable "diskspace_timeframe" {
|
||||
description = "Monitor timeframe for ES cluster diskspace [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -88,6 +100,12 @@ variable "cpu_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "cpu_aggregator" {
|
||||
description = "Monitor aggregator for ES cluster cpu [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "avg"
|
||||
}
|
||||
|
||||
variable "cpu_timeframe" {
|
||||
description = "Monitor timeframe for ES cluster cpu [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
|
||||
@ -18,9 +18,9 @@ resource "datadog_monitor" "es_cluster_status" {
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
max(${var.es_cluster_status_timeframe}): (
|
||||
avg:aws.es.cluster_statusred{${data.template_file.filter.rendered}} by {region,name} * 2 +
|
||||
(avg:aws.es.cluster_statusyellow{${data.template_file.filter.rendered}} by {region,name} + 0.1)
|
||||
${var.es_cluster_status_aggregator}(${var.es_cluster_status_timeframe}): (
|
||||
${var.es_cluster_status_aggregator}:aws.es.cluster_statusred{${data.template_file.filter.rendered}} by {region,name} * 2 +
|
||||
(${var.es_cluster_status_aggregator}:aws.es.cluster_statusyellow{${data.template_file.filter.rendered}} by {region,name} + 0.1)
|
||||
) >= 2
|
||||
EOF
|
||||
|
||||
@ -52,8 +52,8 @@ resource "datadog_monitor" "es_free_space_low" {
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
avg(${var.diskspace_timeframe}): (
|
||||
avg:aws.es.free_storage_space{${data.template_file.filter.rendered}} by {region,name} /
|
||||
${var.diskspace_aggregator}(${var.diskspace_timeframe}): (
|
||||
${var.diskspace_aggregator}:aws.es.free_storage_space{${data.template_file.filter.rendered}} by {region,name} /
|
||||
(${var.es_cluster_volume_size}*1000) * 100
|
||||
) < ${var.diskspace_threshold_critical}
|
||||
EOF
|
||||
@ -86,8 +86,8 @@ resource "datadog_monitor" "es_cpu_90_15min" {
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
avg(${var.cpu_timeframe}): (
|
||||
avg:aws.es.cpuutilization{${data.template_file.filter.rendered}} by {region,name}
|
||||
${var.cpu_aggregator}(${var.cpu_timeframe}): (
|
||||
${var.cpu_aggregator}:aws.es.cpuutilization{${data.template_file.filter.rendered}} by {region,name}
|
||||
) > ${var.cpu_threshold_critical}
|
||||
EOF
|
||||
|
||||
|
||||
@ -32,31 +32,37 @@ Inputs
|
||||
|------|-------------|:----:|:-----:|:-----:|
|
||||
| artificial_requests_count | Number of false requests used to mitigate false positive in case of low trafic | string | `5` | no |
|
||||
| delay | Delay in seconds for the metric evaluation | string | `900` | no |
|
||||
| elb_4xx_aggregator | Monitor aggregator for ELB 4xx errors [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| elb_4xx_message | Custom message for ELB 4xx errors monitor | string | `` | no |
|
||||
| elb_4xx_silenced | Groups to mute for ELB 4xx errors monitor | map | `<map>` | no |
|
||||
| elb_4xx_threshold_critical | loadbalancer 4xx critical threshold in percentage | string | `10` | no |
|
||||
| elb_4xx_threshold_warning | loadbalancer 4xx warning threshold in percentage | string | `5` | no |
|
||||
| elb_4xx_timeframe | Monitor timeframe for ELB 4xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| elb_5xx_aggregator | Monitor aggregator for ELB 5xx errors [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| elb_5xx_message | Custom message for ELB 5xx errors monitor | string | `` | no |
|
||||
| elb_5xx_silenced | Groups to mute for ELB 5xx errors monitor | map | `<map>` | no |
|
||||
| elb_5xx_threshold_critical | loadbalancer 5xx critical threshold in percentage | string | `10` | no |
|
||||
| elb_5xx_threshold_warning | loadbalancer 5xx warning threshold in percentage | string | `5` | no |
|
||||
| elb_5xx_timeframe | Monitor timeframe for ELB 5xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| elb_backend_4xx_aggregator | Monitor aggregator for ELB backend 4xx errors [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| elb_backend_4xx_message | Custom message for ELB backend 4xx errors monitor | string | `` | no |
|
||||
| elb_backend_4xx_silenced | Groups to mute for ELB backend 4xx errors monitor | map | `<map>` | no |
|
||||
| elb_backend_4xx_threshold_critical | loadbalancer backend 4xx critical threshold in percentage | string | `10` | no |
|
||||
| elb_backend_4xx_threshold_warning | loadbalancer backend 4xx warning threshold in percentage | string | `5` | no |
|
||||
| elb_backend_4xx_timeframe | Monitor timeframe for ELB backend 4xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| elb_backend_5xx_aggregator | Monitor aggregator for ELB backend 5xx errors [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| elb_backend_5xx_message | Custom message for ELB backend 5xx errors monitor | string | `` | no |
|
||||
| elb_backend_5xx_silenced | Groups to mute for ELB backend 5xx errors monitor | map | `<map>` | no |
|
||||
| elb_backend_5xx_threshold_critical | loadbalancer backend 5xx critical threshold in percentage | string | `10` | no |
|
||||
| elb_backend_5xx_threshold_warning | loadbalancer backend 5xx warning threshold in percentage | string | `5` | no |
|
||||
| elb_backend_5xx_timeframe | Monitor timeframe for ELB backend 5xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| elb_backend_latency_aggregator | Monitor aggregator for ELB backend latency [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| elb_backend_latency_critical | latency critical threshold in seconds | string | `5` | no |
|
||||
| elb_backend_latency_message | Custom message for ELB backend latency monitor | string | `` | no |
|
||||
| elb_backend_latency_silenced | Groups to mute for ELB backend latency monitor | map | `<map>` | no |
|
||||
| elb_backend_latency_timeframe | Monitor timeframe for ELB backend latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| elb_backend_latency_warning | latency warning threshold in seconds | string | `1` | no |
|
||||
| elb_no_healthy_instance_aggregator | Monitor aggregator for ELB no healty instance [available values: min, max, sum or avg] | string | `min` | no |
|
||||
| elb_no_healthy_instance_message | Custom message for ELB no healty instance monitor | string | `` | no |
|
||||
| elb_no_healthy_instance_silenced | Groups to mute for ELB no healty instance monitor | map | `<map>` | no |
|
||||
| elb_no_healthy_instance_timeframe | Monitor timeframe for ELB no healty instance [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
|
||||
@ -37,6 +37,12 @@ variable "elb_no_healthy_instance_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "elb_no_healthy_instance_aggregator" {
|
||||
description = "Monitor aggregator for ELB no healty instance [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "elb_no_healthy_instance_timeframe" {
|
||||
description = "Monitor timeframe for ELB no healty instance [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -55,6 +61,12 @@ variable "elb_4xx_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "elb_4xx_aggregator" {
|
||||
description = "Monitor aggregator for ELB 4xx errors [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "elb_4xx_timeframe" {
|
||||
description = "Monitor timeframe for ELB 4xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -83,6 +95,12 @@ variable "elb_5xx_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "elb_5xx_aggregator" {
|
||||
description = "Monitor aggregator for ELB 5xx errors [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "elb_5xx_timeframe" {
|
||||
description = "Monitor timeframe for ELB 5xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -111,6 +129,12 @@ variable "elb_backend_4xx_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "elb_backend_4xx_aggregator" {
|
||||
description = "Monitor aggregator for ELB backend 4xx errors [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "elb_backend_4xx_timeframe" {
|
||||
description = "Monitor timeframe for ELB backend 4xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -139,6 +163,12 @@ variable "elb_backend_5xx_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "elb_backend_5xx_aggregator" {
|
||||
description = "Monitor aggregator for ELB backend 5xx errors [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "elb_backend_5xx_timeframe" {
|
||||
description = "Monitor timeframe for ELB backend 5xx errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -167,6 +197,12 @@ variable "elb_backend_latency_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "elb_backend_latency_aggregator" {
|
||||
description = "Monitor aggregator for ELB backend latency [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "elb_backend_latency_timeframe" {
|
||||
description = "Monitor timeframe for ELB backend latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
|
||||
@ -11,8 +11,8 @@ resource "datadog_monitor" "ELB_no_healthy_instances" {
|
||||
message = "${coalesce(var.elb_no_healthy_instance_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.elb_no_healthy_instance_timeframe}): (
|
||||
min:aws.elb.healthy_host_count{${data.template_file.filter.rendered}} by {region,loadbalancername}
|
||||
${var.elb_no_healthy_instance_aggregator}(${var.elb_no_healthy_instance_timeframe}): (
|
||||
${var.elb_no_healthy_instance_aggregator}:aws.elb.healthy_host_count{${data.template_file.filter.rendered}} by {region,loadbalancername}
|
||||
) < 1
|
||||
EOF
|
||||
|
||||
@ -38,10 +38,10 @@ resource "datadog_monitor" "ELB_too_much_4xx" {
|
||||
message = "${coalesce(var.elb_4xx_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.elb_4xx_timeframe}): (
|
||||
${var.elb_4xx_aggregator}(${var.elb_4xx_timeframe}): (
|
||||
default(
|
||||
min:aws.elb.httpcode_elb_4xx{${data.template_file.filter.rendered}} by {region,loadbalancername}.as_count() /
|
||||
(min:aws.elb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancername}.as_count() + ${var.artificial_requests_count}),
|
||||
${var.elb_4xx_aggregator}:aws.elb.httpcode_elb_4xx{${data.template_file.filter.rendered}} by {region,loadbalancername}.as_count() /
|
||||
(${var.elb_4xx_aggregator}:aws.elb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancername}.as_count() + ${var.artificial_requests_count}),
|
||||
0) * 100
|
||||
) > ${var.elb_4xx_threshold_critical}
|
||||
EOF
|
||||
@ -73,10 +73,10 @@ resource "datadog_monitor" "ELB_too_much_5xx" {
|
||||
message = "${coalesce(var.elb_5xx_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.elb_5xx_timeframe}): (
|
||||
${var.elb_5xx_aggregator}(${var.elb_5xx_timeframe}): (
|
||||
default(
|
||||
min:aws.elb.httpcode_elb_5xx{${data.template_file.filter.rendered}} by {region,loadbalancername} /
|
||||
(min:aws.elb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancername} + ${var.artificial_requests_count}),
|
||||
${var.elb_5xx_aggregator}:aws.elb.httpcode_elb_5xx{${data.template_file.filter.rendered}} by {region,loadbalancername} /
|
||||
(${var.elb_5xx_aggregator}:aws.elb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancername} + ${var.artificial_requests_count}),
|
||||
0) * 100
|
||||
) > ${var.elb_5xx_threshold_critical}
|
||||
EOF
|
||||
@ -108,10 +108,10 @@ resource "datadog_monitor" "ELB_too_much_4xx_backend" {
|
||||
message = "${coalesce(var.elb_backend_4xx_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.elb_backend_4xx_timeframe}): (
|
||||
${var.elb_backend_4xx_aggregator}(${var.elb_backend_4xx_timeframe}): (
|
||||
default(
|
||||
min:aws.elb.httpcode_backend_4xx{${data.template_file.filter.rendered}} by {region,loadbalancername} /
|
||||
(min:aws.elb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancername} + ${var.artificial_requests_count}),
|
||||
${var.elb_backend_4xx_aggregator}:aws.elb.httpcode_backend_4xx{${data.template_file.filter.rendered}} by {region,loadbalancername} /
|
||||
(${var.elb_backend_4xx_aggregator}:aws.elb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancername} + ${var.artificial_requests_count}),
|
||||
0) * 100
|
||||
) > ${var.elb_backend_4xx_threshold_critical}
|
||||
EOF
|
||||
@ -143,10 +143,10 @@ resource "datadog_monitor" "ELB_too_much_5xx_backend" {
|
||||
message = "${coalesce(var.elb_backend_5xx_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.elb_backend_5xx_timeframe}): (
|
||||
${var.elb_backend_5xx_aggregator}(${var.elb_backend_5xx_timeframe}): (
|
||||
default(
|
||||
min:aws.elb.httpcode_backend_5xx{${data.template_file.filter.rendered}} by {region,loadbalancername} /
|
||||
(min:aws.elb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancername} + ${var.artificial_requests_count}),
|
||||
${var.elb_backend_5xx_aggregator}:aws.elb.httpcode_backend_5xx{${data.template_file.filter.rendered}} by {region,loadbalancername} /
|
||||
(${var.elb_backend_5xx_aggregator}:aws.elb.request_count{${data.template_file.filter.rendered}} by {region,loadbalancername} + ${var.artificial_requests_count}),
|
||||
0) * 100
|
||||
) > ${var.elb_backend_5xx_threshold_critical}
|
||||
EOF
|
||||
@ -178,8 +178,8 @@ resource "datadog_monitor" "ELB_backend_latency" {
|
||||
message = "${coalesce(var.elb_backend_latency_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
min(${var.elb_backend_latency_warning}): (
|
||||
min:aws.elb.latency{${data.template_file.filter.rendered}} by {region,loadbalancername}
|
||||
${var.elb_backend_latency_aggregator}(${var.elb_backend_latency_warning}): (
|
||||
${var.elb_backend_latency_aggregator}:aws.elb.latency{${data.template_file.filter.rendered}} by {region,loadbalancername}
|
||||
) > ${var.elb_backend_latency_critical}
|
||||
EOF
|
||||
|
||||
|
||||
@ -22,12 +22,14 @@ Creates DataDog monitors with the following checks :
|
||||
|
||||
Inputs
|
||||
------
|
||||
|
||||
| Name | Description | Type | Default | Required |
|
||||
|------|-------------|:----:|:-----:|:-----:|
|
||||
| delay | Delay in seconds for the metric evaluation | string | `900` | no |
|
||||
| environment | Environment | string | - | yes |
|
||||
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
|
||||
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
|
||||
| incoming_records_aggregator | Monitor aggregator for Kinesis Firehorse incoming records [available values: min, max, sum or avg] | string | `sum` | no |
|
||||
| incoming_records_message | Custom message for Kinesis Firehorse incoming records monitor | string | `` | no |
|
||||
| incoming_records_silenced | Groups to mute for Kinesis Firehorse incoming records monitor | map | `<map>` | no |
|
||||
| incoming_records_timeframe | Monitor timeframe for incoming records metrics evaluation [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no |
|
||||
|
||||
@ -38,6 +38,12 @@ variable "incoming_records_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "incoming_records_aggregator" {
|
||||
description = "Monitor aggregator for Kinesis Firehorse incoming records [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "sum"
|
||||
}
|
||||
|
||||
variable "incoming_records_timeframe" {
|
||||
description = "Monitor timeframe for incoming records metrics evaluation [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
default = "last_15m"
|
||||
|
||||
@ -14,8 +14,8 @@ resource "datadog_monitor" "firehose_incoming_records" {
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
sum(${var.incoming_records_timeframe}): (
|
||||
avg:aws.firehose.incoming_records{${data.template_file.filter.rendered}} by {region,deliverystreamname}
|
||||
${var.incoming_records_aggregator}(${var.incoming_records_timeframe}): (
|
||||
${var.incoming_records_aggregator}:aws.firehose.incoming_records{${data.template_file.filter.rendered}} by {region,deliverystreamname}
|
||||
) <= 0
|
||||
EOF
|
||||
|
||||
|
||||
@ -25,12 +25,14 @@ Inputs
|
||||
|
||||
| Name | Description | Type | Default | Required |
|
||||
|------|-------------|:----:|:-----:|:-----:|
|
||||
| cpu_aggregator | Monitor aggregator for RDS CPU usage [available values: min, max, sum or avg] | string | `avg` | no |
|
||||
| cpu_message | Custom message for RDS CPU usage monitor | string | `` | no |
|
||||
| cpu_silenced | Groups to mute for RDS CPU usage monitor | map | `<map>` | no |
|
||||
| cpu_threshold_critical | CPU usage in percent (critical threshold) | string | `90` | no |
|
||||
| cpu_threshold_warning | CPU usage in percent (warning threshold) | string | `80` | no |
|
||||
| cpu_timeframe | Monitor timeframe for RDS CPU usage [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no |
|
||||
| delay | Delay in seconds for the metric evaluation | string | `900` | no |
|
||||
| diskspace_aggregator | Monitor aggregator for RDS free diskspace [available values: min, max, sum or avg] | string | `avg` | no |
|
||||
| diskspace_message | Custom message for RDS free diskspace monitor | string | `` | no |
|
||||
| diskspace_silenced | Groups to mute for RDS free diskspace monitor | map | `<map>` | no |
|
||||
| diskspace_threshold_critical | Disk free space in percent (critical threshold) | string | `10` | no |
|
||||
|
||||
@ -38,6 +38,12 @@ variable "cpu_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "cpu_aggregator" {
|
||||
description = "Monitor aggregator for RDS CPU usage [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "avg"
|
||||
}
|
||||
|
||||
variable "cpu_timeframe" {
|
||||
description = "Monitor timeframe for RDS CPU usage [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
@ -66,6 +72,12 @@ variable "diskspace_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "diskspace_aggregator" {
|
||||
description = "Monitor aggregator for RDS free diskspace [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "avg"
|
||||
}
|
||||
|
||||
variable "diskspace_timeframe" {
|
||||
description = "Monitor timeframe for RDS free diskspace [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
|
||||
@ -14,8 +14,8 @@ resource "datadog_monitor" "rds_cpu_90_15min" {
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
avg(${var.cpu_timeframe}): (
|
||||
avg:aws.rds.cpuutilization{${data.template_file.filter.rendered}} by {region,name}
|
||||
${var.cpu_aggregator}(${var.cpu_timeframe}): (
|
||||
${var.cpu_aggregator}:aws.rds.cpuutilization{${data.template_file.filter.rendered}} by {region,name}
|
||||
) > ${var.cpu_threshold_critical}
|
||||
EOF
|
||||
|
||||
@ -46,9 +46,9 @@ resource "datadog_monitor" "rds_free_space_low" {
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
avg(${var.diskspace_timeframe}): (
|
||||
avg:aws.rds.free_storage_space{${data.template_file.filter.rendered}} by {region,name} /
|
||||
avg:aws.rds.total_storage_space{${data.template_file.filter.rendered}} by {region,name} * 100
|
||||
${var.diskspace_aggregator}(${var.diskspace_timeframe}): (
|
||||
${var.diskspace_aggregator}:aws.rds.free_storage_space{${data.template_file.filter.rendered}} by {region,name} /
|
||||
${var.diskspace_aggregator}:aws.rds.total_storage_space{${data.template_file.filter.rendered}} by {region,name} * 100
|
||||
) < ${var.diskspace_threshold_critical}
|
||||
EOF
|
||||
|
||||
|
||||
@ -29,6 +29,7 @@ Inputs
|
||||
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
|
||||
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
|
||||
| message | Message sent when an alert is triggered | string | - | yes |
|
||||
| vpn_status_aggregator | Monitor aggregator for VPN status [available values: min, max, sum or avg] | string | `avg` | no |
|
||||
| vpn_status_message | Custom message for VPN status monitor | string | `` | no |
|
||||
| vpn_status_silenced | Groups to mute for VPN status monitor | map | `<map>` | no |
|
||||
| vpn_status_timeframe | Monitor timeframe for VPN status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
|
||||
@ -36,6 +36,12 @@ variable "vpn_status_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "vpn_status_aggregator" {
|
||||
description = "Monitor aggregator for VPN status [available values: min, max, sum or avg]"
|
||||
type = "string"
|
||||
default = "avg"
|
||||
}
|
||||
|
||||
variable "vpn_status_timeframe" {
|
||||
description = "Monitor timeframe for VPN status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
|
||||
@ -11,8 +11,8 @@ resource "datadog_monitor" "VPN_status" {
|
||||
message = "${coalesce(var.vpn_status_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
avg(${var.vpn_status_timeframe}): (
|
||||
avg:aws.vpn.tunnel_state{${data.template_file.filter.rendered}} by {region,name}
|
||||
${var.vpn_status_aggregator}(${var.vpn_status_timeframe}): (
|
||||
${var.vpn_status_aggregator}:aws.vpn.tunnel_state{${data.template_file.filter.rendered}} by {region,name}
|
||||
) < 1
|
||||
EOF
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user