diff --git a/cloud/gcp/lb/README.md b/cloud/gcp/lb/README.md index e2aaaa0..8a987f0 100644 --- a/cloud/gcp/lb/README.md +++ b/cloud/gcp/lb/README.md @@ -35,12 +35,14 @@ Creates DataDog monitors with the following checks: | backend_latency_timeframe | Timeframe for the GCP LB Backend Latency monitor | string | `last_10m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | | environment | Architecture environment | string | - | yes | +| error_rate_4xx_artificial_request | Divisor Delta for the GCP LB 4XX Errors monitor | string | `5` | no | | error_rate_4xx_extra_tags | Extra tags for GCP LB 4XX Errors monitor | list | `` | no | | error_rate_4xx_message | Custom message for the GCP LB 4XX Errors monitor | string | `` | no | | error_rate_4xx_silenced | Groups to mute for GCP LB 4XX Errors monitor | map | `` | no | | error_rate_4xx_threshold_critical | Rate error in percentage (critical threshold) | string | `50` | no | | error_rate_4xx_time_aggregator | Timeframe for the GCP LB 4XX Errors monitor | string | `sum` | no | | error_rate_4xx_timeframe | Timeframe for the GCP LB 4XX Errors monitor | string | `last_5m` | no | +| error_rate_5xx_artificial_request | Divisor Delta for the GCP LB 5XX Errors monitor | string | `5` | no | | error_rate_5xx_extra_tags | Extra tags for GCP LB 5XX Errors monitor | list | `` | no | | error_rate_5xx_message | Custom message for the GCP LB 5XX Errors monitor | string | `` | no | | error_rate_5xx_silenced | Groups to mute for GCP LB 5XX Errors monitor | map | `` | no | diff --git a/cloud/gcp/lb/inputs.tf b/cloud/gcp/lb/inputs.tf index 8581b67..88d8f0f 100644 --- a/cloud/gcp/lb/inputs.tf +++ b/cloud/gcp/lb/inputs.tf @@ -54,6 +54,12 @@ variable "error_rate_4xx_timeframe" { default = "last_5m" } +variable "error_rate_4xx_artificial_request" { + description = "Divisor Delta for the GCP LB 4XX Errors monitor" + type = "string" + default = 5 +} + variable "error_rate_4xx_threshold_critical" { description = "Rate error in percentage (critical threshold)" type = "string" @@ -93,6 +99,12 @@ variable "error_rate_5xx_timeframe" { default = "last_5m" } +variable "error_rate_5xx_artificial_request" { + description = "Divisor Delta for the GCP LB 5XX Errors monitor" + type = "string" + default = 5 +} + variable "error_rate_5xx_threshold_critical" { description = "Rate error in percentage (critical threshold)" type = "string" diff --git a/cloud/gcp/lb/monitors-lb.tf b/cloud/gcp/lb/monitors-lb.tf index b16fb6b..a2601d3 100644 --- a/cloud/gcp/lb/monitors-lb.tf +++ b/cloud/gcp/lb/monitors-lb.tf @@ -24,7 +24,7 @@ resource "datadog_monitor" "error_rate_4xx" { ${var.error_rate_4xx_time_aggregator}(${var.error_rate_4xx_timeframe}): avg:gcp.loadbalancing.https.request_count{${data.template_file.filter.rendered},response_code_class:400} by {backend_target_name}.as_count().fill(zero) / - (avg:gcp.loadbalancing.https.request_count{${data.template_file.filter.rendered}} by {backend_target_name}.as_count().fill(zero) + 5 ) * 100 + (avg:gcp.loadbalancing.https.request_count{${data.template_file.filter.rendered}} by {backend_target_name}.as_count().fill(zero) + ${var.error_rate_4xx_artificial_request} ) * 100 > ${var.error_rate_4xx_threshold_critical} EOF @@ -68,7 +68,7 @@ resource "datadog_monitor" "error_rate_5xx" { ${var.error_rate_5xx_time_aggregator}(${var.error_rate_5xx_timeframe}): avg:gcp.loadbalancing.https.request_count{${data.template_file.filter.rendered},response_code_class:400} by {backend_target_name}.as_count().fill(zero) / - (avg:gcp.loadbalancing.https.request_count{${data.template_file.filter.rendered}} by {backend_target_name}.as_count().fill(zero) + 5 ) * 100 + (avg:gcp.loadbalancing.https.request_count{${data.template_file.filter.rendered}} by {backend_target_name}.as_count().fill(zero) + ${var.error_rate_5xx_artificial_request} ) * 100 > ${var.error_rate_5xx_threshold_critical} EOF @@ -189,7 +189,7 @@ EOF # Request Count # resource "datadog_monitor" "request_count" { - name = "[${var.environment}] GCP LB Requests count increased abruptly" + name = "[${var.environment}] GCP LB Requests count increased abruptly {{#is_alert}}{{value}}%{{/is_alert}}{{#is_warning}}{{value}}%{{/is_warning}}" message = "${coalesce(var.request_count_message, var.message)}" type = "query alert"