Merged in MON-300-gcp-lb-add-warning-thresholds (pull request #154)

MON-300 Warning added and values adapted for the monitors of GCP LB 4XX and 5XX errors

Approved-by: Rafael Romero Carmona <rafael.romero.carmona@fr.clara.net>
Approved-by: Quentin Manfroi <quentin.manfroi@yahoo.fr>
Approved-by: Jean-Philippe LAINÉ <jean-philippe.laine@fr.clara.net>
This commit is contained in:
Rafael Romero Carmona 2018-09-11 14:57:38 +00:00 committed by Quentin Manfroi
commit b94f3fbe85
3 changed files with 23 additions and 7 deletions

View File

@ -45,14 +45,16 @@ Creates DataDog monitors with the following checks:
| error_rate_4xx_extra_tags | Extra tags for GCP LB 4XX Errors monitor | list | `<list>` | no |
| error_rate_4xx_message | Custom message for the GCP LB 4XX Errors monitor | string | `` | no |
| error_rate_4xx_silenced | Groups to mute for GCP LB 4XX Errors monitor | map | `<map>` | no |
| error_rate_4xx_threshold_critical | Rate error in percentage (critical threshold) | string | `50` | no |
| error_rate_4xx_threshold_critical | Rate error in percentage (critical threshold) | string | `60` | no |
| error_rate_4xx_threshold_warning | Rate error in percentage (warning threshold) | string | `50` | no |
| error_rate_4xx_time_aggregator | Timeframe for the GCP LB 4XX Errors monitor | string | `sum` | no |
| error_rate_4xx_timeframe | Timeframe for the GCP LB 4XX Errors monitor | string | `last_5m` | no |
| error_rate_5xx_artificial_request | Divisor Delta for the GCP LB 5XX Errors monitor | string | `5` | no |
| error_rate_5xx_extra_tags | Extra tags for GCP LB 5XX Errors monitor | list | `<list>` | no |
| error_rate_5xx_message | Custom message for the GCP LB 5XX Errors monitor | string | `` | no |
| error_rate_5xx_silenced | Groups to mute for GCP LB 5XX Errors monitor | map | `<map>` | no |
| error_rate_5xx_threshold_critical | Rate error in percentage (critical threshold) | string | `50` | no |
| error_rate_5xx_threshold_critical | Rate error in percentage (critical threshold) | string | `40` | no |
| error_rate_5xx_threshold_warning | Rate error in percentage (warning threshold) | string | `30` | no |
| error_rate_5xx_time_aggregator | Timeframe for the GCP LB 5XX Errors monitor | string | `sum` | no |
| error_rate_5xx_timeframe | Timeframe for the GCP LB 5XX Errors monitor | string | `last_5m` | no |
| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no |

View File

@ -52,10 +52,16 @@ variable "error_rate_4xx_artificial_request" {
default = 5
}
variable "error_rate_4xx_threshold_warning" {
description = "Rate error in percentage (warning threshold)"
type = "string"
default = 50
}
variable "error_rate_4xx_threshold_critical" {
description = "Rate error in percentage (critical threshold)"
type = "string"
default = 50
default = 60
}
variable "error_rate_4xx_silenced" {
@ -97,10 +103,16 @@ variable "error_rate_5xx_artificial_request" {
default = 5
}
variable "error_rate_5xx_threshold_warning" {
description = "Rate error in percentage (warning threshold)"
type = "string"
default = 30
}
variable "error_rate_5xx_threshold_critical" {
description = "Rate error in percentage (critical threshold)"
type = "string"
default = 50
default = 40
}
variable "error_rate_5xx_silenced" {

View File

@ -16,6 +16,7 @@ resource "datadog_monitor" "error_rate_4xx" {
EOF
thresholds {
warning = "${var.error_rate_4xx_threshold_warning}"
critical = "${var.error_rate_4xx_threshold_critical}"
}
@ -45,14 +46,15 @@ resource "datadog_monitor" "error_rate_5xx" {
type = "metric alert"
query = <<EOF
${var.error_rate_4xx_time_aggregator}(${var.error_rate_4xx_timeframe}):
${var.error_rate_5xx_time_aggregator}(${var.error_rate_5xx_timeframe}):
default(sum:gcp.loadbalancing.https.request_count{${var.filter_tags},response_code_class:500} by {forwarding_rule_name}.as_count(), 0)
/ (default(sum:gcp.loadbalancing.https.request_count{${var.filter_tags}} by {forwarding_rule_name}.as_count(), 0)
+ ${var.error_rate_4xx_artificial_request}) * 100
> ${var.error_rate_4xx_threshold_critical}
+ ${var.error_rate_5xx_artificial_request}) * 100
> ${var.error_rate_5xx_threshold_critical}
EOF
thresholds {
warning = "${var.error_rate_5xx_threshold_warning}"
critical = "${var.error_rate_5xx_threshold_critical}"
}