MON-79 Raise critical thresholds and add warning thresholds to avoid "bagot" alerting during NBH

This commit is contained in:
Laurent Piroelle 2018-02-08 17:54:51 +01:00 committed by Quentin Manfroi
parent 1223e3b26f
commit a0ac2d7629
4 changed files with 108 additions and 0 deletions

View File

@ -312,46 +312,91 @@ variable "sqldatabase_deadlock_threshold_critical" {
# Azure Storage specific variables
variable "storage_availability_threshold_critical" {
description = "Minimum acceptable percent of availability for a storage"
default = 50
}
variable "storage_availability_threshold_warning" {
description = "Warning regarding acceptable percent of availability for a storage"
default = 90
}
variable "storage_successful_requests_threshold_critical" {
description = "Minimum acceptable percent of successful requests for a storage"
default = 50
}
variable "storage_successful_requests_threshold_warning" {
description = "Warning regarding acceptable percent of successful requests for a storage"
default = 90
}
variable "storage_latency_threshold_critical" {
description = "Maximum acceptable end to end latency (ms) for a storage"
default = 2000
}
variable "storage_latency_threshold_warning" {
description = "Warning regarding acceptable end to end latency (ms) for a storage"
default = 1000
}
variable "storage_timeout_error_requests_threshold_critical" {
description = "Maximum acceptable percent of timeout error requests for a storage"
default = 50
}
variable "storage_timeout_error_requests_threshold_warning" {
description = "Warning regarding acceptable percent of timeout error requests for a storage"
default = 5
}
variable "storage_network_error_requests_threshold_critical" {
description = "Maximum acceptable percent of network error requests for a storage"
default = 50
}
variable "storage_network_error_requests_threshold_warning" {
description = "Warning regarding acceptable percent of network error requests for a storage"
default = 5
}
variable "storage_throttling_error_requests_threshold_critical" {
description = "Maximum acceptable percent of throttling error requests for a storage"
default = 50
}
variable "storage_throttling_error_requests_threshold_warning" {
description = "Warning regarding acceptable percent of throttling error requests for a storage"
default = 10
}
variable "storage_server_other_error_requests_threshold_critical" {
description = "Maximum acceptable percent of server other error requests for a storage"
default = 50
}
variable "storage_server_other_error_requests_threshold_warning" {
description = "Warning regarding acceptable percent of server other error requests for a storage"
default = 10
}
variable "storage_client_other_error_requests_threshold_critical" {
description = "Maximum acceptable percent of client other error requests for a storage"
default = 50
}
variable "storage_client_other_error_requests_threshold_warning" {
description = "Warning regarding acceptable percent of client other error requests for a storage"
default = 15
}
variable "storage_authorization_error_requests_threshold_critical" {
description = "Maximum acceptable percent of authorization error requests for a storage"
default = 50
}
variable "storage_authorization_error_requests_threshold_warning" {
description = "Warning regarding acceptable percent of authorization error requests for a storage"
default = 15
}

View File

@ -135,14 +135,23 @@ module "storage" {
filter_tags_custom = "${var.filter_tags_custom}"
authorization_error_requests_threshold_critical = "${var.storage_authorization_error_requests_threshold_critical}"
authorization_error_requests_threshold_warning = "${var.storage_authorization_error_requests_threshold_warning}"
availability_threshold_critical = "${var.storage_availability_threshold_critical}"
availability_threshold_warning = "${var.storage_availability_threshold_warning}"
client_other_error_requests_threshold_critical = "${var.storage_client_other_error_requests_threshold_critical}"
client_other_error_requests_threshold_warning = "${var.storage_client_other_error_requests_threshold_warning}"
latency_threshold_critical = "${var.storage_latency_threshold_critical}"
latency_threshold_warning = "${var.storage_latency_threshold_warning}"
network_error_requests_threshold_critical = "${var.storage_network_error_requests_threshold_critical}"
network_error_requests_threshold_warning = "${var.storage_network_error_requests_threshold_warning}"
server_other_error_requests_threshold_critical = "${var.storage_server_other_error_requests_threshold_critical}"
server_other_error_requests_threshold_warning = "${var.storage_server_other_error_requests_threshold_warning}"
successful_requests_threshold_critical = "${var.storage_successful_requests_threshold_critical}"
successful_requests_threshold_warning = "${var.storage_successful_requests_threshold_warning}"
throttling_error_requests_threshold_critical = "${var.storage_throttling_error_requests_threshold_critical}"
throttling_error_requests_threshold_warning = "${var.storage_throttling_error_requests_threshold_warning}"
timeout_error_requests_threshold_critical = "${var.storage_timeout_error_requests_threshold_critical}"
timeout_error_requests_threshold_warning = "${var.storage_timeout_error_requests_threshold_warning}"
}
module "streamanalytics" {

View File

@ -27,45 +27,90 @@ variable "filter_tags_custom" {
# Azure Storage specific
variable "availability_threshold_critical" {
description = "Minimum acceptable percent of availability for a storage"
default = 50
}
variable "availability_threshold_warning" {
description = "Warning regarding acceptable percent of availability for a storage"
default = 90
}
variable "successful_requests_threshold_critical" {
description = "Minimum acceptable percent of successful requests for a storage"
default = 50
}
variable "successful_requests_threshold_warning" {
description = "Warning regarding acceptable percent of successful requests for a storage"
default = 90
}
variable "latency_threshold_critical" {
description = "Maximum acceptable end to end latency (ms) for a storage"
default = 2000
}
variable "latency_threshold_warning" {
description = "Warning regarding acceptable end to end latency (ms) for a storage"
default = 1000
}
variable "timeout_error_requests_threshold_critical" {
description = "Maximum acceptable percent of timeout error requests for a storage"
default = 50
}
variable "timeout_error_requests_threshold_warning" {
description = "Warning regarding acceptable percent of timeout error requests for a storage"
default = 5
}
variable "network_error_requests_threshold_critical" {
description = "Maximum acceptable percent of network error requests for a storage"
default = 50
}
variable "network_error_requests_threshold_warning" {
description = "Warning regarding acceptable percent of network error requests for a storage"
default = 5
}
variable "throttling_error_requests_threshold_critical" {
description = "Maximum acceptable percent of throttling error requests for a storage"
default = 50
}
variable "throttling_error_requests_threshold_warning" {
description = "Warning regarding acceptable percent of throttling error requests for a storage"
default = 10
}
variable "server_other_error_requests_threshold_critical" {
description = "Maximum acceptable percent of server other error requests for a storage"
default = 50
}
variable "server_other_error_requests_threshold_warning" {
description = "Warning regarding acceptable percent of server other error requests for a storage"
default = 10
}
variable "client_other_error_requests_threshold_critical" {
description = "Maximum acceptable percent of client other error requests for a storage"
default = 50
}
variable "client_other_error_requests_threshold_warning" {
description = "Warning regarding acceptable percent of client other error requests for a storage"
default = 15
}
variable "authorization_error_requests_threshold_critical" {
description = "Maximum acceptable percent of authorization error requests for a storage"
default = 50
}
variable "authorization_error_requests_threshold_warning" {
description = "Warning regarding acceptable percent of authorization error requests for a storage"
default = 15
}

View File

@ -18,6 +18,7 @@ EOF
thresholds {
critical = "${var.availability_threshold_critical}"
warning = "${var.availability_threshold_warning}"
}
type = "metric alert"
@ -47,6 +48,7 @@ EOF
thresholds {
critical = "${var.successful_requests_threshold_critical}"
warning = "${var.successful_requests_threshold_warning}"
}
type = "metric alert"
@ -76,6 +78,7 @@ EOF
thresholds {
critical = "${var.latency_threshold_critical}"
warning = "${var.latency_threshold_warning}"
}
type = "metric alert"
@ -105,6 +108,7 @@ EOF
thresholds {
critical = "${var.timeout_error_requests_threshold_critical}"
warning = "${var.timeout_error_requests_threshold_warning}"
}
type = "metric alert"
@ -134,6 +138,7 @@ EOF
thresholds {
critical = "${var.network_error_requests_threshold_critical}"
warning = "${var.network_error_requests_threshold_warning}"
}
type = "metric alert"
@ -163,6 +168,7 @@ EOF
thresholds {
critical = "${var.throttling_error_requests_threshold_critical}"
warning = "${var.throttling_error_requests_threshold_warning}"
}
type = "metric alert"
@ -192,6 +198,7 @@ EOF
thresholds {
critical = "${var.server_other_error_requests_threshold_critical}"
warning = "${var.server_other_error_requests_threshold_warning}"
}
type = "metric alert"
@ -221,6 +228,7 @@ EOF
thresholds {
critical = "${var.client_other_error_requests_threshold_critical}"
warning = "${var.client_other_error_requests_threshold_warning}"
}
type = "metric alert"
@ -250,6 +258,7 @@ EOF
thresholds {
critical = "${var.authorization_error_requests_threshold_critical}"
warning = "${var.authorization_error_requests_threshold_warning}"
}
type = "metric alert"