MON-79 Raise critical thresholds and add warning thresholds to avoid "bagot" alerting during NBH
This commit is contained in:
parent
1223e3b26f
commit
a0ac2d7629
@ -312,46 +312,91 @@ variable "sqldatabase_deadlock_threshold_critical" {
|
|||||||
# Azure Storage specific variables
|
# Azure Storage specific variables
|
||||||
variable "storage_availability_threshold_critical" {
|
variable "storage_availability_threshold_critical" {
|
||||||
description = "Minimum acceptable percent of availability for a storage"
|
description = "Minimum acceptable percent of availability for a storage"
|
||||||
|
default = 50
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "storage_availability_threshold_warning" {
|
||||||
|
description = "Warning regarding acceptable percent of availability for a storage"
|
||||||
default = 90
|
default = 90
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "storage_successful_requests_threshold_critical" {
|
variable "storage_successful_requests_threshold_critical" {
|
||||||
description = "Minimum acceptable percent of successful requests for a storage"
|
description = "Minimum acceptable percent of successful requests for a storage"
|
||||||
|
default = 50
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "storage_successful_requests_threshold_warning" {
|
||||||
|
description = "Warning regarding acceptable percent of successful requests for a storage"
|
||||||
default = 90
|
default = 90
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "storage_latency_threshold_critical" {
|
variable "storage_latency_threshold_critical" {
|
||||||
description = "Maximum acceptable end to end latency (ms) for a storage"
|
description = "Maximum acceptable end to end latency (ms) for a storage"
|
||||||
|
default = 2000
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "storage_latency_threshold_warning" {
|
||||||
|
description = "Warning regarding acceptable end to end latency (ms) for a storage"
|
||||||
default = 1000
|
default = 1000
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "storage_timeout_error_requests_threshold_critical" {
|
variable "storage_timeout_error_requests_threshold_critical" {
|
||||||
description = "Maximum acceptable percent of timeout error requests for a storage"
|
description = "Maximum acceptable percent of timeout error requests for a storage"
|
||||||
|
default = 50
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "storage_timeout_error_requests_threshold_warning" {
|
||||||
|
description = "Warning regarding acceptable percent of timeout error requests for a storage"
|
||||||
default = 5
|
default = 5
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "storage_network_error_requests_threshold_critical" {
|
variable "storage_network_error_requests_threshold_critical" {
|
||||||
description = "Maximum acceptable percent of network error requests for a storage"
|
description = "Maximum acceptable percent of network error requests for a storage"
|
||||||
|
default = 50
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "storage_network_error_requests_threshold_warning" {
|
||||||
|
description = "Warning regarding acceptable percent of network error requests for a storage"
|
||||||
default = 5
|
default = 5
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "storage_throttling_error_requests_threshold_critical" {
|
variable "storage_throttling_error_requests_threshold_critical" {
|
||||||
description = "Maximum acceptable percent of throttling error requests for a storage"
|
description = "Maximum acceptable percent of throttling error requests for a storage"
|
||||||
|
default = 50
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "storage_throttling_error_requests_threshold_warning" {
|
||||||
|
description = "Warning regarding acceptable percent of throttling error requests for a storage"
|
||||||
default = 10
|
default = 10
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "storage_server_other_error_requests_threshold_critical" {
|
variable "storage_server_other_error_requests_threshold_critical" {
|
||||||
description = "Maximum acceptable percent of server other error requests for a storage"
|
description = "Maximum acceptable percent of server other error requests for a storage"
|
||||||
|
default = 50
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "storage_server_other_error_requests_threshold_warning" {
|
||||||
|
description = "Warning regarding acceptable percent of server other error requests for a storage"
|
||||||
default = 10
|
default = 10
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "storage_client_other_error_requests_threshold_critical" {
|
variable "storage_client_other_error_requests_threshold_critical" {
|
||||||
description = "Maximum acceptable percent of client other error requests for a storage"
|
description = "Maximum acceptable percent of client other error requests for a storage"
|
||||||
|
default = 50
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "storage_client_other_error_requests_threshold_warning" {
|
||||||
|
description = "Warning regarding acceptable percent of client other error requests for a storage"
|
||||||
default = 15
|
default = 15
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "storage_authorization_error_requests_threshold_critical" {
|
variable "storage_authorization_error_requests_threshold_critical" {
|
||||||
description = "Maximum acceptable percent of authorization error requests for a storage"
|
description = "Maximum acceptable percent of authorization error requests for a storage"
|
||||||
|
default = 50
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "storage_authorization_error_requests_threshold_warning" {
|
||||||
|
description = "Warning regarding acceptable percent of authorization error requests for a storage"
|
||||||
default = 15
|
default = 15
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -135,14 +135,23 @@ module "storage" {
|
|||||||
filter_tags_custom = "${var.filter_tags_custom}"
|
filter_tags_custom = "${var.filter_tags_custom}"
|
||||||
|
|
||||||
authorization_error_requests_threshold_critical = "${var.storage_authorization_error_requests_threshold_critical}"
|
authorization_error_requests_threshold_critical = "${var.storage_authorization_error_requests_threshold_critical}"
|
||||||
|
authorization_error_requests_threshold_warning = "${var.storage_authorization_error_requests_threshold_warning}"
|
||||||
availability_threshold_critical = "${var.storage_availability_threshold_critical}"
|
availability_threshold_critical = "${var.storage_availability_threshold_critical}"
|
||||||
|
availability_threshold_warning = "${var.storage_availability_threshold_warning}"
|
||||||
client_other_error_requests_threshold_critical = "${var.storage_client_other_error_requests_threshold_critical}"
|
client_other_error_requests_threshold_critical = "${var.storage_client_other_error_requests_threshold_critical}"
|
||||||
|
client_other_error_requests_threshold_warning = "${var.storage_client_other_error_requests_threshold_warning}"
|
||||||
latency_threshold_critical = "${var.storage_latency_threshold_critical}"
|
latency_threshold_critical = "${var.storage_latency_threshold_critical}"
|
||||||
|
latency_threshold_warning = "${var.storage_latency_threshold_warning}"
|
||||||
network_error_requests_threshold_critical = "${var.storage_network_error_requests_threshold_critical}"
|
network_error_requests_threshold_critical = "${var.storage_network_error_requests_threshold_critical}"
|
||||||
|
network_error_requests_threshold_warning = "${var.storage_network_error_requests_threshold_warning}"
|
||||||
server_other_error_requests_threshold_critical = "${var.storage_server_other_error_requests_threshold_critical}"
|
server_other_error_requests_threshold_critical = "${var.storage_server_other_error_requests_threshold_critical}"
|
||||||
|
server_other_error_requests_threshold_warning = "${var.storage_server_other_error_requests_threshold_warning}"
|
||||||
successful_requests_threshold_critical = "${var.storage_successful_requests_threshold_critical}"
|
successful_requests_threshold_critical = "${var.storage_successful_requests_threshold_critical}"
|
||||||
|
successful_requests_threshold_warning = "${var.storage_successful_requests_threshold_warning}"
|
||||||
throttling_error_requests_threshold_critical = "${var.storage_throttling_error_requests_threshold_critical}"
|
throttling_error_requests_threshold_critical = "${var.storage_throttling_error_requests_threshold_critical}"
|
||||||
|
throttling_error_requests_threshold_warning = "${var.storage_throttling_error_requests_threshold_warning}"
|
||||||
timeout_error_requests_threshold_critical = "${var.storage_timeout_error_requests_threshold_critical}"
|
timeout_error_requests_threshold_critical = "${var.storage_timeout_error_requests_threshold_critical}"
|
||||||
|
timeout_error_requests_threshold_warning = "${var.storage_timeout_error_requests_threshold_warning}"
|
||||||
}
|
}
|
||||||
|
|
||||||
module "streamanalytics" {
|
module "streamanalytics" {
|
||||||
|
|||||||
@ -27,45 +27,90 @@ variable "filter_tags_custom" {
|
|||||||
# Azure Storage specific
|
# Azure Storage specific
|
||||||
variable "availability_threshold_critical" {
|
variable "availability_threshold_critical" {
|
||||||
description = "Minimum acceptable percent of availability for a storage"
|
description = "Minimum acceptable percent of availability for a storage"
|
||||||
|
default = 50
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "availability_threshold_warning" {
|
||||||
|
description = "Warning regarding acceptable percent of availability for a storage"
|
||||||
default = 90
|
default = 90
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "successful_requests_threshold_critical" {
|
variable "successful_requests_threshold_critical" {
|
||||||
description = "Minimum acceptable percent of successful requests for a storage"
|
description = "Minimum acceptable percent of successful requests for a storage"
|
||||||
|
default = 50
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "successful_requests_threshold_warning" {
|
||||||
|
description = "Warning regarding acceptable percent of successful requests for a storage"
|
||||||
default = 90
|
default = 90
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "latency_threshold_critical" {
|
variable "latency_threshold_critical" {
|
||||||
description = "Maximum acceptable end to end latency (ms) for a storage"
|
description = "Maximum acceptable end to end latency (ms) for a storage"
|
||||||
|
default = 2000
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "latency_threshold_warning" {
|
||||||
|
description = "Warning regarding acceptable end to end latency (ms) for a storage"
|
||||||
default = 1000
|
default = 1000
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "timeout_error_requests_threshold_critical" {
|
variable "timeout_error_requests_threshold_critical" {
|
||||||
description = "Maximum acceptable percent of timeout error requests for a storage"
|
description = "Maximum acceptable percent of timeout error requests for a storage"
|
||||||
|
default = 50
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "timeout_error_requests_threshold_warning" {
|
||||||
|
description = "Warning regarding acceptable percent of timeout error requests for a storage"
|
||||||
default = 5
|
default = 5
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "network_error_requests_threshold_critical" {
|
variable "network_error_requests_threshold_critical" {
|
||||||
description = "Maximum acceptable percent of network error requests for a storage"
|
description = "Maximum acceptable percent of network error requests for a storage"
|
||||||
|
default = 50
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "network_error_requests_threshold_warning" {
|
||||||
|
description = "Warning regarding acceptable percent of network error requests for a storage"
|
||||||
default = 5
|
default = 5
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "throttling_error_requests_threshold_critical" {
|
variable "throttling_error_requests_threshold_critical" {
|
||||||
description = "Maximum acceptable percent of throttling error requests for a storage"
|
description = "Maximum acceptable percent of throttling error requests for a storage"
|
||||||
|
default = 50
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "throttling_error_requests_threshold_warning" {
|
||||||
|
description = "Warning regarding acceptable percent of throttling error requests for a storage"
|
||||||
default = 10
|
default = 10
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "server_other_error_requests_threshold_critical" {
|
variable "server_other_error_requests_threshold_critical" {
|
||||||
description = "Maximum acceptable percent of server other error requests for a storage"
|
description = "Maximum acceptable percent of server other error requests for a storage"
|
||||||
|
default = 50
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "server_other_error_requests_threshold_warning" {
|
||||||
|
description = "Warning regarding acceptable percent of server other error requests for a storage"
|
||||||
default = 10
|
default = 10
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "client_other_error_requests_threshold_critical" {
|
variable "client_other_error_requests_threshold_critical" {
|
||||||
description = "Maximum acceptable percent of client other error requests for a storage"
|
description = "Maximum acceptable percent of client other error requests for a storage"
|
||||||
|
default = 50
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "client_other_error_requests_threshold_warning" {
|
||||||
|
description = "Warning regarding acceptable percent of client other error requests for a storage"
|
||||||
default = 15
|
default = 15
|
||||||
}
|
}
|
||||||
|
|
||||||
variable "authorization_error_requests_threshold_critical" {
|
variable "authorization_error_requests_threshold_critical" {
|
||||||
description = "Maximum acceptable percent of authorization error requests for a storage"
|
description = "Maximum acceptable percent of authorization error requests for a storage"
|
||||||
|
default = 50
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "authorization_error_requests_threshold_warning" {
|
||||||
|
description = "Warning regarding acceptable percent of authorization error requests for a storage"
|
||||||
default = 15
|
default = 15
|
||||||
}
|
}
|
||||||
|
|||||||
@ -18,6 +18,7 @@ EOF
|
|||||||
|
|
||||||
thresholds {
|
thresholds {
|
||||||
critical = "${var.availability_threshold_critical}"
|
critical = "${var.availability_threshold_critical}"
|
||||||
|
warning = "${var.availability_threshold_warning}"
|
||||||
}
|
}
|
||||||
|
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
@ -47,6 +48,7 @@ EOF
|
|||||||
|
|
||||||
thresholds {
|
thresholds {
|
||||||
critical = "${var.successful_requests_threshold_critical}"
|
critical = "${var.successful_requests_threshold_critical}"
|
||||||
|
warning = "${var.successful_requests_threshold_warning}"
|
||||||
}
|
}
|
||||||
|
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
@ -76,6 +78,7 @@ EOF
|
|||||||
|
|
||||||
thresholds {
|
thresholds {
|
||||||
critical = "${var.latency_threshold_critical}"
|
critical = "${var.latency_threshold_critical}"
|
||||||
|
warning = "${var.latency_threshold_warning}"
|
||||||
}
|
}
|
||||||
|
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
@ -105,6 +108,7 @@ EOF
|
|||||||
|
|
||||||
thresholds {
|
thresholds {
|
||||||
critical = "${var.timeout_error_requests_threshold_critical}"
|
critical = "${var.timeout_error_requests_threshold_critical}"
|
||||||
|
warning = "${var.timeout_error_requests_threshold_warning}"
|
||||||
}
|
}
|
||||||
|
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
@ -134,6 +138,7 @@ EOF
|
|||||||
|
|
||||||
thresholds {
|
thresholds {
|
||||||
critical = "${var.network_error_requests_threshold_critical}"
|
critical = "${var.network_error_requests_threshold_critical}"
|
||||||
|
warning = "${var.network_error_requests_threshold_warning}"
|
||||||
}
|
}
|
||||||
|
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
@ -163,6 +168,7 @@ EOF
|
|||||||
|
|
||||||
thresholds {
|
thresholds {
|
||||||
critical = "${var.throttling_error_requests_threshold_critical}"
|
critical = "${var.throttling_error_requests_threshold_critical}"
|
||||||
|
warning = "${var.throttling_error_requests_threshold_warning}"
|
||||||
}
|
}
|
||||||
|
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
@ -192,6 +198,7 @@ EOF
|
|||||||
|
|
||||||
thresholds {
|
thresholds {
|
||||||
critical = "${var.server_other_error_requests_threshold_critical}"
|
critical = "${var.server_other_error_requests_threshold_critical}"
|
||||||
|
warning = "${var.server_other_error_requests_threshold_warning}"
|
||||||
}
|
}
|
||||||
|
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
@ -221,6 +228,7 @@ EOF
|
|||||||
|
|
||||||
thresholds {
|
thresholds {
|
||||||
critical = "${var.client_other_error_requests_threshold_critical}"
|
critical = "${var.client_other_error_requests_threshold_critical}"
|
||||||
|
warning = "${var.client_other_error_requests_threshold_warning}"
|
||||||
}
|
}
|
||||||
|
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
@ -250,6 +258,7 @@ EOF
|
|||||||
|
|
||||||
thresholds {
|
thresholds {
|
||||||
critical = "${var.authorization_error_requests_threshold_critical}"
|
critical = "${var.authorization_error_requests_threshold_critical}"
|
||||||
|
warning = "${var.authorization_error_requests_threshold_warning}"
|
||||||
}
|
}
|
||||||
|
|
||||||
type = "metric alert"
|
type = "metric alert"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user