Merge branch 'MON-451-Fix_nodata_alerts_on_Azure_App_Services' into 'master'

MON-451: Do not notify no data alerts for Azure app services memory usage

Closes MON-451

See merge request claranet/pt-monitoring/projects/datadog/terraform/monitors!57
This commit is contained in:
Quentin Manfroi 2019-05-09 13:16:39 +02:00
commit 10698fe91f
4 changed files with 78 additions and 2 deletions

View File

@ -19,6 +19,7 @@ Creates DataDog monitors with the following checks:
- App Services HTTP 4xx errors too high
- App Services HTTP 5xx errors too high
- App Services HTTP successful responses too low
- App Services is down
- App Services memory usage
- App Services response time too high
@ -74,6 +75,12 @@ Creates DataDog monitors with the following checks:
| response\_time\_threshold\_warning | Warning threshold for response time in seconds | string | `"5"` | no |
| response\_time\_time\_aggregator | Monitor aggregator for App Services response time [available values: min, max or avg] | string | `"min"` | no |
| response\_time\_timeframe | Monitor timeframe for App Services response time [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `"last_5m"` | no |
| status\_enabled | Flag to enable App Services status monitor | string | `"true"` | no |
| status\_extra\_tags | Extra tags for App Services status monitor | list | `[]` | no |
| status\_message | Custom message for App Services status monitor | string | `""` | no |
| status\_silenced | Groups to mute for App Services status monitor | map | `{}` | no |
| status\_time\_aggregator | Monitor aggregator for App Services status [available values: min, max or avg] | string | `"max"` | no |
| status\_timeframe | Monitor timeframe for App Services status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `"last_5m"` | no |
## Outputs
@ -84,6 +91,7 @@ Creates DataDog monitors with the following checks:
| appservices\_http\_success\_status\_rate\_id | id for monitor appservices_http_success_status_rate |
| appservices\_memory\_usage\_count\_id | id for monitor appservices_memory_usage_count |
| appservices\_response\_time\_id | id for monitor appservices_response_time |
| appservices\_status\_id | id for monitor appservices_status |
## Related documentation

View File

@ -267,3 +267,39 @@ variable "http_successful_requests_threshold_warning" {
default = 30
description = "Warning regarding acceptable percent of 2xx & 3xx requests"
}
variable "status_enabled" {
description = "Flag to enable App Services status monitor"
type = "string"
default = "true"
}
variable "status_message" {
description = "Custom message for App Services status monitor"
type = "string"
default = ""
}
variable "status_silenced" {
description = "Groups to mute for App Services status monitor"
type = "map"
default = {}
}
variable "status_extra_tags" {
description = "Extra tags for App Services status monitor"
type = "list"
default = []
}
variable "status_time_aggregator" {
description = "Monitor aggregator for App Services status [available values: min, max or avg]"
type = "string"
default = "max"
}
variable "status_timeframe" {
description = "Monitor timeframe for App Services status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
type = "string"
default = "last_5m"
}

View File

@ -21,7 +21,7 @@ resource "datadog_monitor" "appservices_response_time" {
silenced = "${var.response_time_silenced}"
notify_no_data = true # Will notify when no data is received
notify_no_data = false # Will NOT notify when no data is received
renotify_interval = 0
require_full_window = false
timeout_h = 0
@ -53,7 +53,7 @@ resource "datadog_monitor" "appservices_memory_usage_count" {
silenced = "${var.memory_usage_silenced}"
notify_no_data = true # Will notify when no data is received
notify_no_data = false # Will NOT notify when no data is received
renotify_interval = 0
require_full_window = false
timeout_h = 0
@ -162,3 +162,30 @@ resource "datadog_monitor" "appservices_http_success_status_rate" {
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:app-services", "team:claranet", "created-by:terraform", "${var.http_successful_requests_extra_tags}"]
}
# Monitoring App Services status
resource "datadog_monitor" "appservices_status" {
count = "${var.status_enabled == "true" ? 1 : 0}"
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] App Services is down"
type = "metric alert"
message = "${coalesce(var.status_message, var.message)}"
query = <<EOQ
${var.status_time_aggregator}(${var.status_timeframe}):avg:azure.app_services.status${module.filter-tags.query_alert} by {resource_group,region,name} < 1
EOQ
evaluation_delay = "${var.evaluation_delay}"
new_host_delay = "${var.new_host_delay}"
thresholds {
critical = 1
}
silenced = "${var.status_silenced}"
notify_no_data = true # Will notify when no data is received
renotify_interval = 0
require_full_window = false
timeout_h = 0
include_tags = true
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:app-services", "team:claranet", "created-by:terraform", "${var.status_extra_tags}"]
}

View File

@ -22,3 +22,8 @@ output "appservices_http_success_status_rate_id" {
description = "id for monitor appservices_http_success_status_rate"
value = "${datadog_monitor.appservices_http_success_status_rate.*.id}"
}
output "appservices_status_id" {
description = "id for monitor appservices_status"
value = "${datadog_monitor.appservices_status.*.id}"
}