diff --git a/cloud/azure/app-services/README.md b/cloud/azure/app-services/README.md index 7cd5908..ca57b7a 100644 --- a/cloud/azure/app-services/README.md +++ b/cloud/azure/app-services/README.md @@ -19,6 +19,7 @@ Creates DataDog monitors with the following checks: - App Services HTTP 4xx errors too high - App Services HTTP 5xx errors too high - App Services HTTP successful responses too low +- App Services is down - App Services memory usage - App Services response time too high @@ -74,6 +75,12 @@ Creates DataDog monitors with the following checks: | response\_time\_threshold\_warning | Warning threshold for response time in seconds | string | `"5"` | no | | response\_time\_time\_aggregator | Monitor aggregator for App Services response time [available values: min, max or avg] | string | `"min"` | no | | response\_time\_timeframe | Monitor timeframe for App Services response time [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `"last_5m"` | no | +| status\_enabled | Flag to enable App Services status monitor | string | `"true"` | no | +| status\_extra\_tags | Extra tags for App Services status monitor | list | `[]` | no | +| status\_message | Custom message for App Services status monitor | string | `""` | no | +| status\_silenced | Groups to mute for App Services status monitor | map | `{}` | no | +| status\_time\_aggregator | Monitor aggregator for App Services status [available values: min, max or avg] | string | `"max"` | no | +| status\_timeframe | Monitor timeframe for App Services status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `"last_5m"` | no | ## Outputs @@ -84,6 +91,7 @@ Creates DataDog monitors with the following checks: | appservices\_http\_success\_status\_rate\_id | id for monitor appservices_http_success_status_rate | | appservices\_memory\_usage\_count\_id | id for monitor appservices_memory_usage_count | | appservices\_response\_time\_id | id for monitor appservices_response_time | +| appservices\_status\_id | id for monitor appservices_status | ## Related documentation diff --git a/cloud/azure/app-services/inputs.tf b/cloud/azure/app-services/inputs.tf index 5d89b66..88cc73f 100644 --- a/cloud/azure/app-services/inputs.tf +++ b/cloud/azure/app-services/inputs.tf @@ -267,3 +267,39 @@ variable "http_successful_requests_threshold_warning" { default = 30 description = "Warning regarding acceptable percent of 2xx & 3xx requests" } + +variable "status_enabled" { + description = "Flag to enable App Services status monitor" + type = "string" + default = "true" +} + +variable "status_message" { + description = "Custom message for App Services status monitor" + type = "string" + default = "" +} + +variable "status_silenced" { + description = "Groups to mute for App Services status monitor" + type = "map" + default = {} +} + +variable "status_extra_tags" { + description = "Extra tags for App Services status monitor" + type = "list" + default = [] +} + +variable "status_time_aggregator" { + description = "Monitor aggregator for App Services status [available values: min, max or avg]" + type = "string" + default = "max" +} + +variable "status_timeframe" { + description = "Monitor timeframe for App Services status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} diff --git a/cloud/azure/app-services/monitors-app_services.tf b/cloud/azure/app-services/monitors-app_services.tf index 3eae9f3..5a9ec8f 100644 --- a/cloud/azure/app-services/monitors-app_services.tf +++ b/cloud/azure/app-services/monitors-app_services.tf @@ -21,7 +21,7 @@ resource "datadog_monitor" "appservices_response_time" { silenced = "${var.response_time_silenced}" - notify_no_data = true # Will notify when no data is received + notify_no_data = false # Will NOT notify when no data is received renotify_interval = 0 require_full_window = false timeout_h = 0 @@ -53,7 +53,7 @@ resource "datadog_monitor" "appservices_memory_usage_count" { silenced = "${var.memory_usage_silenced}" - notify_no_data = true # Will notify when no data is received + notify_no_data = false # Will NOT notify when no data is received renotify_interval = 0 require_full_window = false timeout_h = 0 @@ -162,3 +162,30 @@ resource "datadog_monitor" "appservices_http_success_status_rate" { tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:app-services", "team:claranet", "created-by:terraform", "${var.http_successful_requests_extra_tags}"] } + +# Monitoring App Services status +resource "datadog_monitor" "appservices_status" { + count = "${var.status_enabled == "true" ? 1 : 0}" + name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] App Services is down" + type = "metric alert" + message = "${coalesce(var.status_message, var.message)}" + + query = <