diff --git a/cloud/azure/app-services/README.md b/cloud/azure/app-services/README.md index e56fac2..ab49366 100644 --- a/cloud/azure/app-services/README.md +++ b/cloud/azure/app-services/README.md @@ -19,9 +19,8 @@ Creates a DataDog monitors with the following checks : * Response time * Memory usage count -* HTTP 404 errors -* HTTP 50x errors -* HTTP 20x rate +* HTTP 404 requests +* HTTP 2xx requests Inputs ------ @@ -32,12 +31,10 @@ Inputs | environment | Architecture environment | string | - | yes | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | -| http_2xx_status_rate_limit | | string | `30` | no | -| http_2xx_status_rate_threshold_critical | Alerting threshold (percentage) | string | `0.9` | no | -| http_2xx_status_rate_threshold_warning | Warning threshold (percentage) | string | `0.95` | no | -| http_404_errors_count_rate_limit | | string | `30` | no | -| http_404_errors_count_rate_threshold_critical | Alerting threshold (number of requests) | string | `30` | no | -| http_404_errors_count_rate_threshold_warning | Warning threshold (number of requests) | string | `10` | no | +| http_2xx_requests_threshold_critical | Minimum critical acceptable percent of 2xx requests | string | `90` | no | +| http_2xx_requests_threshold_warning | Minimum warning acceptable percent of 2xx requests | string | `95` | no | +| http_404_requests_threshold_critical | Maximum critical acceptable percent of 404 errors | string | `40` | no | +| http_404_requests_threshold_warning | Maximum warning acceptable percent of 404 errors | string | `30` | no | | memory_usage_threshold_critical | Alerting threshold in Mib | string | `52430000` | no | | memory_usage_threshold_warning | Warning threshold in MiB | string | `33550000` | no | | message | Message sent when a monitor is triggered | string | - | yes | diff --git a/cloud/azure/app-services/inputs.tf b/cloud/azure/app-services/inputs.tf index c4bc451..541a0e7 100644 --- a/cloud/azure/app-services/inputs.tf +++ b/cloud/azure/app-services/inputs.tf @@ -54,34 +54,26 @@ variable "memory_usage_threshold_warning" { ### HTTP 404 status pages ### ################################# -variable "http_404_errors_count_rate_limit" { - default = 30 +variable "http_404_requests_threshold_critical" { + default = 40 + description = "Maximum critical acceptable percent of 404 errors" } -variable "http_404_errors_count_rate_threshold_critical" { +variable "http_404_requests_threshold_warning" { default = 30 - description = "Alerting threshold (number of requests)" -} - -variable "http_404_errors_count_rate_threshold_warning" { - default = 10 - description = "Warning threshold (number of requests)" + description = "Maximum critical acceptable percent of 404 errors" } ################################# ### HTTP 202 status pages ### ################################# -variable "http_2xx_status_rate_limit" { - default = 30 +variable "http_2xx_requests_threshold_critical" { + default = 90 + description = "Minimum critical acceptable percent of 2xx requests" } -variable "http_2xx_status_rate_threshold_critical" { - default = 0.9 - description = "Alerting threshold (percentage)" -} - -variable "http_2xx_status_rate_threshold_warning" { - default = 0.95 - description = "Warning threshold (percentage)" +variable "http_2xx_requests_threshold_warning" { + default = 95 + description = "Minimum warning acceptable percent of 2xx requests" } diff --git a/cloud/azure/app-services/monitors-app_services.tf b/cloud/azure/app-services/monitors-app_services.tf index aedc748..0abc8fd 100644 --- a/cloud/azure/app-services/monitors-app_services.tf +++ b/cloud/azure/app-services/monitors-app_services.tf @@ -15,7 +15,7 @@ resource "datadog_monitor" "appservices_response_time" { query = <= ${var.response_time_threshold_critical} + ) > ${var.response_time_threshold_critical} EOF evaluation_delay = "${var.delay}" @@ -44,7 +44,7 @@ resource "datadog_monitor" "appservices_memory_usage_count" { query = <= ${var.memory_usage_threshold_critical} + ) > ${var.memory_usage_threshold_critical} EOF evaluation_delay = "${var.delay}" @@ -71,17 +71,18 @@ resource "datadog_monitor" "appservices_http_404_errors_count" { message = "${var.message}" query = < ${var.http_404_errors_count_rate_threshold_critical} + sum(last_5m): ( + avg:azure.app_services.http404{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() / + avg:azure.app_services.requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() + ) * 100 > ${var.http_404_requests_threshold_critical} EOF evaluation_delay = "${var.delay}" new_host_delay = "${var.delay}" thresholds { - warning = "${var.http_404_errors_count_rate_threshold_warning}" - critical = "${var.http_404_errors_count_rate_threshold_critical}" + warning = "${var.http_404_requests_threshold_warning}" + critical = "${var.http_404_requests_threshold_critical}" } notify_no_data = false # Will NOT notify when no data is received @@ -102,16 +103,16 @@ resource "datadog_monitor" "appservices_http_2xx_status_rate" { query = <