diff --git a/cloud/azure/app-services/README.md b/cloud/azure/app-services/README.md new file mode 100644 index 0000000..e56fac2 --- /dev/null +++ b/cloud/azure/app-services/README.md @@ -0,0 +1,50 @@ +Azure AppServices (Web, API, Functions) DataDog monitors +======================================================== + +How to use this module +---------------------- + +``` +module "datadog-monitors-azure-app-services" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/app-services?ref={revision}" + + message = "${module.datadog-message-alerting.alerting-message}" + environment = "${var.environment}" +} +``` + +Purpose +------- +Creates a DataDog monitors with the following checks : + +* Response time +* Memory usage count +* HTTP 404 errors +* HTTP 50x errors +* HTTP 20x rate + +Inputs +------ + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| delay | Delay in seconds for the metric evaluation | string | `600` | no | +| environment | Architecture environment | string | - | yes | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| http_2xx_status_rate_limit | | string | `30` | no | +| http_2xx_status_rate_threshold_critical | Alerting threshold (percentage) | string | `0.9` | no | +| http_2xx_status_rate_threshold_warning | Warning threshold (percentage) | string | `0.95` | no | +| http_404_errors_count_rate_limit | | string | `30` | no | +| http_404_errors_count_rate_threshold_critical | Alerting threshold (number of requests) | string | `30` | no | +| http_404_errors_count_rate_threshold_warning | Warning threshold (number of requests) | string | `10` | no | +| memory_usage_threshold_critical | Alerting threshold in Mib | string | `52430000` | no | +| memory_usage_threshold_warning | Warning threshold in MiB | string | `33550000` | no | +| message | Message sent when a monitor is triggered | string | - | yes | +| response_time_threshold_critical | Alerting threshold in seconds | string | `0.8` | no | +| response_time_threshold_warning | Warning threshold in seconds | string | `0.4` | no | + +Related documentation +--------------------- + +DataDog documentation: [https://docs.datadoghq.com/integrations/azure_app_services](https://docs.datadoghq.com/integrations/azure_app_services) diff --git a/cloud/azure/app-services/inputs.tf b/cloud/azure/app-services/inputs.tf new file mode 100644 index 0000000..c4bc451 --- /dev/null +++ b/cloud/azure/app-services/inputs.tf @@ -0,0 +1,87 @@ +variable "environment" { + description = "Architecture environment" + type = "string" +} + +variable "filter_tags_use_defaults" { + description = "Use default filter tags convention" + default = "true" +} + +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_use_defaults is false" + default = "*" +} + +variable "message" { + description = "Message sent when a monitor is triggered" +} + +variable "delay" { + description = "Delay in seconds for the metric evaluation" + default = 600 +} + +################################### +### RESPONSE TIME VARIABLES ### +################################### + +variable "response_time_threshold_critical" { + default = 0.8 + description = "Alerting threshold in seconds" +} + +variable "response_time_threshold_warning" { + default = 0.4 + description = "Warning threshold in seconds" +} + +################################### +### MEMORY USAGE VARIABLES ### +################################### + +variable "memory_usage_threshold_critical" { + default = 52430000 + description = "Alerting threshold in Mib" +} + +variable "memory_usage_threshold_warning" { + default = 33550000 + description = "Warning threshold in MiB" +} + +################################# +### HTTP 404 status pages ### +################################# + +variable "http_404_errors_count_rate_limit" { + default = 30 +} + +variable "http_404_errors_count_rate_threshold_critical" { + default = 30 + description = "Alerting threshold (number of requests)" +} + +variable "http_404_errors_count_rate_threshold_warning" { + default = 10 + description = "Warning threshold (number of requests)" +} + +################################# +### HTTP 202 status pages ### +################################# + +variable "http_2xx_status_rate_limit" { + default = 30 +} + +variable "http_2xx_status_rate_threshold_critical" { + default = 0.9 + description = "Alerting threshold (percentage)" +} + +variable "http_2xx_status_rate_threshold_warning" { + default = 0.95 + description = "Warning threshold (percentage)" +} diff --git a/cloud/azure/app-services/monitors-app_services.tf b/cloud/azure/app-services/monitors-app_services.tf new file mode 100644 index 0000000..1cff1af --- /dev/null +++ b/cloud/azure/app-services/monitors-app_services.tf @@ -0,0 +1,124 @@ +data "template_file" "filter" { + template = "$${filter}" + + vars { + filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_storage:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" + } +} + +# Monitoring App Services response time +resource "datadog_monitor" "appservices_response_time" { + name = "[${var.environment}] App Services response time > ${var.response_time_threshold_critical}s on {{name}}" + type = "metric alert" + message = "${var.message}" + + query = <= ${var.response_time_threshold_critical} + EOF + + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + + thresholds { + warning = "${var.response_time_threshold_warning}" + critical = "${var.response_time_threshold_critical}" + } + + notify_no_data = true # Will notify when no data is received + renotify_interval = 0 + require_full_window = true + timeout_h = 0 + include_tags = true + + tags = ["env:${var.environment}", "resource:appservices", "team:azure", "provider:azure"] +} + +# Monitoring App Services memory usage +resource "datadog_monitor" "appservices_memory_usage_count" { + name = "[${var.environment}] App Services memory usage > ${ceil(var.memory_usage_threshold_critical/1000000)}MiB on {{name}}" + type = "metric alert" + message = "${var.message}" + + query = <= ${var.memory_usage_threshold_critical} + EOF + + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + + thresholds { + warning = "${var.memory_usage_threshold_warning}" + critical = "${var.memory_usage_threshold_critical}" + } + + notify_no_data = true # Will notify when no data is received + renotify_interval = 0 + require_full_window = true + timeout_h = 0 + include_tags = true + + tags = ["env:${var.environment}", "resource:appservices", "team:azure", "provider:azure"] +} + +# Monitoring App Services 404 errors rate +resource "datadog_monitor" "appservices_http_404_errors_count" { + name = "[${var.environment}] App Services HTTP errors > ${var.http_404_errors_count_rate_limit} limit on {{name}}" + type = "metric alert" + message = "${var.message}" + + query = < ${var.http_404_errors_count_rate_threshold_critical} + EOF + + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + + thresholds { + warning = "${var.http_404_errors_count_rate_threshold_warning}" + critical = "${var.http_404_errors_count_rate_threshold_critical}" + } + + notify_no_data = false # Will NOT notify when no data is received + renotify_interval = 0 + require_full_window = true + timeout_h = 0 + include_tags = true + + tags = ["env:${var.environment}", "resource:appservices", "team:azure", "provider:azure"] +} + +# Monitoring App Services HTTP 2xx status pages rate +resource "datadog_monitor" "appservices_http_2xx_status_rate" { + name = "[${var.environment}] App Services Too much non 2xx HTTP status in response to the requests on {{name}}" + type = "metric alert" + message = "${var.message}" + + query = <