diff --git a/cloud/azure/storage/README.md b/cloud/azure/storage/README.md new file mode 100644 index 0000000..417f866 --- /dev/null +++ b/cloud/azure/storage/README.md @@ -0,0 +1,60 @@ +Azure Storage DataDog monitors +============================ + +How to use this module +---------------------- + +``` +module "datadog-monitors-azure-storage" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/storage?ref={revision}" + + message = "${module.datadog-message-alerting.alerting-message}" + + environment = "${var.environment}" + client_name = "${var.client_name}" +} +``` + +Purpose +------- +Creates a DataDog monitors with the following checks : + +* Service availability +* End to end latency +* Minimum successful requests +* Maximum timeout error requests +* Maximum network error requests +* Maximum throttling error requests +* Maximum server other error requests +* Maximum client other error requests +* Maximum authorization error requests + +Inputs +------ + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| client_name | Client name | string | - | yes | +| delay | Delay in seconds for the metric evaluation | string | `600` | no | +| environment | Architecture environment | string | - | yes | +| message | Message sent when a monitor is triggered | string | - | yes | +| use_filter_tags | Filter the data with service tags if true | string | `true` | no | +| availability_threshold_critical | Minimum threshold of availability | string | `90` | no | +| successful_requests_threshold_critical | Minimum threshold of successful requests | string | `90` | no | +| latency_threshold_critical | Maximum threshold of latency in ms | string | `1000` | no | +| timeout_error_requests_threshold_critical | Maximum threshold of timeout error requests in percent | string | `35` | no | +| network_error_requests_threshold_critical | Maximum threshold of network error requests in percent | string | `35` | no | +| throttling_error_requests_threshold_critical | Maximum threshold of throttling error requests in percent | string | `50` | no | +| server_other_error_requests_threshold_critical | Maximum threshold of server other error requests in percent | string | `50` | no | +| client_other_error_requests_threshold_critical | Maximum threshold of client other error requests in percent | string | `75` | no | +| authorization_error_requests_threshold_critical | Maximum threshold of authorization error requests in percent | string | `75` | no | + +Related documentation +--------------------- + +DataDog documentation: [https://docs.datadoghq.com/integrations/azure_storage/](https://docs.datadoghq.com/integrations/azure_storage/) + +DataDog blog: [https://www.datadoghq.com/blog/monitor-azure-storage-datadog/](https://www.datadoghq.com/blog/monitor-azure-storage-datadog/) + +Azure Redis metrics documentation: [https://docs.microsoft.com/en-us/azure/storage/common/storage-monitor-storage-account](https://docs.microsoft.com/en-us/azure/storage/common/storage-monitor-storage-account) + diff --git a/cloud/azure/storage/inputs.tf b/cloud/azure/storage/inputs.tf new file mode 100644 index 0000000..68e02a5 --- /dev/null +++ b/cloud/azure/storage/inputs.tf @@ -0,0 +1,71 @@ +# Global Terraform +variable "client_name" { + type = "string" +} + +variable "environment" { + description = "Architecture environment" + type = "string" +} + +# Global DataDog +variable "message" { + description = "Message sent when a Redis monitor is triggered" +} + +variable "delay" { + description = "Delay in seconds for the metric evaluation" + default = 600 +} + +variable "use_filter_tags" { + description = "Filter the data with service tags if true" + default = "true" +} + +# Azure Storage specific +variable "availability_threshold_critical" { + description = "Minimum acceptable percent of availability for a storage" + default = 90 +} + +variable "successful_requests_threshold_critical" { + description = "Minimum acceptable percent of successful requests for a storage" + default = 90 +} + +variable "latency_threshold_critical" { + description = "Maximum acceptable end to end latency (ms) for a storage" + default = 1000 +} + +variable "timeout_error_requests_threshold_critical" { + description = "Maximum acceptable percent of timeout error requests for a storage" + default = 35 +} + +variable "network_error_requests_threshold_critical" { + description = "Maximum acceptable percent of network error requests for a storage" + default = 35 +} + +variable "throttling_error_requests_threshold_critical" { + description = "Maximum acceptable percent of throttling error requests for a storage" + default = 50 +} + +variable "server_other_error_requests_threshold_critical" { + description = "Maximum acceptable percent of server other error requests for a storage" + default = 50 +} + +variable "client_other_error_requests_threshold_critical" { + description = "Maximum acceptable percent of client other error requests for a storage" + default = 75 +} + +variable "authorization_error_requests_threshold_critical" { + description = "Maximum acceptable percent of authorization error requests for a storage" + default = 75 +} + diff --git a/cloud/azure/storage/monitors-azure-storage.tf b/cloud/azure/storage/monitors-azure-storage.tf new file mode 100644 index 0000000..64f3286 --- /dev/null +++ b/cloud/azure/storage/monitors-azure-storage.tf @@ -0,0 +1,255 @@ +data "template_file" "filter" { + template = "$${filter}" + + vars { + filter = "${var.use_filter_tags == "true" ? format("dd_monitoring:enabled,dd_azure_storage:enabled,env:%s", var.environment) : "*"}" + } +} + +resource "datadog_monitor" "availability" { + name = "[${var.environment}] Azure Storage {{name}} unvailability detected" + message = "${var.message}" + + query = < ${var.latency_threshold_critical} +EOF + + thresholds { + critical = "${var.latency_threshold_critical}" + } + + type = "query alert" + notify_no_data = true + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + evaluation_delay = "${var.delay}" + renotify_interval = 0 + no_data_timeframe = 20 + + tags = ["env:${var.environment}"] +} + +resource "datadog_monitor" "timeout_error_requests" { + name = "[${var.environment}] Azure Storage {{value}}% of timeout error requests on {{name}}" + message = "${var.message}" + + query = < ${var.timeout_error_requests_threshold_critical} +EOF + + thresholds { + critical = "${var.timeout_error_requests_threshold_critical}" + } + + type = "query alert" + notify_no_data = true + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + evaluation_delay = "${var.delay}" + renotify_interval = 0 + no_data_timeframe = 20 + + tags = ["env:${var.environment}"] +} + + +resource "datadog_monitor" "network_error_requests" { + name = "[${var.environment}] Azure Storage {{value}}% of network error requests on {{name}}" + message = "${var.message}" + + query = < ${var.network_error_requests_threshold_critical} +EOF + + thresholds { + critical = "${var.network_error_requests_threshold_critical}" + } + + type = "query alert" + notify_no_data = true + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + evaluation_delay = "${var.delay}" + renotify_interval = 0 + no_data_timeframe = 20 + + tags = ["env:${var.environment}"] +} + + +resource "datadog_monitor" "throttling_error_requests" { + name = "[${var.environment}] Azure Storage {{value}}% of throttling error requests on {{name}}" + message = "${var.message}" + + query = < ${var.throttling_error_requests_threshold_critical} +EOF + + thresholds { + critical = "${var.throttling_error_requests_threshold_critical}" + } + + type = "query alert" + notify_no_data = true + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + evaluation_delay = "${var.delay}" + renotify_interval = 0 + no_data_timeframe = 20 + + tags = ["env:${var.environment}"] +} + + +resource "datadog_monitor" "server_other_error_requests" { + name = "[${var.environment}] Azure Storage {{value}}% of server_other error requests on {{name}}" + message = "${var.message}" + + query = < ${var.server_other_error_requests_threshold_critical} +EOF + + thresholds { + critical = "${var.server_other_error_requests_threshold_critical}" + } + + type = "query alert" + notify_no_data = true + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + evaluation_delay = "${var.delay}" + renotify_interval = 0 + no_data_timeframe = 20 + + tags = ["env:${var.environment}"] +} + + +resource "datadog_monitor" "client_other_error_requests" { + name = "[${var.environment}] Azure Storage {{value}}% of client_other error requests on {{name}}" + message = "${var.message}" + + query = < ${var.client_other_error_requests_threshold_critical} +EOF + + thresholds { + critical = "${var.client_other_error_requests_threshold_critical}" + } + + type = "query alert" + notify_no_data = true + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + evaluation_delay = "${var.delay}" + renotify_interval = 0 + no_data_timeframe = 20 + + tags = ["env:${var.environment}"] +} + + +resource "datadog_monitor" "authorization_error_requests" { + name = "[${var.environment}] Azure Storage {{value}}% of authorization error requests on {{name}}" + message = "${var.message}" + + query = < ${var.authorization_error_requests_threshold_critical} +EOF + + thresholds { + critical = "${var.authorization_error_requests_threshold_critical}" + } + + type = "query alert" + notify_no_data = true + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + evaluation_delay = "${var.delay}" + renotify_interval = 0 + no_data_timeframe = 20 + + tags = ["env:${var.environment}"] +}