From fdfe302a3433f22982df979778f609e669370f3b Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Tue, 31 Oct 2017 20:32:44 +0100 Subject: [PATCH 01/15] MON-79 azure storage minitors --- cloud/azure/storage/README.md | 60 +++++ cloud/azure/storage/inputs.tf | 71 +++++ cloud/azure/storage/monitors-azure-storage.tf | 255 ++++++++++++++++++ 3 files changed, 386 insertions(+) create mode 100644 cloud/azure/storage/README.md create mode 100644 cloud/azure/storage/inputs.tf create mode 100644 cloud/azure/storage/monitors-azure-storage.tf diff --git a/cloud/azure/storage/README.md b/cloud/azure/storage/README.md new file mode 100644 index 0000000..417f866 --- /dev/null +++ b/cloud/azure/storage/README.md @@ -0,0 +1,60 @@ +Azure Storage DataDog monitors +============================ + +How to use this module +---------------------- + +``` +module "datadog-monitors-azure-storage" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/storage?ref={revision}" + + message = "${module.datadog-message-alerting.alerting-message}" + + environment = "${var.environment}" + client_name = "${var.client_name}" +} +``` + +Purpose +------- +Creates a DataDog monitors with the following checks : + +* Service availability +* End to end latency +* Minimum successful requests +* Maximum timeout error requests +* Maximum network error requests +* Maximum throttling error requests +* Maximum server other error requests +* Maximum client other error requests +* Maximum authorization error requests + +Inputs +------ + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| client_name | Client name | string | - | yes | +| delay | Delay in seconds for the metric evaluation | string | `600` | no | +| environment | Architecture environment | string | - | yes | +| message | Message sent when a monitor is triggered | string | - | yes | +| use_filter_tags | Filter the data with service tags if true | string | `true` | no | +| availability_threshold_critical | Minimum threshold of availability | string | `90` | no | +| successful_requests_threshold_critical | Minimum threshold of successful requests | string | `90` | no | +| latency_threshold_critical | Maximum threshold of latency in ms | string | `1000` | no | +| timeout_error_requests_threshold_critical | Maximum threshold of timeout error requests in percent | string | `35` | no | +| network_error_requests_threshold_critical | Maximum threshold of network error requests in percent | string | `35` | no | +| throttling_error_requests_threshold_critical | Maximum threshold of throttling error requests in percent | string | `50` | no | +| server_other_error_requests_threshold_critical | Maximum threshold of server other error requests in percent | string | `50` | no | +| client_other_error_requests_threshold_critical | Maximum threshold of client other error requests in percent | string | `75` | no | +| authorization_error_requests_threshold_critical | Maximum threshold of authorization error requests in percent | string | `75` | no | + +Related documentation +--------------------- + +DataDog documentation: [https://docs.datadoghq.com/integrations/azure_storage/](https://docs.datadoghq.com/integrations/azure_storage/) + +DataDog blog: [https://www.datadoghq.com/blog/monitor-azure-storage-datadog/](https://www.datadoghq.com/blog/monitor-azure-storage-datadog/) + +Azure Redis metrics documentation: [https://docs.microsoft.com/en-us/azure/storage/common/storage-monitor-storage-account](https://docs.microsoft.com/en-us/azure/storage/common/storage-monitor-storage-account) + diff --git a/cloud/azure/storage/inputs.tf b/cloud/azure/storage/inputs.tf new file mode 100644 index 0000000..68e02a5 --- /dev/null +++ b/cloud/azure/storage/inputs.tf @@ -0,0 +1,71 @@ +# Global Terraform +variable "client_name" { + type = "string" +} + +variable "environment" { + description = "Architecture environment" + type = "string" +} + +# Global DataDog +variable "message" { + description = "Message sent when a Redis monitor is triggered" +} + +variable "delay" { + description = "Delay in seconds for the metric evaluation" + default = 600 +} + +variable "use_filter_tags" { + description = "Filter the data with service tags if true" + default = "true" +} + +# Azure Storage specific +variable "availability_threshold_critical" { + description = "Minimum acceptable percent of availability for a storage" + default = 90 +} + +variable "successful_requests_threshold_critical" { + description = "Minimum acceptable percent of successful requests for a storage" + default = 90 +} + +variable "latency_threshold_critical" { + description = "Maximum acceptable end to end latency (ms) for a storage" + default = 1000 +} + +variable "timeout_error_requests_threshold_critical" { + description = "Maximum acceptable percent of timeout error requests for a storage" + default = 35 +} + +variable "network_error_requests_threshold_critical" { + description = "Maximum acceptable percent of network error requests for a storage" + default = 35 +} + +variable "throttling_error_requests_threshold_critical" { + description = "Maximum acceptable percent of throttling error requests for a storage" + default = 50 +} + +variable "server_other_error_requests_threshold_critical" { + description = "Maximum acceptable percent of server other error requests for a storage" + default = 50 +} + +variable "client_other_error_requests_threshold_critical" { + description = "Maximum acceptable percent of client other error requests for a storage" + default = 75 +} + +variable "authorization_error_requests_threshold_critical" { + description = "Maximum acceptable percent of authorization error requests for a storage" + default = 75 +} + diff --git a/cloud/azure/storage/monitors-azure-storage.tf b/cloud/azure/storage/monitors-azure-storage.tf new file mode 100644 index 0000000..64f3286 --- /dev/null +++ b/cloud/azure/storage/monitors-azure-storage.tf @@ -0,0 +1,255 @@ +data "template_file" "filter" { + template = "$${filter}" + + vars { + filter = "${var.use_filter_tags == "true" ? format("dd_monitoring:enabled,dd_azure_storage:enabled,env:%s", var.environment) : "*"}" + } +} + +resource "datadog_monitor" "availability" { + name = "[${var.environment}] Azure Storage {{name}} unvailability detected" + message = "${var.message}" + + query = < ${var.latency_threshold_critical} +EOF + + thresholds { + critical = "${var.latency_threshold_critical}" + } + + type = "query alert" + notify_no_data = true + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + evaluation_delay = "${var.delay}" + renotify_interval = 0 + no_data_timeframe = 20 + + tags = ["env:${var.environment}"] +} + +resource "datadog_monitor" "timeout_error_requests" { + name = "[${var.environment}] Azure Storage {{value}}% of timeout error requests on {{name}}" + message = "${var.message}" + + query = < ${var.timeout_error_requests_threshold_critical} +EOF + + thresholds { + critical = "${var.timeout_error_requests_threshold_critical}" + } + + type = "query alert" + notify_no_data = true + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + evaluation_delay = "${var.delay}" + renotify_interval = 0 + no_data_timeframe = 20 + + tags = ["env:${var.environment}"] +} + + +resource "datadog_monitor" "network_error_requests" { + name = "[${var.environment}] Azure Storage {{value}}% of network error requests on {{name}}" + message = "${var.message}" + + query = < ${var.network_error_requests_threshold_critical} +EOF + + thresholds { + critical = "${var.network_error_requests_threshold_critical}" + } + + type = "query alert" + notify_no_data = true + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + evaluation_delay = "${var.delay}" + renotify_interval = 0 + no_data_timeframe = 20 + + tags = ["env:${var.environment}"] +} + + +resource "datadog_monitor" "throttling_error_requests" { + name = "[${var.environment}] Azure Storage {{value}}% of throttling error requests on {{name}}" + message = "${var.message}" + + query = < ${var.throttling_error_requests_threshold_critical} +EOF + + thresholds { + critical = "${var.throttling_error_requests_threshold_critical}" + } + + type = "query alert" + notify_no_data = true + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + evaluation_delay = "${var.delay}" + renotify_interval = 0 + no_data_timeframe = 20 + + tags = ["env:${var.environment}"] +} + + +resource "datadog_monitor" "server_other_error_requests" { + name = "[${var.environment}] Azure Storage {{value}}% of server_other error requests on {{name}}" + message = "${var.message}" + + query = < ${var.server_other_error_requests_threshold_critical} +EOF + + thresholds { + critical = "${var.server_other_error_requests_threshold_critical}" + } + + type = "query alert" + notify_no_data = true + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + evaluation_delay = "${var.delay}" + renotify_interval = 0 + no_data_timeframe = 20 + + tags = ["env:${var.environment}"] +} + + +resource "datadog_monitor" "client_other_error_requests" { + name = "[${var.environment}] Azure Storage {{value}}% of client_other error requests on {{name}}" + message = "${var.message}" + + query = < ${var.client_other_error_requests_threshold_critical} +EOF + + thresholds { + critical = "${var.client_other_error_requests_threshold_critical}" + } + + type = "query alert" + notify_no_data = true + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + evaluation_delay = "${var.delay}" + renotify_interval = 0 + no_data_timeframe = 20 + + tags = ["env:${var.environment}"] +} + + +resource "datadog_monitor" "authorization_error_requests" { + name = "[${var.environment}] Azure Storage {{value}}% of authorization error requests on {{name}}" + message = "${var.message}" + + query = < ${var.authorization_error_requests_threshold_critical} +EOF + + thresholds { + critical = "${var.authorization_error_requests_threshold_critical}" + } + + type = "query alert" + notify_no_data = true + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + evaluation_delay = "${var.delay}" + renotify_interval = 0 + no_data_timeframe = 20 + + tags = ["env:${var.environment}"] +} From 681ada337d59c2c667e065458048d76544b1828d Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Thu, 2 Nov 2017 11:55:11 +0100 Subject: [PATCH 02/15] MON-79 EOF on querys --- cloud/azure/storage/monitors-azure-storage.tf | 36 ++++++++++++++----- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/cloud/azure/storage/monitors-azure-storage.tf b/cloud/azure/storage/monitors-azure-storage.tf index 64f3286..e2b115e 100644 --- a/cloud/azure/storage/monitors-azure-storage.tf +++ b/cloud/azure/storage/monitors-azure-storage.tf @@ -11,7 +11,9 @@ resource "datadog_monitor" "availability" { message = "${var.message}" query = < ${var.latency_threshold_critical} + max(last_5m): ( + avg:azure.storage.average_e2_e_latency{${data.template_file.filter.rendered}} by {resource_group,storage_type,name} + ) > ${var.latency_threshold_critical} EOF thresholds { @@ -92,7 +98,9 @@ resource "datadog_monitor" "timeout_error_requests" { message = "${var.message}" query = < ${var.timeout_error_requests_threshold_critical} + avg(last_5m): ( + avg:azure.storage.percent_timeout_error{${data.template_file.filter.rendered}} by {resource_group,storage_type,name} + ) > ${var.timeout_error_requests_threshold_critical} EOF thresholds { @@ -120,7 +128,9 @@ resource "datadog_monitor" "network_error_requests" { message = "${var.message}" query = < ${var.network_error_requests_threshold_critical} + avg(last_5m): ( + avg:azure.storage.percent_network_error{${data.template_file.filter.rendered}} by {resource_group,storage_type,name} + ) > ${var.network_error_requests_threshold_critical} EOF thresholds { @@ -148,7 +158,9 @@ resource "datadog_monitor" "throttling_error_requests" { message = "${var.message}" query = < ${var.throttling_error_requests_threshold_critical} + avg(last_5m): ( + avg:azure.storage.percent_throttling_error{${data.template_file.filter.rendered}} by {resource_group,storage_type,name} + ) > ${var.throttling_error_requests_threshold_critical} EOF thresholds { @@ -176,7 +188,9 @@ resource "datadog_monitor" "server_other_error_requests" { message = "${var.message}" query = < ${var.server_other_error_requests_threshold_critical} + avg(last_5m): ( + avg:azure.storage.percent_server_other_error{${data.template_file.filter.rendered}} by {resource_group,storage_type,name} + ) > ${var.server_other_error_requests_threshold_critical} EOF thresholds { @@ -204,7 +218,9 @@ resource "datadog_monitor" "client_other_error_requests" { message = "${var.message}" query = < ${var.client_other_error_requests_threshold_critical} + avg(last_5m): ( + avg:azure.storage.percent_client_other_error{${data.template_file.filter.rendered}} by {resource_group,storage_type,name} + ) > ${var.client_other_error_requests_threshold_critical} EOF thresholds { @@ -232,7 +248,9 @@ resource "datadog_monitor" "authorization_error_requests" { message = "${var.message}" query = < ${var.authorization_error_requests_threshold_critical} + avg(last_5m): ( + avg:azure.storage.percent_authorization_error{${data.template_file.filter.rendered}} by {resource_group,storage_type,name} + ) > ${var.authorization_error_requests_threshold_critical} EOF thresholds { From 9470ad31668879cb7fb7a97585badb8d403d8552 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Fri, 3 Nov 2017 13:00:43 +0100 Subject: [PATCH 03/15] MON-79 add monitor tags --- cloud/azure/storage/inputs.tf | 12 ++++++++++++ cloud/azure/storage/monitors-azure-storage.tf | 18 +++++++++--------- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/cloud/azure/storage/inputs.tf b/cloud/azure/storage/inputs.tf index 68e02a5..5f99615 100644 --- a/cloud/azure/storage/inputs.tf +++ b/cloud/azure/storage/inputs.tf @@ -8,6 +8,18 @@ variable "environment" { type = "string" } +variable "provider" { + description = "Cloud provider which the monitor and its based metric depend on" + type = "string" + default = "azure" +} + +variable "service" { + description = "Service monitored by this set of monitors" + type = "string" + default = "storage" +} + # Global DataDog variable "message" { description = "Message sent when a Redis monitor is triggered" diff --git a/cloud/azure/storage/monitors-azure-storage.tf b/cloud/azure/storage/monitors-azure-storage.tf index e2b115e..388706c 100644 --- a/cloud/azure/storage/monitors-azure-storage.tf +++ b/cloud/azure/storage/monitors-azure-storage.tf @@ -32,7 +32,7 @@ EOF renotify_interval = 0 no_data_timeframe = 20 - tags = ["env:${var.environment}"] + tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] } resource "datadog_monitor" "successful_requests" { @@ -61,7 +61,7 @@ EOF renotify_interval = 0 no_data_timeframe = 20 - tags = ["env:${var.environment}"] + tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] } resource "datadog_monitor" "latency" { @@ -90,7 +90,7 @@ EOF renotify_interval = 0 no_data_timeframe = 20 - tags = ["env:${var.environment}"] + tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] } resource "datadog_monitor" "timeout_error_requests" { @@ -119,7 +119,7 @@ EOF renotify_interval = 0 no_data_timeframe = 20 - tags = ["env:${var.environment}"] + tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] } @@ -149,7 +149,7 @@ EOF renotify_interval = 0 no_data_timeframe = 20 - tags = ["env:${var.environment}"] + tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] } @@ -179,7 +179,7 @@ EOF renotify_interval = 0 no_data_timeframe = 20 - tags = ["env:${var.environment}"] + tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] } @@ -209,7 +209,7 @@ EOF renotify_interval = 0 no_data_timeframe = 20 - tags = ["env:${var.environment}"] + tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] } @@ -239,7 +239,7 @@ EOF renotify_interval = 0 no_data_timeframe = 20 - tags = ["env:${var.environment}"] + tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] } @@ -269,5 +269,5 @@ EOF renotify_interval = 0 no_data_timeframe = 20 - tags = ["env:${var.environment}"] + tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] } From 4e0be10d976aae1fec11f7f2eb662f1ab09a0026 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Fri, 3 Nov 2017 17:51:17 +0100 Subject: [PATCH 04/15] MON-79 remove useless client_name variable --- cloud/azure/storage/README.md | 2 -- cloud/azure/storage/inputs.tf | 4 ---- 2 files changed, 6 deletions(-) diff --git a/cloud/azure/storage/README.md b/cloud/azure/storage/README.md index 417f866..a24bcd8 100644 --- a/cloud/azure/storage/README.md +++ b/cloud/azure/storage/README.md @@ -11,7 +11,6 @@ module "datadog-monitors-azure-storage" { message = "${module.datadog-message-alerting.alerting-message}" environment = "${var.environment}" - client_name = "${var.client_name}" } ``` @@ -34,7 +33,6 @@ Inputs | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| -| client_name | Client name | string | - | yes | | delay | Delay in seconds for the metric evaluation | string | `600` | no | | environment | Architecture environment | string | - | yes | | message | Message sent when a monitor is triggered | string | - | yes | diff --git a/cloud/azure/storage/inputs.tf b/cloud/azure/storage/inputs.tf index 5f99615..bb21f9a 100644 --- a/cloud/azure/storage/inputs.tf +++ b/cloud/azure/storage/inputs.tf @@ -1,8 +1,4 @@ # Global Terraform -variable "client_name" { - type = "string" -} - variable "environment" { description = "Architecture environment" type = "string" From 3cbd81df8af7fd08c6b40fb85908b86d68f4fc02 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Fri, 3 Nov 2017 20:10:03 +0100 Subject: [PATCH 05/15] MON-79 add subscription_id to filter on --- cloud/azure/storage/README.md | 3 ++- cloud/azure/storage/inputs.tf | 5 +++++ cloud/azure/storage/monitors-azure-storage.tf | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/cloud/azure/storage/README.md b/cloud/azure/storage/README.md index a24bcd8..bac3ac2 100644 --- a/cloud/azure/storage/README.md +++ b/cloud/azure/storage/README.md @@ -9,8 +9,8 @@ module "datadog-monitors-azure-storage" { source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/storage?ref={revision}" message = "${module.datadog-message-alerting.alerting-message}" - environment = "${var.environment}" + subscription_id = "${var.subscription_id}" } ``` @@ -35,6 +35,7 @@ Inputs |------|-------------|:----:|:-----:|:-----:| | delay | Delay in seconds for the metric evaluation | string | `600` | no | | environment | Architecture environment | string | - | yes | +| subscription_id | Azure account id used as filter for monitors | string | - | yes | | message | Message sent when a monitor is triggered | string | - | yes | | use_filter_tags | Filter the data with service tags if true | string | `true` | no | | availability_threshold_critical | Minimum threshold of availability | string | `90` | no | diff --git a/cloud/azure/storage/inputs.tf b/cloud/azure/storage/inputs.tf index bb21f9a..e77079d 100644 --- a/cloud/azure/storage/inputs.tf +++ b/cloud/azure/storage/inputs.tf @@ -4,6 +4,11 @@ variable "environment" { type = "string" } +variable "subscription_id" { + description = "Azure account id used as filter for monitors" + type = "string" +} + variable "provider" { description = "Cloud provider which the monitor and its based metric depend on" type = "string" diff --git a/cloud/azure/storage/monitors-azure-storage.tf b/cloud/azure/storage/monitors-azure-storage.tf index 388706c..671eb42 100644 --- a/cloud/azure/storage/monitors-azure-storage.tf +++ b/cloud/azure/storage/monitors-azure-storage.tf @@ -2,7 +2,7 @@ data "template_file" "filter" { template = "$${filter}" vars { - filter = "${var.use_filter_tags == "true" ? format("dd_monitoring:enabled,dd_azure_storage:enabled,env:%s", var.environment) : "*"}" + filter = "${var.use_filter_tags == "true" ? format("dd_monitoring:enabled,dd_azure_storage:enabled,env:%s", var.environment) : "subscription_id:${var.subscription_id}"}" } } From cab81284ac302e22631a3a78f1dd2b283ee638ce Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Fri, 3 Nov 2017 20:45:05 +0100 Subject: [PATCH 06/15] MON-79 update readme --- cloud/azure/storage/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cloud/azure/storage/README.md b/cloud/azure/storage/README.md index bac3ac2..bdd2fb4 100644 --- a/cloud/azure/storage/README.md +++ b/cloud/azure/storage/README.md @@ -35,6 +35,8 @@ Inputs |------|-------------|:----:|:-----:|:-----:| | delay | Delay in seconds for the metric evaluation | string | `600` | no | | environment | Architecture environment | string | - | yes | +| provider | What is the monitored provider | string | azure | no | +| service | What is the monitored service | string | storage | no | | subscription_id | Azure account id used as filter for monitors | string | - | yes | | message | Message sent when a monitor is triggered | string | - | yes | | use_filter_tags | Filter the data with service tags if true | string | `true` | no | From f2a33f07f5378f7cf6eab4ee05e53be648e96fb5 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 23 Nov 2017 11:14:33 +0100 Subject: [PATCH 07/15] MON-79 Fix readme missing = --- cloud/azure/storage/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloud/azure/storage/README.md b/cloud/azure/storage/README.md index bdd2fb4..f78f831 100644 --- a/cloud/azure/storage/README.md +++ b/cloud/azure/storage/README.md @@ -1,5 +1,5 @@ Azure Storage DataDog monitors -============================ +============================== How to use this module ---------------------- From a6a1fdbfdc28aefc2cde46ed120c42a35fb38f0c Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 23 Nov 2017 11:16:08 +0100 Subject: [PATCH 08/15] MON-79 Change query alert type to metric alert --- cloud/azure/storage/monitors-azure-storage.tf | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/cloud/azure/storage/monitors-azure-storage.tf b/cloud/azure/storage/monitors-azure-storage.tf index 671eb42..86b35e1 100644 --- a/cloud/azure/storage/monitors-azure-storage.tf +++ b/cloud/azure/storage/monitors-azure-storage.tf @@ -20,7 +20,7 @@ EOF critical = "${var.availability_threshold_critical}" } - type = "query alert" + type = "metric alert" notify_no_data = true notify_audit = false timeout_h = 0 @@ -49,7 +49,7 @@ EOF critical = "${var.successful_requests_threshold_critical}" } - type = "query alert" + type = "metric alert" notify_no_data = true notify_audit = false timeout_h = 0 @@ -78,7 +78,7 @@ EOF critical = "${var.latency_threshold_critical}" } - type = "query alert" + type = "metric alert" notify_no_data = true notify_audit = false timeout_h = 0 @@ -107,7 +107,7 @@ EOF critical = "${var.timeout_error_requests_threshold_critical}" } - type = "query alert" + type = "metric alert" notify_no_data = true notify_audit = false timeout_h = 0 @@ -137,7 +137,7 @@ EOF critical = "${var.network_error_requests_threshold_critical}" } - type = "query alert" + type = "metric alert" notify_no_data = true notify_audit = false timeout_h = 0 @@ -167,7 +167,7 @@ EOF critical = "${var.throttling_error_requests_threshold_critical}" } - type = "query alert" + type = "metric alert" notify_no_data = true notify_audit = false timeout_h = 0 @@ -197,7 +197,7 @@ EOF critical = "${var.server_other_error_requests_threshold_critical}" } - type = "query alert" + type = "metric alert" notify_no_data = true notify_audit = false timeout_h = 0 @@ -227,7 +227,7 @@ EOF critical = "${var.client_other_error_requests_threshold_critical}" } - type = "query alert" + type = "metric alert" notify_no_data = true notify_audit = false timeout_h = 0 @@ -257,7 +257,7 @@ EOF critical = "${var.authorization_error_requests_threshold_critical}" } - type = "query alert" + type = "metric alert" notify_no_data = true notify_audit = false timeout_h = 0 From 14a79613b48d89067bd920a7d630c450ecff67d3 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 23 Nov 2017 11:23:30 +0100 Subject: [PATCH 09/15] MON-79 Allow custom filtering on tags --- cloud/azure/storage/inputs.tf | 9 +++++++-- cloud/azure/storage/monitors-azure-storage.tf | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/cloud/azure/storage/inputs.tf b/cloud/azure/storage/inputs.tf index e77079d..b594d82 100644 --- a/cloud/azure/storage/inputs.tf +++ b/cloud/azure/storage/inputs.tf @@ -31,11 +31,16 @@ variable "delay" { default = 600 } -variable "use_filter_tags" { - description = "Filter the data with service tags if true" +variable "filter_tags_default" { + description = "Use default filter tags convention" default = "true" } +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_default is false" + default = "*" +} + # Azure Storage specific variable "availability_threshold_critical" { description = "Minimum acceptable percent of availability for a storage" diff --git a/cloud/azure/storage/monitors-azure-storage.tf b/cloud/azure/storage/monitors-azure-storage.tf index 86b35e1..522a34c 100644 --- a/cloud/azure/storage/monitors-azure-storage.tf +++ b/cloud/azure/storage/monitors-azure-storage.tf @@ -2,7 +2,7 @@ data "template_file" "filter" { template = "$${filter}" vars { - filter = "${var.use_filter_tags == "true" ? format("dd_monitoring:enabled,dd_azure_storage:enabled,env:%s", var.environment) : "subscription_id:${var.subscription_id}"}" + filter = "${var.filter_tags_default == "true" ? format("dd_monitoring:enabled,dd_azure_storage:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" } } From 7b9156b40b08fa64d5c61bed7663d8e530cf3db7 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 23 Nov 2017 11:34:12 +0100 Subject: [PATCH 10/15] MON-79 hardcode tags --- cloud/azure/storage/inputs.tf | 17 ----------------- cloud/azure/storage/monitors-azure-storage.tf | 18 +++++++++--------- 2 files changed, 9 insertions(+), 26 deletions(-) diff --git a/cloud/azure/storage/inputs.tf b/cloud/azure/storage/inputs.tf index b594d82..a1d459e 100644 --- a/cloud/azure/storage/inputs.tf +++ b/cloud/azure/storage/inputs.tf @@ -4,23 +4,6 @@ variable "environment" { type = "string" } -variable "subscription_id" { - description = "Azure account id used as filter for monitors" - type = "string" -} - -variable "provider" { - description = "Cloud provider which the monitor and its based metric depend on" - type = "string" - default = "azure" -} - -variable "service" { - description = "Service monitored by this set of monitors" - type = "string" - default = "storage" -} - # Global DataDog variable "message" { description = "Message sent when a Redis monitor is triggered" diff --git a/cloud/azure/storage/monitors-azure-storage.tf b/cloud/azure/storage/monitors-azure-storage.tf index 522a34c..ba2ece6 100644 --- a/cloud/azure/storage/monitors-azure-storage.tf +++ b/cloud/azure/storage/monitors-azure-storage.tf @@ -32,7 +32,7 @@ EOF renotify_interval = 0 no_data_timeframe = 20 - tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] + tags = ["env:${var.environment}","resource:storage","team:azure", "provider:azure"] } resource "datadog_monitor" "successful_requests" { @@ -61,7 +61,7 @@ EOF renotify_interval = 0 no_data_timeframe = 20 - tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] + tags = ["env:${var.environment}","resource:storage","team:azure", "provider:azure"] } resource "datadog_monitor" "latency" { @@ -90,7 +90,7 @@ EOF renotify_interval = 0 no_data_timeframe = 20 - tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] + tags = ["env:${var.environment}","resource:storage","team:azure", "provider:azure"] } resource "datadog_monitor" "timeout_error_requests" { @@ -119,7 +119,7 @@ EOF renotify_interval = 0 no_data_timeframe = 20 - tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] + tags = ["env:${var.environment}","resource:storage","team:azure", "provider:azure"] } @@ -149,7 +149,7 @@ EOF renotify_interval = 0 no_data_timeframe = 20 - tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] + tags = ["env:${var.environment}","resource:storage","team:azure", "provider:azure"] } @@ -179,7 +179,7 @@ EOF renotify_interval = 0 no_data_timeframe = 20 - tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] + tags = ["env:${var.environment}","resource:storage","team:azure", "provider:azure"] } @@ -209,7 +209,7 @@ EOF renotify_interval = 0 no_data_timeframe = 20 - tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] + tags = ["env:${var.environment}","resource:storage","team:azure", "provider:azure"] } @@ -239,7 +239,7 @@ EOF renotify_interval = 0 no_data_timeframe = 20 - tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] + tags = ["env:${var.environment}","resource:storage","team:azure", "provider:azure"] } @@ -269,5 +269,5 @@ EOF renotify_interval = 0 no_data_timeframe = 20 - tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] + tags = ["env:${var.environment}","resource:storage","team:azure", "provider:azure"] } From 567fbb27f67304e00a987ac9beec69f6c5887e5e Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 23 Nov 2017 11:40:50 +0100 Subject: [PATCH 11/15] MON-79 descrease threshold for storage monitors --- cloud/azure/storage/inputs.tf | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cloud/azure/storage/inputs.tf b/cloud/azure/storage/inputs.tf index a1d459e..9b36cc9 100644 --- a/cloud/azure/storage/inputs.tf +++ b/cloud/azure/storage/inputs.tf @@ -42,31 +42,31 @@ variable "latency_threshold_critical" { variable "timeout_error_requests_threshold_critical" { description = "Maximum acceptable percent of timeout error requests for a storage" - default = 35 + default = 5 } variable "network_error_requests_threshold_critical" { description = "Maximum acceptable percent of network error requests for a storage" - default = 35 + default = 5 } variable "throttling_error_requests_threshold_critical" { description = "Maximum acceptable percent of throttling error requests for a storage" - default = 50 + default = 10 } variable "server_other_error_requests_threshold_critical" { description = "Maximum acceptable percent of server other error requests for a storage" - default = 50 + default = 10 } variable "client_other_error_requests_threshold_critical" { description = "Maximum acceptable percent of client other error requests for a storage" - default = 75 + default = 15 } variable "authorization_error_requests_threshold_critical" { description = "Maximum acceptable percent of authorization error requests for a storage" - default = 75 + default = 15 } From 45b51b446d6c2289fcd3b9a29d082174dcf0fab4 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 23 Nov 2017 11:52:41 +0100 Subject: [PATCH 12/15] MON-79 update readme with tagging variables --- cloud/azure/storage/README.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cloud/azure/storage/README.md b/cloud/azure/storage/README.md index f78f831..c9a5aaa 100644 --- a/cloud/azure/storage/README.md +++ b/cloud/azure/storage/README.md @@ -35,11 +35,9 @@ Inputs |------|-------------|:----:|:-----:|:-----:| | delay | Delay in seconds for the metric evaluation | string | `600` | no | | environment | Architecture environment | string | - | yes | -| provider | What is the monitored provider | string | azure | no | -| service | What is the monitored service | string | storage | no | -| subscription_id | Azure account id used as filter for monitors | string | - | yes | | message | Message sent when a monitor is triggered | string | - | yes | -| use_filter_tags | Filter the data with service tags if true | string | `true` | no | +| filter_tags_default | Use default tagging convention | string | `true` | no | +| filter_tags_custom | Tags used for custom filtering when filter_tags_default is false | string | `*` | no | | availability_threshold_critical | Minimum threshold of availability | string | `90` | no | | successful_requests_threshold_critical | Minimum threshold of successful requests | string | `90` | no | | latency_threshold_critical | Maximum threshold of latency in ms | string | `1000` | no | From 9e2b4317b8061a9b4ec6aa5a56ff2d16d60affe1 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 23 Nov 2017 11:55:26 +0100 Subject: [PATCH 13/15] MON-79 remove subscriiption id from example of readme --- cloud/azure/storage/README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/cloud/azure/storage/README.md b/cloud/azure/storage/README.md index c9a5aaa..166a21c 100644 --- a/cloud/azure/storage/README.md +++ b/cloud/azure/storage/README.md @@ -10,7 +10,6 @@ module "datadog-monitors-azure-storage" { message = "${module.datadog-message-alerting.alerting-message}" environment = "${var.environment}" - subscription_id = "${var.subscription_id}" } ``` From bc1a493204d66bc896f86db4aca15bb160f7b37b Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 23 Nov 2017 12:18:25 +0100 Subject: [PATCH 14/15] MON-79 add spaces to tags list --- cloud/azure/storage/monitors-azure-storage.tf | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/cloud/azure/storage/monitors-azure-storage.tf b/cloud/azure/storage/monitors-azure-storage.tf index ba2ece6..28a35ad 100644 --- a/cloud/azure/storage/monitors-azure-storage.tf +++ b/cloud/azure/storage/monitors-azure-storage.tf @@ -32,7 +32,7 @@ EOF renotify_interval = 0 no_data_timeframe = 20 - tags = ["env:${var.environment}","resource:storage","team:azure", "provider:azure"] + tags = ["env:${var.environment}", "resource:storage", "team:azure", "provider:azure"] } resource "datadog_monitor" "successful_requests" { @@ -61,7 +61,7 @@ EOF renotify_interval = 0 no_data_timeframe = 20 - tags = ["env:${var.environment}","resource:storage","team:azure", "provider:azure"] + tags = ["env:${var.environment}", "resource:storage", "team:azure", "provider:azure"] } resource "datadog_monitor" "latency" { @@ -90,7 +90,7 @@ EOF renotify_interval = 0 no_data_timeframe = 20 - tags = ["env:${var.environment}","resource:storage","team:azure", "provider:azure"] + tags = ["env:${var.environment}", "resource:storage", "team:azure", "provider:azure"] } resource "datadog_monitor" "timeout_error_requests" { @@ -119,7 +119,7 @@ EOF renotify_interval = 0 no_data_timeframe = 20 - tags = ["env:${var.environment}","resource:storage","team:azure", "provider:azure"] + tags = ["env:${var.environment}", "resource:storage", "team:azure", "provider:azure"] } @@ -149,7 +149,7 @@ EOF renotify_interval = 0 no_data_timeframe = 20 - tags = ["env:${var.environment}","resource:storage","team:azure", "provider:azure"] + tags = ["env:${var.environment}", "resource:storage", "team:azure", "provider:azure"] } @@ -179,7 +179,7 @@ EOF renotify_interval = 0 no_data_timeframe = 20 - tags = ["env:${var.environment}","resource:storage","team:azure", "provider:azure"] + tags = ["env:${var.environment}", "resource:storage", "team:azure", "provider:azure"] } @@ -209,7 +209,7 @@ EOF renotify_interval = 0 no_data_timeframe = 20 - tags = ["env:${var.environment}","resource:storage","team:azure", "provider:azure"] + tags = ["env:${var.environment}", "resource:storage", "team:azure", "provider:azure"] } @@ -239,7 +239,7 @@ EOF renotify_interval = 0 no_data_timeframe = 20 - tags = ["env:${var.environment}","resource:storage","team:azure", "provider:azure"] + tags = ["env:${var.environment}", "resource:storage", "team:azure", "provider:azure"] } @@ -269,5 +269,5 @@ EOF renotify_interval = 0 no_data_timeframe = 20 - tags = ["env:${var.environment}","resource:storage","team:azure", "provider:azure"] + tags = ["env:${var.environment}", "resource:storage", "team:azure", "provider:azure"] } From e02cf797ab28bb3ce309889865bb6304634db23e Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 23 Nov 2017 12:21:58 +0100 Subject: [PATCH 15/15] MON-79 change default tag variable for more explicit name --- cloud/azure/storage/README.md | 4 ++-- cloud/azure/storage/inputs.tf | 4 ++-- cloud/azure/storage/monitors-azure-storage.tf | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cloud/azure/storage/README.md b/cloud/azure/storage/README.md index 166a21c..0849152 100644 --- a/cloud/azure/storage/README.md +++ b/cloud/azure/storage/README.md @@ -35,8 +35,8 @@ Inputs | delay | Delay in seconds for the metric evaluation | string | `600` | no | | environment | Architecture environment | string | - | yes | | message | Message sent when a monitor is triggered | string | - | yes | -| filter_tags_default | Use default tagging convention | string | `true` | no | -| filter_tags_custom | Tags used for custom filtering when filter_tags_default is false | string | `*` | no | +| filter_tags_use_defaults | Use default tagging convention | string | `true` | no | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | availability_threshold_critical | Minimum threshold of availability | string | `90` | no | | successful_requests_threshold_critical | Minimum threshold of successful requests | string | `90` | no | | latency_threshold_critical | Maximum threshold of latency in ms | string | `1000` | no | diff --git a/cloud/azure/storage/inputs.tf b/cloud/azure/storage/inputs.tf index 9b36cc9..5c512b5 100644 --- a/cloud/azure/storage/inputs.tf +++ b/cloud/azure/storage/inputs.tf @@ -14,13 +14,13 @@ variable "delay" { default = 600 } -variable "filter_tags_default" { +variable "filter_tags_use_defaults" { description = "Use default filter tags convention" default = "true" } variable "filter_tags_custom" { - description = "Tags used for custom filtering when filter_tags_default is false" + description = "Tags used for custom filtering when filter_tags_use_defaults is false" default = "*" } diff --git a/cloud/azure/storage/monitors-azure-storage.tf b/cloud/azure/storage/monitors-azure-storage.tf index 28a35ad..7466798 100644 --- a/cloud/azure/storage/monitors-azure-storage.tf +++ b/cloud/azure/storage/monitors-azure-storage.tf @@ -2,7 +2,7 @@ data "template_file" "filter" { template = "$${filter}" vars { - filter = "${var.filter_tags_default == "true" ? format("dd_monitoring:enabled,dd_azure_storage:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" + filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_storage:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" } }