From 6e6147088cbb58c322031c3f2169001025a0cae3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Respaut?= Date: Mon, 30 Oct 2017 11:34:42 +0100 Subject: [PATCH 01/10] MON-76: Azure Redis - DataDog Monitors --- cloud/azure/redis/inputs.tf | 31 +++++++++++++++++ cloud/azure/redis/monitors-azure-redis.tf | 42 +++++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 cloud/azure/redis/inputs.tf create mode 100644 cloud/azure/redis/monitors-azure-redis.tf diff --git a/cloud/azure/redis/inputs.tf b/cloud/azure/redis/inputs.tf new file mode 100644 index 0000000..70eba23 --- /dev/null +++ b/cloud/azure/redis/inputs.tf @@ -0,0 +1,31 @@ +# Global Terraform +variable "client_name" { + type = "string" +} + +variable "environment" { + type = "string" +} + +variable "stack" { + type = "string" +} + +# Global DataDog +variable "critical_escalation_group" { +} + +variable "warning_escalation_group" { +} + +variable "delay" { + default = 600 +} + +# Azure Redis specific +variable "evictedkeys_threshold_warning" { + default = 0 +} +variable "evictedkeys_threshold_critical" { + default = 100 +} diff --git a/cloud/azure/redis/monitors-azure-redis.tf b/cloud/azure/redis/monitors-azure-redis.tf new file mode 100644 index 0000000..ec562b9 --- /dev/null +++ b/cloud/azure/redis/monitors-azure-redis.tf @@ -0,0 +1,42 @@ +resource "datadog_monitor" "status" { + name = "[${var.environment}] Redis {{name}} is down" + message = "{{#is_alert}}\n${var.critical_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.critical_escalation_group}\n{{/is_recovery}}" + + query = "avg(last_5m):avg:azure.cache_redis.status{*} by {name,resource_group} != 1" + type = "query alert" + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 60 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + no_data_timeframe = 20 +} + +resource "datadog_monitor" "evictedkeys" { + name = "[${var.environment}] Redis {{value}} evictedkeys on {{name}}" + message = "{{#is_alert}}\n${var.critical_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.critical_escalation_group}\n{{/is_recovery}}" + + query = "avg(last_5m):avg:azure.cache_redis.evictedkeys{*} by {name,resource_group} > ${var.evictedkeys_threshold_critical}" + type = "query alert" + + thresholds { + warning = "${var.evictedkeys_threshold_warning}" + critical = "${var.evictedkeys_threshold_critical}" + } + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 60 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + no_data_timeframe = 20 +} From 9112ce02a390dc783a85ee6a92b65239b6f35d6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Respaut?= Date: Mon, 30 Oct 2017 16:37:03 +0100 Subject: [PATCH 02/10] MON-76: Uses the generic message --- cloud/azure/redis/inputs.tf | 5 +---- cloud/azure/redis/monitors-azure-redis.tf | 4 ++-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/cloud/azure/redis/inputs.tf b/cloud/azure/redis/inputs.tf index 70eba23..3f9460f 100644 --- a/cloud/azure/redis/inputs.tf +++ b/cloud/azure/redis/inputs.tf @@ -12,10 +12,7 @@ variable "stack" { } # Global DataDog -variable "critical_escalation_group" { -} - -variable "warning_escalation_group" { +variable "message" { } variable "delay" { diff --git a/cloud/azure/redis/monitors-azure-redis.tf b/cloud/azure/redis/monitors-azure-redis.tf index ec562b9..8b47249 100644 --- a/cloud/azure/redis/monitors-azure-redis.tf +++ b/cloud/azure/redis/monitors-azure-redis.tf @@ -1,6 +1,6 @@ resource "datadog_monitor" "status" { name = "[${var.environment}] Redis {{name}} is down" - message = "{{#is_alert}}\n${var.critical_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.critical_escalation_group}\n{{/is_recovery}}" + message = "${var.message}" query = "avg(last_5m):avg:azure.cache_redis.status{*} by {name,resource_group} != 1" type = "query alert" @@ -19,7 +19,7 @@ resource "datadog_monitor" "status" { resource "datadog_monitor" "evictedkeys" { name = "[${var.environment}] Redis {{value}} evictedkeys on {{name}}" - message = "{{#is_alert}}\n${var.critical_escalation_group} \n{{/is_alert}} \n{{#is_recovery}}\n${var.critical_escalation_group}\n{{/is_recovery}}" + message = "${var.message}" query = "avg(last_5m):avg:azure.cache_redis.evictedkeys{*} by {name,resource_group} > ${var.evictedkeys_threshold_critical}" type = "query alert" From 9f1051097e4b42b37f3814a7cb6d139f537ba280 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Respaut?= Date: Mon, 30 Oct 2017 17:44:30 +0100 Subject: [PATCH 03/10] MON-76: More monitors --- cloud/azure/redis/inputs.tf | 20 +++++--- cloud/azure/redis/monitors-azure-redis.tf | 58 +++++++++++++++++++++-- 2 files changed, 67 insertions(+), 11 deletions(-) diff --git a/cloud/azure/redis/inputs.tf b/cloud/azure/redis/inputs.tf index 3f9460f..f13b4cb 100644 --- a/cloud/azure/redis/inputs.tf +++ b/cloud/azure/redis/inputs.tf @@ -7,10 +7,6 @@ variable "environment" { type = "string" } -variable "stack" { - type = "string" -} - # Global DataDog variable "message" { } @@ -20,9 +16,21 @@ variable "delay" { } # Azure Redis specific -variable "evictedkeys_threshold_warning" { +variable "evictedkeys_limit_threshold_warning" { default = 0 } -variable "evictedkeys_threshold_critical" { +variable "evictedkeys_limit_threshold_critical" { default = 100 } +variable "percent_processor_time_threshold_critical" { + default = 80 +} +variable "percent_processor_time_threshold_warning" { + default = 60 +} +variable "server_load_rate_threshold_critical" { + default = 90 +} +variable "server_load_rate_threshold_warning" { + default = 70 +} diff --git a/cloud/azure/redis/monitors-azure-redis.tf b/cloud/azure/redis/monitors-azure-redis.tf index 8b47249..d4b21b5 100644 --- a/cloud/azure/redis/monitors-azure-redis.tf +++ b/cloud/azure/redis/monitors-azure-redis.tf @@ -7,7 +7,7 @@ resource "datadog_monitor" "status" { notify_no_data = false evaluation_delay = "${var.delay}" - renotify_interval = 60 + renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true @@ -21,17 +21,65 @@ resource "datadog_monitor" "evictedkeys" { name = "[${var.environment}] Redis {{value}} evictedkeys on {{name}}" message = "${var.message}" - query = "avg(last_5m):avg:azure.cache_redis.evictedkeys{*} by {name,resource_group} > ${var.evictedkeys_threshold_critical}" + query = "avg(last_5m):avg:azure.cache_redis.evictedkeys{*} by {name,resource_group} > ${var.evictedkeys_limit_threshold_critical}" type = "query alert" thresholds { - warning = "${var.evictedkeys_threshold_warning}" - critical = "${var.evictedkeys_threshold_critical}" + warning = "${var.evictedkeys_limit_threshold_warning}" + critical = "${var.evictedkeys_limit_threshold_critical}" } notify_no_data = false evaluation_delay = "${var.delay}" - renotify_interval = 60 + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + no_data_timeframe = 20 +} + +resource "datadog_monitor" "percent_processor_time" { + name = "[${var.environment}] Redis processor time {{value}}% on {{name}}" + message = "${var.message}" + + query = "avg(last_5m):avg:azure.cache_redis.percent_processor_time{*} by {name,resource_group} > ${var.percent_processor_time_threshold_critical}" + type = "query alert" + + thresholds { + warning = "${var.percent_processor_time_threshold_warning}" + critical = "${var.percent_processor_time_threshold_critical}" + } + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + no_data_timeframe = 20 +} + +resource "datadog_monitor" "server_load" { + name = "[${var.environment}] Redis processor server load {{value}}% on {{name}}" + message = "${var.message}" + + query = "avg(last_5m):avg:azure.cache_redis.server_load{*} by {name,resource_group} > ${var.server_load_rate_threshold_critical}" + type = "query alert" + + thresholds { + warning = "${var.server_load_rate_threshold_critical}" + critical = "${var.server_load_rate_threshold_warning}" + } + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true From 386ad343a54753b73956e831c28cce39f14088ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Respaut?= Date: Tue, 31 Oct 2017 10:34:48 +0100 Subject: [PATCH 04/10] MON-76: Filter tags option --- cloud/azure/redis/inputs.tf | 12 ++++++++++++ cloud/azure/redis/monitors-azure-redis.tf | 16 ++++++++++++---- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/cloud/azure/redis/inputs.tf b/cloud/azure/redis/inputs.tf index f13b4cb..a96cc51 100644 --- a/cloud/azure/redis/inputs.tf +++ b/cloud/azure/redis/inputs.tf @@ -9,28 +9,40 @@ variable "environment" { # Global DataDog variable "message" { + description = "Message sent when a Redis monitor is triggered" } variable "delay" { + description = "Delay in seconds for the metric evaluation" default = 600 } +variable "use_filter_tags" { + description = "Filter the data with service tags if true" + default = "true" +} + # Azure Redis specific variable "evictedkeys_limit_threshold_warning" { default = 0 } + variable "evictedkeys_limit_threshold_critical" { default = 100 } + variable "percent_processor_time_threshold_critical" { default = 80 } + variable "percent_processor_time_threshold_warning" { default = 60 } + variable "server_load_rate_threshold_critical" { default = 90 } + variable "server_load_rate_threshold_warning" { default = 70 } diff --git a/cloud/azure/redis/monitors-azure-redis.tf b/cloud/azure/redis/monitors-azure-redis.tf index d4b21b5..6931afe 100644 --- a/cloud/azure/redis/monitors-azure-redis.tf +++ b/cloud/azure/redis/monitors-azure-redis.tf @@ -1,8 +1,16 @@ +data "template_file" "filter" { + template = "$${filter}" + + vars { + filter = "${var.use_filter_tags == "true" ? format("dd_monitoring:enabled,dd_azure_eventhub:enabled,env:%s", var.environment) : "*"}" + } +} + resource "datadog_monitor" "status" { name = "[${var.environment}] Redis {{name}} is down" message = "${var.message}" - query = "avg(last_5m):avg:azure.cache_redis.status{*} by {name,resource_group} != 1" + query = "avg(last_5m):avg:azure.cache_redis.status{${data.template_file.filter.rendered}} by {name,resource_group} != 1" type = "query alert" notify_no_data = false @@ -21,7 +29,7 @@ resource "datadog_monitor" "evictedkeys" { name = "[${var.environment}] Redis {{value}} evictedkeys on {{name}}" message = "${var.message}" - query = "avg(last_5m):avg:azure.cache_redis.evictedkeys{*} by {name,resource_group} > ${var.evictedkeys_limit_threshold_critical}" + query = "avg(last_5m):avg:azure.cache_redis.evictedkeys{${data.template_file.filter.rendered}} by {name,resource_group} > ${var.evictedkeys_limit_threshold_critical}" type = "query alert" thresholds { @@ -45,7 +53,7 @@ resource "datadog_monitor" "percent_processor_time" { name = "[${var.environment}] Redis processor time {{value}}% on {{name}}" message = "${var.message}" - query = "avg(last_5m):avg:azure.cache_redis.percent_processor_time{*} by {name,resource_group} > ${var.percent_processor_time_threshold_critical}" + query = "avg(last_5m):avg:azure.cache_redis.percent_processor_time{${data.template_file.filter.rendered}} by {name,resource_group} > ${var.percent_processor_time_threshold_critical}" type = "query alert" thresholds { @@ -69,7 +77,7 @@ resource "datadog_monitor" "server_load" { name = "[${var.environment}] Redis processor server load {{value}}% on {{name}}" message = "${var.message}" - query = "avg(last_5m):avg:azure.cache_redis.server_load{*} by {name,resource_group} > ${var.server_load_rate_threshold_critical}" + query = "avg(last_5m):avg:azure.cache_redis.server_load{${data.template_file.filter.rendered}} by {name,resource_group} > ${var.server_load_rate_threshold_critical}" type = "query alert" thresholds { From 8aab6d99b025ff3bf4375a8cc7310ac65edfe749 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Respaut?= Date: Tue, 31 Oct 2017 10:38:20 +0100 Subject: [PATCH 05/10] MON-76: Multiple line queries for better readibility --- cloud/azure/redis/monitors-azure-redis.tf | 26 +++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/cloud/azure/redis/monitors-azure-redis.tf b/cloud/azure/redis/monitors-azure-redis.tf index 6931afe..b3ad63a 100644 --- a/cloud/azure/redis/monitors-azure-redis.tf +++ b/cloud/azure/redis/monitors-azure-redis.tf @@ -10,7 +10,9 @@ resource "datadog_monitor" "status" { name = "[${var.environment}] Redis {{name}} is down" message = "${var.message}" - query = "avg(last_5m):avg:azure.cache_redis.status{${data.template_file.filter.rendered}} by {name,resource_group} != 1" + query = < ${var.evictedkeys_limit_threshold_critical} +EOF type = "query alert" thresholds { @@ -53,7 +59,11 @@ resource "datadog_monitor" "percent_processor_time" { name = "[${var.environment}] Redis processor time {{value}}% on {{name}}" message = "${var.message}" - query = "avg(last_5m):avg:azure.cache_redis.percent_processor_time{${data.template_file.filter.rendered}} by {name,resource_group} > ${var.percent_processor_time_threshold_critical}" + query = < ${var.percent_processor_time_threshold_critical} +EOF type = "query alert" thresholds { @@ -77,12 +87,16 @@ resource "datadog_monitor" "server_load" { name = "[${var.environment}] Redis processor server load {{value}}% on {{name}}" message = "${var.message}" - query = "avg(last_5m):avg:azure.cache_redis.server_load{${data.template_file.filter.rendered}} by {name,resource_group} > ${var.server_load_rate_threshold_critical}" + query = < ${var.server_load_rate_threshold_critical} +EOF type = "query alert" thresholds { - warning = "${var.server_load_rate_threshold_critical}" - critical = "${var.server_load_rate_threshold_warning}" + warning = "${var.server_load_rate_threshold_warning}" + critical = "${var.server_load_rate_threshold_critical}" } notify_no_data = false From 505e0df14c86116c1c30a91d64fbc6f352bc2a7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Respaut?= Date: Tue, 31 Oct 2017 10:53:58 +0100 Subject: [PATCH 06/10] MON-76: Add Readme --- cloud/azure/redis/README.md | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 cloud/azure/redis/README.md diff --git a/cloud/azure/redis/README.md b/cloud/azure/redis/README.md new file mode 100644 index 0000000..b5acaaa --- /dev/null +++ b/cloud/azure/redis/README.md @@ -0,0 +1,32 @@ +Azure Redis DataDog monitors +============================ + +How to use this module +---------------------- + +``` +module "datadog-monitors-azure-redis" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/redis?ref={revision}" + + message = "${module.datadog-message-alerting.alerting-message}" + + environment = "${var.environment}" + client_name = "${var.client_name}" +} +``` + +Purpose +------- +Creates a DataDog monitors with the following checks : + +* Service status check +* Evicted keys count check +* Processor time (percent) threshold +* Server CPU load threshold + +Related documentation +--------------------- + +DataDog documentation: https://docs.datadoghq.com/integrations/azure_redis_cache/ + +Azure Redis metrics documentation: https://docs.microsoft.com/en-us/azure/redis-cache/cache-how-to-monitor From 814ee2838da545e1fd75592038e4f7e7fe2dd4f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Respaut?= Date: Tue, 31 Oct 2017 11:04:38 +0100 Subject: [PATCH 07/10] MON-76: Readme update with inputs --- cloud/azure/redis/README.md | 17 +++++++++++++++ cloud/azure/redis/inputs.tf | 26 +++++++++++++++-------- cloud/azure/redis/monitors-azure-redis.tf | 12 +++++++---- 3 files changed, 42 insertions(+), 13 deletions(-) diff --git a/cloud/azure/redis/README.md b/cloud/azure/redis/README.md index b5acaaa..d885193 100644 --- a/cloud/azure/redis/README.md +++ b/cloud/azure/redis/README.md @@ -24,6 +24,23 @@ Creates a DataDog monitors with the following checks : * Processor time (percent) threshold * Server CPU load threshold +Inputs +------ + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| client_name | Client name | string | - | yes | +| delay | Delay in seconds for the metric evaluation | string | `600` | no | +| environment | Architecture environment | string | - | yes | +| evictedkeys_limit_threshold_critical | Evicted keys limit (critical threshold) | string | `100` | no | +| evictedkeys_limit_threshold_warning | Evicted keys limit (warning threshold) | string | `0` | no | +| message | Message sent when a Redis monitor is triggered | string | - | yes | +| percent_processor_time_threshold_critical | Processor time percent (critical threshold) | string | `80` | no | +| percent_processor_time_threshold_warning | Processor time percent (warning threshold) | string | `60` | no | +| server_load_rate_threshold_critical | Server CPU load rate (critical threshold) | string | `90` | no | +| server_load_rate_threshold_warning | Server CPU load rate (warning threshold) | string | `70` | no | +| use_filter_tags | Filter the data with service tags if true | string | `true` | no | + Related documentation --------------------- diff --git a/cloud/azure/redis/inputs.tf b/cloud/azure/redis/inputs.tf index a96cc51..89385e8 100644 --- a/cloud/azure/redis/inputs.tf +++ b/cloud/azure/redis/inputs.tf @@ -1,10 +1,12 @@ # Global Terraform variable "client_name" { - type = "string" + description = "Client name" + type = "string" } variable "environment" { - type = "string" + description = "Architecture environment" + type = "string" } # Global DataDog @@ -14,7 +16,7 @@ variable "message" { variable "delay" { description = "Delay in seconds for the metric evaluation" - default = 600 + default = 600 } variable "use_filter_tags" { @@ -24,25 +26,31 @@ variable "use_filter_tags" { # Azure Redis specific variable "evictedkeys_limit_threshold_warning" { - default = 0 + description = "Evicted keys limit (warning threshold)" + default = 0 } variable "evictedkeys_limit_threshold_critical" { - default = 100 + description = "Evicted keys limit (critical threshold)" + default = 100 } variable "percent_processor_time_threshold_critical" { - default = 80 + description = "Processor time percent (critical threshold)" + default = 80 } variable "percent_processor_time_threshold_warning" { - default = 60 + description = "Processor time percent (warning threshold)" + default = 60 } variable "server_load_rate_threshold_critical" { - default = 90 + description = "Server CPU load rate (critical threshold)" + default = 90 } variable "server_load_rate_threshold_warning" { - default = 70 + description = "Server CPU load rate (warning threshold)" + default = 70 } diff --git a/cloud/azure/redis/monitors-azure-redis.tf b/cloud/azure/redis/monitors-azure-redis.tf index b3ad63a..8287dad 100644 --- a/cloud/azure/redis/monitors-azure-redis.tf +++ b/cloud/azure/redis/monitors-azure-redis.tf @@ -13,7 +13,8 @@ resource "datadog_monitor" "status" { query = < ${var.evictedkeys_limit_threshold_critical} EOF - type = "query alert" + + type = "query alert" thresholds { warning = "${var.evictedkeys_limit_threshold_warning}" @@ -64,7 +66,8 @@ resource "datadog_monitor" "percent_processor_time" { avg:azure.cache_redis.percent_processor_time{${data.template_file.filter.rendered}} by {name,resource_group} ) > ${var.percent_processor_time_threshold_critical} EOF - type = "query alert" + + type = "query alert" thresholds { warning = "${var.percent_processor_time_threshold_warning}" @@ -92,7 +95,8 @@ resource "datadog_monitor" "server_load" { avg:azure.cache_redis.server_load{${data.template_file.filter.rendered}} by {name,resource_group} ) > ${var.server_load_rate_threshold_critical} EOF - type = "query alert" + + type = "query alert" thresholds { warning = "${var.server_load_rate_threshold_warning}" From c624b041a42121fc631fd9dcd27497e6351fe9e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Respaut?= Date: Tue, 31 Oct 2017 11:09:46 +0100 Subject: [PATCH 08/10] MON-76: Uses the right DD tag for Azure Redis --- cloud/azure/redis/README.md | 4 ++-- cloud/azure/redis/monitors-azure-redis.tf | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cloud/azure/redis/README.md b/cloud/azure/redis/README.md index d885193..8520c6b 100644 --- a/cloud/azure/redis/README.md +++ b/cloud/azure/redis/README.md @@ -44,6 +44,6 @@ Inputs Related documentation --------------------- -DataDog documentation: https://docs.datadoghq.com/integrations/azure_redis_cache/ +DataDog documentation: [https://docs.datadoghq.com/integrations/azure_redis_cache/](https://docs.datadoghq.com/integrations/azure_redis_cache/) -Azure Redis metrics documentation: https://docs.microsoft.com/en-us/azure/redis-cache/cache-how-to-monitor +Azure Redis metrics documentation: [https://docs.microsoft.com/en-us/azure/redis-cache/cache-how-to-monitor](https://docs.microsoft.com/en-us/azure/redis-cache/cache-how-to-monitor) diff --git a/cloud/azure/redis/monitors-azure-redis.tf b/cloud/azure/redis/monitors-azure-redis.tf index 8287dad..92652e9 100644 --- a/cloud/azure/redis/monitors-azure-redis.tf +++ b/cloud/azure/redis/monitors-azure-redis.tf @@ -2,7 +2,7 @@ data "template_file" "filter" { template = "$${filter}" vars { - filter = "${var.use_filter_tags == "true" ? format("dd_monitoring:enabled,dd_azure_eventhub:enabled,env:%s", var.environment) : "*"}" + filter = "${var.use_filter_tags == "true" ? format("dd_monitoring:enabled,dd_azure_redis:enabled,env:%s", var.environment) : "*"}" } } From 0a4345dfa39618213c71e8200153b03fd0bf5645 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Fri, 3 Nov 2017 20:56:04 +0100 Subject: [PATCH 09/10] MON-76 add subscription_id and tags, remove client_name --- cloud/azure/redis/README.md | 7 ++++--- cloud/azure/redis/inputs.tf | 24 +++++++++++++++++------ cloud/azure/redis/monitors-azure-redis.tf | 10 +++++++++- 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/cloud/azure/redis/README.md b/cloud/azure/redis/README.md index 8520c6b..45538d1 100644 --- a/cloud/azure/redis/README.md +++ b/cloud/azure/redis/README.md @@ -9,9 +9,8 @@ module "datadog-monitors-azure-redis" { source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/redis?ref={revision}" message = "${module.datadog-message-alerting.alerting-message}" - environment = "${var.environment}" - client_name = "${var.client_name}" + subscription_id = "${var.subscription_id}" } ``` @@ -29,7 +28,6 @@ Inputs | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| -| client_name | Client name | string | - | yes | | delay | Delay in seconds for the metric evaluation | string | `600` | no | | environment | Architecture environment | string | - | yes | | evictedkeys_limit_threshold_critical | Evicted keys limit (critical threshold) | string | `100` | no | @@ -37,9 +35,12 @@ Inputs | message | Message sent when a Redis monitor is triggered | string | - | yes | | percent_processor_time_threshold_critical | Processor time percent (critical threshold) | string | `80` | no | | percent_processor_time_threshold_warning | Processor time percent (warning threshold) | string | `60` | no | +| provider | What is the monitored provider | string | azure | no | | server_load_rate_threshold_critical | Server CPU load rate (critical threshold) | string | `90` | no | | server_load_rate_threshold_warning | Server CPU load rate (warning threshold) | string | `70` | no | | use_filter_tags | Filter the data with service tags if true | string | `true` | no | +| service | What is the monitored service | string | storage | no | +| subscription_id | Azure account id used as filter for monitors | string | - | yes | Related documentation --------------------- diff --git a/cloud/azure/redis/inputs.tf b/cloud/azure/redis/inputs.tf index 89385e8..7c57d63 100644 --- a/cloud/azure/redis/inputs.tf +++ b/cloud/azure/redis/inputs.tf @@ -1,12 +1,24 @@ # Global Terraform -variable "client_name" { - description = "Client name" - type = "string" -} - variable "environment" { description = "Architecture environment" - type = "string" + type = "string" +} + +variable "subscription_id" { + description = "Azure account id used as filter for monitors" + type = "string" +} + +variable "provider" { + description = "Cloud provider which the monitor and its based metric depend on" + type = "string" + default = "azure" +} + +variable "service" { + description = "Service monitored by this set of monitors" + type = "string" + default = "storage" } # Global DataDog diff --git a/cloud/azure/redis/monitors-azure-redis.tf b/cloud/azure/redis/monitors-azure-redis.tf index 92652e9..950e9a1 100644 --- a/cloud/azure/redis/monitors-azure-redis.tf +++ b/cloud/azure/redis/monitors-azure-redis.tf @@ -2,7 +2,7 @@ data "template_file" "filter" { template = "$${filter}" vars { - filter = "${var.use_filter_tags == "true" ? format("dd_monitoring:enabled,dd_azure_redis:enabled,env:%s", var.environment) : "*"}" + filter = "${var.use_filter_tags == "true" ? format("dd_monitoring:enabled,dd_azure_storage:enabled,env:%s", var.environment) : "subscription_id:${var.subscription_id}"}" } } @@ -26,6 +26,8 @@ EOF require_full_window = true new_host_delay = "${var.delay}" no_data_timeframe = 20 + + tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] } resource "datadog_monitor" "evictedkeys" { @@ -55,6 +57,8 @@ EOF require_full_window = true new_host_delay = "${var.delay}" no_data_timeframe = 20 + + tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] } resource "datadog_monitor" "percent_processor_time" { @@ -84,6 +88,8 @@ EOF require_full_window = true new_host_delay = "${var.delay}" no_data_timeframe = 20 + + tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] } resource "datadog_monitor" "server_load" { @@ -113,4 +119,6 @@ EOF require_full_window = true new_host_delay = "${var.delay}" no_data_timeframe = 20 + + tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] } From 753da1173437df811d43b65b651ccb1a98d63122 Mon Sep 17 00:00:00 2001 From: Laurent Piroelle Date: Thu, 23 Nov 2017 17:12:16 +0100 Subject: [PATCH 10/10] MON-76 Normalize monitors --- cloud/azure/redis/README.md | 9 +++----- cloud/azure/redis/inputs.tf | 26 ++++++----------------- cloud/azure/redis/monitors-azure-redis.tf | 20 ++++++++--------- 3 files changed, 20 insertions(+), 35 deletions(-) diff --git a/cloud/azure/redis/README.md b/cloud/azure/redis/README.md index 45538d1..4cd7a51 100644 --- a/cloud/azure/redis/README.md +++ b/cloud/azure/redis/README.md @@ -8,9 +8,8 @@ How to use this module module "datadog-monitors-azure-redis" { source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/redis?ref={revision}" - message = "${module.datadog-message-alerting.alerting-message}" + message = "${module.datadog-message-alerting.alerting-message}" environment = "${var.environment}" - subscription_id = "${var.subscription_id}" } ``` @@ -32,15 +31,13 @@ Inputs | environment | Architecture environment | string | - | yes | | evictedkeys_limit_threshold_critical | Evicted keys limit (critical threshold) | string | `100` | no | | evictedkeys_limit_threshold_warning | Evicted keys limit (warning threshold) | string | `0` | no | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | message | Message sent when a Redis monitor is triggered | string | - | yes | | percent_processor_time_threshold_critical | Processor time percent (critical threshold) | string | `80` | no | | percent_processor_time_threshold_warning | Processor time percent (warning threshold) | string | `60` | no | -| provider | What is the monitored provider | string | azure | no | | server_load_rate_threshold_critical | Server CPU load rate (critical threshold) | string | `90` | no | | server_load_rate_threshold_warning | Server CPU load rate (warning threshold) | string | `70` | no | -| use_filter_tags | Filter the data with service tags if true | string | `true` | no | -| service | What is the monitored service | string | storage | no | -| subscription_id | Azure account id used as filter for monitors | string | - | yes | Related documentation --------------------- diff --git a/cloud/azure/redis/inputs.tf b/cloud/azure/redis/inputs.tf index 7c57d63..49750fa 100644 --- a/cloud/azure/redis/inputs.tf +++ b/cloud/azure/redis/inputs.tf @@ -4,23 +4,6 @@ variable "environment" { type = "string" } -variable "subscription_id" { - description = "Azure account id used as filter for monitors" - type = "string" -} - -variable "provider" { - description = "Cloud provider which the monitor and its based metric depend on" - type = "string" - default = "azure" -} - -variable "service" { - description = "Service monitored by this set of monitors" - type = "string" - default = "storage" -} - # Global DataDog variable "message" { description = "Message sent when a Redis monitor is triggered" @@ -31,11 +14,16 @@ variable "delay" { default = 600 } -variable "use_filter_tags" { - description = "Filter the data with service tags if true" +variable "filter_tags_use_defaults" { + description = "Use default filter tags convention" default = "true" } +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_use_defaults is false" + default = "*" +} + # Azure Redis specific variable "evictedkeys_limit_threshold_warning" { description = "Evicted keys limit (warning threshold)" diff --git a/cloud/azure/redis/monitors-azure-redis.tf b/cloud/azure/redis/monitors-azure-redis.tf index 950e9a1..57b3a6c 100644 --- a/cloud/azure/redis/monitors-azure-redis.tf +++ b/cloud/azure/redis/monitors-azure-redis.tf @@ -2,7 +2,7 @@ data "template_file" "filter" { template = "$${filter}" vars { - filter = "${var.use_filter_tags == "true" ? format("dd_monitoring:enabled,dd_azure_storage:enabled,env:%s", var.environment) : "subscription_id:${var.subscription_id}"}" + filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_redis:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" } } @@ -14,9 +14,9 @@ resource "datadog_monitor" "status" { avg(last_5m):avg:azure.cache_redis.status{${data.template_file.filter.rendered}} by {name,resource_group} != 1 EOF - type = "query alert" + type = "metric alert" - notify_no_data = false + notify_no_data = true evaluation_delay = "${var.delay}" renotify_interval = 0 notify_audit = false @@ -27,7 +27,7 @@ EOF new_host_delay = "${var.delay}" no_data_timeframe = 20 - tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] + tags = ["env:${var.environment}", "resource:redis", "team:azure", "provider:azure"] } resource "datadog_monitor" "evictedkeys" { @@ -40,7 +40,7 @@ resource "datadog_monitor" "evictedkeys" { ) > ${var.evictedkeys_limit_threshold_critical} EOF - type = "query alert" + type = "metric alert" thresholds { warning = "${var.evictedkeys_limit_threshold_warning}" @@ -58,7 +58,7 @@ EOF new_host_delay = "${var.delay}" no_data_timeframe = 20 - tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] + tags = ["env:${var.environment}", "resource:redis", "team:azure", "provider:azure"] } resource "datadog_monitor" "percent_processor_time" { @@ -71,7 +71,7 @@ resource "datadog_monitor" "percent_processor_time" { ) > ${var.percent_processor_time_threshold_critical} EOF - type = "query alert" + type = "metric alert" thresholds { warning = "${var.percent_processor_time_threshold_warning}" @@ -89,7 +89,7 @@ EOF new_host_delay = "${var.delay}" no_data_timeframe = 20 - tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] + tags = ["env:${var.environment}", "resource:redis", "team:azure", "provider:azure"] } resource "datadog_monitor" "server_load" { @@ -102,7 +102,7 @@ resource "datadog_monitor" "server_load" { ) > ${var.server_load_rate_threshold_critical} EOF - type = "query alert" + type = "metric alert" thresholds { warning = "${var.server_load_rate_threshold_warning}" @@ -120,5 +120,5 @@ EOF new_host_delay = "${var.delay}" no_data_timeframe = 20 - tags = ["env:${var.environment}","resource:${var.service}","team:${var.provider}"] + tags = ["env:${var.environment}", "resource:redis", "team:azure", "provider:azure"] }