From 14403eddcb806a118cca34c47df349dc912bc626 Mon Sep 17 00:00:00 2001 From: Alexandre Gaillet Date: Wed, 4 Jul 2018 17:42:52 +0200 Subject: [PATCH] MON-237 - Cosmos RU utilization monitor added --- cloud/azure/README.md | 15 ++- cloud/azure/cosmosdb/README.md | 120 +++++++++++++--------- cloud/azure/cosmosdb/inputs.tf | 39 ++++++- cloud/azure/cosmosdb/monitors-cosmosdb.tf | 42 +++++++- cloud/azure/inputs.tf | 51 ++++++++- cloud/azure/monitors.tf | 6 ++ 6 files changed, 207 insertions(+), 66 deletions(-) diff --git a/cloud/azure/README.md b/cloud/azure/README.md index 5e67d56..e6785e7 100644 --- a/cloud/azure/README.md +++ b/cloud/azure/README.md @@ -83,14 +83,21 @@ Inputs | appservices_response_time_threshold_warning | Warning threshold for response time in seconds | string | `5` | no | | appservices_response_time_time_aggregator | Monitor aggregator for App Services response time [available values: min, max or avg] | string | `min` | no | | appservices_response_time_timeframe | Monitor timeframe for App Services response time [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| cosmos_db_4xx_request_rate_threshold_critical | | string | `80` | no | -| cosmos_db_4xx_request_rate_threshold_warning | | string | `50` | no | +| cosmos_db_4xx_request_rate_threshold_critical | Critical threshold for Cosmos DB 4xx requests monitor | string | `80` | no | +| cosmos_db_4xx_request_rate_threshold_warning | Warning threshold for Cosmos DB 4xx requests monitor | string | `50` | no | | cosmos_db_4xx_requests_message | Custom message for Cosmos DB 4xx requests monitor | string | `` | no | | cosmos_db_4xx_requests_silenced | Groups to mute for Cosmos DB 4xx requests monitor | map | `` | no | -| cosmos_db_5xx_request_rate_threshold_critical | | string | `80` | no | -| cosmos_db_5xx_request_rate_threshold_warning | | string | `50` | no | +| cosmos_db_5xx_request_rate_threshold_critical | Critical threshold for Cosmos DB 5xx requests monitor | string | `80` | no | +| cosmos_db_5xx_request_rate_threshold_warning | Warning threshold for Cosmos DB 5xx requests monitor | string | `50` | no | | cosmos_db_5xx_requests_message | Custom message for Cosmos DB 5xx requests monitor | string | `` | no | | cosmos_db_5xx_requests_silenced | Groups to mute for Cosmos DB 5xx requests monitor | map | `` | no | +| cosmos_db_no_request_message | Custom message for Cosmos DB no request monitor | string | `` | no | +| cosmos_db_no_request_silenced | Groups to mute for Cosmos DB no request monitor | map | `` | no | +| cosmos_db_ru_utilization_collection | Group to associate Cosmos DB collection to RU max | map | - | yes | +| cosmos_db_ru_utilization_message | Custom message for Cosmos DB collection RU utilization monitor | string | `` | no | +| cosmos_db_ru_utilization_rate_threshold_critical | Critical threshold for Cosmos DB collection RU utilization monitor | string | `90` | no | +| cosmos_db_ru_utilization_rate_threshold_warning | Warning threshold for Cosmos DB collection RU utilization monitor | string | `80` | no | +| cosmos_db_ru_utilization_silenced | Groups to mute for Cosmos DB collection RU utilization monitor | map | `` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | | environment | Architecture environment | string | - | yes | | eventhub_errors_rate_message | Custom message for Event Hub errors monitor | string | `` | no | diff --git a/cloud/azure/cosmosdb/README.md b/cloud/azure/cosmosdb/README.md index 1df255e..28a8e26 100644 --- a/cloud/azure/cosmosdb/README.md +++ b/cloud/azure/cosmosdb/README.md @@ -1,49 +1,71 @@ -Azure Cosmos DB DataDog monitors -================================ - -How to use this module ----------------------- - -``` -module "datadog-monitors-azure-cosmosdb" { - source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/cosmosdb?ref={revision}" - - message = "${module.datadog-message-alerting.alerting-message}" - environment = "${var.environment}" -} -``` - -Purpose -------- -Creates a DataDog monitors with the following checks : - -* No request -* Too many 4xx requests -* Too many 5xx requests - -Inputs ------- - -| Name | Description | Type | Default | Required | -|------|-------------|:----:|:-----:|:-----:| -| cosmos_db_4xx_request_rate_threshold_critical | | string | `80` | no | -| cosmos_db_4xx_request_rate_threshold_warning | | string | `50` | no | -| cosmos_db_4xx_requests_message | Custom message for Cosmos DB 4xx requests monitor | string | `` | no | -| cosmos_db_4xx_requests_silenced | Groups to mute for Cosmos DB 4xx requests monitor | map | `` | no | -| cosmos_db_5xx_request_rate_threshold_critical | | string | `80` | no | -| cosmos_db_5xx_request_rate_threshold_warning | | string | `50` | no | -| cosmos_db_5xx_requests_message | Custom message for Cosmos DB 5xx requests monitor | string | `` | no | -| cosmos_db_5xx_requests_silenced | Groups to mute for Cosmos DB 5xx requests monitor | map | `` | no | -| cosmos_db_no_request_message | Custom message for Cosmos DB no request monitor | string | `` | no | -| cosmos_db_no_request_silenced | Groups to mute for Cosmos DB no request monitor | map | `` | no | -| delay | Delay in seconds for the metric evaluation | string | `900` | no | -| environment | Architecture environment | string | - | yes | -| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | -| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | -| message | Message sent when a monitor is triggered | string | - | yes | -| subscription_id | ID of the subscription | string | - | yes | - -Related documentation ---------------------- - -To be defined +Azure Cosmos DB DataDog monitors +================================ + +How to use this module +---------------------- + +``` +module "datadog-monitors-azure-cosmosdb" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/cosmosdb?ref={revision}" + + message = "${module.datadog-message-alerting.alerting-message}" + environment = "${var.environment}" +} +``` + +How to define cosmos_db_ru_utilization_collection variable +---------------------------------------------------------- + +At the time this module is defined, we can't define Cosmos DB collection with Terraform, so we have to define a variable making the connection between collections and RU max + +``` +variable cosmos_db_ru_utilization_collection { + type = "map" + default = { + "collection_1" = "ru_max_1" + "collection_2" = "ru_max_2" + ... + } +} +``` + +Purpose +------- +Creates a DataDog monitors with the following checks : + +* No request +* Too many 4xx requests +* Too many 5xx requests +* Collection RU utilization + +Inputs +------ + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| cosmos_db_4xx_request_rate_threshold_critical | Critical threshold for Cosmos DB 4xx requests monitor | string | `80` | no | +| cosmos_db_4xx_request_rate_threshold_warning | Warning threshold for Cosmos DB 4xx requests monitor | string | `50` | no | +| cosmos_db_4xx_requests_message | Custom message for Cosmos DB 4xx requests monitor | string | `` | no | +| cosmos_db_4xx_requests_silenced | Groups to mute for Cosmos DB 4xx requests monitor | map | `` | no | +| cosmos_db_5xx_request_rate_threshold_critical | Critical threshold for Cosmos DB 5xx requests monitor | string | `80` | no | +| cosmos_db_5xx_request_rate_threshold_warning | Warning threshold for Cosmos DB 5xx requests monitor | string | `50` | no | +| cosmos_db_5xx_requests_message | Custom message for Cosmos DB 5xx requests monitor | string | `` | no | +| cosmos_db_5xx_requests_silenced | Groups to mute for Cosmos DB 5xx requests monitor | map | `` | no | +| cosmos_db_no_request_message | Custom message for Cosmos DB no request monitor | string | `` | no | +| cosmos_db_no_request_silenced | Groups to mute for Cosmos DB no request monitor | map | `` | no | +| cosmos_db_ru_utilization_collection | Group to associate Cosmos DB collection to RU max | map | - | yes | +| cosmos_db_ru_utilization_message | Custom message for Cosmos DB collection RU utilization monitor | string | `` | no | +| cosmos_db_ru_utilization_rate_threshold_critical | Critical threshold for Cosmos DB collection RU utilization monitor | string | `90` | no | +| cosmos_db_ru_utilization_rate_threshold_warning | Warning threshold for Cosmos DB collection RU utilization monitor | string | `80` | no | +| cosmos_db_ru_utilization_silenced | Groups to mute for Cosmos DB collection RU utilization monitor | map | `` | no | +| delay | Delay in seconds for the metric evaluation | string | `900` | no | +| environment | Architecture environment | string | - | yes | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| message | Message sent when a monitor is triggered | string | - | yes | +| subscription_id | ID of the subscription | string | - | yes | + +Related documentation +--------------------- + +To be defined diff --git a/cloud/azure/cosmosdb/inputs.tf b/cloud/azure/cosmosdb/inputs.tf index 9232eed..40a94cf 100644 --- a/cloud/azure/cosmosdb/inputs.tf +++ b/cloud/azure/cosmosdb/inputs.tf @@ -41,11 +41,13 @@ variable "cosmos_db_4xx_requests_silenced" { } variable "cosmos_db_4xx_request_rate_threshold_critical" { - default = 80 + description = "Critical threshold for Cosmos DB 4xx requests monitor" + default = 80 } variable "cosmos_db_4xx_request_rate_threshold_warning" { - default = 50 + description = "Warning threshold for Cosmos DB 4xx requests monitor" + default = 50 } variable "cosmos_db_5xx_requests_message" { @@ -61,11 +63,13 @@ variable "cosmos_db_5xx_requests_silenced" { } variable "cosmos_db_5xx_request_rate_threshold_critical" { - default = 80 + description = "Critical threshold for Cosmos DB 5xx requests monitor" + default = 80 } variable "cosmos_db_5xx_request_rate_threshold_warning" { - default = 50 + description = "Warning threshold for Cosmos DB 5xx requests monitor" + default = 50 } variable "cosmos_db_no_request_message" { @@ -79,3 +83,30 @@ variable "cosmos_db_no_request_silenced" { type = "map" default = {} } + +variable "cosmos_db_ru_utilization_message" { + description = "Custom message for Cosmos DB collection RU utilization monitor" + type = "string" + default = "" +} + +variable "cosmos_db_ru_utilization_silenced" { + description = "Groups to mute for Cosmos DB collection RU utilization monitor" + type = "map" + default = {} +} + +variable "cosmos_db_ru_utilization_rate_threshold_critical" { + description = "Critical threshold for Cosmos DB collection RU utilization monitor" + default = 90 +} + +variable "cosmos_db_ru_utilization_rate_threshold_warning" { + description = "Warning threshold for Cosmos DB collection RU utilization monitor" + default = 80 +} + +variable "cosmos_db_ru_utilization_collection" { + description = "Group to associate Cosmos DB collection to RU max" + type = "map" +} diff --git a/cloud/azure/cosmosdb/monitors-cosmosdb.tf b/cloud/azure/cosmosdb/monitors-cosmosdb.tf index 95fa7bb..19a21ad 100644 --- a/cloud/azure/cosmosdb/monitors-cosmosdb.tf +++ b/cloud/azure/cosmosdb/monitors-cosmosdb.tf @@ -7,7 +7,7 @@ data "template_file" "filter" { } resource "datadog_monitor" "cosmos_db_4xx_requests" { - name = "[${var.environment}] Cosmos DB 4xx requests rate is too low {{#is_alert}}{{comparator}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{comparator}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + name = "[${var.environment}] Cosmos DB 4xx requests rate is high {{#is_alert}}{{comparator}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{comparator}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.cosmos_db_4xx_requests_message, var.message)}" query = < ${var.cosmos_db_ru_utilization_rate_threshold_critical} + EOF + + type = "metric alert" + + thresholds { + critical = "${var.cosmos_db_ru_utilization_rate_threshold_critical}" + warning = "${var.cosmos_db_ru_utilization_rate_threshold_warning}" + } + + silenced = "${var.cosmos_db_ru_utilization_silenced}" + + notify_no_data = false + evaluation_delay = "${var.delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = true + new_host_delay = "${var.delay}" + + tags = ["env:${var.environment}", "resource:cosmos_db", "collection:${element(keys(var.cosmos_db_ru_utilization_collection),count.index)}", "team:azure", "provider:azure"] +} diff --git a/cloud/azure/inputs.tf b/cloud/azure/inputs.tf index e12a9c9..b477cd1 100644 --- a/cloud/azure/inputs.tf +++ b/cloud/azure/inputs.tf @@ -1532,11 +1532,13 @@ variable "cosmos_db_4xx_requests_silenced" { } variable "cosmos_db_4xx_request_rate_threshold_critical" { - default = 80 + description = "Critical threshold for Cosmos DB 4xx requests monitor" + default = 80 } variable "cosmos_db_4xx_request_rate_threshold_warning" { - default = 50 + description = "Warning threshold for Cosmos DB 4xx requests monitor" + default = 50 } variable "cosmos_db_5xx_requests_message" { @@ -1552,9 +1554,50 @@ variable "cosmos_db_5xx_requests_silenced" { } variable "cosmos_db_5xx_request_rate_threshold_critical" { - default = 80 + description = "Critical threshold for Cosmos DB 5xx requests monitor" + default = 80 } variable "cosmos_db_5xx_request_rate_threshold_warning" { - default = 50 + description = "Warning threshold for Cosmos DB 5xx requests monitor" + default = 50 +} + +variable "cosmos_db_no_request_message" { + description = "Custom message for Cosmos DB no request monitor" + type = "string" + default = "" +} + +variable "cosmos_db_no_request_silenced" { + description = "Groups to mute for Cosmos DB no request monitor" + type = "map" + default = {} +} + +variable "cosmos_db_ru_utilization_message" { + description = "Custom message for Cosmos DB collection RU utilization monitor" + type = "string" + default = "" +} + +variable "cosmos_db_ru_utilization_silenced" { + description = "Groups to mute for Cosmos DB collection RU utilization monitor" + type = "map" + default = {} +} + +variable "cosmos_db_ru_utilization_rate_threshold_critical" { + description = "Critical threshold for Cosmos DB collection RU utilization monitor" + default = 90 +} + +variable "cosmos_db_ru_utilization_rate_threshold_warning" { + description = "Warning threshold for Cosmos DB collection RU utilization monitor" + default = 80 +} + +variable "cosmos_db_ru_utilization_collection" { + description = "Group to associate Cosmos DB collection to RU max" + type = "map" } diff --git a/cloud/azure/monitors.tf b/cloud/azure/monitors.tf index 068ccbe..6738aa6 100644 --- a/cloud/azure/monitors.tf +++ b/cloud/azure/monitors.tf @@ -390,4 +390,10 @@ module "cosmosdb" { cosmos_db_5xx_request_rate_threshold_warning = "${var.cosmos_db_5xx_request_rate_threshold_warning}" cosmos_db_5xx_requests_message = "${var.cosmos_db_5xx_requests_message}" cosmos_db_5xx_requests_silenced = "${var.cosmos_db_5xx_requests_silenced}" + + cosmos_db_ru_utilization_rate_threshold_critical = "${var.cosmos_db_ru_utilization_rate_threshold_critical}" + cosmos_db_ru_utilization_rate_threshold_warning = "${var.cosmos_db_ru_utilization_rate_threshold_warning}" + cosmos_db_ru_utilization_message = "${var.cosmos_db_ru_utilization_message}" + cosmos_db_ru_utilization_silenced = "${var.cosmos_db_ru_utilization_silenced}" + cosmos_db_ru_utilization_collection = "${var.cosmos_db_ru_utilization_collection}" }