diff --git a/README.md b/README.md index 37b9581..5d7a6f0 100644 --- a/README.md +++ b/README.md @@ -88,8 +88,11 @@ The `//` is very important, it's a terraform specific syntax used to separate gi - [azure](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/azure/) - [apimanagement](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/azure/apimanagement/) - [app-services](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/azure/app-services/) + - [cosmosdb](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/azure/cosmosdb/) + - [datalakestore](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/azure/datalakestore/) - [eventhub](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/azure/eventhub/) - [iothubs](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/azure/iothubs/) + - [keyvault](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/azure/keyvault/) - [redis](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/azure/redis/) - [servicebus](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/azure/servicebus/) - [sql-database](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/azure/sql-database/) diff --git a/cloud/azure/README.md b/cloud/azure/README.md index 52c33aa..c51727d 100644 --- a/cloud/azure/README.md +++ b/cloud/azure/README.md @@ -83,6 +83,18 @@ Inputs | appservices_response_time_threshold_warning | Warning threshold for response time in seconds | string | `5` | no | | appservices_response_time_time_aggregator | Monitor aggregator for App Services response time [available values: min, max or avg] | string | `min` | no | | appservices_response_time_timeframe | Monitor timeframe for App Services response time [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| cosmos_db_4xx_request_rate_threshold_critical | Critical threshold for Cosmos DB 4xx requests monitor | string | `80` | no | +| cosmos_db_4xx_request_rate_threshold_warning | Warning threshold for Cosmos DB 4xx requests monitor | string | `50` | no | +| cosmos_db_4xx_requests_message | Custom message for Cosmos DB 4xx requests monitor | string | `` | no | +| cosmos_db_4xx_requests_silenced | Groups to mute for Cosmos DB 4xx requests monitor | map | `` | no | +| cosmos_db_5xx_request_rate_threshold_critical | Critical threshold for Cosmos DB 5xx requests monitor | string | `80` | no | +| cosmos_db_5xx_request_rate_threshold_warning | Warning threshold for Cosmos DB 5xx requests monitor | string | `50` | no | +| cosmos_db_5xx_requests_message | Custom message for Cosmos DB 5xx requests monitor | string | `` | no | +| cosmos_db_5xx_requests_silenced | Groups to mute for Cosmos DB 5xx requests monitor | map | `` | no | +| datalakestore_status_message | Custom message for Datalake Store status monitor | string | `` | no | +| datalakestore_status_silenced | Groups to mute for Datalake Store status monitor | map | `` | no | +| datalakestore_status_time_aggregator | Monitor aggregator for Datalake Store status [available values: min, max or avg] | string | `max` | no | +| datalakestore_status_timeframe | Monitor timeframe for Datalake Store status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | | environment | Architecture environment | string | - | yes | | eventhub_errors_rate_message | Custom message for Event Hub errors monitor | string | `` | no | @@ -167,6 +179,15 @@ Inputs | iothub_total_devices_silenced | Groups to mute for IoT Hub total devices monitor | map | `` | no | | iothub_total_devices_time_aggregator | Monitor aggregator for IoT Hub total devices [available values: min, max or avg] | string | `min` | no | | iothub_total_devices_timeframe | Monitor timeframe for IoT Hub total devices [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| keyvault_api_result_message | Custom message for Key Vault API result monitor | string | `` | no | +| keyvault_api_result_silenced | Groups to mute for Key Vault API result monitor | map | `` | no | +| keyvault_api_result_threshold_critical | Critical threshold for Key Vault API result rate | string | `10` | no | +| keyvault_api_result_threshold_warning | Warning threshold for Key Vault API result rate | string | `30` | no | +| keyvault_api_result_timeframe | Monitor timeframe for Key Vault API result [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_30m` | no | +| keyvault_status_message | Custom message for Key Vault status monitor | string | `` | no | +| keyvault_status_silenced | Groups to mute for Key Vault status monitor | map | `` | no | +| keyvault_status_time_aggregator | Monitor aggregator for Key Vault status [available values: min, max or avg] | string | `max` | no | +| keyvault_status_timeframe | Monitor timeframe for Key Vault status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | message | Message sent when a monitor is triggered | string | - | yes | | non_taggable_filter_tags | Tags used for filtering for components without tag support | string | `*` | no | | redis_evictedkeys_limit_message | Custom message for Redis evicted keys monitor | string | `` | no | @@ -191,10 +212,24 @@ Inputs | redis_status_silenced | Groups to mute for Redis status monitor | map | `` | no | | redis_status_time_aggregator | Monitor aggregator for Redis status [available values: min, max or avg] | string | `max` | no | | redis_status_timeframe | Monitor timeframe for Redis status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| servicebus_no_active_connections_message | Custom message for Service Bus status monitor | string | `` | no | +| servicebus_no_active_connections_silenced | Groups to mute for Service Bus status monitor | map | `` | no | +| servicebus_no_active_connections_time_aggregator | Monitor aggregator for Service Bus status [available values: min, max or avg] | string | `max` | no | +| servicebus_no_active_connections_timeframe | Monitor timeframe for Service Bus status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| servicebus_server_errors_message | Custom message for Service Bus server errors monitor | string | `` | no | +| servicebus_server_errors_silenced | Groups to mute for Service Bus server errors monitor | map | `` | no | +| servicebus_server_errors_threshold_critical | Critical threshold for Service Bus server errors monitor | string | `90` | no | +| servicebus_server_errors_threshold_warning | Warning threshold for Service Bus server errors monitor | string | `50` | no | +| servicebus_server_errors_timeframe | Monitor timeframe for Service Bus server errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | servicebus_status_message | Custom message for Service Bus status monitor | string | `` | no | | servicebus_status_silenced | Groups to mute for Service Bus status monitor | map | `` | no | | servicebus_status_time_aggregator | Monitor aggregator for Service Bus status [available values: min, max or avg] | string | `max` | no | | servicebus_status_timeframe | Monitor timeframe for Service Bus status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| servicebus_user_errors_message | Custom message for Service Bus user errors monitor | string | `` | no | +| servicebus_user_errors_silenced | Groups to mute for Service Bus user errors monitor | map | `` | no | +| servicebus_user_errors_threshold_critical | Critical threshold for Service Bus user errors monitor | string | `90` | no | +| servicebus_user_errors_threshold_warning | Warning threshold for Service Bus user errors monitor | string | `50` | no | +| servicebus_user_errors_timeframe | Monitor timeframe for Service Bus user errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | sqldatabase_cpu_message | Custom message for SQL CPU monitor | string | `` | no | | sqldatabase_cpu_silenced | Groups to mute for SQL CPU monitor | map | `` | no | | sqldatabase_cpu_threshold_critical | CPU usage in percent (critical threshold) | string | `90` | no | diff --git a/cloud/azure/cosmosdb/README.md b/cloud/azure/cosmosdb/README.md new file mode 100644 index 0000000..6de3209 --- /dev/null +++ b/cloud/azure/cosmosdb/README.md @@ -0,0 +1,71 @@ +# CLOUD AZURE COSMOSDB DataDog monitors + +## How to use this module + +``` +module "datadog-monitors-cloud-azure-cosmosdb" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/cosmosdb?ref={revision}" + + environment = "${var.environment}" + message = "${module.datadog-message-alerting.alerting-message}" +} + +``` + +## Purpose + +Creates DataDog monitors with the following checks: + +- Cosmos DB 4xx requests rate is high +- Cosmos DB 5xx requests rate is high +- Cosmos DB is down + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| cosmos_db_4xx_request_extra_tags | Extra tags for Cosmos DB 4xx requests monitor | list | `` | no | +| cosmos_db_4xx_request_rate_threshold_critical | Critical threshold for Cosmos DB 4xx requests monitor | string | `80` | no | +| cosmos_db_4xx_request_rate_threshold_warning | Warning threshold for Cosmos DB 4xx requests monitor | string | `50` | no | +| cosmos_db_4xx_request_time_aggregator | Monitor aggregator for Cosmos DB 4xx requests [available values: min, max or avg] | string | `sum` | no | +| cosmos_db_4xx_request_timeframe | Monitor timeframe for Cosmos DB 4xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| cosmos_db_4xx_requests_enabled | Flag to enable Cosmos DB 4xx requests monitor | string | `true` | no | +| cosmos_db_4xx_requests_message | Custom message for Cosmos DB 4xx requests monitor | string | `` | no | +| cosmos_db_4xx_requests_silenced | Groups to mute for Cosmos DB 4xx requests monitor | map | `` | no | +| cosmos_db_5xx_request_rate_extra_tags | Extra tags for Cosmos DB 5xx requests monitor | list | `` | no | +| cosmos_db_5xx_request_rate_threshold_critical | Critical threshold for Cosmos DB 5xx requests monitor | string | `80` | no | +| cosmos_db_5xx_request_rate_threshold_warning | Warning threshold for Cosmos DB 5xx requests monitor | string | `50` | no | +| cosmos_db_5xx_request_time_aggregator | Monitor aggregator for Cosmos DB 5xx requests [available values: min, max or avg] | string | `sum` | no | +| cosmos_db_5xx_request_timeframe | Monitor timeframe for Cosmos DB 5xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| cosmos_db_5xx_requests_enabled | Flag to enable Cosmos DB 5xx requests monitor | string | `true` | no | +| cosmos_db_5xx_requests_message | Custom message for Cosmos DB 5xx requests monitor | string | `` | no | +| cosmos_db_5xx_requests_silenced | Groups to mute for Cosmos DB 5xx requests monitor | map | `` | no | +| environment | Architecture environment | string | - | yes | +| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| message | Message sent when a monitor is triggered | string | - | yes | +| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no | +| status_enabled | Flag to enable Cosmos DB status monitor | string | `true` | no | +| status_extra_tags | Extra tags for Cosmos DB status monitor | list | `` | no | +| status_message | Custom message for Cosmos DB status monitor | string | `` | no | +| status_silenced | Groups to mute for Cosmos DB status monitor | map | `` | no | +| status_time_aggregator | Monitor aggregator for Cosmos DB status [available values: min, max or avg] | string | `max` | no | +| status_timeframe | Monitor timeframe for Cosmos DB status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| cosmos_db_4xx_requests_id | id for monitor cosmos_db_4xx_requests | +| cosmos_db_5xx_requests_id | id for monitor cosmos_db_5xx_requests | +| cosmos_db_status_id | id for monitor cosmos_db_status | + +Related documentation +--------------------- + +DataDog documentation : [https://docs.datadoghq.com/integrations/azure/](https://docs.datadoghq.com/integrations/azure/) +You must search `cosmosdb`, there is no integration for now. + +Azure metrics documentation : [https://docs.microsoft.com/fr-fr/azure/monitoring-and-diagnostics/monitoring-supported-metrics#microsoftdocumentdbdatabaseaccounts](https://docs.microsoft.com/fr-fr/azure/monitoring-and-diagnostics/monitoring-supported-metrics#microsoftdocumentdbdatabaseaccounts) + diff --git a/cloud/azure/cosmosdb/inputs.tf b/cloud/azure/cosmosdb/inputs.tf new file mode 100644 index 0000000..4d6cd55 --- /dev/null +++ b/cloud/azure/cosmosdb/inputs.tf @@ -0,0 +1,157 @@ +variable "environment" { + description = "Architecture environment" + type = "string" +} + +variable "filter_tags_use_defaults" { + description = "Use default filter tags convention" + default = "true" +} + +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_use_defaults is false" + default = "*" +} + +variable "message" { + description = "Message sent when a monitor is triggered" +} + +variable "evaluation_delay" { + description = "Delay in seconds for the metric evaluation" + default = 900 +} + +variable "new_host_delay" { + description = "Delay in seconds before monitor new resource" + default = 300 +} + +# Azure CosmosDB specific variables +variable "status_enabled" { + description = "Flag to enable Cosmos DB status monitor" + type = "string" + default = "true" +} + +variable "status_silenced" { + description = "Groups to mute for Cosmos DB status monitor" + type = "map" + default = {} +} + +variable "status_extra_tags" { + description = "Extra tags for Cosmos DB status monitor" + type = "list" + default = [] +} + +variable "status_message" { + description = "Custom message for Cosmos DB status monitor" + type = "string" + default = "" +} + +variable "status_time_aggregator" { + description = "Monitor aggregator for Cosmos DB status [available values: min, max or avg]" + type = "string" + default = "max" +} + +variable "status_timeframe" { + description = "Monitor timeframe for Cosmos DB status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +variable "cosmos_db_4xx_requests_message" { + description = "Custom message for Cosmos DB 4xx requests monitor" + type = "string" + default = "" +} + +variable "cosmos_db_4xx_requests_enabled" { + description = "Flag to enable Cosmos DB 4xx requests monitor" + type = "string" + default = "true" +} + +variable "cosmos_db_4xx_requests_silenced" { + description = "Groups to mute for Cosmos DB 4xx requests monitor" + type = "map" + default = {} +} + +variable "cosmos_db_4xx_request_rate_threshold_critical" { + description = "Critical threshold for Cosmos DB 4xx requests monitor" + default = 80 +} + +variable "cosmos_db_4xx_request_rate_threshold_warning" { + description = "Warning threshold for Cosmos DB 4xx requests monitor" + default = 50 +} + +variable "cosmos_db_4xx_request_extra_tags" { + description = "Extra tags for Cosmos DB 4xx requests monitor" + type = "list" + default = [] +} + +variable "cosmos_db_4xx_request_time_aggregator" { + description = "Monitor aggregator for Cosmos DB 4xx requests [available values: min, max or avg]" + type = "string" + default = "sum" +} + +variable "cosmos_db_4xx_request_timeframe" { + description = "Monitor timeframe for Cosmos DB 4xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +variable "cosmos_db_5xx_requests_message" { + description = "Custom message for Cosmos DB 5xx requests monitor" + type = "string" + default = "" +} + +variable "cosmos_db_5xx_requests_enabled" { + description = "Flag to enable Cosmos DB 5xx requests monitor" + type = "string" + default = "true" +} + +variable "cosmos_db_5xx_requests_silenced" { + description = "Groups to mute for Cosmos DB 5xx requests monitor" + type = "map" + default = {} +} + +variable "cosmos_db_5xx_request_rate_threshold_critical" { + description = "Critical threshold for Cosmos DB 5xx requests monitor" + default = 80 +} + +variable "cosmos_db_5xx_request_rate_threshold_warning" { + description = "Warning threshold for Cosmos DB 5xx requests monitor" + default = 50 +} + +variable "cosmos_db_5xx_request_rate_extra_tags" { + description = "Extra tags for Cosmos DB 5xx requests monitor" + type = "list" + default = [] +} + +variable "cosmos_db_5xx_request_time_aggregator" { + description = "Monitor aggregator for Cosmos DB 5xx requests [available values: min, max or avg]" + type = "string" + default = "sum" +} + +variable "cosmos_db_5xx_request_timeframe" { + description = "Monitor timeframe for Cosmos DB 5xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} diff --git a/cloud/azure/cosmosdb/modules.tf b/cloud/azure/cosmosdb/modules.tf new file mode 100644 index 0000000..889e29a --- /dev/null +++ b/cloud/azure/cosmosdb/modules.tf @@ -0,0 +1,19 @@ +module "filter-tags" { + source = "../../../common/filter-tags" + + environment = "${var.environment}" + resource = "cosmosdb" + filter_tags_use_defaults = "${var.filter_tags_use_defaults}" + filter_tags_custom = "${var.filter_tags_custom}" +} + +module "filter-tags-statuscode" { + source = "../../../common/filter-tags" + + environment = "${var.environment}" + resource = "cosmosdb" + filter_tags_use_defaults = "${var.filter_tags_use_defaults}" + filter_tags_custom = "${var.filter_tags_custom},statuscode:%s" + + extra_tags = ["statuscode:%s"] +} diff --git a/cloud/azure/cosmosdb/monitors-cosmosdb.tf b/cloud/azure/cosmosdb/monitors-cosmosdb.tf new file mode 100644 index 0000000..f4aa6bb --- /dev/null +++ b/cloud/azure/cosmosdb/monitors-cosmosdb.tf @@ -0,0 +1,136 @@ +resource "datadog_monitor" "cosmos_db_status" { + count = "${var.status_enabled ? 1 : 0}" + + name = "[${var.environment}] Cosmos DB is down" + message = "${coalesce(var.status_message, var.message)}" + + query = < ${var.cosmos_db_4xx_request_rate_threshold_critical} + EOF + + type = "metric alert" + + thresholds { + critical = "${var.cosmos_db_4xx_request_rate_threshold_critical}" + warning = "${var.cosmos_db_4xx_request_rate_threshold_warning}" + } + + silenced = "${var.cosmos_db_4xx_requests_silenced}" + + notify_no_data = false + evaluation_delay = "${var.evaluation_delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.new_host_delay}" + + tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:cosmos_db", "team:claranet", "created-by:terraform", "${var.cosmos_db_4xx_request_extra_tags}"] +} + +resource "datadog_monitor" "cosmos_db_5xx_requests" { + count = "${var.cosmos_db_5xx_requests_enabled ? 1 : 0}" + + name = "[${var.environment}] Cosmos DB 5xx requests rate is high {{#is_alert}}{{comparator}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{comparator}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.cosmos_db_5xx_requests_message, var.message)}" + + query = < ${var.cosmos_db_5xx_request_rate_threshold_critical} + EOF + + type = "metric alert" + + thresholds { + critical = "${var.cosmos_db_5xx_request_rate_threshold_critical}" + warning = "${var.cosmos_db_5xx_request_rate_threshold_warning}" + } + + silenced = "${var.cosmos_db_5xx_requests_silenced}" + + notify_no_data = false + evaluation_delay = "${var.evaluation_delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.new_host_delay}" + + tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:cosmos_db", "team:claranet", "created-by:terraform", "${var.cosmos_db_5xx_request_rate_extra_tags}"] +} diff --git a/cloud/azure/cosmosdb/outputs.tf b/cloud/azure/cosmosdb/outputs.tf new file mode 100644 index 0000000..6901b39 --- /dev/null +++ b/cloud/azure/cosmosdb/outputs.tf @@ -0,0 +1,14 @@ +output "cosmos_db_status_id" { + description = "id for monitor cosmos_db_status" + value = "${datadog_monitor.cosmos_db_status.*.id}" +} + +output "cosmos_db_4xx_requests_id" { + description = "id for monitor cosmos_db_4xx_requests" + value = "${datadog_monitor.cosmos_db_4xx_requests.*.id}" +} + +output "cosmos_db_5xx_requests_id" { + description = "id for monitor cosmos_db_5xx_requests" + value = "${datadog_monitor.cosmos_db_5xx_requests.*.id}" +} diff --git a/cloud/azure/datalakestore/README.md b/cloud/azure/datalakestore/README.md new file mode 100644 index 0000000..9ac4a1f --- /dev/null +++ b/cloud/azure/datalakestore/README.md @@ -0,0 +1,50 @@ +# CLOUD AZURE DATALAKESTORE DataDog monitors + +## How to use this module + +``` +module "datadog-monitors-cloud-azure-datalakestore" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/azure/datalakestore?ref={revision}" + + environment = "${var.environment}" + message = "${module.datadog-message-alerting.alerting-message}" +} + +``` + +## Purpose + +Creates DataDog monitors with the following checks: + +- Datalake Store is down + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| environment | Architecture environment | string | - | yes | +| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| message | Message sent when a monitor is triggered | string | - | yes | +| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no | +| status_enabled | Flag to enable Datalake Store status monitor | string | `true` | no | +| status_extra_tags | Extra tags for Datalake Store status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | list | `` | no | +| status_message | Custom message for Datalake Store status monitor | string | `` | no | +| status_silenced | Groups to mute for Datalake Store status monitor | map | `` | no | +| status_time_aggregator | Monitor aggregator for Datalake Store status [available values: min, max or avg] | string | `max` | no | +| status_timeframe | Monitor timeframe for Datalake Store status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| datalakestore_status_id | id for monitor datalakestore_status | + +Related documentation +--------------------- + +DataDog documentation : [https://docs.datadoghq.com/integrations/azure/](https://docs.datadoghq.com/integrations/azure/) +You must search `datalake`, there is no integration for now. + +Azure metrics documentation : [https://docs.microsoft.com/fr-fr/azure/monitoring-and-diagnostics/monitoring-supported-metrics#microsoftdatalakestoreaccounts](https://docs.microsoft.com/fr-fr/azure/monitoring-and-diagnostics/monitoring-supported-metrics#microsoftdatalakestoreaccounts) diff --git a/cloud/azure/datalakestore/inputs.tf b/cloud/azure/datalakestore/inputs.tf new file mode 100644 index 0000000..7a0183c --- /dev/null +++ b/cloud/azure/datalakestore/inputs.tf @@ -0,0 +1,64 @@ +variable "environment" { + description = "Architecture environment" + type = "string" +} + +variable "filter_tags_use_defaults" { + description = "Use default filter tags convention" + default = "true" +} + +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_use_defaults is false" + default = "*" +} + +variable "message" { + description = "Message sent when a monitor is triggered" +} + +variable "evaluation_delay" { + description = "Delay in seconds for the metric evaluation" + default = 900 +} + +variable "new_host_delay" { + description = "Delay in seconds before monitor new resource" + default = 300 +} + +# Azure Datalake Store specific variables +variable "status_enabled" { + description = "Flag to enable Datalake Store status monitor" + type = "string" + default = "true" +} + +variable "status_silenced" { + description = "Groups to mute for Datalake Store status monitor" + type = "map" + default = {} +} + +variable "status_message" { + description = "Custom message for Datalake Store status monitor" + type = "string" + default = "" +} + +variable "status_time_aggregator" { + description = "Monitor aggregator for Datalake Store status [available values: min, max or avg]" + type = "string" + default = "max" +} + +variable "status_timeframe" { + description = "Monitor timeframe for Datalake Store status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "status_extra_tags" { + description = "Extra tags for Datalake Store status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "list" + default = [] +} diff --git a/cloud/azure/datalakestore/modules.tf b/cloud/azure/datalakestore/modules.tf new file mode 100644 index 0000000..2a526b2 --- /dev/null +++ b/cloud/azure/datalakestore/modules.tf @@ -0,0 +1,8 @@ +module "filter-tags" { + source = "../../../common/filter-tags" + + environment = "${var.environment}" + resource = "datalakestore" + filter_tags_use_defaults = "${var.filter_tags_use_defaults}" + filter_tags_custom = "${var.filter_tags_custom}" +} diff --git a/cloud/azure/datalakestore/monitors-datalakestore.tf b/cloud/azure/datalakestore/monitors-datalakestore.tf new file mode 100644 index 0000000..db5f78f --- /dev/null +++ b/cloud/azure/datalakestore/monitors-datalakestore.tf @@ -0,0 +1,28 @@ +resource "datadog_monitor" "datalakestore_status" { + count = "${var.status_enabled ? 1 : 0}" + + name = "[${var.environment}] Datalake Store is down" + message = "${coalesce(var.status_message, var.message)}" + + query = <` | no | +| api_latency_message | Custom message for Key Vault API latency monitor | string | `` | no | +| api_latency_silenced | Groups to mute for Key Vault API latency monitor | map | `` | no | +| api_latency_threshold_critical | Critical threshold for Key Vault API latency rate | string | `100` | no | +| api_latency_threshold_warning | Warning threshold for Key Vault API latency rate | string | `80` | no | +| api_latency_time_aggregator | Monitor aggregator for Key Vault API latency [available values: min, max or avg] | string | `min` | no | +| api_latency_timeframe | Monitor timeframe for Key Vault API latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| api_result_enabled | Flag to enable Key Vault API result monitor | string | `true` | no | +| api_result_extra_tags | Extra tags for Key Vault API result monitor | list | `` | no | +| api_result_message | Custom message for Key Vault API result monitor | string | `` | no | +| api_result_silenced | Groups to mute for Key Vault API result monitor | map | `` | no | +| api_result_threshold_critical | Critical threshold for Key Vault API result rate | string | `10` | no | +| api_result_threshold_warning | Warning threshold for Key Vault API result rate | string | `30` | no | +| api_result_time_aggregator | Monitor aggregator for Key Vault API result [available values: min, max or avg] | string | `sum` | no | +| api_result_timeframe | Monitor timeframe for Key Vault API result [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| environment | Architecture environment | string | - | yes | +| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| message | Message sent when a monitor is triggered | string | - | yes | +| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no | +| status_enabled | Flag to enable Key Vault status monitor | string | `true` | no | +| status_extra_tags | Extra tags for Key Vault status monitor | list | `` | no | +| status_message | Custom message for Key Vault status monitor | string | `` | no | +| status_silenced | Groups to mute for Key Vault status monitor | map | `` | no | +| status_time_aggregator | Monitor aggregator for Key Vault status [available values: min, max or avg] | string | `max` | no | +| status_timeframe | Monitor timeframe for Key Vault status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | + +## Outputs + +| Name | Description | +|------|-------------| +| keyvault_api_latency_id | id for monitor keyvault_api_latency | +| keyvault_api_result_id | id for monitor keyvault_api_result | +| keyvault_status_id | id for monitor keyvault_status | + +Related documentation +--------------------- + +DataDog documentation : [https://docs.datadoghq.com/integrations/azure/](https://docs.datadoghq.com/integrations/azure/) +You must search `keyvault`, there is no integration for now. + +Azure metrics documentation : [https://docs.microsoft.com/fr-fr/azure/monitoring-and-diagnostics/monitoring-supported-metrics#microsoftkeyvaultvaults](https://docs.microsoft.com/fr-fr/azure/monitoring-and-diagnostics/monitoring-supported-metrics#microsoftkeyvaultvaults) diff --git a/cloud/azure/keyvault/inputs.tf b/cloud/azure/keyvault/inputs.tf new file mode 100644 index 0000000..fa03318 --- /dev/null +++ b/cloud/azure/keyvault/inputs.tf @@ -0,0 +1,154 @@ +variable "environment" { + description = "Architecture environment" + type = "string" +} + +variable "filter_tags_use_defaults" { + description = "Use default filter tags convention" + default = "true" +} + +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_use_defaults is false" + default = "*" +} + +variable "message" { + description = "Message sent when a monitor is triggered" +} + +variable "evaluation_delay" { + description = "Delay in seconds for the metric evaluation" + default = 900 +} + +variable "new_host_delay" { + description = "Delay in seconds before monitor new resource" + default = 300 +} + +# Azure Key Vault specific variables +variable "status_enabled" { + description = "Flag to enable Key Vault status monitor" + type = "string" + default = "true" +} + +variable "status_silenced" { + description = "Groups to mute for Key Vault status monitor" + type = "map" + default = {} +} + +variable "status_message" { + description = "Custom message for Key Vault status monitor" + type = "string" + default = "" +} + +variable "status_time_aggregator" { + description = "Monitor aggregator for Key Vault status [available values: min, max or avg]" + type = "string" + default = "max" +} + +variable "status_timeframe" { + description = "Monitor timeframe for Key Vault status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "status_extra_tags" { + description = "Extra tags for Key Vault status monitor" + type = "list" + default = [] +} + +variable "api_result_enabled" { + description = "Flag to enable Key Vault API result monitor" + type = "string" + default = "true" +} + +variable "api_result_silenced" { + description = "Groups to mute for Key Vault API result monitor" + type = "map" + default = {} +} + +variable "api_result_message" { + description = "Custom message for Key Vault API result monitor" + type = "string" + default = "" +} + +variable "api_result_time_aggregator" { + description = "Monitor aggregator for Key Vault API result [available values: min, max or avg]" + type = "string" + default = "sum" +} + +variable "api_result_timeframe" { + description = "Monitor timeframe for Key Vault API result [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "api_result_threshold_critical" { + description = "Critical threshold for Key Vault API result rate" + default = 10 +} + +variable "api_result_threshold_warning" { + description = "Warning threshold for Key Vault API result rate" + default = 30 +} + +variable "api_result_extra_tags" { + description = "Extra tags for Key Vault API result monitor" + type = "list" + default = [] +} + +variable "api_latency_enabled" { + description = "Flag to enable Key Vault API latency monitor" + type = "string" + default = "true" +} + +variable "api_latency_silenced" { + description = "Groups to mute for Key Vault API latency monitor" + type = "map" + default = {} +} + +variable "api_latency_message" { + description = "Custom message for Key Vault API latency monitor" + type = "string" + default = "" +} + +variable "api_latency_time_aggregator" { + description = "Monitor aggregator for Key Vault API latency [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "api_latency_timeframe" { + description = "Monitor timeframe for Key Vault API latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "api_latency_threshold_critical" { + description = "Critical threshold for Key Vault API latency rate" + default = 100 +} + +variable "api_latency_threshold_warning" { + description = "Warning threshold for Key Vault API latency rate" + default = 80 +} + +variable "api_latency_extra_tags" { + description = "Extra tags for Key Vault API latency monitor" + type = "list" + default = [] +} diff --git a/cloud/azure/keyvault/modules.tf b/cloud/azure/keyvault/modules.tf new file mode 100644 index 0000000..0c21a6a --- /dev/null +++ b/cloud/azure/keyvault/modules.tf @@ -0,0 +1,19 @@ +module "filter-tags" { + source = "../../../common/filter-tags" + + environment = "${var.environment}" + resource = "keyvault" + filter_tags_use_defaults = "${var.filter_tags_use_defaults}" + filter_tags_custom = "${var.filter_tags_custom}" +} + +module "filter-tags-statuscode" { + source = "../../../common/filter-tags" + + environment = "${var.environment}" + resource = "cosmosdb" + filter_tags_use_defaults = "${var.filter_tags_use_defaults}" + filter_tags_custom = "${var.filter_tags_custom},statuscode:%s" + + extra_tags = ["statuscode:%s"] +} diff --git a/cloud/azure/keyvault/monitors-keyvault.tf b/cloud/azure/keyvault/monitors-keyvault.tf new file mode 100644 index 0000000..1847517 --- /dev/null +++ b/cloud/azure/keyvault/monitors-keyvault.tf @@ -0,0 +1,97 @@ +resource "datadog_monitor" "keyvault_status" { + count = "${var.status_enabled ? 1 : 0}" + + name = "[${var.environment}] Key Vault is down" + message = "${coalesce(var.status_message, var.message)}" + + query = < ${var.api_latency_threshold_critical} + EOF + + thresholds { + critical = "${var.api_latency_threshold_critical}" + warning = "${var.api_latency_threshold_warning}" + } + + type = "metric alert" + + silenced = "${var.api_latency_silenced}" + + notify_no_data = false + evaluation_delay = "${var.evaluation_delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.new_host_delay}" + + tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:keyvault", "team:claranet", "created-by:terraform", "${var.api_latency_extra_tags}"] +} diff --git a/cloud/azure/keyvault/outputs.tf b/cloud/azure/keyvault/outputs.tf new file mode 100644 index 0000000..a6a4ca0 --- /dev/null +++ b/cloud/azure/keyvault/outputs.tf @@ -0,0 +1,14 @@ +output "keyvault_status_id" { + description = "id for monitor keyvault_status" + value = "${datadog_monitor.keyvault_status.*.id}" +} + +output "keyvault_api_result_id" { + description = "id for monitor keyvault_api_result" + value = "${datadog_monitor.keyvault_api_result.*.id}" +} + +output "keyvault_api_latency_id" { + description = "id for monitor keyvault_api_latency" + value = "${datadog_monitor.keyvault_api_latency.*.id}" +} diff --git a/cloud/azure/monitors.tf b/cloud/azure/monitors.tf index 4f0733a..d044a79 100644 --- a/cloud/azure/monitors.tf +++ b/cloud/azure/monitors.tf @@ -230,6 +230,23 @@ module "servicebus" { status_message = "${var.servicebus_status_message}" status_timeframe = "${var.servicebus_status_timeframe}" status_time_aggregator = "${var.servicebus_status_time_aggregator}" + + no_active_connections_silenced = "${var.servicebus_no_active_connections_silenced}" + no_active_connections_message = "${var.servicebus_no_active_connections_message}" + no_active_connections_timeframe = "${var.servicebus_no_active_connections_timeframe}" + no_active_connections_time_aggregator = "${var.servicebus_no_active_connections_time_aggregator}" + + server_errors_silenced = "${var.servicebus_server_errors_silenced}" + server_errors_message = "${var.servicebus_server_errors_message}" + server_errors_timeframe = "${var.servicebus_server_errors_timeframe}" + server_errors_threshold_critical = "${var.servicebus_server_errors_threshold_critical}" + server_errors_threshold_warning = "${var.servicebus_server_errors_threshold_warning}" + + user_errors_silenced = "${var.servicebus_user_errors_silenced}" + user_errors_message = "${var.servicebus_user_errors_message}" + user_errors_timeframe = "${var.servicebus_user_errors_timeframe}" + user_errors_threshold_critical = "${var.servicebus_user_errors_threshold_critical}" + user_errors_threshold_warning = "${var.servicebus_user_errors_threshold_warning}" } module "sqldatabase" { @@ -369,3 +386,96 @@ module "streamanalytics" { su_utilization_threshold_critical = "${var.streamanalytics_su_utilization_threshold_critical}" su_utilization_threshold_warning = "${var.streamanalytics_su_utilization_threshold_warning}" } + +module "cosmosdb" { + source = "./cosmosdb" + + environment = "${var.environment}" + message = "${var.message}" + evaluation_delay = "${var.evaluation_delay}" + new_host_delay = "${var.new_host_delay}" + + filter_tags_use_defaults = "${var.filter_tags_use_defaults}" + filter_tags_custom = "${var.filter_tags_custom}" + + status_enabled = "${var.cosmos_db_status_enabled}" + status_message = "${var.cosmos_db_status_message}" + status_silenced = "${var.cosmos_db_status_silenced}" + status_extra_tags = "${var.cosmos_db_status_extra_tags}" + status_time_aggregator = "${var.cosmos_db_status_time_aggregator}" + status_timeframe = "${var.cosmos_db_status_timeframe}" + + cosmos_db_4xx_requests_enabled = "${var.cosmos_db_4xx_requests_enabled}" + cosmos_db_4xx_request_rate_threshold_critical = "${var.cosmos_db_4xx_request_rate_threshold_critical}" + cosmos_db_4xx_request_rate_threshold_warning = "${var.cosmos_db_4xx_request_rate_threshold_warning}" + cosmos_db_4xx_requests_message = "${var.cosmos_db_4xx_requests_message}" + cosmos_db_4xx_requests_silenced = "${var.cosmos_db_4xx_requests_silenced}" + cosmos_db_4xx_request_extra_tags = "${var.cosmos_db_4xx_request_extra_tags}" + cosmos_db_4xx_request_time_aggregator = "${var.cosmos_db_4xx_request_time_aggregator}" + cosmos_db_4xx_request_timeframe = "${var.cosmos_db_4xx_request_timeframe}" + + cosmos_db_5xx_requests_enabled = "${var.cosmos_db_5xx_requests_enabled}" + cosmos_db_5xx_request_rate_threshold_critical = "${var.cosmos_db_5xx_request_rate_threshold_critical}" + cosmos_db_5xx_request_rate_threshold_warning = "${var.cosmos_db_5xx_request_rate_threshold_warning}" + cosmos_db_5xx_requests_message = "${var.cosmos_db_5xx_requests_message}" + cosmos_db_5xx_requests_silenced = "${var.cosmos_db_5xx_requests_silenced}" + cosmos_db_5xx_request_rate_extra_tags = "${var.cosmos_db_5xx_request_rate_extra_tags}" + cosmos_db_5xx_request_time_aggregator = "${var.cosmos_db_5xx_request_time_aggregator}" + cosmos_db_5xx_request_timeframe = "${var.cosmos_db_5xx_request_timeframe}" +} + +module "datalakestore" { + source = "./datalakestore" + + environment = "${var.environment}" + message = "${var.message}" + evaluation_delay = "${var.evaluation_delay}" + new_host_delay = "${var.new_host_delay}" + + filter_tags_use_defaults = "${var.filter_tags_use_defaults}" + filter_tags_custom = "${var.filter_tags_custom}" + + status_enabled = "${var.datalakestore_status_enabled}" + status_silenced = "${var.datalakestore_status_silenced}" + status_message = "${var.datalakestore_status_message}" + status_timeframe = "${var.datalakestore_status_timeframe}" + status_time_aggregator = "${var.datalakestore_status_time_aggregator}" + status_extra_tags = "${var.datalakestore_status_extra_tags}" +} + +module "keyvault" { + source = "./keyvault" + + environment = "${var.environment}" + message = "${var.message}" + evaluation_delay = "${var.evaluation_delay}" + new_host_delay = "${var.new_host_delay}" + + filter_tags_use_defaults = "${var.filter_tags_use_defaults}" + filter_tags_custom = "${var.filter_tags_custom}" + + status_enabled = "${var.keyvault_status_enabled}" + status_silenced = "${var.keyvault_status_silenced}" + status_message = "${var.keyvault_status_message}" + status_timeframe = "${var.keyvault_status_timeframe}" + status_time_aggregator = "${var.keyvault_status_time_aggregator}" + status_extra_tags = "${var.keyvault_status_extra_tags}" + + api_result_enabled = "${var.keyvault_api_result_enabled}" + api_result_silenced = "${var.keyvault_api_result_silenced}" + api_result_message = "${var.keyvault_api_result_message}" + api_result_timeframe = "${var.keyvault_api_result_timeframe}" + api_result_time_aggregator = "${var.keyvault_api_result_time_aggregator}" + api_result_threshold_critical = "${var.keyvault_api_result_threshold_critical}" + api_result_threshold_warning = "${var.keyvault_api_result_threshold_warning}" + api_result_extra_tags = "${var.keyvault_api_result_extra_tags}" + + api_latency_enabled = "${var.keyvault_api_latency_enabled}" + api_latency_silenced = "${var.keyvault_api_latency_silenced}" + api_latency_message = "${var.keyvault_api_latency_message}" + api_latency_timeframe = "${var.keyvault_api_latency_timeframe}" + api_latency_time_aggregator = "${var.keyvault_api_latency_time_aggregator}" + api_latency_threshold_critical = "${var.keyvault_api_latency_threshold_critical}" + api_latency_threshold_warning = "${var.keyvault_api_latency_threshold_warning}" + api_latency_extra_tags = "${var.keyvault_api_latency_extra_tags}" +} diff --git a/cloud/azure/servicebus/README.md b/cloud/azure/servicebus/README.md index 052aab1..c810c21 100644 --- a/cloud/azure/servicebus/README.md +++ b/cloud/azure/servicebus/README.md @@ -16,7 +16,10 @@ module "datadog-monitors-cloud-azure-servicebus" { Creates DataDog monitors with the following checks: +- Service Bus has no active connection - Service Bus is down +- Service Bus server errors rate is high +- Service Bus user errors rate is high ## Inputs @@ -28,16 +31,37 @@ Creates DataDog monitors with the following checks: | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | message | Message sent when an alert is triggered | string | - | yes | | new_host_delay | Delay in seconds before monitor new resource | string | `300` | no | +| no_active_connections_enabled | Flag to enable Service Bus status monitor | string | `true` | no | +| no_active_connections_message | Custom message for Service Bus status monitor | string | `` | no | +| no_active_connections_silenced | Groups to mute for Service Bus status monitor | map | `` | no | +| no_active_connections_time_aggregator | Monitor aggregator for Service Bus status [available values: min, max or avg] | string | `max` | no | +| no_active_connections_timeframe | Monitor timeframe for Service Bus status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| server_errors_enabled | Flag to enable Service Bus server errors monitor | string | `true` | no | +| server_errors_message | Custom message for Service Bus server errors monitor | string | `` | no | +| server_errors_silenced | Groups to mute for Service Bus server errors monitor | map | `` | no | +| server_errors_threshold_critical | Critical threshold for Service Bus server errors monitor | string | `90` | no | +| server_errors_threshold_warning | Warning threshold for Service Bus server errors monitor | string | `50` | no | +| server_errors_timeframe | Monitor timeframe for Service Bus server errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| status_enabled | Flag to enable Service Bus status monitor | string | `true` | no | | status_extra_tags | Extra tags for Service Bus status monitor | list | `` | no | | status_message | Custom message for Service Bus status monitor | string | `` | no | | status_silenced | Groups to mute for Service Bus status monitor | map | `` | no | | status_time_aggregator | Monitor aggregator for Service Bus status [available values: min, max or avg] | string | `max` | no | -| status_timeframe | Monitor timeframe for Service Bus status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| status_timeframe | Monitor timeframe for Service Bus status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| user_errors_enabled | Flag to enable Service Bus user errors monitor | string | `true` | no | +| user_errors_message | Custom message for Service Bus user errors monitor | string | `` | no | +| user_errors_silenced | Groups to mute for Service Bus user errors monitor | map | `` | no | +| user_errors_threshold_critical | Critical threshold for Service Bus user errors monitor | string | `90` | no | +| user_errors_threshold_warning | Warning threshold for Service Bus user errors monitor | string | `50` | no | +| user_errors_timeframe | Monitor timeframe for Service Bus user errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | ## Outputs | Name | Description | |------|-------------| +| service_bus_no_active_connections_id | id for monitor service_bus_no_active_connections | +| service_bus_server_errors_id | id for monitor service_bus_server_errors | +| service_bus_user_errors_id | id for monitor service_bus_user_errors | | servicebus_status_id | id for monitor servicebus_status | ## Related documentation diff --git a/cloud/azure/servicebus/inputs.tf b/cloud/azure/servicebus/inputs.tf index 680b606..eccabf5 100644 --- a/cloud/azure/servicebus/inputs.tf +++ b/cloud/azure/servicebus/inputs.tf @@ -30,6 +30,12 @@ variable "filter_tags_custom" { } # Azure Service Bus specific variables +variable "status_enabled" { + description = "Flag to enable Service Bus status monitor" + type = "string" + default = "true" +} + variable "status_silenced" { description = "Groups to mute for Service Bus status monitor" type = "map" @@ -56,5 +62,103 @@ variable "status_time_aggregator" { variable "status_timeframe" { description = "Monitor timeframe for Service Bus status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_15m" + default = "last_5m" +} + +variable "no_active_connections_enabled" { + description = "Flag to enable Service Bus status monitor" + type = "string" + default = "true" +} + +variable "no_active_connections_silenced" { + description = "Groups to mute for Service Bus status monitor" + type = "map" + default = {} +} + +variable "no_active_connections_message" { + description = "Custom message for Service Bus status monitor" + type = "string" + default = "" +} + +variable "no_active_connections_time_aggregator" { + description = "Monitor aggregator for Service Bus status [available values: min, max or avg]" + type = "string" + default = "max" +} + +variable "no_active_connections_timeframe" { + description = "Monitor timeframe for Service Bus status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +variable "server_errors_message" { + description = "Custom message for Service Bus server errors monitor" + type = "string" + default = "" +} + +variable "server_errors_enabled" { + description = "Flag to enable Service Bus server errors monitor" + type = "string" + default = "true" +} + +variable "server_errors_silenced" { + description = "Groups to mute for Service Bus server errors monitor" + type = "map" + default = {} +} + +variable "server_errors_timeframe" { + description = "Monitor timeframe for Service Bus server errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +variable "server_errors_threshold_critical" { + description = "Critical threshold for Service Bus server errors monitor" + default = 90 +} + +variable "server_errors_threshold_warning" { + description = "Warning threshold for Service Bus server errors monitor" + default = 50 +} + +variable "user_errors_message" { + description = "Custom message for Service Bus user errors monitor" + type = "string" + default = "" +} + +variable "user_errors_enabled" { + description = "Flag to enable Service Bus user errors monitor" + type = "string" + default = "true" +} + +variable "user_errors_silenced" { + description = "Groups to mute for Service Bus user errors monitor" + type = "map" + default = {} +} + +variable "user_errors_timeframe" { + description = "Monitor timeframe for Service Bus user errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +variable "user_errors_threshold_critical" { + description = "Critical threshold for Service Bus user errors monitor" + default = 90 +} + +variable "user_errors_threshold_warning" { + description = "Warning threshold for Service Bus user errors monitor" + default = 50 } diff --git a/cloud/azure/servicebus/monitors-service-bus.tf b/cloud/azure/servicebus/monitors-service-bus.tf index dc9d1ea..82d0030 100644 --- a/cloud/azure/servicebus/monitors-service-bus.tf +++ b/cloud/azure/servicebus/monitors-service-bus.tf @@ -1,4 +1,6 @@ resource "datadog_monitor" "servicebus_status" { + count = "${var.status_enabled ? 1 : 0}" + name = "[${var.environment}] Service Bus is down" message = "${coalesce(var.status_message, var.message)}" @@ -24,3 +26,104 @@ EOF tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:servicebus", "team:claranet", "created-by:terraform", "${var.status_extra_tags}"] } + +resource "datadog_monitor" "service_bus_no_active_connections" { + count = "${var.no_active_connections_enabled ? 1 : 0}" + + name = "[${var.environment}] Service Bus has no active connection" + message = "${coalesce(var.no_active_connections_message, var.message)}" + + query = < ${var.user_errors_threshold_critical} + EOF + + type = "metric alert" + + thresholds { + critical = "${var.user_errors_threshold_critical}" + warning = "${var.user_errors_threshold_warning}" + } + + silenced = "${var.user_errors_silenced}" + + notify_no_data = false + evaluation_delay = "${var.evaluation_delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.new_host_delay}" + + tags = ["env:${var.environment}", "resource:servicebus", "team:azure", "provider:azure"] +} + +resource "datadog_monitor" "service_bus_server_errors" { + count = "${var.server_errors_enabled ? 1 : 0}" + + name = "[${var.environment}] Service Bus server errors rate is high {{#is_alert}}{{comparator}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{comparator}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.server_errors_message, var.message)}" + + query = < ${var.server_errors_threshold_critical} + EOF + + type = "metric alert" + + thresholds { + critical = "${var.server_errors_threshold_critical}" + warning = "${var.server_errors_threshold_warning}" + } + + silenced = "${var.server_errors_silenced}" + + notify_no_data = false + evaluation_delay = "${var.evaluation_delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.new_host_delay}" + + tags = ["env:${var.environment}", "resource:servicebus", "team:azure", "provider:azure"] +} diff --git a/cloud/azure/servicebus/outputs.tf b/cloud/azure/servicebus/outputs.tf index 843de78..ad117a1 100644 --- a/cloud/azure/servicebus/outputs.tf +++ b/cloud/azure/servicebus/outputs.tf @@ -2,3 +2,18 @@ output "servicebus_status_id" { description = "id for monitor servicebus_status" value = "${datadog_monitor.servicebus_status.*.id}" } + +output "service_bus_no_active_connections_id" { + description = "id for monitor service_bus_no_active_connections" + value = "${datadog_monitor.service_bus_no_active_connections.*.id}" +} + +output "service_bus_user_errors_id" { + description = "id for monitor service_bus_user_errors" + value = "${datadog_monitor.service_bus_user_errors.*.id}" +} + +output "service_bus_server_errors_id" { + description = "id for monitor service_bus_server_errors" + value = "${datadog_monitor.service_bus_server_errors.*.id}" +}