diff --git a/cloud/azure/cosmosdb/README.md b/cloud/azure/cosmosdb/README.md index 8db2c06..0b908dc 100644 --- a/cloud/azure/cosmosdb/README.md +++ b/cloud/azure/cosmosdb/README.md @@ -18,33 +18,52 @@ Creates DataDog monitors with the following checks: - Cosmos DB 4xx requests rate is high - Cosmos DB 5xx requests rate is high +- Cosmos DB collection ${element(keys(var.cosmos_db_ru_utilization_collections),count.index)} RU utilization is high - Cosmos DB has no request -- Cosmos DB collection ${element(keys(var.cosmos_db_ru_utilization_collection),count.index)} RU utilization is high +- Cosmos DB is down ## Inputs | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| +| cosmos_db_4xx_request_extra_tags | Extra tags for Cosmos DB 4xx requests monitor | list | `` | no | | cosmos_db_4xx_request_rate_threshold_critical | Critical threshold for Cosmos DB 4xx requests monitor | string | `80` | no | | cosmos_db_4xx_request_rate_threshold_warning | Warning threshold for Cosmos DB 4xx requests monitor | string | `50` | no | +| cosmos_db_4xx_request_time_aggregator | Monitor aggregator for Cosmos DB status [available values: min, max or avg] | string | `sum` | no | +| cosmos_db_4xx_request_timeframe | Monitor timeframe for Cosmos DB status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | cosmos_db_4xx_requests_message | Custom message for Cosmos DB 4xx requests monitor | string | `` | no | | cosmos_db_4xx_requests_silenced | Groups to mute for Cosmos DB 4xx requests monitor | map | `` | no | +| cosmos_db_5xx_request_rate_extra_tags | Extra tags for Cosmos DB 5xx requests monitor | list | `` | no | | cosmos_db_5xx_request_rate_threshold_critical | Critical threshold for Cosmos DB 5xx requests monitor | string | `80` | no | | cosmos_db_5xx_request_rate_threshold_warning | Warning threshold for Cosmos DB 5xx requests monitor | string | `50` | no | +| cosmos_db_5xx_request_time_aggregator | Monitor aggregator for Cosmos DB status [available values: min, max or avg] | string | `sum` | no | +| cosmos_db_5xx_request_timeframe | Monitor timeframe for Cosmos DB status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | cosmos_db_5xx_requests_message | Custom message for Cosmos DB 5xx requests monitor | string | `` | no | | cosmos_db_5xx_requests_silenced | Groups to mute for Cosmos DB 5xx requests monitor | map | `` | no | +| cosmos_db_no_request_extra_tags | Extra tags for Cosmos DB no request monitor | list | `` | no | | cosmos_db_no_request_message | Custom message for Cosmos DB no request monitor | string | `` | no | | cosmos_db_no_request_silenced | Groups to mute for Cosmos DB no request monitor | map | `` | no | -| cosmos_db_ru_utilization_collection | Group to associate Cosmos DB collection to RU max | map | - | yes | +| cosmos_db_no_request_time_aggregator | Monitor aggregator for Cosmos DB status [available values: min, max or avg] | string | `max` | no | +| cosmos_db_no_request_timeframe | Monitor timeframe for Cosmos DB status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | +| cosmos_db_ru_utilization_collections | Group to associate Cosmos DB collection to RU max | map | - | yes | +| cosmos_db_ru_utilization_extra_tags | Extra tags for Cosmos DB collection RU utilization monitor | list | `` | no | | cosmos_db_ru_utilization_message | Custom message for Cosmos DB collection RU utilization monitor | string | `` | no | | cosmos_db_ru_utilization_rate_threshold_critical | Critical threshold for Cosmos DB collection RU utilization monitor | string | `90` | no | | cosmos_db_ru_utilization_rate_threshold_warning | Warning threshold for Cosmos DB collection RU utilization monitor | string | `80` | no | | cosmos_db_ru_utilization_silenced | Groups to mute for Cosmos DB collection RU utilization monitor | map | `` | no | -| delay | Delay in seconds for the metric evaluation | string | `900` | no | +| cosmos_db_ru_utilization_time_aggregator | Monitor aggregator for Cosmos DB status [available values: min, max or avg] | string | `max` | no | +| cosmos_db_ru_utilization_timeframe | Monitor timeframe for Cosmos DB status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | | environment | Architecture environment | string | - | yes | +| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | message | Message sent when a monitor is triggered | string | - | yes | +| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no | +| status_extra_tags | Extra tags for Cosmos DB status monitor | list | `` | no | +| status_message | Custom message for Cosmos DB status monitor | string | `` | no | +| status_silenced | Groups to mute for Cosmos DB status monitor | map | `` | no | +| status_time_aggregator | Monitor aggregator for Cosmos DB status [available values: min, max or avg] | string | `max` | no | +| status_timeframe | Monitor timeframe for Cosmos DB status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | ## Outputs @@ -53,6 +72,7 @@ Creates DataDog monitors with the following checks: | cosmos_db_4xx_requests_id | id for monitor cosmos_db_4xx_requests | | cosmos_db_5xx_requests_id | id for monitor cosmos_db_5xx_requests | | cosmos_db_ru_utilization_id | id for monitor cosmos_db_ru_utilization | +| cosmos_db_status_id | id for monitor cosmos_db_status | | cosmos_db_success_no_data_id | id for monitor cosmos_db_success_no_data | Related documentation diff --git a/cloud/azure/cosmosdb/inputs.tf b/cloud/azure/cosmosdb/inputs.tf index e8d04bf..19f0939 100644 --- a/cloud/azure/cosmosdb/inputs.tf +++ b/cloud/azure/cosmosdb/inputs.tf @@ -17,12 +17,47 @@ variable "message" { description = "Message sent when a monitor is triggered" } -variable "delay" { +variable "evaluation_delay" { description = "Delay in seconds for the metric evaluation" default = 900 } +variable "new_host_delay" { + description = "Delay in seconds before monitor new resource" + default = 300 +} + # Azure CosmosDB specific variables +variable "status_silenced" { + description = "Groups to mute for Cosmos DB status monitor" + type = "map" + default = {} +} + +variable "status_extra_tags" { + description = "Extra tags for Cosmos DB status monitor" + type = "list" + default = [] +} + +variable "status_message" { + description = "Custom message for Cosmos DB status monitor" + type = "string" + default = "" +} + +variable "status_time_aggregator" { + description = "Monitor aggregator for Cosmos DB status [available values: min, max or avg]" + type = "string" + default = "max" +} + +variable "status_timeframe" { + description = "Monitor timeframe for Cosmos DB status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "cosmos_db_4xx_requests_message" { description = "Custom message for Cosmos DB 4xx requests monitor" type = "string" @@ -45,6 +80,24 @@ variable "cosmos_db_4xx_request_rate_threshold_warning" { default = 50 } +variable "cosmos_db_4xx_request_extra_tags" { + description = "Extra tags for Cosmos DB 4xx requests monitor" + type = "list" + default = [] +} + +variable "cosmos_db_4xx_request_time_aggregator" { + description = "Monitor aggregator for Cosmos DB 4xx requests [available values: min, max or avg]" + type = "string" + default = "sum" +} + +variable "cosmos_db_4xx_request_timeframe" { + description = "Monitor timeframe for Cosmos DB 4xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "cosmos_db_5xx_requests_message" { description = "Custom message for Cosmos DB 5xx requests monitor" type = "string" @@ -67,6 +120,24 @@ variable "cosmos_db_5xx_request_rate_threshold_warning" { default = 50 } +variable "cosmos_db_5xx_request_rate_extra_tags" { + description = "Extra tags for Cosmos DB 5xx requests monitor" + type = "list" + default = [] +} + +variable "cosmos_db_5xx_request_time_aggregator" { + description = "Monitor aggregator for Cosmos DB 5xx requests [available values: min, max or avg]" + type = "string" + default = "sum" +} + +variable "cosmos_db_5xx_request_timeframe" { + description = "Monitor timeframe for Cosmos DB 5xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "cosmos_db_no_request_message" { description = "Custom message for Cosmos DB no request monitor" type = "string" @@ -79,6 +150,24 @@ variable "cosmos_db_no_request_silenced" { default = {} } +variable "cosmos_db_no_request_extra_tags" { + description = "Extra tags for Cosmos DB no request monitor" + type = "list" + default = [] +} + +variable "cosmos_db_no_request_time_aggregator" { + description = "Monitor aggregator for Cosmos DB no request [available values: min, max or avg]" + type = "string" + default = "max" +} + +variable "cosmos_db_no_request_timeframe" { + description = "Monitor timeframe for Cosmos DB no request [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + variable "cosmos_db_ru_utilization_message" { description = "Custom message for Cosmos DB collection RU utilization monitor" type = "string" @@ -101,7 +190,25 @@ variable "cosmos_db_ru_utilization_rate_threshold_warning" { default = 80 } -variable "cosmos_db_ru_utilization_collection" { +variable "cosmos_db_ru_utilization_extra_tags" { + description = "Extra tags for Cosmos DB collection RU utilization monitor" + type = "list" + default = [] +} + +variable "cosmos_db_ru_utilization_time_aggregator" { + description = "Monitor aggregator for Cosmos DB RU utilization [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "cosmos_db_ru_utilization_timeframe" { + description = "Monitor timeframe for Cosmos DB RU utilization [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +variable "cosmos_db_ru_utilization_collections" { description = "Group to associate Cosmos DB collection to RU max" type = "map" } diff --git a/cloud/azure/cosmosdb/modules.tf b/cloud/azure/cosmosdb/modules.tf new file mode 100644 index 0000000..aa2ac12 --- /dev/null +++ b/cloud/azure/cosmosdb/modules.tf @@ -0,0 +1,30 @@ +module "filter-tags" { + source = "../../../common/filter-tags" + + environment = "${var.environment}" + resource = "cosmosdb" + filter_tags_use_defaults = "${var.filter_tags_use_defaults}" + filter_tags_custom = "${var.filter_tags_custom}" +} + +module "filter-tags-statuscode" { + source = "../../../common/filter-tags" + + environment = "${var.environment}" + resource = "cosmosdb" + filter_tags_use_defaults = "${var.filter_tags_use_defaults}" + filter_tags_custom = "${var.filter_tags_custom},statuscode:%s" + + extra_tags = ["statuscode:%s"] +} + +module "filter-tags-collection" { + source = "../../../common/filter-tags" + + environment = "${var.environment}" + resource = "cosmosdb" + filter_tags_use_defaults = "${var.filter_tags_use_defaults}" + filter_tags_custom = "${var.filter_tags_custom},collectionname:%s" + + extra_tags = ["collectionname:%s"] +} diff --git a/cloud/azure/cosmosdb/monitors-cosmosdb.tf b/cloud/azure/cosmosdb/monitors-cosmosdb.tf index fb4b916..4b1e23b 100644 --- a/cloud/azure/cosmosdb/monitors-cosmosdb.tf +++ b/cloud/azure/cosmosdb/monitors-cosmosdb.tf @@ -1,9 +1,33 @@ -data "template_file" "filter" { - template = "$${filter}" +resource "datadog_monitor" "cosmos_db_status" { + name = "[${var.environment}] Cosmos DB is down" + message = "${coalesce(var.status_message, var.message)}" - vars { - filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_cosmosdb:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" + query = < ${var.cosmos_db_4xx_request_rate_threshold_critical} EOF @@ -37,16 +75,16 @@ resource "datadog_monitor" "cosmos_db_4xx_requests" { silenced = "${var.cosmos_db_4xx_requests_silenced}" notify_no_data = false - evaluation_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false require_full_window = true - new_host_delay = "${var.delay}" + new_host_delay = "${var.new_host_delay}" - tags = ["env:${var.environment}", "resource:cosmos_db", "team:azure", "provider:azure"] + tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:cosmos_db", "team:claranet", "created-by:terraform", "${var.cosmos_db_4xx_request_extra_tags}"] } resource "datadog_monitor" "cosmos_db_5xx_requests" { @@ -54,10 +92,16 @@ resource "datadog_monitor" "cosmos_db_5xx_requests" { message = "${coalesce(var.cosmos_db_5xx_requests_message, var.message)}" query = < ${var.cosmos_db_5xx_request_rate_threshold_critical} EOF @@ -72,16 +116,16 @@ resource "datadog_monitor" "cosmos_db_5xx_requests" { silenced = "${var.cosmos_db_5xx_requests_silenced}" notify_no_data = false - evaluation_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false require_full_window = true - new_host_delay = "${var.delay}" + new_host_delay = "${var.new_host_delay}" - tags = ["env:${var.environment}", "resource:cosmos_db", "team:azure", "provider:azure"] + tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:cosmos_db", "team:claranet", "created-by:terraform", "${var.cosmos_db_5xx_request_rate_extra_tags}"] } resource "datadog_monitor" "cosmos_db_success_no_data" { @@ -89,8 +133,9 @@ resource "datadog_monitor" "cosmos_db_success_no_data" { message = "${coalesce(var.cosmos_db_no_request_message, var.message)}" query = < ${var.cosmos_db_ru_utilization_rate_threshold_critical} EOF @@ -134,14 +182,14 @@ resource "datadog_monitor" "cosmos_db_ru_utilization" { silenced = "${var.cosmos_db_ru_utilization_silenced}" notify_no_data = false - evaluation_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false require_full_window = true - new_host_delay = "${var.delay}" + new_host_delay = "${var.new_host_delay}" - tags = ["env:${var.environment}", "resource:cosmos_db", "collection:${element(keys(var.cosmos_db_ru_utilization_collection),count.index)}", "team:azure", "provider:azure"] + tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:cosmos_db", "team:claranet", "created-by:terraform", "${var.cosmos_db_ru_utilization_extra_tags}"] } diff --git a/cloud/azure/cosmosdb/outputs-custom.tf b/cloud/azure/cosmosdb/outputs-custom.tf new file mode 100644 index 0000000..1da4343 --- /dev/null +++ b/cloud/azure/cosmosdb/outputs-custom.tf @@ -0,0 +1,4 @@ +output "cosmos_db_ru_utilization_id" { + description = "id for monitor cosmos_db_ru_utilization" + value = "${datadog_monitor.cosmos_db_ru_utilization.*.id}" +} diff --git a/cloud/azure/cosmosdb/outputs.tf b/cloud/azure/cosmosdb/outputs.tf index 0dd8c71..1039a55 100644 --- a/cloud/azure/cosmosdb/outputs.tf +++ b/cloud/azure/cosmosdb/outputs.tf @@ -1,3 +1,8 @@ +output "cosmos_db_status_id" { + description = "id for monitor cosmos_db_status" + value = "${datadog_monitor.cosmos_db_status.id}" +} + output "cosmos_db_4xx_requests_id" { description = "id for monitor cosmos_db_4xx_requests" value = "${datadog_monitor.cosmos_db_4xx_requests.id}" @@ -12,8 +17,3 @@ output "cosmos_db_success_no_data_id" { description = "id for monitor cosmos_db_success_no_data" value = "${datadog_monitor.cosmos_db_success_no_data.id}" } - -output "cosmos_db_ru_utilization_id" { - description = "id for monitor cosmos_db_ru_utilization" - value = "${datadog_monitor.cosmos_db_ru_utilization.id}" -} diff --git a/cloud/azure/datalakestore/README.md b/cloud/azure/datalakestore/README.md index a96ffe9..9603957 100644 --- a/cloud/azure/datalakestore/README.md +++ b/cloud/azure/datalakestore/README.md @@ -22,11 +22,13 @@ Creates DataDog monitors with the following checks: | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| -| delay | Delay in seconds for the metric evaluation | string | `900` | no | | environment | Architecture environment | string | - | yes | +| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | message | Message sent when a monitor is triggered | string | - | yes | +| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no | +| status_extra_tags | Extra tags for Datalake Store status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | list | `` | no | | status_message | Custom message for Datalake Store status monitor | string | `` | no | | status_silenced | Groups to mute for Datalake Store status monitor | map | `` | no | | status_time_aggregator | Monitor aggregator for Datalake Store status [available values: min, max or avg] | string | `max` | no | diff --git a/cloud/azure/datalakestore/inputs.tf b/cloud/azure/datalakestore/inputs.tf index 30afe45..319056a 100644 --- a/cloud/azure/datalakestore/inputs.tf +++ b/cloud/azure/datalakestore/inputs.tf @@ -17,11 +17,16 @@ variable "message" { description = "Message sent when a monitor is triggered" } -variable "delay" { +variable "evaluation_delay" { description = "Delay in seconds for the metric evaluation" default = 900 } +variable "new_host_delay" { + description = "Delay in seconds before monitor new resource" + default = 300 +} + # Azure Datalake Store specific variables variable "status_silenced" { description = "Groups to mute for Datalake Store status monitor" @@ -43,5 +48,11 @@ variable "status_time_aggregator" { variable "status_timeframe" { description = "Monitor timeframe for Datalake Store status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_15m" + default = "last_5m" +} + +variable "status_extra_tags" { + description = "Extra tags for Datalake Store status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "list" + default = [] } diff --git a/cloud/azure/datalakestore/modules.tf b/cloud/azure/datalakestore/modules.tf new file mode 100644 index 0000000..2a526b2 --- /dev/null +++ b/cloud/azure/datalakestore/modules.tf @@ -0,0 +1,8 @@ +module "filter-tags" { + source = "../../../common/filter-tags" + + environment = "${var.environment}" + resource = "datalakestore" + filter_tags_use_defaults = "${var.filter_tags_use_defaults}" + filter_tags_custom = "${var.filter_tags_custom}" +} diff --git a/cloud/azure/datalakestore/monitors-datalakestore.tf b/cloud/azure/datalakestore/monitors-datalakestore.tf index fa4b67d..b7c41b1 100644 --- a/cloud/azure/datalakestore/monitors-datalakestore.tf +++ b/cloud/azure/datalakestore/monitors-datalakestore.tf @@ -1,19 +1,11 @@ -data "template_file" "filter" { - template = "$${filter}" - - vars { - filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_servicebus:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" - } -} - resource "datadog_monitor" "datalakestore_status" { name = "[${var.environment}] Datalake Store is down" message = "${coalesce(var.status_message, var.message)}" query = <` | no | | api_result_message | Custom message for Key Vault API result monitor | string | `` | no | | api_result_silenced | Groups to mute for Key Vault API result monitor | map | `` | no | | api_result_threshold_critical | Critical threshold for Key Vault API result rate | string | `10` | no | | api_result_threshold_warning | Warning threshold for Key Vault API result rate | string | `30` | no | +| api_result_time_aggregator | Monitor aggregator for Key Vault API result [available values: min, max or avg] | string | `sum` | no | | api_result_timeframe | Monitor timeframe for Key Vault API result [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_30m` | no | -| delay | Delay in seconds for the metric evaluation | string | `900` | no | | environment | Architecture environment | string | - | yes | +| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | message | Message sent when a monitor is triggered | string | - | yes | +| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no | +| status_extra_tags | Extra tags for Key Vault status monitor | list | `` | no | | status_message | Custom message for Key Vault status monitor | string | `` | no | | status_silenced | Groups to mute for Key Vault status monitor | map | `` | no | | status_time_aggregator | Monitor aggregator for Key Vault status [available values: min, max or avg] | string | `max` | no | diff --git a/cloud/azure/keyvault/inputs.tf b/cloud/azure/keyvault/inputs.tf index 6fac667..1e31cec 100644 --- a/cloud/azure/keyvault/inputs.tf +++ b/cloud/azure/keyvault/inputs.tf @@ -17,11 +17,16 @@ variable "message" { description = "Message sent when a monitor is triggered" } -variable "delay" { +variable "evaluation_delay" { description = "Delay in seconds for the metric evaluation" default = 900 } +variable "new_host_delay" { + description = "Delay in seconds before monitor new resource" + default = 300 +} + # Azure Key Vault specific variables variable "status_silenced" { description = "Groups to mute for Key Vault status monitor" @@ -43,7 +48,13 @@ variable "status_time_aggregator" { variable "status_timeframe" { description = "Monitor timeframe for Key Vault status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_15m" + default = "last_5m" +} + +variable "status_extra_tags" { + description = "Extra tags for Key Vault status monitor" + type = "list" + default = [] } variable "api_result_silenced" { @@ -58,9 +69,15 @@ variable "api_result_message" { default = "" } +variable "api_result_time_aggregator" { + description = "Monitor aggregator for Key Vault API result [available values: min, max or avg]" + type = "string" + default = "sum" +} + variable "api_result_timeframe" { description = "Monitor timeframe for Key Vault API result [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_30m" + default = "last_5m" } variable "api_result_threshold_critical" { @@ -72,3 +89,48 @@ variable "api_result_threshold_warning" { description = "Warning threshold for Key Vault API result rate" default = 30 } + +variable "api_result_extra_tags" { + description = "Extra tags for Key Vault API result monitor" + type = "list" + default = [] +} + +variable "api_latency_silenced" { + description = "Groups to mute for Key Vault API latency monitor" + type = "map" + default = {} +} + +variable "api_latency_message" { + description = "Custom message for Key Vault API latency monitor" + type = "string" + default = "" +} + +variable "api_latency_time_aggregator" { + description = "Monitor aggregator for Key Vault API latency [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "api_latency_timeframe" { + description = "Monitor timeframe for Key Vault API latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + default = "last_5m" +} + +variable "api_latency_threshold_critical" { + description = "Critical threshold for Key Vault API latency rate" + default = 100 +} + +variable "api_latency_threshold_warning" { + description = "Warning threshold for Key Vault API latency rate" + default = 80 +} + +variable "api_latency_extra_tags" { + description = "Extra tags for Key Vault API latency monitor" + type = "list" + default = [] +} diff --git a/cloud/azure/keyvault/modules.tf b/cloud/azure/keyvault/modules.tf new file mode 100644 index 0000000..0c21a6a --- /dev/null +++ b/cloud/azure/keyvault/modules.tf @@ -0,0 +1,19 @@ +module "filter-tags" { + source = "../../../common/filter-tags" + + environment = "${var.environment}" + resource = "keyvault" + filter_tags_use_defaults = "${var.filter_tags_use_defaults}" + filter_tags_custom = "${var.filter_tags_custom}" +} + +module "filter-tags-statuscode" { + source = "../../../common/filter-tags" + + environment = "${var.environment}" + resource = "cosmosdb" + filter_tags_use_defaults = "${var.filter_tags_use_defaults}" + filter_tags_custom = "${var.filter_tags_custom},statuscode:%s" + + extra_tags = ["statuscode:%s"] +} diff --git a/cloud/azure/keyvault/monitors-keyvault.tf b/cloud/azure/keyvault/monitors-keyvault.tf index f35c67d..9f5703d 100644 --- a/cloud/azure/keyvault/monitors-keyvault.tf +++ b/cloud/azure/keyvault/monitors-keyvault.tf @@ -1,36 +1,28 @@ -data "template_file" "filter" { - template = "$${filter}" - - vars { - filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_servicebus:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" - } -} - resource "datadog_monitor" "keyvault_status" { name = "[${var.environment}] Key Vault is down" message = "${coalesce(var.status_message, var.message)}" query = < ${var.api_latency_threshold_critical} + EOF + + thresholds { + critical = "${var.api_latency_threshold_critical}" + warning = "${var.api_latency_threshold_warning}" + } + + type = "metric alert" + + silenced = "${var.api_latency_silenced}" + + notify_no_data = true + evaluation_delay = "${var.evaluation_delay}" + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + require_full_window = false + new_host_delay = "${var.new_host_delay}" + + tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:keyvault", "team:claranet", "created-by:terraform", "${var.api_latency_extra_tags}"] } diff --git a/cloud/azure/monitors.tf b/cloud/azure/monitors.tf index 025f649..a495dc8 100644 --- a/cloud/azure/monitors.tf +++ b/cloud/azure/monitors.tf @@ -390,36 +390,59 @@ module "streamanalytics" { module "cosmosdb" { source = "./cosmosdb" - environment = "${var.environment}" - message = "${var.message}" - delay = "${var.delay}" + environment = "${var.environment}" + message = "${var.message}" + evaluation_delay = "${var.evaluation_delay}" + new_host_delay = "${var.new_host_delay}" filter_tags_use_defaults = "${var.filter_tags_use_defaults}" filter_tags_custom = "${var.filter_tags_custom}" + status_message = "${var.cosmos_db_status_message}" + status_silenced = "${var.cosmos_db_status_silenced}" + status_extra_tags = "${var.cosmos_db_status_extra_tags}" + status_time_aggregator = "${var.cosmos_db_status_time_aggregator}" + status_timeframe = "${var.cosmos_db_status_timeframe}" + cosmos_db_4xx_request_rate_threshold_critical = "${var.cosmos_db_4xx_request_rate_threshold_critical}" cosmos_db_4xx_request_rate_threshold_warning = "${var.cosmos_db_4xx_request_rate_threshold_warning}" cosmos_db_4xx_requests_message = "${var.cosmos_db_4xx_requests_message}" cosmos_db_4xx_requests_silenced = "${var.cosmos_db_4xx_requests_silenced}" + cosmos_db_4xx_request_extra_tags = "${var.cosmos_db_4xx_request_extra_tags}" + cosmos_db_4xx_request_time_aggregator = "${var.cosmos_db_4xx_request_time_aggregator}" + cosmos_db_4xx_request_timeframe = "${var.cosmos_db_4xx_request_timeframe}" cosmos_db_5xx_request_rate_threshold_critical = "${var.cosmos_db_5xx_request_rate_threshold_critical}" cosmos_db_5xx_request_rate_threshold_warning = "${var.cosmos_db_5xx_request_rate_threshold_warning}" cosmos_db_5xx_requests_message = "${var.cosmos_db_5xx_requests_message}" cosmos_db_5xx_requests_silenced = "${var.cosmos_db_5xx_requests_silenced}" + cosmos_db_5xx_request_rate_extra_tags = "${var.cosmos_db_5xx_request_rate_extra_tags}" + cosmos_db_5xx_request_time_aggregator = "${var.cosmos_db_5xx_request_time_aggregator}" + cosmos_db_5xx_request_timeframe = "${var.cosmos_db_5xx_request_timeframe}" + + cosmos_db_no_request_message = "${var.cosmos_db_no_request_message}" + cosmos_db_no_request_silenced = "${var.cosmos_db_no_request_silenced}" + cosmos_db_no_request_extra_tags = "${var.cosmos_db_no_request_extra_tags}" + cosmos_db_no_request_time_aggregator = "${var.cosmos_db_no_request_time_aggregator}" + cosmos_db_no_request_timeframe = "${var.cosmos_db_no_request_timeframe}" cosmos_db_ru_utilization_rate_threshold_critical = "${var.cosmos_db_ru_utilization_rate_threshold_critical}" cosmos_db_ru_utilization_rate_threshold_warning = "${var.cosmos_db_ru_utilization_rate_threshold_warning}" cosmos_db_ru_utilization_message = "${var.cosmos_db_ru_utilization_message}" cosmos_db_ru_utilization_silenced = "${var.cosmos_db_ru_utilization_silenced}" - cosmos_db_ru_utilization_collection = "${var.cosmos_db_ru_utilization_collection}" + cosmos_db_ru_utilization_extra_tags = "${var.cosmos_db_ru_utilization_extra_tags}" + cosmos_db_ru_utilization_time_aggregator = "${var.cosmos_db_ru_utilization_time_aggregator}" + cosmos_db_ru_utilization_timeframe = "${var.cosmos_db_ru_utilization_timeframe}" + cosmos_db_ru_utilization_collections = "${var.cosmos_db_ru_utilization_collections}" } module "datalakestore" { source = "./datalakestore" - environment = "${var.environment}" - message = "${var.message}" - delay = "${var.delay}" + environment = "${var.environment}" + message = "${var.message}" + evaluation_delay = "${var.evaluation_delay}" + new_host_delay = "${var.new_host_delay}" filter_tags_use_defaults = "${var.filter_tags_use_defaults}" filter_tags_custom = "${var.filter_tags_custom}" @@ -428,14 +451,16 @@ module "datalakestore" { status_message = "${var.datalakestore_status_message}" status_timeframe = "${var.datalakestore_status_timeframe}" status_time_aggregator = "${var.datalakestore_status_time_aggregator}" + status_extra_tags = "${var.datalakestore_status_extra_tags}" } module "keyvault" { source = "./keyvault" - environment = "${var.environment}" - message = "${var.message}" - delay = "${var.delay}" + environment = "${var.environment}" + message = "${var.message}" + evaluation_delay = "${var.evaluation_delay}" + new_host_delay = "${var.new_host_delay}" filter_tags_use_defaults = "${var.filter_tags_use_defaults}" filter_tags_custom = "${var.filter_tags_custom}" @@ -444,10 +469,22 @@ module "keyvault" { status_message = "${var.keyvault_status_message}" status_timeframe = "${var.keyvault_status_timeframe}" status_time_aggregator = "${var.keyvault_status_time_aggregator}" + status_extra_tags = "${var.keyvault_status_extra_tags}" + api_result_enabled = "${var.keyvault_api_result_enabled}" api_result_silenced = "${var.keyvault_api_result_silenced}" api_result_message = "${var.keyvault_api_result_message}" api_result_timeframe = "${var.keyvault_api_result_timeframe}" + api_result_time_aggregator = "${var.keyvault_api_result_time_aggregator}" api_result_threshold_critical = "${var.keyvault_api_result_threshold_critical}" api_result_threshold_warning = "${var.keyvault_api_result_threshold_warning}" + api_result_extra_tags = "${var.keyvault_api_result_extra_tags}" + + api_latency_silenced = "${var.keyvault_api_latency_silenced}" + api_latency_message = "${var.keyvault_api_latency_message}" + api_latency_timeframe = "${var.keyvault_api_latency_timeframe}" + api_latency_time_aggregator = "${var.keyvault_api_latency_time_aggregator}" + api_latency_threshold_critical = "${var.keyvault_api_latency_threshold_critical}" + api_latency_threshold_warning = "${var.keyvault_api_latency_threshold_warning}" + api_latency_extra_tags = "${var.keyvault_api_latency_extra_tags}" } diff --git a/cloud/azure/servicebus/README.md b/cloud/azure/servicebus/README.md index cca3b22..cd8682b 100644 --- a/cloud/azure/servicebus/README.md +++ b/cloud/azure/servicebus/README.md @@ -16,10 +16,10 @@ module "datadog-monitors-cloud-azure-servicebus" { Creates DataDog monitors with the following checks: -- Service Bus is down - Service Bus has no active connection -- Service Bus user errors rate is high +- Service Bus is down - Service Bus server errors rate is high +- Service Bus user errors rate is high ## Inputs @@ -30,6 +30,7 @@ Creates DataDog monitors with the following checks: | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | message | Message sent when an alert is triggered | string | - | yes | +| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no | | no_active_connections_message | Custom message for Service Bus status monitor | string | `` | no | | no_active_connections_silenced | Groups to mute for Service Bus status monitor | map | `` | no | | no_active_connections_time_aggregator | Monitor aggregator for Service Bus status [available values: min, max or avg] | string | `max` | no | @@ -39,7 +40,6 @@ Creates DataDog monitors with the following checks: | server_errors_threshold_critical | Critical threshold for Service Bus server errors monitor | string | `90` | no | | server_errors_threshold_warning | Warning threshold for Service Bus server errors monitor | string | `50` | no | | server_errors_timeframe | Monitor timeframe for Service Bus server errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | -| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no | | status_extra_tags | Extra tags for Service Bus status monitor | list | `` | no | | status_message | Custom message for Service Bus status monitor | string | `` | no | | status_silenced | Groups to mute for Service Bus status monitor | map | `` | no | diff --git a/cloud/azure/servicebus/inputs.tf b/cloud/azure/servicebus/inputs.tf index ca8bbc4..8d6a454 100644 --- a/cloud/azure/servicebus/inputs.tf +++ b/cloud/azure/servicebus/inputs.tf @@ -56,7 +56,7 @@ variable "status_time_aggregator" { variable "status_timeframe" { description = "Monitor timeframe for Service Bus status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" - default = "last_15m" + default = "last_5m" } variable "no_active_connections_silenced" { @@ -80,7 +80,7 @@ variable "no_active_connections_time_aggregator" { variable "no_active_connections_timeframe" { description = "Monitor timeframe for Service Bus status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" - default = "last_15m" + default = "last_5m" } variable "server_errors_message" { diff --git a/cloud/azure/servicebus/monitors-service-bus.tf b/cloud/azure/servicebus/monitors-service-bus.tf index 79acaf2..0a17b93 100644 --- a/cloud/azure/servicebus/monitors-service-bus.tf +++ b/cloud/azure/servicebus/monitors-service-bus.tf @@ -31,7 +31,7 @@ resource "datadog_monitor" "service_bus_no_active_connections" { query = < ${var.user_errors_threshold_critical} EOF @@ -74,14 +74,14 @@ resource "datadog_monitor" "service_bus_user_errors" { silenced = "${var.user_errors_silenced}" notify_no_data = false - evaluation_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false require_full_window = true - new_host_delay = "${var.delay}" + new_host_delay = "${var.new_host_delay}" tags = ["env:${var.environment}", "resource:servicebus", "team:azure", "provider:azure"] } @@ -92,8 +92,8 @@ resource "datadog_monitor" "service_bus_server_errors" { query = < ${var.server_errors_threshold_critical} EOF @@ -108,14 +108,14 @@ resource "datadog_monitor" "service_bus_server_errors" { silenced = "${var.server_errors_silenced}" notify_no_data = false - evaluation_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" renotify_interval = 0 notify_audit = false timeout_h = 0 include_tags = true locked = false require_full_window = true - new_host_delay = "${var.delay}" + new_host_delay = "${var.new_host_delay}" tags = ["env:${var.environment}", "resource:servicebus", "team:azure", "provider:azure"] }