MON-237 Monitors improvements and best practices fixes

This commit is contained in:
Laurent Piroelle 2018-08-24 17:35:24 +02:00
parent a3d9384681
commit 56d40bcb43
19 changed files with 656 additions and 123 deletions

View File

@ -18,33 +18,52 @@ Creates DataDog monitors with the following checks:
- Cosmos DB 4xx requests rate is high - Cosmos DB 4xx requests rate is high
- Cosmos DB 5xx requests rate is high - Cosmos DB 5xx requests rate is high
- Cosmos DB collection ${element(keys(var.cosmos_db_ru_utilization_collections),count.index)} RU utilization is high
- Cosmos DB has no request - Cosmos DB has no request
- Cosmos DB collection ${element(keys(var.cosmos_db_ru_utilization_collection),count.index)} RU utilization is high - Cosmos DB is down
## Inputs ## Inputs
| Name | Description | Type | Default | Required | | Name | Description | Type | Default | Required |
|------|-------------|:----:|:-----:|:-----:| |------|-------------|:----:|:-----:|:-----:|
| cosmos_db_4xx_request_extra_tags | Extra tags for Cosmos DB 4xx requests monitor | list | `<list>` | no |
| cosmos_db_4xx_request_rate_threshold_critical | Critical threshold for Cosmos DB 4xx requests monitor | string | `80` | no | | cosmos_db_4xx_request_rate_threshold_critical | Critical threshold for Cosmos DB 4xx requests monitor | string | `80` | no |
| cosmos_db_4xx_request_rate_threshold_warning | Warning threshold for Cosmos DB 4xx requests monitor | string | `50` | no | | cosmos_db_4xx_request_rate_threshold_warning | Warning threshold for Cosmos DB 4xx requests monitor | string | `50` | no |
| cosmos_db_4xx_request_time_aggregator | Monitor aggregator for Cosmos DB status [available values: min, max or avg] | string | `sum` | no |
| cosmos_db_4xx_request_timeframe | Monitor timeframe for Cosmos DB status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no |
| cosmos_db_4xx_requests_message | Custom message for Cosmos DB 4xx requests monitor | string | `` | no | | cosmos_db_4xx_requests_message | Custom message for Cosmos DB 4xx requests monitor | string | `` | no |
| cosmos_db_4xx_requests_silenced | Groups to mute for Cosmos DB 4xx requests monitor | map | `<map>` | no | | cosmos_db_4xx_requests_silenced | Groups to mute for Cosmos DB 4xx requests monitor | map | `<map>` | no |
| cosmos_db_5xx_request_rate_extra_tags | Extra tags for Cosmos DB 5xx requests monitor | list | `<list>` | no |
| cosmos_db_5xx_request_rate_threshold_critical | Critical threshold for Cosmos DB 5xx requests monitor | string | `80` | no | | cosmos_db_5xx_request_rate_threshold_critical | Critical threshold for Cosmos DB 5xx requests monitor | string | `80` | no |
| cosmos_db_5xx_request_rate_threshold_warning | Warning threshold for Cosmos DB 5xx requests monitor | string | `50` | no | | cosmos_db_5xx_request_rate_threshold_warning | Warning threshold for Cosmos DB 5xx requests monitor | string | `50` | no |
| cosmos_db_5xx_request_time_aggregator | Monitor aggregator for Cosmos DB status [available values: min, max or avg] | string | `sum` | no |
| cosmos_db_5xx_request_timeframe | Monitor timeframe for Cosmos DB status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no |
| cosmos_db_5xx_requests_message | Custom message for Cosmos DB 5xx requests monitor | string | `` | no | | cosmos_db_5xx_requests_message | Custom message for Cosmos DB 5xx requests monitor | string | `` | no |
| cosmos_db_5xx_requests_silenced | Groups to mute for Cosmos DB 5xx requests monitor | map | `<map>` | no | | cosmos_db_5xx_requests_silenced | Groups to mute for Cosmos DB 5xx requests monitor | map | `<map>` | no |
| cosmos_db_no_request_extra_tags | Extra tags for Cosmos DB no request monitor | list | `<list>` | no |
| cosmos_db_no_request_message | Custom message for Cosmos DB no request monitor | string | `` | no | | cosmos_db_no_request_message | Custom message for Cosmos DB no request monitor | string | `` | no |
| cosmos_db_no_request_silenced | Groups to mute for Cosmos DB no request monitor | map | `<map>` | no | | cosmos_db_no_request_silenced | Groups to mute for Cosmos DB no request monitor | map | `<map>` | no |
| cosmos_db_ru_utilization_collection | Group to associate Cosmos DB collection to RU max | map | - | yes | | cosmos_db_no_request_time_aggregator | Monitor aggregator for Cosmos DB status [available values: min, max or avg] | string | `max` | no |
| cosmos_db_no_request_timeframe | Monitor timeframe for Cosmos DB status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no |
| cosmos_db_ru_utilization_collections | Group to associate Cosmos DB collection to RU max | map | - | yes |
| cosmos_db_ru_utilization_extra_tags | Extra tags for Cosmos DB collection RU utilization monitor | list | `<list>` | no |
| cosmos_db_ru_utilization_message | Custom message for Cosmos DB collection RU utilization monitor | string | `` | no | | cosmos_db_ru_utilization_message | Custom message for Cosmos DB collection RU utilization monitor | string | `` | no |
| cosmos_db_ru_utilization_rate_threshold_critical | Critical threshold for Cosmos DB collection RU utilization monitor | string | `90` | no | | cosmos_db_ru_utilization_rate_threshold_critical | Critical threshold for Cosmos DB collection RU utilization monitor | string | `90` | no |
| cosmos_db_ru_utilization_rate_threshold_warning | Warning threshold for Cosmos DB collection RU utilization monitor | string | `80` | no | | cosmos_db_ru_utilization_rate_threshold_warning | Warning threshold for Cosmos DB collection RU utilization monitor | string | `80` | no |
| cosmos_db_ru_utilization_silenced | Groups to mute for Cosmos DB collection RU utilization monitor | map | `<map>` | no | | cosmos_db_ru_utilization_silenced | Groups to mute for Cosmos DB collection RU utilization monitor | map | `<map>` | no |
| delay | Delay in seconds for the metric evaluation | string | `900` | no | | cosmos_db_ru_utilization_time_aggregator | Monitor aggregator for Cosmos DB status [available values: min, max or avg] | string | `max` | no |
| cosmos_db_ru_utilization_timeframe | Monitor timeframe for Cosmos DB status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no |
| environment | Architecture environment | string | - | yes | | environment | Architecture environment | string | - | yes |
| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no |
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
| message | Message sent when a monitor is triggered | string | - | yes | | message | Message sent when a monitor is triggered | string | - | yes |
| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no |
| status_extra_tags | Extra tags for Cosmos DB status monitor | list | `<list>` | no |
| status_message | Custom message for Cosmos DB status monitor | string | `` | no |
| status_silenced | Groups to mute for Cosmos DB status monitor | map | `<map>` | no |
| status_time_aggregator | Monitor aggregator for Cosmos DB status [available values: min, max or avg] | string | `max` | no |
| status_timeframe | Monitor timeframe for Cosmos DB status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no |
## Outputs ## Outputs
@ -53,6 +72,7 @@ Creates DataDog monitors with the following checks:
| cosmos_db_4xx_requests_id | id for monitor cosmos_db_4xx_requests | | cosmos_db_4xx_requests_id | id for monitor cosmos_db_4xx_requests |
| cosmos_db_5xx_requests_id | id for monitor cosmos_db_5xx_requests | | cosmos_db_5xx_requests_id | id for monitor cosmos_db_5xx_requests |
| cosmos_db_ru_utilization_id | id for monitor cosmos_db_ru_utilization | | cosmos_db_ru_utilization_id | id for monitor cosmos_db_ru_utilization |
| cosmos_db_status_id | id for monitor cosmos_db_status |
| cosmos_db_success_no_data_id | id for monitor cosmos_db_success_no_data | | cosmos_db_success_no_data_id | id for monitor cosmos_db_success_no_data |
Related documentation Related documentation

View File

@ -17,12 +17,47 @@ variable "message" {
description = "Message sent when a monitor is triggered" description = "Message sent when a monitor is triggered"
} }
variable "delay" { variable "evaluation_delay" {
description = "Delay in seconds for the metric evaluation" description = "Delay in seconds for the metric evaluation"
default = 900 default = 900
} }
variable "new_host_delay" {
description = "Delay in seconds before monitor new resource"
default = 300
}
# Azure CosmosDB specific variables # Azure CosmosDB specific variables
variable "status_silenced" {
description = "Groups to mute for Cosmos DB status monitor"
type = "map"
default = {}
}
variable "status_extra_tags" {
description = "Extra tags for Cosmos DB status monitor"
type = "list"
default = []
}
variable "status_message" {
description = "Custom message for Cosmos DB status monitor"
type = "string"
default = ""
}
variable "status_time_aggregator" {
description = "Monitor aggregator for Cosmos DB status [available values: min, max or avg]"
type = "string"
default = "max"
}
variable "status_timeframe" {
description = "Monitor timeframe for Cosmos DB status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
type = "string"
default = "last_5m"
}
variable "cosmos_db_4xx_requests_message" { variable "cosmos_db_4xx_requests_message" {
description = "Custom message for Cosmos DB 4xx requests monitor" description = "Custom message for Cosmos DB 4xx requests monitor"
type = "string" type = "string"
@ -45,6 +80,24 @@ variable "cosmos_db_4xx_request_rate_threshold_warning" {
default = 50 default = 50
} }
variable "cosmos_db_4xx_request_extra_tags" {
description = "Extra tags for Cosmos DB 4xx requests monitor"
type = "list"
default = []
}
variable "cosmos_db_4xx_request_time_aggregator" {
description = "Monitor aggregator for Cosmos DB 4xx requests [available values: min, max or avg]"
type = "string"
default = "sum"
}
variable "cosmos_db_4xx_request_timeframe" {
description = "Monitor timeframe for Cosmos DB 4xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
type = "string"
default = "last_5m"
}
variable "cosmos_db_5xx_requests_message" { variable "cosmos_db_5xx_requests_message" {
description = "Custom message for Cosmos DB 5xx requests monitor" description = "Custom message for Cosmos DB 5xx requests monitor"
type = "string" type = "string"
@ -67,6 +120,24 @@ variable "cosmos_db_5xx_request_rate_threshold_warning" {
default = 50 default = 50
} }
variable "cosmos_db_5xx_request_rate_extra_tags" {
description = "Extra tags for Cosmos DB 5xx requests monitor"
type = "list"
default = []
}
variable "cosmos_db_5xx_request_time_aggregator" {
description = "Monitor aggregator for Cosmos DB 5xx requests [available values: min, max or avg]"
type = "string"
default = "sum"
}
variable "cosmos_db_5xx_request_timeframe" {
description = "Monitor timeframe for Cosmos DB 5xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
type = "string"
default = "last_5m"
}
variable "cosmos_db_no_request_message" { variable "cosmos_db_no_request_message" {
description = "Custom message for Cosmos DB no request monitor" description = "Custom message for Cosmos DB no request monitor"
type = "string" type = "string"
@ -79,6 +150,24 @@ variable "cosmos_db_no_request_silenced" {
default = {} default = {}
} }
variable "cosmos_db_no_request_extra_tags" {
description = "Extra tags for Cosmos DB no request monitor"
type = "list"
default = []
}
variable "cosmos_db_no_request_time_aggregator" {
description = "Monitor aggregator for Cosmos DB no request [available values: min, max or avg]"
type = "string"
default = "max"
}
variable "cosmos_db_no_request_timeframe" {
description = "Monitor timeframe for Cosmos DB no request [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
type = "string"
default = "last_5m"
}
variable "cosmos_db_ru_utilization_message" { variable "cosmos_db_ru_utilization_message" {
description = "Custom message for Cosmos DB collection RU utilization monitor" description = "Custom message for Cosmos DB collection RU utilization monitor"
type = "string" type = "string"
@ -101,7 +190,25 @@ variable "cosmos_db_ru_utilization_rate_threshold_warning" {
default = 80 default = 80
} }
variable "cosmos_db_ru_utilization_collection" { variable "cosmos_db_ru_utilization_extra_tags" {
description = "Extra tags for Cosmos DB collection RU utilization monitor"
type = "list"
default = []
}
variable "cosmos_db_ru_utilization_time_aggregator" {
description = "Monitor aggregator for Cosmos DB RU utilization [available values: min, max or avg]"
type = "string"
default = "min"
}
variable "cosmos_db_ru_utilization_timeframe" {
description = "Monitor timeframe for Cosmos DB RU utilization [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
type = "string"
default = "last_5m"
}
variable "cosmos_db_ru_utilization_collections" {
description = "Group to associate Cosmos DB collection to RU max" description = "Group to associate Cosmos DB collection to RU max"
type = "map" type = "map"
} }

View File

@ -0,0 +1,30 @@
module "filter-tags" {
source = "../../../common/filter-tags"
environment = "${var.environment}"
resource = "cosmosdb"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}"
}
module "filter-tags-statuscode" {
source = "../../../common/filter-tags"
environment = "${var.environment}"
resource = "cosmosdb"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom},statuscode:%s"
extra_tags = ["statuscode:%s"]
}
module "filter-tags-collection" {
source = "../../../common/filter-tags"
environment = "${var.environment}"
resource = "cosmosdb"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom},collectionname:%s"
extra_tags = ["collectionname:%s"]
}

View File

@ -1,9 +1,33 @@
data "template_file" "filter" { resource "datadog_monitor" "cosmos_db_status" {
template = "$${filter}" name = "[${var.environment}] Cosmos DB is down"
message = "${coalesce(var.status_message, var.message)}"
vars { query = <<EOF
filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_cosmosdb:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" ${var.status_time_aggregator}(${var.status_timeframe}):(
avg:azure.cosmosdb.status${module.filter-tags.query_alert} by {resource_group,region,name} +
avg:azure.documentdb_databaseaccounts.status${module.filter-tags.query_alert} by {resource_group,region,name})
< 1
EOF
type = "metric alert"
thresholds {
critical = 1
} }
silenced = "${var.status_silenced}"
notify_no_data = true
evaluation_delay = "${var.evaluation_delay}"
renotify_interval = 0
notify_audit = false
timeout_h = 0
include_tags = true
locked = false
require_full_window = false
new_host_delay = "${var.new_host_delay}"
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:cosmos_db", "team:claranet", "created-by:terraform", "${var.status_extra_tags}"]
} }
resource "datadog_monitor" "cosmos_db_4xx_requests" { resource "datadog_monitor" "cosmos_db_4xx_requests" {
@ -11,18 +35,32 @@ resource "datadog_monitor" "cosmos_db_4xx_requests" {
message = "${coalesce(var.cosmos_db_4xx_requests_message, var.message)}" message = "${coalesce(var.cosmos_db_4xx_requests_message, var.message)}"
query = <<EOF query = <<EOF
sum(last_5m): (default( ${var.cosmos_db_4xx_request_time_aggregator}(${var.cosmos_db_4xx_request_timeframe}): (default(
( avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered},statuscode:400} by {resource_group,region,name}.as_count() + (
avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered},statuscode:403} by {resource_group,region,name}.as_count() + avg:azure.cosmosdb.total_requests${format(module.filter-tags-statuscode.query_alert, "400")} by {resource_group,region,name}.as_count() +
avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered},statuscode:404} by {resource_group,region,name}.as_count() + avg:azure.cosmosdb.total_requests${format(module.filter-tags-statuscode.query_alert, "401")} by {resource_group,region,name}.as_count() +
avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered},statuscode:408} by {resource_group,region,name}.as_count() + avg:azure.cosmosdb.total_requests${format(module.filter-tags-statuscode.query_alert, "403")} by {resource_group,region,name}.as_count() +
avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered},statuscode:409} by {resource_group,region,name}.as_count() + avg:azure.cosmosdb.total_requests${format(module.filter-tags-statuscode.query_alert, "404")} by {resource_group,region,name}.as_count() +
avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered},statuscode:410} by {resource_group,region,name}.as_count() + avg:azure.cosmosdb.total_requests${format(module.filter-tags-statuscode.query_alert, "408")} by {resource_group,region,name}.as_count() +
avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered},statuscode:412} by {resource_group,region,name}.as_count() + avg:azure.cosmosdb.total_requests${format(module.filter-tags-statuscode.query_alert, "409")} by {resource_group,region,name}.as_count() +
avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered},statuscode:413} by {resource_group,region,name}.as_count() + avg:azure.cosmosdb.total_requests${format(module.filter-tags-statuscode.query_alert, "412")} by {resource_group,region,name}.as_count() +
avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered},statuscode:429} by {resource_group,region,name}.as_count() + avg:azure.cosmosdb.total_requests${format(module.filter-tags-statuscode.query_alert, "413")} by {resource_group,region,name}.as_count() +
avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered},statuscode:449} by {resource_group,region,name}.as_count() ) / avg:azure.cosmosdb.total_requests${format(module.filter-tags-statuscode.query_alert, "429")} by {resource_group,region,name}.as_count() +
avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() avg:azure.cosmosdb.total_requests${format(module.filter-tags-statuscode.query_alert, "449")} by {resource_group,region,name}.as_count() +
avg:azure.documentdb_databaseaccounts.total_requests${format(module.filter-tags-statuscode.query_alert, "400")} by {resource_group,region,name}.as_count() +
avg:azure.documentdb_databaseaccounts.total_requests${format(module.filter-tags-statuscode.query_alert, "401")} by {resource_group,region,name}.as_count() +
avg:azure.documentdb_databaseaccounts.total_requests${format(module.filter-tags-statuscode.query_alert, "403")} by {resource_group,region,name}.as_count() +
avg:azure.documentdb_databaseaccounts.total_requests${format(module.filter-tags-statuscode.query_alert, "404")} by {resource_group,region,name}.as_count() +
avg:azure.documentdb_databaseaccounts.total_requests${format(module.filter-tags-statuscode.query_alert, "408")} by {resource_group,region,name}.as_count() +
avg:azure.documentdb_databaseaccounts.total_requests${format(module.filter-tags-statuscode.query_alert, "409")} by {resource_group,region,name}.as_count() +
avg:azure.documentdb_databaseaccounts.total_requests${format(module.filter-tags-statuscode.query_alert, "412")} by {resource_group,region,name}.as_count() +
avg:azure.documentdb_databaseaccounts.total_requests${format(module.filter-tags-statuscode.query_alert, "413")} by {resource_group,region,name}.as_count() +
avg:azure.documentdb_databaseaccounts.total_requests${format(module.filter-tags-statuscode.query_alert, "429")} by {resource_group,region,name}.as_count() +
avg:azure.documentdb_databaseaccounts.total_requests${format(module.filter-tags-statuscode.query_alert, "449")} by {resource_group,region,name}.as_count()
) / (
avg:azure.cosmosdb.total_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_count() +
avg:azure.documentdb_databaseaccounts.total_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_count()
)
* 100, 0) * 100, 0)
) > ${var.cosmos_db_4xx_request_rate_threshold_critical} ) > ${var.cosmos_db_4xx_request_rate_threshold_critical}
EOF EOF
@ -37,16 +75,16 @@ resource "datadog_monitor" "cosmos_db_4xx_requests" {
silenced = "${var.cosmos_db_4xx_requests_silenced}" silenced = "${var.cosmos_db_4xx_requests_silenced}"
notify_no_data = false notify_no_data = false
evaluation_delay = "${var.delay}" evaluation_delay = "${var.evaluation_delay}"
renotify_interval = 0 renotify_interval = 0
notify_audit = false notify_audit = false
timeout_h = 0 timeout_h = 0
include_tags = true include_tags = true
locked = false locked = false
require_full_window = true require_full_window = true
new_host_delay = "${var.delay}" new_host_delay = "${var.new_host_delay}"
tags = ["env:${var.environment}", "resource:cosmos_db", "team:azure", "provider:azure"] tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:cosmos_db", "team:claranet", "created-by:terraform", "${var.cosmos_db_4xx_request_extra_tags}"]
} }
resource "datadog_monitor" "cosmos_db_5xx_requests" { resource "datadog_monitor" "cosmos_db_5xx_requests" {
@ -54,10 +92,16 @@ resource "datadog_monitor" "cosmos_db_5xx_requests" {
message = "${coalesce(var.cosmos_db_5xx_requests_message, var.message)}" message = "${coalesce(var.cosmos_db_5xx_requests_message, var.message)}"
query = <<EOF query = <<EOF
sum(last_5m): (default( ${var.cosmos_db_5xx_request_time_aggregator}(${var.cosmos_db_5xx_request_timeframe}): (default(
( avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered},statuscode:500} by {resource_group,region,name}.as_count() + (
avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered},statuscode:503} by {resource_group,region,name}.as_count() ) / avg:azure.cosmosdb.total_requests${format(module.filter-tags-statuscode.query_alert, "500")} by {resource_group,region,name}.as_count() +
avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() avg:azure.cosmosdb.total_requests${format(module.filter-tags-statuscode.query_alert, "503")} by {resource_group,region,name}.as_count() +
avg:azure.documentdb_databaseaccounts.total_requests${format(module.filter-tags-statuscode.query_alert, "500")} by {resource_group,region,name}.as_count() +
avg:azure.documentdb_databaseaccounts.total_requests${format(module.filter-tags-statuscode.query_alert, "503")} by {resource_group,region,name}.as_count()
) / (
avg:azure.cosmosdb.total_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_count() +
avg:azure.documentdb_databaseaccounts.total_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_count()
)
* 100, 0) * 100, 0)
) > ${var.cosmos_db_5xx_request_rate_threshold_critical} ) > ${var.cosmos_db_5xx_request_rate_threshold_critical}
EOF EOF
@ -72,16 +116,16 @@ resource "datadog_monitor" "cosmos_db_5xx_requests" {
silenced = "${var.cosmos_db_5xx_requests_silenced}" silenced = "${var.cosmos_db_5xx_requests_silenced}"
notify_no_data = false notify_no_data = false
evaluation_delay = "${var.delay}" evaluation_delay = "${var.evaluation_delay}"
renotify_interval = 0 renotify_interval = 0
notify_audit = false notify_audit = false
timeout_h = 0 timeout_h = 0
include_tags = true include_tags = true
locked = false locked = false
require_full_window = true require_full_window = true
new_host_delay = "${var.delay}" new_host_delay = "${var.new_host_delay}"
tags = ["env:${var.environment}", "resource:cosmos_db", "team:azure", "provider:azure"] tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:cosmos_db", "team:claranet", "created-by:terraform", "${var.cosmos_db_5xx_request_rate_extra_tags}"]
} }
resource "datadog_monitor" "cosmos_db_success_no_data" { resource "datadog_monitor" "cosmos_db_success_no_data" {
@ -89,8 +133,9 @@ resource "datadog_monitor" "cosmos_db_success_no_data" {
message = "${coalesce(var.cosmos_db_no_request_message, var.message)}" message = "${coalesce(var.cosmos_db_no_request_message, var.message)}"
query = <<EOF query = <<EOF
avg(last_5m): ( ${var.cosmos_db_no_request_time_aggregator}(${var.cosmos_db_no_request_timeframe}): (
avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered}} by {resource_group,region,name} avg:azure.cosmosdb.total_requests${module.filter-tags.query_alert} by {resource_group,region,name} +
avg:azure.documentdb_databaseaccounts.total_requests${module.filter-tags.query_alert} by {resource_group,region,name}
) < 0 ) < 0
EOF EOF
@ -99,28 +144,31 @@ resource "datadog_monitor" "cosmos_db_success_no_data" {
silenced = "${var.cosmos_db_no_request_silenced}" silenced = "${var.cosmos_db_no_request_silenced}"
notify_no_data = true notify_no_data = true
evaluation_delay = "${var.delay}" evaluation_delay = "${var.evaluation_delay}"
renotify_interval = 0 renotify_interval = 0
notify_audit = false notify_audit = false
timeout_h = 0 timeout_h = 0
include_tags = true include_tags = true
locked = false locked = false
require_full_window = true require_full_window = true
new_host_delay = "${var.delay}" new_host_delay = "${var.new_host_delay}"
tags = ["env:${var.environment}", "resource:cosmos_db", "team:azure", "provider:azure"] tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:cosmos_db", "team:claranet", "created-by:terraform", "${var.cosmos_db_no_request_extra_tags}"]
} }
resource "datadog_monitor" "cosmos_db_ru_utilization" { resource "datadog_monitor" "cosmos_db_ru_utilization" {
count = "${length(var.cosmos_db_ru_utilization_collection)}" count = "${length(var.cosmos_db_ru_utilization_collections)}"
name = "[${var.environment}] Cosmos DB collection ${element(keys(var.cosmos_db_ru_utilization_collection),count.index)} RU utilization is high {{#is_alert}}{{comparator}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{comparator}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" name = "[${var.environment}] Cosmos DB collection ${element(keys(var.cosmos_db_ru_utilization_collections),count.index)} RU utilization is high {{#is_alert}}{{comparator}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{comparator}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
message = "${coalesce(var.cosmos_db_ru_utilization_message, var.message)}" message = "${coalesce(var.cosmos_db_ru_utilization_message, var.message)}"
query = <<EOF query = <<EOF
avg(last_5m): ( ${var.cosmos_db_ru_utilization_time_aggregator}(${var.cosmos_db_ru_utilization_timeframe}): (
avg:azure.cosmosdb.total_request_units{${data.template_file.filter.rendered},collectionname:${element(keys(var.cosmos_db_ru_utilization_collection),count.index)}} by {resource_group,region,name} / (
${element(values(var.cosmos_db_ru_utilization_collection),count.index)} avg:azure.cosmosdb.total_request_units${format(module.filter-tags-collection.query_alert,lower(element(keys(var.cosmos_db_ru_utilization_collections),count.index)))} by {resource_group,region,name,collectionname} +
avg:azure.documentdb_databaseaccounts.total_request_units${format(module.filter-tags-collection.query_alert,lower(element(keys(var.cosmos_db_ru_utilization_collections),count.index)))} by {resource_group,region,name,collectionname}
) /
${element(values(var.cosmos_db_ru_utilization_collections),count.index)}
) * 100 > ${var.cosmos_db_ru_utilization_rate_threshold_critical} ) * 100 > ${var.cosmos_db_ru_utilization_rate_threshold_critical}
EOF EOF
@ -134,14 +182,14 @@ resource "datadog_monitor" "cosmos_db_ru_utilization" {
silenced = "${var.cosmos_db_ru_utilization_silenced}" silenced = "${var.cosmos_db_ru_utilization_silenced}"
notify_no_data = false notify_no_data = false
evaluation_delay = "${var.delay}" evaluation_delay = "${var.evaluation_delay}"
renotify_interval = 0 renotify_interval = 0
notify_audit = false notify_audit = false
timeout_h = 0 timeout_h = 0
include_tags = true include_tags = true
locked = false locked = false
require_full_window = true require_full_window = true
new_host_delay = "${var.delay}" new_host_delay = "${var.new_host_delay}"
tags = ["env:${var.environment}", "resource:cosmos_db", "collection:${element(keys(var.cosmos_db_ru_utilization_collection),count.index)}", "team:azure", "provider:azure"] tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:cosmos_db", "team:claranet", "created-by:terraform", "${var.cosmos_db_ru_utilization_extra_tags}"]
} }

View File

@ -0,0 +1,4 @@
output "cosmos_db_ru_utilization_id" {
description = "id for monitor cosmos_db_ru_utilization"
value = "${datadog_monitor.cosmos_db_ru_utilization.*.id}"
}

View File

@ -1,3 +1,8 @@
output "cosmos_db_status_id" {
description = "id for monitor cosmos_db_status"
value = "${datadog_monitor.cosmos_db_status.id}"
}
output "cosmos_db_4xx_requests_id" { output "cosmos_db_4xx_requests_id" {
description = "id for monitor cosmos_db_4xx_requests" description = "id for monitor cosmos_db_4xx_requests"
value = "${datadog_monitor.cosmos_db_4xx_requests.id}" value = "${datadog_monitor.cosmos_db_4xx_requests.id}"
@ -12,8 +17,3 @@ output "cosmos_db_success_no_data_id" {
description = "id for monitor cosmos_db_success_no_data" description = "id for monitor cosmos_db_success_no_data"
value = "${datadog_monitor.cosmos_db_success_no_data.id}" value = "${datadog_monitor.cosmos_db_success_no_data.id}"
} }
output "cosmos_db_ru_utilization_id" {
description = "id for monitor cosmos_db_ru_utilization"
value = "${datadog_monitor.cosmos_db_ru_utilization.id}"
}

View File

@ -22,11 +22,13 @@ Creates DataDog monitors with the following checks:
| Name | Description | Type | Default | Required | | Name | Description | Type | Default | Required |
|------|-------------|:----:|:-----:|:-----:| |------|-------------|:----:|:-----:|:-----:|
| delay | Delay in seconds for the metric evaluation | string | `900` | no |
| environment | Architecture environment | string | - | yes | | environment | Architecture environment | string | - | yes |
| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no |
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
| message | Message sent when a monitor is triggered | string | - | yes | | message | Message sent when a monitor is triggered | string | - | yes |
| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no |
| status_extra_tags | Extra tags for Datalake Store status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | list | `<list>` | no |
| status_message | Custom message for Datalake Store status monitor | string | `` | no | | status_message | Custom message for Datalake Store status monitor | string | `` | no |
| status_silenced | Groups to mute for Datalake Store status monitor | map | `<map>` | no | | status_silenced | Groups to mute for Datalake Store status monitor | map | `<map>` | no |
| status_time_aggregator | Monitor aggregator for Datalake Store status [available values: min, max or avg] | string | `max` | no | | status_time_aggregator | Monitor aggregator for Datalake Store status [available values: min, max or avg] | string | `max` | no |

View File

@ -17,11 +17,16 @@ variable "message" {
description = "Message sent when a monitor is triggered" description = "Message sent when a monitor is triggered"
} }
variable "delay" { variable "evaluation_delay" {
description = "Delay in seconds for the metric evaluation" description = "Delay in seconds for the metric evaluation"
default = 900 default = 900
} }
variable "new_host_delay" {
description = "Delay in seconds before monitor new resource"
default = 300
}
# Azure Datalake Store specific variables # Azure Datalake Store specific variables
variable "status_silenced" { variable "status_silenced" {
description = "Groups to mute for Datalake Store status monitor" description = "Groups to mute for Datalake Store status monitor"
@ -43,5 +48,11 @@ variable "status_time_aggregator" {
variable "status_timeframe" { variable "status_timeframe" {
description = "Monitor timeframe for Datalake Store status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" description = "Monitor timeframe for Datalake Store status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
default = "last_15m" default = "last_5m"
}
variable "status_extra_tags" {
description = "Extra tags for Datalake Store status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
type = "list"
default = []
} }

View File

@ -0,0 +1,8 @@
module "filter-tags" {
source = "../../../common/filter-tags"
environment = "${var.environment}"
resource = "datalakestore"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}"
}

View File

@ -1,19 +1,11 @@
data "template_file" "filter" {
template = "$${filter}"
vars {
filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_servicebus:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
}
}
resource "datadog_monitor" "datalakestore_status" { resource "datadog_monitor" "datalakestore_status" {
name = "[${var.environment}] Datalake Store is down" name = "[${var.environment}] Datalake Store is down"
message = "${coalesce(var.status_message, var.message)}" message = "${coalesce(var.status_message, var.message)}"
query = <<EOF query = <<EOF
${var.status_time_aggregator}(${var.status_timeframe}): ( ${var.status_time_aggregator}(${var.status_timeframe}): (
avg:azure.datalakestore_accounts.status{${data.template_file.filter.rendered}} by {resource_group,region,name} avg:azure.datalakestore_accounts.status${module.filter-tags.query_alert} by {resource_group,region,name}
) != 1 ) < 1
EOF EOF
type = "metric alert" type = "metric alert"
@ -21,14 +13,14 @@ EOF
silenced = "${var.status_silenced}" silenced = "${var.status_silenced}"
notify_no_data = true notify_no_data = true
evaluation_delay = "${var.delay}" evaluation_delay = "${var.evaluation_delay}"
renotify_interval = 0 renotify_interval = 0
notify_audit = false notify_audit = false
timeout_h = 0 timeout_h = 0
include_tags = true include_tags = true
locked = false locked = false
require_full_window = false require_full_window = false
new_host_delay = "${var.delay}" new_host_delay = "${var.new_host_delay}"
tags = ["env:${var.environment}", "resource:servicebus", "team:azure", "provider:azure"] tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:datalakestore", "team:claranet", "created-by:terraform", "${var.status_extra_tags}"]
} }

View File

@ -927,7 +927,7 @@ variable "servicebus_status_time_aggregator" {
variable "servicebus_status_timeframe" { variable "servicebus_status_timeframe" {
description = "Monitor timeframe for Service Bus status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" description = "Monitor timeframe for Service Bus status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
default = "last_15m" default = "last_5m"
} }
variable "servicebus_no_active_connections_silenced" { variable "servicebus_no_active_connections_silenced" {
@ -951,7 +951,7 @@ variable "servicebus_no_active_connections_time_aggregator" {
variable "servicebus_no_active_connections_timeframe" { variable "servicebus_no_active_connections_timeframe" {
description = "Monitor timeframe for Service Bus status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" description = "Monitor timeframe for Service Bus status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
type = "string" type = "string"
default = "last_15m" default = "last_5m"
} }
variable "servicebus_server_errors_message" { variable "servicebus_server_errors_message" {
@ -1599,6 +1599,36 @@ variable "streamanalytics_runtime_errors_threshold_critical" {
} }
# Azure CosmosDB specific variables # Azure CosmosDB specific variables
variable "cosmos_db_status_silenced" {
description = "Groups to mute for Cosmos DB status monitor"
type = "map"
default = {}
}
variable "cosmos_db_status_extra_tags" {
description = "Extra tags for Cosmos DB status monitor"
type = "list"
default = []
}
variable "cosmos_db_status_message" {
description = "Custom message for Cosmos DB status monitor"
type = "string"
default = ""
}
variable "cosmos_db_status_time_aggregator" {
description = "Monitor aggregator for Cosmos DB status [available values: min, max or avg]"
type = "string"
default = "max"
}
variable "cosmos_db_status_timeframe" {
description = "Monitor timeframe for Cosmos DB status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
type = "string"
default = "last_5m"
}
variable "cosmos_db_4xx_requests_message" { variable "cosmos_db_4xx_requests_message" {
description = "Custom message for Cosmos DB 4xx requests monitor" description = "Custom message for Cosmos DB 4xx requests monitor"
type = "string" type = "string"
@ -1621,6 +1651,24 @@ variable "cosmos_db_4xx_request_rate_threshold_warning" {
default = 50 default = 50
} }
variable "cosmos_db_4xx_request_extra_tags" {
description = "Extra tags for Cosmos DB 4xx requests monitor"
type = "list"
default = []
}
variable "cosmos_db_4xx_request_time_aggregator" {
description = "Monitor aggregator for Cosmos DB 4xx requests [available values: min, max or avg]"
type = "string"
default = "sum"
}
variable "cosmos_db_4xx_request_timeframe" {
description = "Monitor timeframe for Cosmos DB 4xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
type = "string"
default = "last_5m"
}
variable "cosmos_db_5xx_requests_message" { variable "cosmos_db_5xx_requests_message" {
description = "Custom message for Cosmos DB 5xx requests monitor" description = "Custom message for Cosmos DB 5xx requests monitor"
type = "string" type = "string"
@ -1643,6 +1691,24 @@ variable "cosmos_db_5xx_request_rate_threshold_warning" {
default = 50 default = 50
} }
variable "cosmos_db_5xx_request_rate_extra_tags" {
description = "Extra tags for Cosmos DB 5xx requests monitor"
type = "list"
default = []
}
variable "cosmos_db_5xx_request_time_aggregator" {
description = "Monitor aggregator for Cosmos DB 5xx requests [available values: min, max or avg]"
type = "string"
default = "sum"
}
variable "cosmos_db_5xx_request_timeframe" {
description = "Monitor timeframe for Cosmos DB 5xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
type = "string"
default = "last_5m"
}
variable "cosmos_db_no_request_message" { variable "cosmos_db_no_request_message" {
description = "Custom message for Cosmos DB no request monitor" description = "Custom message for Cosmos DB no request monitor"
type = "string" type = "string"
@ -1655,6 +1721,24 @@ variable "cosmos_db_no_request_silenced" {
default = {} default = {}
} }
variable "cosmos_db_no_request_extra_tags" {
description = "Extra tags for Cosmos DB no request monitor"
type = "list"
default = []
}
variable "cosmos_db_no_request_time_aggregator" {
description = "Monitor aggregator for Cosmos DB no request [available values: min, max or avg]"
type = "string"
default = "max"
}
variable "cosmos_db_no_request_timeframe" {
description = "Monitor timeframe for Cosmos DB no request [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
type = "string"
default = "last_5m"
}
variable "cosmos_db_ru_utilization_message" { variable "cosmos_db_ru_utilization_message" {
description = "Custom message for Cosmos DB collection RU utilization monitor" description = "Custom message for Cosmos DB collection RU utilization monitor"
type = "string" type = "string"
@ -1677,7 +1761,25 @@ variable "cosmos_db_ru_utilization_rate_threshold_warning" {
default = 80 default = 80
} }
variable "cosmos_db_ru_utilization_collection" { variable "cosmos_db_ru_utilization_extra_tags" {
description = "Extra tags for Cosmos DB collection RU utilization monitor"
type = "list"
default = []
}
variable "cosmos_db_ru_utilization_time_aggregator" {
description = "Monitor aggregator for Cosmos DB RU utilization [available values: min, max or avg]"
type = "string"
default = "avg"
}
variable "cosmos_db_ru_utilization_timeframe" {
description = "Monitor timeframe for Cosmos DB RU utilization [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
type = "string"
default = "last_5m"
}
variable "cosmos_db_ru_utilization_collections" {
description = "Group to associate Cosmos DB collection to RU max" description = "Group to associate Cosmos DB collection to RU max"
type = "map" type = "map"
} }
@ -1703,7 +1805,13 @@ variable "datalakestore_status_time_aggregator" {
variable "datalakestore_status_timeframe" { variable "datalakestore_status_timeframe" {
description = "Monitor timeframe for Datalake Store status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" description = "Monitor timeframe for Datalake Store status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
default = "last_15m" default = "last_5m"
}
variable "datalakestore_status_extra_tags" {
description = "Extra tags for Datalake Store status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
type = "list"
default = []
} }
variable "keyvault_status_silenced" { variable "keyvault_status_silenced" {
@ -1726,7 +1834,13 @@ variable "keyvault_status_time_aggregator" {
variable "keyvault_status_timeframe" { variable "keyvault_status_timeframe" {
description = "Monitor timeframe for Key Vault status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" description = "Monitor timeframe for Key Vault status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
default = "last_15m" default = "last_5m"
}
variable "keyvault_status_extra_tags" {
description = "Extra tags for Key Vault status monitor"
type = "list"
default = []
} }
variable "keyvault_api_result_silenced" { variable "keyvault_api_result_silenced" {
@ -1741,9 +1855,15 @@ variable "keyvault_api_result_message" {
default = "" default = ""
} }
variable "keyvault_api_result_time_aggregator" {
description = "Monitor aggregator for Key Vault API result [available values: min, max or avg]"
type = "string"
default = "sum"
}
variable "keyvault_api_result_timeframe" { variable "keyvault_api_result_timeframe" {
description = "Monitor timeframe for Key Vault API result [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" description = "Monitor timeframe for Key Vault API result [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
default = "last_30m" default = "last_5m"
} }
variable "keyvault_api_result_threshold_critical" { variable "keyvault_api_result_threshold_critical" {
@ -1755,3 +1875,48 @@ variable "keyvault_api_result_threshold_warning" {
description = "Warning threshold for Key Vault API result rate" description = "Warning threshold for Key Vault API result rate"
default = 30 default = 30
} }
variable "keyvault_api_result_extra_tags" {
description = "Extra tags for Key Vault API result monitor"
type = "list"
default = []
}
variable "keyvault_api_latency_silenced" {
description = "Groups to mute for Key Vault API latency monitor"
type = "map"
default = {}
}
variable "keyvault_api_latency_message" {
description = "Custom message for Key Vault API latency monitor"
type = "string"
default = ""
}
variable "keyvault_api_latency_time_aggregator" {
description = "Monitor aggregator for Key Vault API latency [available values: min, max or avg]"
type = "string"
default = "min"
}
variable "keyvault_api_latency_timeframe" {
description = "Monitor timeframe for Key Vault API latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
default = "last_5m"
}
variable "keyvault_api_latency_threshold_critical" {
description = "Critical threshold for Key Vault API latency rate"
default = 100
}
variable "keyvault_api_latency_threshold_warning" {
description = "Warning threshold for Key Vault API latency rate"
default = 80
}
variable "keyvault_api_latency_extra_tags" {
description = "Extra tags for Key Vault API latency monitor"
type = "list"
default = []
}

View File

@ -16,23 +16,27 @@ module "datadog-monitors-cloud-azure-keyvault" {
Creates DataDog monitors with the following checks: Creates DataDog monitors with the following checks:
- Key Vault is down
- Key Vault API result rate is low - Key Vault API result rate is low
- Key Vault is down
## Inputs ## Inputs
| Name | Description | Type | Default | Required | | Name | Description | Type | Default | Required |
|------|-------------|:----:|:-----:|:-----:| |------|-------------|:----:|:-----:|:-----:|
| api_result_extra_tags | Extra tags for Key Vault API result monitor | list | `<list>` | no |
| api_result_message | Custom message for Key Vault API result monitor | string | `` | no | | api_result_message | Custom message for Key Vault API result monitor | string | `` | no |
| api_result_silenced | Groups to mute for Key Vault API result monitor | map | `<map>` | no | | api_result_silenced | Groups to mute for Key Vault API result monitor | map | `<map>` | no |
| api_result_threshold_critical | Critical threshold for Key Vault API result rate | string | `10` | no | | api_result_threshold_critical | Critical threshold for Key Vault API result rate | string | `10` | no |
| api_result_threshold_warning | Warning threshold for Key Vault API result rate | string | `30` | no | | api_result_threshold_warning | Warning threshold for Key Vault API result rate | string | `30` | no |
| api_result_time_aggregator | Monitor aggregator for Key Vault API result [available values: min, max or avg] | string | `sum` | no |
| api_result_timeframe | Monitor timeframe for Key Vault API result [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_30m` | no | | api_result_timeframe | Monitor timeframe for Key Vault API result [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_30m` | no |
| delay | Delay in seconds for the metric evaluation | string | `900` | no |
| environment | Architecture environment | string | - | yes | | environment | Architecture environment | string | - | yes |
| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no |
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
| message | Message sent when a monitor is triggered | string | - | yes | | message | Message sent when a monitor is triggered | string | - | yes |
| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no |
| status_extra_tags | Extra tags for Key Vault status monitor | list | `<list>` | no |
| status_message | Custom message for Key Vault status monitor | string | `` | no | | status_message | Custom message for Key Vault status monitor | string | `` | no |
| status_silenced | Groups to mute for Key Vault status monitor | map | `<map>` | no | | status_silenced | Groups to mute for Key Vault status monitor | map | `<map>` | no |
| status_time_aggregator | Monitor aggregator for Key Vault status [available values: min, max or avg] | string | `max` | no | | status_time_aggregator | Monitor aggregator for Key Vault status [available values: min, max or avg] | string | `max` | no |

View File

@ -17,11 +17,16 @@ variable "message" {
description = "Message sent when a monitor is triggered" description = "Message sent when a monitor is triggered"
} }
variable "delay" { variable "evaluation_delay" {
description = "Delay in seconds for the metric evaluation" description = "Delay in seconds for the metric evaluation"
default = 900 default = 900
} }
variable "new_host_delay" {
description = "Delay in seconds before monitor new resource"
default = 300
}
# Azure Key Vault specific variables # Azure Key Vault specific variables
variable "status_silenced" { variable "status_silenced" {
description = "Groups to mute for Key Vault status monitor" description = "Groups to mute for Key Vault status monitor"
@ -43,7 +48,13 @@ variable "status_time_aggregator" {
variable "status_timeframe" { variable "status_timeframe" {
description = "Monitor timeframe for Key Vault status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" description = "Monitor timeframe for Key Vault status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
default = "last_15m" default = "last_5m"
}
variable "status_extra_tags" {
description = "Extra tags for Key Vault status monitor"
type = "list"
default = []
} }
variable "api_result_silenced" { variable "api_result_silenced" {
@ -58,9 +69,15 @@ variable "api_result_message" {
default = "" default = ""
} }
variable "api_result_time_aggregator" {
description = "Monitor aggregator for Key Vault API result [available values: min, max or avg]"
type = "string"
default = "sum"
}
variable "api_result_timeframe" { variable "api_result_timeframe" {
description = "Monitor timeframe for Key Vault API result [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" description = "Monitor timeframe for Key Vault API result [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
default = "last_30m" default = "last_5m"
} }
variable "api_result_threshold_critical" { variable "api_result_threshold_critical" {
@ -72,3 +89,48 @@ variable "api_result_threshold_warning" {
description = "Warning threshold for Key Vault API result rate" description = "Warning threshold for Key Vault API result rate"
default = 30 default = 30
} }
variable "api_result_extra_tags" {
description = "Extra tags for Key Vault API result monitor"
type = "list"
default = []
}
variable "api_latency_silenced" {
description = "Groups to mute for Key Vault API latency monitor"
type = "map"
default = {}
}
variable "api_latency_message" {
description = "Custom message for Key Vault API latency monitor"
type = "string"
default = ""
}
variable "api_latency_time_aggregator" {
description = "Monitor aggregator for Key Vault API latency [available values: min, max or avg]"
type = "string"
default = "min"
}
variable "api_latency_timeframe" {
description = "Monitor timeframe for Key Vault API latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
default = "last_5m"
}
variable "api_latency_threshold_critical" {
description = "Critical threshold for Key Vault API latency rate"
default = 100
}
variable "api_latency_threshold_warning" {
description = "Warning threshold for Key Vault API latency rate"
default = 80
}
variable "api_latency_extra_tags" {
description = "Extra tags for Key Vault API latency monitor"
type = "list"
default = []
}

View File

@ -0,0 +1,19 @@
module "filter-tags" {
source = "../../../common/filter-tags"
environment = "${var.environment}"
resource = "keyvault"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}"
}
module "filter-tags-statuscode" {
source = "../../../common/filter-tags"
environment = "${var.environment}"
resource = "cosmosdb"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom},statuscode:%s"
extra_tags = ["statuscode:%s"]
}

View File

@ -1,36 +1,28 @@
data "template_file" "filter" {
template = "$${filter}"
vars {
filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_servicebus:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
}
}
resource "datadog_monitor" "keyvault_status" { resource "datadog_monitor" "keyvault_status" {
name = "[${var.environment}] Key Vault is down" name = "[${var.environment}] Key Vault is down"
message = "${coalesce(var.status_message, var.message)}" message = "${coalesce(var.status_message, var.message)}"
query = <<EOF query = <<EOF
${var.status_time_aggregator}(${var.status_timeframe}): ( ${var.status_time_aggregator}(${var.status_timeframe}): (
avg:azure.keyvault_vaults.status{${data.template_file.filter.rendered}} by {resource_group,region,name} avg:azure.keyvault_vaults.status${module.filter-tags.query_alert} by {resource_group,region,name}
) != 1 ) < 1
EOF EOF
type = "metric alert" type = "metric alert"
silenced = "${var.status_silenced}" silenced = "${var.status_silenced}"
notify_no_data = true notify_no_data = true
evaluation_delay = "${var.delay}" evaluation_delay = "${var.evaluation_delay}"
renotify_interval = 0 renotify_interval = 0
notify_audit = false notify_audit = false
timeout_h = 0 timeout_h = 0
include_tags = true include_tags = true
locked = false locked = false
require_full_window = false require_full_window = false
new_host_delay = "${var.delay}" new_host_delay = "${var.new_host_delay}"
tags = ["env:${var.environment}", "resource:keyvault", "team:azure", "provider:azure"] tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:keyvault", "team:claranet", "created-by:terraform", "${var.status_extra_tags}"]
} }
resource "datadog_monitor" "keyvault_api_result" { resource "datadog_monitor" "keyvault_api_result" {
@ -38,9 +30,9 @@ resource "datadog_monitor" "keyvault_api_result" {
message = "${coalesce(var.status_message, var.message)}" message = "${coalesce(var.status_message, var.message)}"
query = <<EOF query = <<EOF
sum(${var.api_result_timeframe}): ( ${var.api_result_time_aggregator}(${var.api_result_timeframe}): (
avg:azure.keyvault_vaults.service_api_result{${data.template_file.filter.rendered}} by {name,resource_group,region}.as_count() / avg:azure.keyvault_vaults.service_api_result${format(module.filter-tags-statuscode.query_alert, "200")} by {name,resource_group,region}.as_count() /
avg:azure.keyvault_vaults.service_api_hit{${data.template_file.filter.rendered}} by {name,resource_group,region}.as_count() avg:azure.keyvault_vaults.service_api_result${module.filter-tags.query_alert} by {name,resource_group,region}.as_count()
) * 100 < ${var.api_result_threshold_critical} ) * 100 < ${var.api_result_threshold_critical}
EOF EOF
@ -53,15 +45,47 @@ resource "datadog_monitor" "keyvault_api_result" {
silenced = "${var.api_result_silenced}" silenced = "${var.api_result_silenced}"
notify_no_data = true notify_no_data = false
evaluation_delay = "${var.delay}" evaluation_delay = "${var.evaluation_delay}"
renotify_interval = 0 renotify_interval = 0
notify_audit = false notify_audit = false
timeout_h = 0 timeout_h = 0
include_tags = true include_tags = true
locked = false locked = false
require_full_window = false require_full_window = false
new_host_delay = "${var.delay}" new_host_delay = "${var.new_host_delay}"
tags = ["env:${var.environment}", "resource:keyvault", "team:azure", "provider:azure"] tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:keyvault", "team:claranet", "created-by:terraform", "${var.api_result_extra_tags}"]
}
resource "datadog_monitor" "keyvault_api_latency" {
name = "[${var.environment}] Key Vault API latency is high {{#is_alert}}{{{comparator}}} {{threshold}}ms ({{value}}ms){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}ms ({{value}}ms){{/is_warning}}"
message = "${coalesce(var.status_message, var.message)}"
query = <<EOF
${var.api_latency_time_aggregator}(${var.api_latency_timeframe}):
avg:azure.keyvault_vaults.service_api_latency${module.filter-tags.query_alert} by {name,resource_group,region}
> ${var.api_latency_threshold_critical}
EOF
thresholds {
critical = "${var.api_latency_threshold_critical}"
warning = "${var.api_latency_threshold_warning}"
}
type = "metric alert"
silenced = "${var.api_latency_silenced}"
notify_no_data = true
evaluation_delay = "${var.evaluation_delay}"
renotify_interval = 0
notify_audit = false
timeout_h = 0
include_tags = true
locked = false
require_full_window = false
new_host_delay = "${var.new_host_delay}"
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:keyvault", "team:claranet", "created-by:terraform", "${var.api_latency_extra_tags}"]
} }

View File

@ -390,36 +390,59 @@ module "streamanalytics" {
module "cosmosdb" { module "cosmosdb" {
source = "./cosmosdb" source = "./cosmosdb"
environment = "${var.environment}" environment = "${var.environment}"
message = "${var.message}" message = "${var.message}"
delay = "${var.delay}" evaluation_delay = "${var.evaluation_delay}"
new_host_delay = "${var.new_host_delay}"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}" filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}" filter_tags_custom = "${var.filter_tags_custom}"
status_message = "${var.cosmos_db_status_message}"
status_silenced = "${var.cosmos_db_status_silenced}"
status_extra_tags = "${var.cosmos_db_status_extra_tags}"
status_time_aggregator = "${var.cosmos_db_status_time_aggregator}"
status_timeframe = "${var.cosmos_db_status_timeframe}"
cosmos_db_4xx_request_rate_threshold_critical = "${var.cosmos_db_4xx_request_rate_threshold_critical}" cosmos_db_4xx_request_rate_threshold_critical = "${var.cosmos_db_4xx_request_rate_threshold_critical}"
cosmos_db_4xx_request_rate_threshold_warning = "${var.cosmos_db_4xx_request_rate_threshold_warning}" cosmos_db_4xx_request_rate_threshold_warning = "${var.cosmos_db_4xx_request_rate_threshold_warning}"
cosmos_db_4xx_requests_message = "${var.cosmos_db_4xx_requests_message}" cosmos_db_4xx_requests_message = "${var.cosmos_db_4xx_requests_message}"
cosmos_db_4xx_requests_silenced = "${var.cosmos_db_4xx_requests_silenced}" cosmos_db_4xx_requests_silenced = "${var.cosmos_db_4xx_requests_silenced}"
cosmos_db_4xx_request_extra_tags = "${var.cosmos_db_4xx_request_extra_tags}"
cosmos_db_4xx_request_time_aggregator = "${var.cosmos_db_4xx_request_time_aggregator}"
cosmos_db_4xx_request_timeframe = "${var.cosmos_db_4xx_request_timeframe}"
cosmos_db_5xx_request_rate_threshold_critical = "${var.cosmos_db_5xx_request_rate_threshold_critical}" cosmos_db_5xx_request_rate_threshold_critical = "${var.cosmos_db_5xx_request_rate_threshold_critical}"
cosmos_db_5xx_request_rate_threshold_warning = "${var.cosmos_db_5xx_request_rate_threshold_warning}" cosmos_db_5xx_request_rate_threshold_warning = "${var.cosmos_db_5xx_request_rate_threshold_warning}"
cosmos_db_5xx_requests_message = "${var.cosmos_db_5xx_requests_message}" cosmos_db_5xx_requests_message = "${var.cosmos_db_5xx_requests_message}"
cosmos_db_5xx_requests_silenced = "${var.cosmos_db_5xx_requests_silenced}" cosmos_db_5xx_requests_silenced = "${var.cosmos_db_5xx_requests_silenced}"
cosmos_db_5xx_request_rate_extra_tags = "${var.cosmos_db_5xx_request_rate_extra_tags}"
cosmos_db_5xx_request_time_aggregator = "${var.cosmos_db_5xx_request_time_aggregator}"
cosmos_db_5xx_request_timeframe = "${var.cosmos_db_5xx_request_timeframe}"
cosmos_db_no_request_message = "${var.cosmos_db_no_request_message}"
cosmos_db_no_request_silenced = "${var.cosmos_db_no_request_silenced}"
cosmos_db_no_request_extra_tags = "${var.cosmos_db_no_request_extra_tags}"
cosmos_db_no_request_time_aggregator = "${var.cosmos_db_no_request_time_aggregator}"
cosmos_db_no_request_timeframe = "${var.cosmos_db_no_request_timeframe}"
cosmos_db_ru_utilization_rate_threshold_critical = "${var.cosmos_db_ru_utilization_rate_threshold_critical}" cosmos_db_ru_utilization_rate_threshold_critical = "${var.cosmos_db_ru_utilization_rate_threshold_critical}"
cosmos_db_ru_utilization_rate_threshold_warning = "${var.cosmos_db_ru_utilization_rate_threshold_warning}" cosmos_db_ru_utilization_rate_threshold_warning = "${var.cosmos_db_ru_utilization_rate_threshold_warning}"
cosmos_db_ru_utilization_message = "${var.cosmos_db_ru_utilization_message}" cosmos_db_ru_utilization_message = "${var.cosmos_db_ru_utilization_message}"
cosmos_db_ru_utilization_silenced = "${var.cosmos_db_ru_utilization_silenced}" cosmos_db_ru_utilization_silenced = "${var.cosmos_db_ru_utilization_silenced}"
cosmos_db_ru_utilization_collection = "${var.cosmos_db_ru_utilization_collection}" cosmos_db_ru_utilization_extra_tags = "${var.cosmos_db_ru_utilization_extra_tags}"
cosmos_db_ru_utilization_time_aggregator = "${var.cosmos_db_ru_utilization_time_aggregator}"
cosmos_db_ru_utilization_timeframe = "${var.cosmos_db_ru_utilization_timeframe}"
cosmos_db_ru_utilization_collections = "${var.cosmos_db_ru_utilization_collections}"
} }
module "datalakestore" { module "datalakestore" {
source = "./datalakestore" source = "./datalakestore"
environment = "${var.environment}" environment = "${var.environment}"
message = "${var.message}" message = "${var.message}"
delay = "${var.delay}" evaluation_delay = "${var.evaluation_delay}"
new_host_delay = "${var.new_host_delay}"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}" filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}" filter_tags_custom = "${var.filter_tags_custom}"
@ -428,14 +451,16 @@ module "datalakestore" {
status_message = "${var.datalakestore_status_message}" status_message = "${var.datalakestore_status_message}"
status_timeframe = "${var.datalakestore_status_timeframe}" status_timeframe = "${var.datalakestore_status_timeframe}"
status_time_aggregator = "${var.datalakestore_status_time_aggregator}" status_time_aggregator = "${var.datalakestore_status_time_aggregator}"
status_extra_tags = "${var.datalakestore_status_extra_tags}"
} }
module "keyvault" { module "keyvault" {
source = "./keyvault" source = "./keyvault"
environment = "${var.environment}" environment = "${var.environment}"
message = "${var.message}" message = "${var.message}"
delay = "${var.delay}" evaluation_delay = "${var.evaluation_delay}"
new_host_delay = "${var.new_host_delay}"
filter_tags_use_defaults = "${var.filter_tags_use_defaults}" filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
filter_tags_custom = "${var.filter_tags_custom}" filter_tags_custom = "${var.filter_tags_custom}"
@ -444,10 +469,22 @@ module "keyvault" {
status_message = "${var.keyvault_status_message}" status_message = "${var.keyvault_status_message}"
status_timeframe = "${var.keyvault_status_timeframe}" status_timeframe = "${var.keyvault_status_timeframe}"
status_time_aggregator = "${var.keyvault_status_time_aggregator}" status_time_aggregator = "${var.keyvault_status_time_aggregator}"
status_extra_tags = "${var.keyvault_status_extra_tags}"
api_result_enabled = "${var.keyvault_api_result_enabled}"
api_result_silenced = "${var.keyvault_api_result_silenced}" api_result_silenced = "${var.keyvault_api_result_silenced}"
api_result_message = "${var.keyvault_api_result_message}" api_result_message = "${var.keyvault_api_result_message}"
api_result_timeframe = "${var.keyvault_api_result_timeframe}" api_result_timeframe = "${var.keyvault_api_result_timeframe}"
api_result_time_aggregator = "${var.keyvault_api_result_time_aggregator}"
api_result_threshold_critical = "${var.keyvault_api_result_threshold_critical}" api_result_threshold_critical = "${var.keyvault_api_result_threshold_critical}"
api_result_threshold_warning = "${var.keyvault_api_result_threshold_warning}" api_result_threshold_warning = "${var.keyvault_api_result_threshold_warning}"
api_result_extra_tags = "${var.keyvault_api_result_extra_tags}"
api_latency_silenced = "${var.keyvault_api_latency_silenced}"
api_latency_message = "${var.keyvault_api_latency_message}"
api_latency_timeframe = "${var.keyvault_api_latency_timeframe}"
api_latency_time_aggregator = "${var.keyvault_api_latency_time_aggregator}"
api_latency_threshold_critical = "${var.keyvault_api_latency_threshold_critical}"
api_latency_threshold_warning = "${var.keyvault_api_latency_threshold_warning}"
api_latency_extra_tags = "${var.keyvault_api_latency_extra_tags}"
} }

View File

@ -16,10 +16,10 @@ module "datadog-monitors-cloud-azure-servicebus" {
Creates DataDog monitors with the following checks: Creates DataDog monitors with the following checks:
- Service Bus is down
- Service Bus has no active connection - Service Bus has no active connection
- Service Bus user errors rate is high - Service Bus is down
- Service Bus server errors rate is high - Service Bus server errors rate is high
- Service Bus user errors rate is high
## Inputs ## Inputs
@ -30,6 +30,7 @@ Creates DataDog monitors with the following checks:
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
| message | Message sent when an alert is triggered | string | - | yes | | message | Message sent when an alert is triggered | string | - | yes |
| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no |
| no_active_connections_message | Custom message for Service Bus status monitor | string | `` | no | | no_active_connections_message | Custom message for Service Bus status monitor | string | `` | no |
| no_active_connections_silenced | Groups to mute for Service Bus status monitor | map | `<map>` | no | | no_active_connections_silenced | Groups to mute for Service Bus status monitor | map | `<map>` | no |
| no_active_connections_time_aggregator | Monitor aggregator for Service Bus status [available values: min, max or avg] | string | `max` | no | | no_active_connections_time_aggregator | Monitor aggregator for Service Bus status [available values: min, max or avg] | string | `max` | no |
@ -39,7 +40,6 @@ Creates DataDog monitors with the following checks:
| server_errors_threshold_critical | Critical threshold for Service Bus server errors monitor | string | `90` | no | | server_errors_threshold_critical | Critical threshold for Service Bus server errors monitor | string | `90` | no |
| server_errors_threshold_warning | Warning threshold for Service Bus server errors monitor | string | `50` | no | | server_errors_threshold_warning | Warning threshold for Service Bus server errors monitor | string | `50` | no |
| server_errors_timeframe | Monitor timeframe for Service Bus server errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | | server_errors_timeframe | Monitor timeframe for Service Bus server errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no |
| status_extra_tags | Extra tags for Service Bus status monitor | list | `<list>` | no | | status_extra_tags | Extra tags for Service Bus status monitor | list | `<list>` | no |
| status_message | Custom message for Service Bus status monitor | string | `` | no | | status_message | Custom message for Service Bus status monitor | string | `` | no |
| status_silenced | Groups to mute for Service Bus status monitor | map | `<map>` | no | | status_silenced | Groups to mute for Service Bus status monitor | map | `<map>` | no |

View File

@ -56,7 +56,7 @@ variable "status_time_aggregator" {
variable "status_timeframe" { variable "status_timeframe" {
description = "Monitor timeframe for Service Bus status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" description = "Monitor timeframe for Service Bus status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
default = "last_15m" default = "last_5m"
} }
variable "no_active_connections_silenced" { variable "no_active_connections_silenced" {
@ -80,7 +80,7 @@ variable "no_active_connections_time_aggregator" {
variable "no_active_connections_timeframe" { variable "no_active_connections_timeframe" {
description = "Monitor timeframe for Service Bus status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" description = "Monitor timeframe for Service Bus status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
type = "string" type = "string"
default = "last_15m" default = "last_5m"
} }
variable "server_errors_message" { variable "server_errors_message" {

View File

@ -31,7 +31,7 @@ resource "datadog_monitor" "service_bus_no_active_connections" {
query = <<EOF query = <<EOF
${var.no_active_connections_time_aggregator}(${var.no_active_connections_timeframe}): ( ${var.no_active_connections_time_aggregator}(${var.no_active_connections_timeframe}): (
avg:azure.servicebus_namespaces.active_connections_preview{${data.template_file.filter.rendered}} by {resource_group,region,name} avg:azure.servicebus_namespaces.active_connections_preview${module.filter-tags.query_alert} by {resource_group,region,name}
) < 1 ) < 1
EOF EOF
@ -40,14 +40,14 @@ resource "datadog_monitor" "service_bus_no_active_connections" {
silenced = "${var.no_active_connections_silenced}" silenced = "${var.no_active_connections_silenced}"
notify_no_data = false notify_no_data = false
evaluation_delay = "${var.delay}" evaluation_delay = "${var.evaluation_delay}"
renotify_interval = 0 renotify_interval = 0
notify_audit = false notify_audit = false
timeout_h = 0 timeout_h = 0
include_tags = true include_tags = true
locked = false locked = false
require_full_window = true require_full_window = true
new_host_delay = "${var.delay}" new_host_delay = "${var.new_host_delay}"
tags = ["env:${var.environment}", "resource:servicebus", "team:azure", "provider:azure"] tags = ["env:${var.environment}", "resource:servicebus", "team:azure", "provider:azure"]
} }
@ -58,8 +58,8 @@ resource "datadog_monitor" "service_bus_user_errors" {
query = <<EOF query = <<EOF
sum(${var.user_errors_timeframe}): (default( sum(${var.user_errors_timeframe}): (default(
avg:azure.servicebus_namespaces.user_errors.preview{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() / avg:azure.servicebus_namespaces.user_errors.preview${module.filter-tags.query_alert} by {resource_group,region,name}.as_count() /
avg:azure.servicebus_namespaces.incoming_requests_preview{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() avg:azure.servicebus_namespaces.incoming_requests_preview${module.filter-tags.query_alert} by {resource_group,region,name}.as_count()
* 100, 0) * 100, 0)
) > ${var.user_errors_threshold_critical} ) > ${var.user_errors_threshold_critical}
EOF EOF
@ -74,14 +74,14 @@ resource "datadog_monitor" "service_bus_user_errors" {
silenced = "${var.user_errors_silenced}" silenced = "${var.user_errors_silenced}"
notify_no_data = false notify_no_data = false
evaluation_delay = "${var.delay}" evaluation_delay = "${var.evaluation_delay}"
renotify_interval = 0 renotify_interval = 0
notify_audit = false notify_audit = false
timeout_h = 0 timeout_h = 0
include_tags = true include_tags = true
locked = false locked = false
require_full_window = true require_full_window = true
new_host_delay = "${var.delay}" new_host_delay = "${var.new_host_delay}"
tags = ["env:${var.environment}", "resource:servicebus", "team:azure", "provider:azure"] tags = ["env:${var.environment}", "resource:servicebus", "team:azure", "provider:azure"]
} }
@ -92,8 +92,8 @@ resource "datadog_monitor" "service_bus_server_errors" {
query = <<EOF query = <<EOF
sum(${var.server_errors_timeframe}): (default( sum(${var.server_errors_timeframe}): (default(
avg:azure.servicebus_namespaces.server_errors.preview{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() / avg:azure.servicebus_namespaces.server_errors.preview${module.filter-tags.query_alert} by {resource_group,region,name}.as_count() /
avg:azure.servicebus_namespaces.incoming_requests_preview{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() avg:azure.servicebus_namespaces.incoming_requests_preview${module.filter-tags.query_alert} by {resource_group,region,name}.as_count()
* 100, 0) * 100, 0)
) > ${var.server_errors_threshold_critical} ) > ${var.server_errors_threshold_critical}
EOF EOF
@ -108,14 +108,14 @@ resource "datadog_monitor" "service_bus_server_errors" {
silenced = "${var.server_errors_silenced}" silenced = "${var.server_errors_silenced}"
notify_no_data = false notify_no_data = false
evaluation_delay = "${var.delay}" evaluation_delay = "${var.evaluation_delay}"
renotify_interval = 0 renotify_interval = 0
notify_audit = false notify_audit = false
timeout_h = 0 timeout_h = 0
include_tags = true include_tags = true
locked = false locked = false
require_full_window = true require_full_window = true
new_host_delay = "${var.delay}" new_host_delay = "${var.new_host_delay}"
tags = ["env:${var.environment}", "resource:servicebus", "team:azure", "provider:azure"] tags = ["env:${var.environment}", "resource:servicebus", "team:azure", "provider:azure"]
} }