MON-237 Monitors improvements and best practices fixes
This commit is contained in:
parent
a3d9384681
commit
56d40bcb43
@ -18,33 +18,52 @@ Creates DataDog monitors with the following checks:
|
||||
|
||||
- Cosmos DB 4xx requests rate is high
|
||||
- Cosmos DB 5xx requests rate is high
|
||||
- Cosmos DB collection ${element(keys(var.cosmos_db_ru_utilization_collections),count.index)} RU utilization is high
|
||||
- Cosmos DB has no request
|
||||
- Cosmos DB collection ${element(keys(var.cosmos_db_ru_utilization_collection),count.index)} RU utilization is high
|
||||
- Cosmos DB is down
|
||||
|
||||
## Inputs
|
||||
|
||||
| Name | Description | Type | Default | Required |
|
||||
|------|-------------|:----:|:-----:|:-----:|
|
||||
| cosmos_db_4xx_request_extra_tags | Extra tags for Cosmos DB 4xx requests monitor | list | `<list>` | no |
|
||||
| cosmos_db_4xx_request_rate_threshold_critical | Critical threshold for Cosmos DB 4xx requests monitor | string | `80` | no |
|
||||
| cosmos_db_4xx_request_rate_threshold_warning | Warning threshold for Cosmos DB 4xx requests monitor | string | `50` | no |
|
||||
| cosmos_db_4xx_request_time_aggregator | Monitor aggregator for Cosmos DB status [available values: min, max or avg] | string | `sum` | no |
|
||||
| cosmos_db_4xx_request_timeframe | Monitor timeframe for Cosmos DB status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no |
|
||||
| cosmos_db_4xx_requests_message | Custom message for Cosmos DB 4xx requests monitor | string | `` | no |
|
||||
| cosmos_db_4xx_requests_silenced | Groups to mute for Cosmos DB 4xx requests monitor | map | `<map>` | no |
|
||||
| cosmos_db_5xx_request_rate_extra_tags | Extra tags for Cosmos DB 5xx requests monitor | list | `<list>` | no |
|
||||
| cosmos_db_5xx_request_rate_threshold_critical | Critical threshold for Cosmos DB 5xx requests monitor | string | `80` | no |
|
||||
| cosmos_db_5xx_request_rate_threshold_warning | Warning threshold for Cosmos DB 5xx requests monitor | string | `50` | no |
|
||||
| cosmos_db_5xx_request_time_aggregator | Monitor aggregator for Cosmos DB status [available values: min, max or avg] | string | `sum` | no |
|
||||
| cosmos_db_5xx_request_timeframe | Monitor timeframe for Cosmos DB status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no |
|
||||
| cosmos_db_5xx_requests_message | Custom message for Cosmos DB 5xx requests monitor | string | `` | no |
|
||||
| cosmos_db_5xx_requests_silenced | Groups to mute for Cosmos DB 5xx requests monitor | map | `<map>` | no |
|
||||
| cosmos_db_no_request_extra_tags | Extra tags for Cosmos DB no request monitor | list | `<list>` | no |
|
||||
| cosmos_db_no_request_message | Custom message for Cosmos DB no request monitor | string | `` | no |
|
||||
| cosmos_db_no_request_silenced | Groups to mute for Cosmos DB no request monitor | map | `<map>` | no |
|
||||
| cosmos_db_ru_utilization_collection | Group to associate Cosmos DB collection to RU max | map | - | yes |
|
||||
| cosmos_db_no_request_time_aggregator | Monitor aggregator for Cosmos DB status [available values: min, max or avg] | string | `max` | no |
|
||||
| cosmos_db_no_request_timeframe | Monitor timeframe for Cosmos DB status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no |
|
||||
| cosmos_db_ru_utilization_collections | Group to associate Cosmos DB collection to RU max | map | - | yes |
|
||||
| cosmos_db_ru_utilization_extra_tags | Extra tags for Cosmos DB collection RU utilization monitor | list | `<list>` | no |
|
||||
| cosmos_db_ru_utilization_message | Custom message for Cosmos DB collection RU utilization monitor | string | `` | no |
|
||||
| cosmos_db_ru_utilization_rate_threshold_critical | Critical threshold for Cosmos DB collection RU utilization monitor | string | `90` | no |
|
||||
| cosmos_db_ru_utilization_rate_threshold_warning | Warning threshold for Cosmos DB collection RU utilization monitor | string | `80` | no |
|
||||
| cosmos_db_ru_utilization_silenced | Groups to mute for Cosmos DB collection RU utilization monitor | map | `<map>` | no |
|
||||
| delay | Delay in seconds for the metric evaluation | string | `900` | no |
|
||||
| cosmos_db_ru_utilization_time_aggregator | Monitor aggregator for Cosmos DB status [available values: min, max or avg] | string | `max` | no |
|
||||
| cosmos_db_ru_utilization_timeframe | Monitor timeframe for Cosmos DB status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no |
|
||||
| environment | Architecture environment | string | - | yes |
|
||||
| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no |
|
||||
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
|
||||
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
|
||||
| message | Message sent when a monitor is triggered | string | - | yes |
|
||||
| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no |
|
||||
| status_extra_tags | Extra tags for Cosmos DB status monitor | list | `<list>` | no |
|
||||
| status_message | Custom message for Cosmos DB status monitor | string | `` | no |
|
||||
| status_silenced | Groups to mute for Cosmos DB status monitor | map | `<map>` | no |
|
||||
| status_time_aggregator | Monitor aggregator for Cosmos DB status [available values: min, max or avg] | string | `max` | no |
|
||||
| status_timeframe | Monitor timeframe for Cosmos DB status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no |
|
||||
|
||||
## Outputs
|
||||
|
||||
@ -53,6 +72,7 @@ Creates DataDog monitors with the following checks:
|
||||
| cosmos_db_4xx_requests_id | id for monitor cosmos_db_4xx_requests |
|
||||
| cosmos_db_5xx_requests_id | id for monitor cosmos_db_5xx_requests |
|
||||
| cosmos_db_ru_utilization_id | id for monitor cosmos_db_ru_utilization |
|
||||
| cosmos_db_status_id | id for monitor cosmos_db_status |
|
||||
| cosmos_db_success_no_data_id | id for monitor cosmos_db_success_no_data |
|
||||
|
||||
Related documentation
|
||||
|
||||
@ -17,12 +17,47 @@ variable "message" {
|
||||
description = "Message sent when a monitor is triggered"
|
||||
}
|
||||
|
||||
variable "delay" {
|
||||
variable "evaluation_delay" {
|
||||
description = "Delay in seconds for the metric evaluation"
|
||||
default = 900
|
||||
}
|
||||
|
||||
variable "new_host_delay" {
|
||||
description = "Delay in seconds before monitor new resource"
|
||||
default = 300
|
||||
}
|
||||
|
||||
# Azure CosmosDB specific variables
|
||||
variable "status_silenced" {
|
||||
description = "Groups to mute for Cosmos DB status monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "status_extra_tags" {
|
||||
description = "Extra tags for Cosmos DB status monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "status_message" {
|
||||
description = "Custom message for Cosmos DB status monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "status_time_aggregator" {
|
||||
description = "Monitor aggregator for Cosmos DB status [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "max"
|
||||
}
|
||||
|
||||
variable "status_timeframe" {
|
||||
description = "Monitor timeframe for Cosmos DB status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "cosmos_db_4xx_requests_message" {
|
||||
description = "Custom message for Cosmos DB 4xx requests monitor"
|
||||
type = "string"
|
||||
@ -45,6 +80,24 @@ variable "cosmos_db_4xx_request_rate_threshold_warning" {
|
||||
default = 50
|
||||
}
|
||||
|
||||
variable "cosmos_db_4xx_request_extra_tags" {
|
||||
description = "Extra tags for Cosmos DB 4xx requests monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "cosmos_db_4xx_request_time_aggregator" {
|
||||
description = "Monitor aggregator for Cosmos DB 4xx requests [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "sum"
|
||||
}
|
||||
|
||||
variable "cosmos_db_4xx_request_timeframe" {
|
||||
description = "Monitor timeframe for Cosmos DB 4xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "cosmos_db_5xx_requests_message" {
|
||||
description = "Custom message for Cosmos DB 5xx requests monitor"
|
||||
type = "string"
|
||||
@ -67,6 +120,24 @@ variable "cosmos_db_5xx_request_rate_threshold_warning" {
|
||||
default = 50
|
||||
}
|
||||
|
||||
variable "cosmos_db_5xx_request_rate_extra_tags" {
|
||||
description = "Extra tags for Cosmos DB 5xx requests monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "cosmos_db_5xx_request_time_aggregator" {
|
||||
description = "Monitor aggregator for Cosmos DB 5xx requests [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "sum"
|
||||
}
|
||||
|
||||
variable "cosmos_db_5xx_request_timeframe" {
|
||||
description = "Monitor timeframe for Cosmos DB 5xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "cosmos_db_no_request_message" {
|
||||
description = "Custom message for Cosmos DB no request monitor"
|
||||
type = "string"
|
||||
@ -79,6 +150,24 @@ variable "cosmos_db_no_request_silenced" {
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "cosmos_db_no_request_extra_tags" {
|
||||
description = "Extra tags for Cosmos DB no request monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "cosmos_db_no_request_time_aggregator" {
|
||||
description = "Monitor aggregator for Cosmos DB no request [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "max"
|
||||
}
|
||||
|
||||
variable "cosmos_db_no_request_timeframe" {
|
||||
description = "Monitor timeframe for Cosmos DB no request [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "cosmos_db_ru_utilization_message" {
|
||||
description = "Custom message for Cosmos DB collection RU utilization monitor"
|
||||
type = "string"
|
||||
@ -101,7 +190,25 @@ variable "cosmos_db_ru_utilization_rate_threshold_warning" {
|
||||
default = 80
|
||||
}
|
||||
|
||||
variable "cosmos_db_ru_utilization_collection" {
|
||||
variable "cosmos_db_ru_utilization_extra_tags" {
|
||||
description = "Extra tags for Cosmos DB collection RU utilization monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "cosmos_db_ru_utilization_time_aggregator" {
|
||||
description = "Monitor aggregator for Cosmos DB RU utilization [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "cosmos_db_ru_utilization_timeframe" {
|
||||
description = "Monitor timeframe for Cosmos DB RU utilization [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "cosmos_db_ru_utilization_collections" {
|
||||
description = "Group to associate Cosmos DB collection to RU max"
|
||||
type = "map"
|
||||
}
|
||||
|
||||
30
cloud/azure/cosmosdb/modules.tf
Normal file
30
cloud/azure/cosmosdb/modules.tf
Normal file
@ -0,0 +1,30 @@
|
||||
module "filter-tags" {
|
||||
source = "../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
resource = "cosmosdb"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
}
|
||||
|
||||
module "filter-tags-statuscode" {
|
||||
source = "../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
resource = "cosmosdb"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom},statuscode:%s"
|
||||
|
||||
extra_tags = ["statuscode:%s"]
|
||||
}
|
||||
|
||||
module "filter-tags-collection" {
|
||||
source = "../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
resource = "cosmosdb"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom},collectionname:%s"
|
||||
|
||||
extra_tags = ["collectionname:%s"]
|
||||
}
|
||||
@ -1,9 +1,33 @@
|
||||
data "template_file" "filter" {
|
||||
template = "$${filter}"
|
||||
resource "datadog_monitor" "cosmos_db_status" {
|
||||
name = "[${var.environment}] Cosmos DB is down"
|
||||
message = "${coalesce(var.status_message, var.message)}"
|
||||
|
||||
vars {
|
||||
filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_cosmosdb:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
|
||||
query = <<EOF
|
||||
${var.status_time_aggregator}(${var.status_timeframe}):(
|
||||
avg:azure.cosmosdb.status${module.filter-tags.query_alert} by {resource_group,region,name} +
|
||||
avg:azure.documentdb_databaseaccounts.status${module.filter-tags.query_alert} by {resource_group,region,name})
|
||||
< 1
|
||||
EOF
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
thresholds {
|
||||
critical = 1
|
||||
}
|
||||
|
||||
silenced = "${var.status_silenced}"
|
||||
|
||||
notify_no_data = true
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:cosmos_db", "team:claranet", "created-by:terraform", "${var.status_extra_tags}"]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "cosmos_db_4xx_requests" {
|
||||
@ -11,18 +35,32 @@ resource "datadog_monitor" "cosmos_db_4xx_requests" {
|
||||
message = "${coalesce(var.cosmos_db_4xx_requests_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
sum(last_5m): (default(
|
||||
( avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered},statuscode:400} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered},statuscode:403} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered},statuscode:404} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered},statuscode:408} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered},statuscode:409} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered},statuscode:410} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered},statuscode:412} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered},statuscode:413} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered},statuscode:429} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered},statuscode:449} by {resource_group,region,name}.as_count() ) /
|
||||
avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count()
|
||||
${var.cosmos_db_4xx_request_time_aggregator}(${var.cosmos_db_4xx_request_timeframe}): (default(
|
||||
(
|
||||
avg:azure.cosmosdb.total_requests${format(module.filter-tags-statuscode.query_alert, "400")} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.cosmosdb.total_requests${format(module.filter-tags-statuscode.query_alert, "401")} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.cosmosdb.total_requests${format(module.filter-tags-statuscode.query_alert, "403")} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.cosmosdb.total_requests${format(module.filter-tags-statuscode.query_alert, "404")} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.cosmosdb.total_requests${format(module.filter-tags-statuscode.query_alert, "408")} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.cosmosdb.total_requests${format(module.filter-tags-statuscode.query_alert, "409")} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.cosmosdb.total_requests${format(module.filter-tags-statuscode.query_alert, "412")} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.cosmosdb.total_requests${format(module.filter-tags-statuscode.query_alert, "413")} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.cosmosdb.total_requests${format(module.filter-tags-statuscode.query_alert, "429")} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.cosmosdb.total_requests${format(module.filter-tags-statuscode.query_alert, "449")} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.documentdb_databaseaccounts.total_requests${format(module.filter-tags-statuscode.query_alert, "400")} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.documentdb_databaseaccounts.total_requests${format(module.filter-tags-statuscode.query_alert, "401")} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.documentdb_databaseaccounts.total_requests${format(module.filter-tags-statuscode.query_alert, "403")} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.documentdb_databaseaccounts.total_requests${format(module.filter-tags-statuscode.query_alert, "404")} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.documentdb_databaseaccounts.total_requests${format(module.filter-tags-statuscode.query_alert, "408")} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.documentdb_databaseaccounts.total_requests${format(module.filter-tags-statuscode.query_alert, "409")} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.documentdb_databaseaccounts.total_requests${format(module.filter-tags-statuscode.query_alert, "412")} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.documentdb_databaseaccounts.total_requests${format(module.filter-tags-statuscode.query_alert, "413")} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.documentdb_databaseaccounts.total_requests${format(module.filter-tags-statuscode.query_alert, "429")} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.documentdb_databaseaccounts.total_requests${format(module.filter-tags-statuscode.query_alert, "449")} by {resource_group,region,name}.as_count()
|
||||
) / (
|
||||
avg:azure.cosmosdb.total_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.documentdb_databaseaccounts.total_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_count()
|
||||
)
|
||||
* 100, 0)
|
||||
) > ${var.cosmos_db_4xx_request_rate_threshold_critical}
|
||||
EOF
|
||||
@ -37,16 +75,16 @@ resource "datadog_monitor" "cosmos_db_4xx_requests" {
|
||||
silenced = "${var.cosmos_db_4xx_requests_silenced}"
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.delay}"
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
new_host_delay = "${var.delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
tags = ["env:${var.environment}", "resource:cosmos_db", "team:azure", "provider:azure"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:cosmos_db", "team:claranet", "created-by:terraform", "${var.cosmos_db_4xx_request_extra_tags}"]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "cosmos_db_5xx_requests" {
|
||||
@ -54,10 +92,16 @@ resource "datadog_monitor" "cosmos_db_5xx_requests" {
|
||||
message = "${coalesce(var.cosmos_db_5xx_requests_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
sum(last_5m): (default(
|
||||
( avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered},statuscode:500} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered},statuscode:503} by {resource_group,region,name}.as_count() ) /
|
||||
avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count()
|
||||
${var.cosmos_db_5xx_request_time_aggregator}(${var.cosmos_db_5xx_request_timeframe}): (default(
|
||||
(
|
||||
avg:azure.cosmosdb.total_requests${format(module.filter-tags-statuscode.query_alert, "500")} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.cosmosdb.total_requests${format(module.filter-tags-statuscode.query_alert, "503")} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.documentdb_databaseaccounts.total_requests${format(module.filter-tags-statuscode.query_alert, "500")} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.documentdb_databaseaccounts.total_requests${format(module.filter-tags-statuscode.query_alert, "503")} by {resource_group,region,name}.as_count()
|
||||
) / (
|
||||
avg:azure.cosmosdb.total_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_count() +
|
||||
avg:azure.documentdb_databaseaccounts.total_requests${module.filter-tags.query_alert} by {resource_group,region,name}.as_count()
|
||||
)
|
||||
* 100, 0)
|
||||
) > ${var.cosmos_db_5xx_request_rate_threshold_critical}
|
||||
EOF
|
||||
@ -72,16 +116,16 @@ resource "datadog_monitor" "cosmos_db_5xx_requests" {
|
||||
silenced = "${var.cosmos_db_5xx_requests_silenced}"
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.delay}"
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
new_host_delay = "${var.delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
tags = ["env:${var.environment}", "resource:cosmos_db", "team:azure", "provider:azure"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:cosmos_db", "team:claranet", "created-by:terraform", "${var.cosmos_db_5xx_request_rate_extra_tags}"]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "cosmos_db_success_no_data" {
|
||||
@ -89,8 +133,9 @@ resource "datadog_monitor" "cosmos_db_success_no_data" {
|
||||
message = "${coalesce(var.cosmos_db_no_request_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
avg(last_5m): (
|
||||
avg:azure.cosmosdb.total_requests{${data.template_file.filter.rendered}} by {resource_group,region,name}
|
||||
${var.cosmos_db_no_request_time_aggregator}(${var.cosmos_db_no_request_timeframe}): (
|
||||
avg:azure.cosmosdb.total_requests${module.filter-tags.query_alert} by {resource_group,region,name} +
|
||||
avg:azure.documentdb_databaseaccounts.total_requests${module.filter-tags.query_alert} by {resource_group,region,name}
|
||||
) < 0
|
||||
EOF
|
||||
|
||||
@ -99,28 +144,31 @@ resource "datadog_monitor" "cosmos_db_success_no_data" {
|
||||
silenced = "${var.cosmos_db_no_request_silenced}"
|
||||
|
||||
notify_no_data = true
|
||||
evaluation_delay = "${var.delay}"
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
new_host_delay = "${var.delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
tags = ["env:${var.environment}", "resource:cosmos_db", "team:azure", "provider:azure"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:cosmos_db", "team:claranet", "created-by:terraform", "${var.cosmos_db_no_request_extra_tags}"]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "cosmos_db_ru_utilization" {
|
||||
count = "${length(var.cosmos_db_ru_utilization_collection)}"
|
||||
count = "${length(var.cosmos_db_ru_utilization_collections)}"
|
||||
|
||||
name = "[${var.environment}] Cosmos DB collection ${element(keys(var.cosmos_db_ru_utilization_collection),count.index)} RU utilization is high {{#is_alert}}{{comparator}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{comparator}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
name = "[${var.environment}] Cosmos DB collection ${element(keys(var.cosmos_db_ru_utilization_collections),count.index)} RU utilization is high {{#is_alert}}{{comparator}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{comparator}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.cosmos_db_ru_utilization_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
avg(last_5m): (
|
||||
avg:azure.cosmosdb.total_request_units{${data.template_file.filter.rendered},collectionname:${element(keys(var.cosmos_db_ru_utilization_collection),count.index)}} by {resource_group,region,name} /
|
||||
${element(values(var.cosmos_db_ru_utilization_collection),count.index)}
|
||||
${var.cosmos_db_ru_utilization_time_aggregator}(${var.cosmos_db_ru_utilization_timeframe}): (
|
||||
(
|
||||
avg:azure.cosmosdb.total_request_units${format(module.filter-tags-collection.query_alert,lower(element(keys(var.cosmos_db_ru_utilization_collections),count.index)))} by {resource_group,region,name,collectionname} +
|
||||
avg:azure.documentdb_databaseaccounts.total_request_units${format(module.filter-tags-collection.query_alert,lower(element(keys(var.cosmos_db_ru_utilization_collections),count.index)))} by {resource_group,region,name,collectionname}
|
||||
) /
|
||||
${element(values(var.cosmos_db_ru_utilization_collections),count.index)}
|
||||
) * 100 > ${var.cosmos_db_ru_utilization_rate_threshold_critical}
|
||||
EOF
|
||||
|
||||
@ -134,14 +182,14 @@ resource "datadog_monitor" "cosmos_db_ru_utilization" {
|
||||
silenced = "${var.cosmos_db_ru_utilization_silenced}"
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.delay}"
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
new_host_delay = "${var.delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
tags = ["env:${var.environment}", "resource:cosmos_db", "collection:${element(keys(var.cosmos_db_ru_utilization_collection),count.index)}", "team:azure", "provider:azure"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:cosmos_db", "team:claranet", "created-by:terraform", "${var.cosmos_db_ru_utilization_extra_tags}"]
|
||||
}
|
||||
|
||||
4
cloud/azure/cosmosdb/outputs-custom.tf
Normal file
4
cloud/azure/cosmosdb/outputs-custom.tf
Normal file
@ -0,0 +1,4 @@
|
||||
output "cosmos_db_ru_utilization_id" {
|
||||
description = "id for monitor cosmos_db_ru_utilization"
|
||||
value = "${datadog_monitor.cosmos_db_ru_utilization.*.id}"
|
||||
}
|
||||
@ -1,3 +1,8 @@
|
||||
output "cosmos_db_status_id" {
|
||||
description = "id for monitor cosmos_db_status"
|
||||
value = "${datadog_monitor.cosmos_db_status.id}"
|
||||
}
|
||||
|
||||
output "cosmos_db_4xx_requests_id" {
|
||||
description = "id for monitor cosmos_db_4xx_requests"
|
||||
value = "${datadog_monitor.cosmos_db_4xx_requests.id}"
|
||||
@ -12,8 +17,3 @@ output "cosmos_db_success_no_data_id" {
|
||||
description = "id for monitor cosmos_db_success_no_data"
|
||||
value = "${datadog_monitor.cosmos_db_success_no_data.id}"
|
||||
}
|
||||
|
||||
output "cosmos_db_ru_utilization_id" {
|
||||
description = "id for monitor cosmos_db_ru_utilization"
|
||||
value = "${datadog_monitor.cosmos_db_ru_utilization.id}"
|
||||
}
|
||||
|
||||
@ -22,11 +22,13 @@ Creates DataDog monitors with the following checks:
|
||||
|
||||
| Name | Description | Type | Default | Required |
|
||||
|------|-------------|:----:|:-----:|:-----:|
|
||||
| delay | Delay in seconds for the metric evaluation | string | `900` | no |
|
||||
| environment | Architecture environment | string | - | yes |
|
||||
| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no |
|
||||
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
|
||||
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
|
||||
| message | Message sent when a monitor is triggered | string | - | yes |
|
||||
| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no |
|
||||
| status_extra_tags | Extra tags for Datalake Store status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | list | `<list>` | no |
|
||||
| status_message | Custom message for Datalake Store status monitor | string | `` | no |
|
||||
| status_silenced | Groups to mute for Datalake Store status monitor | map | `<map>` | no |
|
||||
| status_time_aggregator | Monitor aggregator for Datalake Store status [available values: min, max or avg] | string | `max` | no |
|
||||
|
||||
@ -17,11 +17,16 @@ variable "message" {
|
||||
description = "Message sent when a monitor is triggered"
|
||||
}
|
||||
|
||||
variable "delay" {
|
||||
variable "evaluation_delay" {
|
||||
description = "Delay in seconds for the metric evaluation"
|
||||
default = 900
|
||||
}
|
||||
|
||||
variable "new_host_delay" {
|
||||
description = "Delay in seconds before monitor new resource"
|
||||
default = 300
|
||||
}
|
||||
|
||||
# Azure Datalake Store specific variables
|
||||
variable "status_silenced" {
|
||||
description = "Groups to mute for Datalake Store status monitor"
|
||||
@ -43,5 +48,11 @@ variable "status_time_aggregator" {
|
||||
|
||||
variable "status_timeframe" {
|
||||
description = "Monitor timeframe for Datalake Store status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
default = "last_15m"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "status_extra_tags" {
|
||||
description = "Extra tags for Datalake Store status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
8
cloud/azure/datalakestore/modules.tf
Normal file
8
cloud/azure/datalakestore/modules.tf
Normal file
@ -0,0 +1,8 @@
|
||||
module "filter-tags" {
|
||||
source = "../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
resource = "datalakestore"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
}
|
||||
@ -1,19 +1,11 @@
|
||||
data "template_file" "filter" {
|
||||
template = "$${filter}"
|
||||
|
||||
vars {
|
||||
filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_servicebus:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
|
||||
}
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "datalakestore_status" {
|
||||
name = "[${var.environment}] Datalake Store is down"
|
||||
message = "${coalesce(var.status_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
${var.status_time_aggregator}(${var.status_timeframe}): (
|
||||
avg:azure.datalakestore_accounts.status{${data.template_file.filter.rendered}} by {resource_group,region,name}
|
||||
) != 1
|
||||
avg:azure.datalakestore_accounts.status${module.filter-tags.query_alert} by {resource_group,region,name}
|
||||
) < 1
|
||||
EOF
|
||||
|
||||
type = "metric alert"
|
||||
@ -21,14 +13,14 @@ EOF
|
||||
silenced = "${var.status_silenced}"
|
||||
|
||||
notify_no_data = true
|
||||
evaluation_delay = "${var.delay}"
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
tags = ["env:${var.environment}", "resource:servicebus", "team:azure", "provider:azure"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:datalakestore", "team:claranet", "created-by:terraform", "${var.status_extra_tags}"]
|
||||
}
|
||||
|
||||
@ -927,7 +927,7 @@ variable "servicebus_status_time_aggregator" {
|
||||
|
||||
variable "servicebus_status_timeframe" {
|
||||
description = "Monitor timeframe for Service Bus status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
default = "last_15m"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "servicebus_no_active_connections_silenced" {
|
||||
@ -951,7 +951,7 @@ variable "servicebus_no_active_connections_time_aggregator" {
|
||||
variable "servicebus_no_active_connections_timeframe" {
|
||||
description = "Monitor timeframe for Service Bus status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
default = "last_15m"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "servicebus_server_errors_message" {
|
||||
@ -1599,6 +1599,36 @@ variable "streamanalytics_runtime_errors_threshold_critical" {
|
||||
}
|
||||
|
||||
# Azure CosmosDB specific variables
|
||||
variable "cosmos_db_status_silenced" {
|
||||
description = "Groups to mute for Cosmos DB status monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "cosmos_db_status_extra_tags" {
|
||||
description = "Extra tags for Cosmos DB status monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "cosmos_db_status_message" {
|
||||
description = "Custom message for Cosmos DB status monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "cosmos_db_status_time_aggregator" {
|
||||
description = "Monitor aggregator for Cosmos DB status [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "max"
|
||||
}
|
||||
|
||||
variable "cosmos_db_status_timeframe" {
|
||||
description = "Monitor timeframe for Cosmos DB status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "cosmos_db_4xx_requests_message" {
|
||||
description = "Custom message for Cosmos DB 4xx requests monitor"
|
||||
type = "string"
|
||||
@ -1621,6 +1651,24 @@ variable "cosmos_db_4xx_request_rate_threshold_warning" {
|
||||
default = 50
|
||||
}
|
||||
|
||||
variable "cosmos_db_4xx_request_extra_tags" {
|
||||
description = "Extra tags for Cosmos DB 4xx requests monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "cosmos_db_4xx_request_time_aggregator" {
|
||||
description = "Monitor aggregator for Cosmos DB 4xx requests [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "sum"
|
||||
}
|
||||
|
||||
variable "cosmos_db_4xx_request_timeframe" {
|
||||
description = "Monitor timeframe for Cosmos DB 4xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "cosmos_db_5xx_requests_message" {
|
||||
description = "Custom message for Cosmos DB 5xx requests monitor"
|
||||
type = "string"
|
||||
@ -1643,6 +1691,24 @@ variable "cosmos_db_5xx_request_rate_threshold_warning" {
|
||||
default = 50
|
||||
}
|
||||
|
||||
variable "cosmos_db_5xx_request_rate_extra_tags" {
|
||||
description = "Extra tags for Cosmos DB 5xx requests monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "cosmos_db_5xx_request_time_aggregator" {
|
||||
description = "Monitor aggregator for Cosmos DB 5xx requests [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "sum"
|
||||
}
|
||||
|
||||
variable "cosmos_db_5xx_request_timeframe" {
|
||||
description = "Monitor timeframe for Cosmos DB 5xx requests [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "cosmos_db_no_request_message" {
|
||||
description = "Custom message for Cosmos DB no request monitor"
|
||||
type = "string"
|
||||
@ -1655,6 +1721,24 @@ variable "cosmos_db_no_request_silenced" {
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "cosmos_db_no_request_extra_tags" {
|
||||
description = "Extra tags for Cosmos DB no request monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "cosmos_db_no_request_time_aggregator" {
|
||||
description = "Monitor aggregator for Cosmos DB no request [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "max"
|
||||
}
|
||||
|
||||
variable "cosmos_db_no_request_timeframe" {
|
||||
description = "Monitor timeframe for Cosmos DB no request [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "cosmos_db_ru_utilization_message" {
|
||||
description = "Custom message for Cosmos DB collection RU utilization monitor"
|
||||
type = "string"
|
||||
@ -1677,7 +1761,25 @@ variable "cosmos_db_ru_utilization_rate_threshold_warning" {
|
||||
default = 80
|
||||
}
|
||||
|
||||
variable "cosmos_db_ru_utilization_collection" {
|
||||
variable "cosmos_db_ru_utilization_extra_tags" {
|
||||
description = "Extra tags for Cosmos DB collection RU utilization monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "cosmos_db_ru_utilization_time_aggregator" {
|
||||
description = "Monitor aggregator for Cosmos DB RU utilization [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "avg"
|
||||
}
|
||||
|
||||
variable "cosmos_db_ru_utilization_timeframe" {
|
||||
description = "Monitor timeframe for Cosmos DB RU utilization [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "cosmos_db_ru_utilization_collections" {
|
||||
description = "Group to associate Cosmos DB collection to RU max"
|
||||
type = "map"
|
||||
}
|
||||
@ -1703,7 +1805,13 @@ variable "datalakestore_status_time_aggregator" {
|
||||
|
||||
variable "datalakestore_status_timeframe" {
|
||||
description = "Monitor timeframe for Datalake Store status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
default = "last_15m"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "datalakestore_status_extra_tags" {
|
||||
description = "Extra tags for Datalake Store status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "keyvault_status_silenced" {
|
||||
@ -1726,7 +1834,13 @@ variable "keyvault_status_time_aggregator" {
|
||||
|
||||
variable "keyvault_status_timeframe" {
|
||||
description = "Monitor timeframe for Key Vault status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
default = "last_15m"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "keyvault_status_extra_tags" {
|
||||
description = "Extra tags for Key Vault status monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "keyvault_api_result_silenced" {
|
||||
@ -1741,9 +1855,15 @@ variable "keyvault_api_result_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "keyvault_api_result_time_aggregator" {
|
||||
description = "Monitor aggregator for Key Vault API result [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "sum"
|
||||
}
|
||||
|
||||
variable "keyvault_api_result_timeframe" {
|
||||
description = "Monitor timeframe for Key Vault API result [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
default = "last_30m"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "keyvault_api_result_threshold_critical" {
|
||||
@ -1755,3 +1875,48 @@ variable "keyvault_api_result_threshold_warning" {
|
||||
description = "Warning threshold for Key Vault API result rate"
|
||||
default = 30
|
||||
}
|
||||
|
||||
variable "keyvault_api_result_extra_tags" {
|
||||
description = "Extra tags for Key Vault API result monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "keyvault_api_latency_silenced" {
|
||||
description = "Groups to mute for Key Vault API latency monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "keyvault_api_latency_message" {
|
||||
description = "Custom message for Key Vault API latency monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "keyvault_api_latency_time_aggregator" {
|
||||
description = "Monitor aggregator for Key Vault API latency [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "keyvault_api_latency_timeframe" {
|
||||
description = "Monitor timeframe for Key Vault API latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "keyvault_api_latency_threshold_critical" {
|
||||
description = "Critical threshold for Key Vault API latency rate"
|
||||
default = 100
|
||||
}
|
||||
|
||||
variable "keyvault_api_latency_threshold_warning" {
|
||||
description = "Warning threshold for Key Vault API latency rate"
|
||||
default = 80
|
||||
}
|
||||
|
||||
variable "keyvault_api_latency_extra_tags" {
|
||||
description = "Extra tags for Key Vault API latency monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
@ -16,23 +16,27 @@ module "datadog-monitors-cloud-azure-keyvault" {
|
||||
|
||||
Creates DataDog monitors with the following checks:
|
||||
|
||||
- Key Vault is down
|
||||
- Key Vault API result rate is low
|
||||
- Key Vault is down
|
||||
|
||||
## Inputs
|
||||
|
||||
| Name | Description | Type | Default | Required |
|
||||
|------|-------------|:----:|:-----:|:-----:|
|
||||
| api_result_extra_tags | Extra tags for Key Vault API result monitor | list | `<list>` | no |
|
||||
| api_result_message | Custom message for Key Vault API result monitor | string | `` | no |
|
||||
| api_result_silenced | Groups to mute for Key Vault API result monitor | map | `<map>` | no |
|
||||
| api_result_threshold_critical | Critical threshold for Key Vault API result rate | string | `10` | no |
|
||||
| api_result_threshold_warning | Warning threshold for Key Vault API result rate | string | `30` | no |
|
||||
| api_result_time_aggregator | Monitor aggregator for Key Vault API result [available values: min, max or avg] | string | `sum` | no |
|
||||
| api_result_timeframe | Monitor timeframe for Key Vault API result [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_30m` | no |
|
||||
| delay | Delay in seconds for the metric evaluation | string | `900` | no |
|
||||
| environment | Architecture environment | string | - | yes |
|
||||
| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no |
|
||||
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
|
||||
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
|
||||
| message | Message sent when a monitor is triggered | string | - | yes |
|
||||
| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no |
|
||||
| status_extra_tags | Extra tags for Key Vault status monitor | list | `<list>` | no |
|
||||
| status_message | Custom message for Key Vault status monitor | string | `` | no |
|
||||
| status_silenced | Groups to mute for Key Vault status monitor | map | `<map>` | no |
|
||||
| status_time_aggregator | Monitor aggregator for Key Vault status [available values: min, max or avg] | string | `max` | no |
|
||||
|
||||
@ -17,11 +17,16 @@ variable "message" {
|
||||
description = "Message sent when a monitor is triggered"
|
||||
}
|
||||
|
||||
variable "delay" {
|
||||
variable "evaluation_delay" {
|
||||
description = "Delay in seconds for the metric evaluation"
|
||||
default = 900
|
||||
}
|
||||
|
||||
variable "new_host_delay" {
|
||||
description = "Delay in seconds before monitor new resource"
|
||||
default = 300
|
||||
}
|
||||
|
||||
# Azure Key Vault specific variables
|
||||
variable "status_silenced" {
|
||||
description = "Groups to mute for Key Vault status monitor"
|
||||
@ -43,7 +48,13 @@ variable "status_time_aggregator" {
|
||||
|
||||
variable "status_timeframe" {
|
||||
description = "Monitor timeframe for Key Vault status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
default = "last_15m"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "status_extra_tags" {
|
||||
description = "Extra tags for Key Vault status monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "api_result_silenced" {
|
||||
@ -58,9 +69,15 @@ variable "api_result_message" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "api_result_time_aggregator" {
|
||||
description = "Monitor aggregator for Key Vault API result [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "sum"
|
||||
}
|
||||
|
||||
variable "api_result_timeframe" {
|
||||
description = "Monitor timeframe for Key Vault API result [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
default = "last_30m"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "api_result_threshold_critical" {
|
||||
@ -72,3 +89,48 @@ variable "api_result_threshold_warning" {
|
||||
description = "Warning threshold for Key Vault API result rate"
|
||||
default = 30
|
||||
}
|
||||
|
||||
variable "api_result_extra_tags" {
|
||||
description = "Extra tags for Key Vault API result monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "api_latency_silenced" {
|
||||
description = "Groups to mute for Key Vault API latency monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "api_latency_message" {
|
||||
description = "Custom message for Key Vault API latency monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "api_latency_time_aggregator" {
|
||||
description = "Monitor aggregator for Key Vault API latency [available values: min, max or avg]"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "api_latency_timeframe" {
|
||||
description = "Monitor timeframe for Key Vault API latency [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "api_latency_threshold_critical" {
|
||||
description = "Critical threshold for Key Vault API latency rate"
|
||||
default = 100
|
||||
}
|
||||
|
||||
variable "api_latency_threshold_warning" {
|
||||
description = "Warning threshold for Key Vault API latency rate"
|
||||
default = 80
|
||||
}
|
||||
|
||||
variable "api_latency_extra_tags" {
|
||||
description = "Extra tags for Key Vault API latency monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
19
cloud/azure/keyvault/modules.tf
Normal file
19
cloud/azure/keyvault/modules.tf
Normal file
@ -0,0 +1,19 @@
|
||||
module "filter-tags" {
|
||||
source = "../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
resource = "keyvault"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
}
|
||||
|
||||
module "filter-tags-statuscode" {
|
||||
source = "../../../common/filter-tags"
|
||||
|
||||
environment = "${var.environment}"
|
||||
resource = "cosmosdb"
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom},statuscode:%s"
|
||||
|
||||
extra_tags = ["statuscode:%s"]
|
||||
}
|
||||
@ -1,36 +1,28 @@
|
||||
data "template_file" "filter" {
|
||||
template = "$${filter}"
|
||||
|
||||
vars {
|
||||
filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_azure_servicebus:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}"
|
||||
}
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "keyvault_status" {
|
||||
name = "[${var.environment}] Key Vault is down"
|
||||
message = "${coalesce(var.status_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
${var.status_time_aggregator}(${var.status_timeframe}): (
|
||||
avg:azure.keyvault_vaults.status{${data.template_file.filter.rendered}} by {resource_group,region,name}
|
||||
) != 1
|
||||
EOF
|
||||
avg:azure.keyvault_vaults.status${module.filter-tags.query_alert} by {resource_group,region,name}
|
||||
) < 1
|
||||
EOF
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
silenced = "${var.status_silenced}"
|
||||
|
||||
notify_no_data = true
|
||||
evaluation_delay = "${var.delay}"
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
tags = ["env:${var.environment}", "resource:keyvault", "team:azure", "provider:azure"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:keyvault", "team:claranet", "created-by:terraform", "${var.status_extra_tags}"]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "keyvault_api_result" {
|
||||
@ -38,9 +30,9 @@ resource "datadog_monitor" "keyvault_api_result" {
|
||||
message = "${coalesce(var.status_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
sum(${var.api_result_timeframe}): (
|
||||
avg:azure.keyvault_vaults.service_api_result{${data.template_file.filter.rendered}} by {name,resource_group,region}.as_count() /
|
||||
avg:azure.keyvault_vaults.service_api_hit{${data.template_file.filter.rendered}} by {name,resource_group,region}.as_count()
|
||||
${var.api_result_time_aggregator}(${var.api_result_timeframe}): (
|
||||
avg:azure.keyvault_vaults.service_api_result${format(module.filter-tags-statuscode.query_alert, "200")} by {name,resource_group,region}.as_count() /
|
||||
avg:azure.keyvault_vaults.service_api_result${module.filter-tags.query_alert} by {name,resource_group,region}.as_count()
|
||||
) * 100 < ${var.api_result_threshold_critical}
|
||||
EOF
|
||||
|
||||
@ -53,15 +45,47 @@ resource "datadog_monitor" "keyvault_api_result" {
|
||||
|
||||
silenced = "${var.api_result_silenced}"
|
||||
|
||||
notify_no_data = true
|
||||
evaluation_delay = "${var.delay}"
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
tags = ["env:${var.environment}", "resource:keyvault", "team:azure", "provider:azure"]
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:keyvault", "team:claranet", "created-by:terraform", "${var.api_result_extra_tags}"]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "keyvault_api_latency" {
|
||||
name = "[${var.environment}] Key Vault API latency is high {{#is_alert}}{{{comparator}}} {{threshold}}ms ({{value}}ms){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}ms ({{value}}ms){{/is_warning}}"
|
||||
message = "${coalesce(var.status_message, var.message)}"
|
||||
|
||||
query = <<EOF
|
||||
${var.api_latency_time_aggregator}(${var.api_latency_timeframe}):
|
||||
avg:azure.keyvault_vaults.service_api_latency${module.filter-tags.query_alert} by {name,resource_group,region}
|
||||
> ${var.api_latency_threshold_critical}
|
||||
EOF
|
||||
|
||||
thresholds {
|
||||
critical = "${var.api_latency_threshold_critical}"
|
||||
warning = "${var.api_latency_threshold_warning}"
|
||||
}
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
silenced = "${var.api_latency_silenced}"
|
||||
|
||||
notify_no_data = true
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = false
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:azure", "resource:keyvault", "team:claranet", "created-by:terraform", "${var.api_latency_extra_tags}"]
|
||||
}
|
||||
|
||||
@ -390,36 +390,59 @@ module "streamanalytics" {
|
||||
module "cosmosdb" {
|
||||
source = "./cosmosdb"
|
||||
|
||||
environment = "${var.environment}"
|
||||
message = "${var.message}"
|
||||
delay = "${var.delay}"
|
||||
environment = "${var.environment}"
|
||||
message = "${var.message}"
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
|
||||
status_message = "${var.cosmos_db_status_message}"
|
||||
status_silenced = "${var.cosmos_db_status_silenced}"
|
||||
status_extra_tags = "${var.cosmos_db_status_extra_tags}"
|
||||
status_time_aggregator = "${var.cosmos_db_status_time_aggregator}"
|
||||
status_timeframe = "${var.cosmos_db_status_timeframe}"
|
||||
|
||||
cosmos_db_4xx_request_rate_threshold_critical = "${var.cosmos_db_4xx_request_rate_threshold_critical}"
|
||||
cosmos_db_4xx_request_rate_threshold_warning = "${var.cosmos_db_4xx_request_rate_threshold_warning}"
|
||||
cosmos_db_4xx_requests_message = "${var.cosmos_db_4xx_requests_message}"
|
||||
cosmos_db_4xx_requests_silenced = "${var.cosmos_db_4xx_requests_silenced}"
|
||||
cosmos_db_4xx_request_extra_tags = "${var.cosmos_db_4xx_request_extra_tags}"
|
||||
cosmos_db_4xx_request_time_aggregator = "${var.cosmos_db_4xx_request_time_aggregator}"
|
||||
cosmos_db_4xx_request_timeframe = "${var.cosmos_db_4xx_request_timeframe}"
|
||||
|
||||
cosmos_db_5xx_request_rate_threshold_critical = "${var.cosmos_db_5xx_request_rate_threshold_critical}"
|
||||
cosmos_db_5xx_request_rate_threshold_warning = "${var.cosmos_db_5xx_request_rate_threshold_warning}"
|
||||
cosmos_db_5xx_requests_message = "${var.cosmos_db_5xx_requests_message}"
|
||||
cosmos_db_5xx_requests_silenced = "${var.cosmos_db_5xx_requests_silenced}"
|
||||
cosmos_db_5xx_request_rate_extra_tags = "${var.cosmos_db_5xx_request_rate_extra_tags}"
|
||||
cosmos_db_5xx_request_time_aggregator = "${var.cosmos_db_5xx_request_time_aggregator}"
|
||||
cosmos_db_5xx_request_timeframe = "${var.cosmos_db_5xx_request_timeframe}"
|
||||
|
||||
cosmos_db_no_request_message = "${var.cosmos_db_no_request_message}"
|
||||
cosmos_db_no_request_silenced = "${var.cosmos_db_no_request_silenced}"
|
||||
cosmos_db_no_request_extra_tags = "${var.cosmos_db_no_request_extra_tags}"
|
||||
cosmos_db_no_request_time_aggregator = "${var.cosmos_db_no_request_time_aggregator}"
|
||||
cosmos_db_no_request_timeframe = "${var.cosmos_db_no_request_timeframe}"
|
||||
|
||||
cosmos_db_ru_utilization_rate_threshold_critical = "${var.cosmos_db_ru_utilization_rate_threshold_critical}"
|
||||
cosmos_db_ru_utilization_rate_threshold_warning = "${var.cosmos_db_ru_utilization_rate_threshold_warning}"
|
||||
cosmos_db_ru_utilization_message = "${var.cosmos_db_ru_utilization_message}"
|
||||
cosmos_db_ru_utilization_silenced = "${var.cosmos_db_ru_utilization_silenced}"
|
||||
cosmos_db_ru_utilization_collection = "${var.cosmos_db_ru_utilization_collection}"
|
||||
cosmos_db_ru_utilization_extra_tags = "${var.cosmos_db_ru_utilization_extra_tags}"
|
||||
cosmos_db_ru_utilization_time_aggregator = "${var.cosmos_db_ru_utilization_time_aggregator}"
|
||||
cosmos_db_ru_utilization_timeframe = "${var.cosmos_db_ru_utilization_timeframe}"
|
||||
cosmos_db_ru_utilization_collections = "${var.cosmos_db_ru_utilization_collections}"
|
||||
}
|
||||
|
||||
module "datalakestore" {
|
||||
source = "./datalakestore"
|
||||
|
||||
environment = "${var.environment}"
|
||||
message = "${var.message}"
|
||||
delay = "${var.delay}"
|
||||
environment = "${var.environment}"
|
||||
message = "${var.message}"
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
@ -428,14 +451,16 @@ module "datalakestore" {
|
||||
status_message = "${var.datalakestore_status_message}"
|
||||
status_timeframe = "${var.datalakestore_status_timeframe}"
|
||||
status_time_aggregator = "${var.datalakestore_status_time_aggregator}"
|
||||
status_extra_tags = "${var.datalakestore_status_extra_tags}"
|
||||
}
|
||||
|
||||
module "keyvault" {
|
||||
source = "./keyvault"
|
||||
|
||||
environment = "${var.environment}"
|
||||
message = "${var.message}"
|
||||
delay = "${var.delay}"
|
||||
environment = "${var.environment}"
|
||||
message = "${var.message}"
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
filter_tags_use_defaults = "${var.filter_tags_use_defaults}"
|
||||
filter_tags_custom = "${var.filter_tags_custom}"
|
||||
@ -444,10 +469,22 @@ module "keyvault" {
|
||||
status_message = "${var.keyvault_status_message}"
|
||||
status_timeframe = "${var.keyvault_status_timeframe}"
|
||||
status_time_aggregator = "${var.keyvault_status_time_aggregator}"
|
||||
status_extra_tags = "${var.keyvault_status_extra_tags}"
|
||||
|
||||
api_result_enabled = "${var.keyvault_api_result_enabled}"
|
||||
api_result_silenced = "${var.keyvault_api_result_silenced}"
|
||||
api_result_message = "${var.keyvault_api_result_message}"
|
||||
api_result_timeframe = "${var.keyvault_api_result_timeframe}"
|
||||
api_result_time_aggregator = "${var.keyvault_api_result_time_aggregator}"
|
||||
api_result_threshold_critical = "${var.keyvault_api_result_threshold_critical}"
|
||||
api_result_threshold_warning = "${var.keyvault_api_result_threshold_warning}"
|
||||
api_result_extra_tags = "${var.keyvault_api_result_extra_tags}"
|
||||
|
||||
api_latency_silenced = "${var.keyvault_api_latency_silenced}"
|
||||
api_latency_message = "${var.keyvault_api_latency_message}"
|
||||
api_latency_timeframe = "${var.keyvault_api_latency_timeframe}"
|
||||
api_latency_time_aggregator = "${var.keyvault_api_latency_time_aggregator}"
|
||||
api_latency_threshold_critical = "${var.keyvault_api_latency_threshold_critical}"
|
||||
api_latency_threshold_warning = "${var.keyvault_api_latency_threshold_warning}"
|
||||
api_latency_extra_tags = "${var.keyvault_api_latency_extra_tags}"
|
||||
}
|
||||
|
||||
@ -16,10 +16,10 @@ module "datadog-monitors-cloud-azure-servicebus" {
|
||||
|
||||
Creates DataDog monitors with the following checks:
|
||||
|
||||
- Service Bus is down
|
||||
- Service Bus has no active connection
|
||||
- Service Bus user errors rate is high
|
||||
- Service Bus is down
|
||||
- Service Bus server errors rate is high
|
||||
- Service Bus user errors rate is high
|
||||
|
||||
## Inputs
|
||||
|
||||
@ -30,6 +30,7 @@ Creates DataDog monitors with the following checks:
|
||||
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
|
||||
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
|
||||
| message | Message sent when an alert is triggered | string | - | yes |
|
||||
| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no |
|
||||
| no_active_connections_message | Custom message for Service Bus status monitor | string | `` | no |
|
||||
| no_active_connections_silenced | Groups to mute for Service Bus status monitor | map | `<map>` | no |
|
||||
| no_active_connections_time_aggregator | Monitor aggregator for Service Bus status [available values: min, max or avg] | string | `max` | no |
|
||||
@ -39,7 +40,6 @@ Creates DataDog monitors with the following checks:
|
||||
| server_errors_threshold_critical | Critical threshold for Service Bus server errors monitor | string | `90` | no |
|
||||
| server_errors_threshold_warning | Warning threshold for Service Bus server errors monitor | string | `50` | no |
|
||||
| server_errors_timeframe | Monitor timeframe for Service Bus server errors [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no |
|
||||
| new_host_delay | Delay in seconds before monitor new resource | string | `300` | no |
|
||||
| status_extra_tags | Extra tags for Service Bus status monitor | list | `<list>` | no |
|
||||
| status_message | Custom message for Service Bus status monitor | string | `` | no |
|
||||
| status_silenced | Groups to mute for Service Bus status monitor | map | `<map>` | no |
|
||||
|
||||
@ -56,7 +56,7 @@ variable "status_time_aggregator" {
|
||||
|
||||
variable "status_timeframe" {
|
||||
description = "Monitor timeframe for Service Bus status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
default = "last_15m"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "no_active_connections_silenced" {
|
||||
@ -80,7 +80,7 @@ variable "no_active_connections_time_aggregator" {
|
||||
variable "no_active_connections_timeframe" {
|
||||
description = "Monitor timeframe for Service Bus status [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = "string"
|
||||
default = "last_15m"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "server_errors_message" {
|
||||
|
||||
@ -31,7 +31,7 @@ resource "datadog_monitor" "service_bus_no_active_connections" {
|
||||
|
||||
query = <<EOF
|
||||
${var.no_active_connections_time_aggregator}(${var.no_active_connections_timeframe}): (
|
||||
avg:azure.servicebus_namespaces.active_connections_preview{${data.template_file.filter.rendered}} by {resource_group,region,name}
|
||||
avg:azure.servicebus_namespaces.active_connections_preview${module.filter-tags.query_alert} by {resource_group,region,name}
|
||||
) < 1
|
||||
EOF
|
||||
|
||||
@ -40,14 +40,14 @@ resource "datadog_monitor" "service_bus_no_active_connections" {
|
||||
silenced = "${var.no_active_connections_silenced}"
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.delay}"
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
new_host_delay = "${var.delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
tags = ["env:${var.environment}", "resource:servicebus", "team:azure", "provider:azure"]
|
||||
}
|
||||
@ -58,8 +58,8 @@ resource "datadog_monitor" "service_bus_user_errors" {
|
||||
|
||||
query = <<EOF
|
||||
sum(${var.user_errors_timeframe}): (default(
|
||||
avg:azure.servicebus_namespaces.user_errors.preview{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() /
|
||||
avg:azure.servicebus_namespaces.incoming_requests_preview{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count()
|
||||
avg:azure.servicebus_namespaces.user_errors.preview${module.filter-tags.query_alert} by {resource_group,region,name}.as_count() /
|
||||
avg:azure.servicebus_namespaces.incoming_requests_preview${module.filter-tags.query_alert} by {resource_group,region,name}.as_count()
|
||||
* 100, 0)
|
||||
) > ${var.user_errors_threshold_critical}
|
||||
EOF
|
||||
@ -74,14 +74,14 @@ resource "datadog_monitor" "service_bus_user_errors" {
|
||||
silenced = "${var.user_errors_silenced}"
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.delay}"
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
new_host_delay = "${var.delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
tags = ["env:${var.environment}", "resource:servicebus", "team:azure", "provider:azure"]
|
||||
}
|
||||
@ -92,8 +92,8 @@ resource "datadog_monitor" "service_bus_server_errors" {
|
||||
|
||||
query = <<EOF
|
||||
sum(${var.server_errors_timeframe}): (default(
|
||||
avg:azure.servicebus_namespaces.server_errors.preview{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count() /
|
||||
avg:azure.servicebus_namespaces.incoming_requests_preview{${data.template_file.filter.rendered}} by {resource_group,region,name}.as_count()
|
||||
avg:azure.servicebus_namespaces.server_errors.preview${module.filter-tags.query_alert} by {resource_group,region,name}.as_count() /
|
||||
avg:azure.servicebus_namespaces.incoming_requests_preview${module.filter-tags.query_alert} by {resource_group,region,name}.as_count()
|
||||
* 100, 0)
|
||||
) > ${var.server_errors_threshold_critical}
|
||||
EOF
|
||||
@ -108,14 +108,14 @@ resource "datadog_monitor" "service_bus_server_errors" {
|
||||
silenced = "${var.server_errors_silenced}"
|
||||
|
||||
notify_no_data = false
|
||||
evaluation_delay = "${var.delay}"
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
new_host_delay = "${var.delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
tags = ["env:${var.environment}", "resource:servicebus", "team:azure", "provider:azure"]
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user