diff --git a/databases/mongodb/inputs.tf b/databases/mongodb/inputs.tf index 54ae5a7..5dbc37f 100644 --- a/databases/mongodb/inputs.tf +++ b/databases/mongodb/inputs.tf @@ -24,6 +24,11 @@ variable "filter_tags_custom" { default = "*" } +variable "mongodb_desired_servers_count" { + description = "Number of servers that should be instanciated for this cluster" + default = 3 +} + variable "mongodb_primary_timeframe" { description = "Monitor timeframe for MongoDB wrong state for primary node [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" @@ -33,7 +38,13 @@ variable "mongodb_primary_timeframe" { variable "mongodb_secondary_timeframe" { description = "Monitor timeframe for MongoDB wrong state for secondaries nodes [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" type = "string" - default = "last_1m" + default = "last_5m" +} + +variable "mongodb_server_count_timeframe" { + description = "Monitor timeframe for MongoDB wrong state for secondaries nodes [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_15m" } variable "mongodb_replication_timeframe" { @@ -64,6 +75,12 @@ variable "mongodb_secondary_silenced" { default = {} } +variable "mongodb_server_count_silenced" { + description = "Groups to mute for Mongodb secondary state monitor" + type = "map" + default = {} +} + variable "mongodb_replication_silenced" { description = "Groups to mute for Mongodb replication lag monitor" type = "map" @@ -82,6 +99,12 @@ variable "mongodb_secondary_message" { default = "" } +variable "mongodb_server_count_message" { + description = "Custom message for MongoDB secondary monitor" + type = "string" + default = "" +} + variable "mongodb_replication_message" { description = "Custom message for MongoDB replication monitor" type = "string" @@ -100,6 +123,12 @@ variable "mongodb_secondary_aggregator" { default = "max" } +variable "mongodb_server_count_aggregator" { + description = "Monitor aggregator for Mongodb secondary state [available values: min, max]" + type = "string" + default = "max" +} + variable "mongodb_replication_aggregator" { description = "Monitor aggregator for Mongodb replication lag [available values: min, max, sum or avg]" type = "string" diff --git a/databases/mongodb/monitors-mongo.tf b/databases/mongodb/monitors-mongo.tf index ccaaec3..5f54331 100644 --- a/databases/mongodb/monitors-mongo.tf +++ b/databases/mongodb/monitors-mongo.tf @@ -32,22 +32,55 @@ resource "datadog_monitor" "mongodb_primary" { } resource "datadog_monitor" "mongodb_secondary" { - name = "[${var.environment}] MongoDB secondary state" + name = "[${var.environment}] MongoDB secondary missing" message = "${coalesce(var.mongodb_secondary_message, var.message)}" query = <= 6 + ${var.mongodb_desired_servers_count} - + sum:mongodb.replset.health{${data.template_file.filter.rendered}} by {replset_name} + > 1 EOF thresholds { - critical = 6 - warning = 3 + critical = 1 + warning = 0 } type = "metric alert" - notify_no_data = true + notify_no_data = false + renotify_interval = 0 + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + notify_audit = false + timeout_h = 0 + include_tags = true + require_full_window = true + + silenced = "${var.mongodb_secondary_silenced}" + + tags = ["env:${var.environment}", "resource:mongodb"] +} + +resource "datadog_monitor" "mongodb_server_count" { + name = "[${var.environment}] MongoDB too much servers or wrong monitoring config" + message = "${coalesce(var.mongodb_server_count_message, var.message)}" + + query = < 99 + EOF + + thresholds { + critical = 99 + warning = "${var.mongodb_desired_servers_count}" + } + + type = "metric alert" + + notify_no_data = false renotify_interval = 0 evaluation_delay = "${var.delay}" new_host_delay = "${var.delay}"