diff --git a/databases/mongodb/README.md b/databases/mongodb/README.md index 193de0e..cabe5bf 100644 --- a/databases/mongodb/README.md +++ b/databases/mongodb/README.md @@ -16,7 +16,10 @@ module "datadog-monitors-databases-mongodb" { Creates DataDog monitors with the following checks: -- Member down in the replica set +- MongoDB primary state +- MongoDB secondary missing +- MongoDB too much servers or wrong monitoring config +- MongoDB replication lag ## Inputs @@ -27,63 +30,36 @@ Creates DataDog monitors with the following checks: | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | message | Message sent when an alert is triggered | string | - | yes | -| mongodb_replicaset_message | Custom message for Mongodb replicaset monitor | string | `` | no | -| mongodb_replicaset_silenced | Groups to mute for Mongodb replicaset monitor | map | `` | no | -| mongodb_replicaset_time_aggregator | Monitor aggregator for Mongodb replicaset [available values: min, max or avg] | string | `max` | no | -| mongodb_replicaset_timeframe | Monitor timeframe for Mongodb replicaset [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| mongodb_desired_servers_count | Number of servers that should be instanciated for this cluster | string | `3` | no | +| mongodb_lag_critical | Critical replication lag in s | string | `5` | no | +| mongodb_lag_warning | Warn replication lag in s | string | `2` | no | +| mongodb_primary_aggregator | Monitor aggregator for MongoDB primary state [available values: min, max] | string | `max` | no | +| mongodb_primary_message | Custom message for MongoDB primary monitor | string | `` | no | +| mongodb_primary_silenced | Groups to mute for MongoDB primary state monitor | map | `` | no | +| mongodb_primary_timeframe | Monitor timeframe for MongoDB wrong state for primary node [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_1m` | no | +| mongodb_replication_aggregator | Monitor aggregator for MongoDB replication lag [available values: min, max, sum or avg] | string | `avg` | no | +| mongodb_replication_message | Custom message for MongoDB replication monitor | string | `` | no | +| mongodb_replication_silenced | Groups to mute for MongoDB replication lag monitor | map | `` | no | +| mongodb_replication_timeframe | Monitor timeframe for MongoDB replication lag [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_1m` | no | +| mongodb_secondary_aggregator | Monitor aggregator for MongoDB secondary state [available values: min, max] | string | `max` | no | +| mongodb_secondary_message | Custom message for MongoDB secondary monitor | string | `` | no | +| mongodb_secondary_silenced | Groups to mute for MongoDB secondary state monitor | map | `` | no | +| mongodb_secondary_timeframe | Monitor timeframe for MongoDB wrong state for secondaries nodes [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| mongodb_server_count_aggregator | Monitor aggregator for MongoDB server count [available values: min, max] | string | `min` | no | +| mongodb_server_count_message | Custom message for MongoDB server count | string | `` | no | +| mongodb_server_count_silenced | Groups to mute for MongoDB server count monitor | map | `` | no | +| mongodb_server_count_timeframe | Monitor timeframe for MongoDB wrong server count [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_15m` | no | ## Outputs | Name | Description | |------|-------------| -| mongodb_replicaset_state_id | id for monitor mongodb_replicaset_state | +| mongodb_primary_id | id for monitor mongodb_primary | +| mongodb_replication_id | id for monitor mongodb_replication | +| mongodb_secondary_id | id for monitor mongodb_secondary | +| mongodb_server_count_id | id for monitor mongodb_server_count | ## Related documentation DataDog documentation: [https://docs.datadoghq.com/integrations/mongo/](https://docs.datadoghq.com/integrations/mongo/) - -## Custom settings - -### Prepare your ReplicaSet - -Add a user to your ReplicaSet (on the primary instance) - - -``` -use admin -db.auth("admin", "admin-password") ## This is optional is you don't have any admin password -db.createUser({"user":"datadog", "pwd": "{{PASSWORD}}", "roles" : [ {role: 'read', db: 'admin' }, {role: 'clusterMonitor', db: 'admin'}, {role: 'read', db: 'local' }]}) -``` - -### Configure your Datadog agent - -Add this file conf.d/mongo.yaml - -``` - -init_config: - -instances: - - server: mongodb://datadog:password@[MONGO_URI] - tags: - - mytag1 - - mytag2 - - server: mongodb://datadog:password@[MONGO_URI] - tags: - - mytag1 - - mytag2 -``` - -### Monitor ReplicaSet Health - -Name: [environment] Replica Set heath for {{ replset_name }} - -This monitor will check the health of your ReplicaSet - -Metrics are : - -1: The replicaSet is OK -0: The replicaSet is KO - -This monitor will trigger an alert for each ReplicaSet. - +MongoDB documentation: [https://docs.mongodb.com/manual/administration/monitoring/](https://docs.mongodb.com/manual/administration/monitoring/) diff --git a/databases/mongodb/inputs.tf b/databases/mongodb/inputs.tf index 0df8a02..3800ae5 100644 --- a/databases/mongodb/inputs.tf +++ b/databases/mongodb/inputs.tf @@ -24,26 +24,113 @@ variable "filter_tags_custom" { default = "*" } -variable "mongodb_replicaset_silenced" { - description = "Groups to mute for Mongodb replicaset monitor" +variable "mongodb_desired_servers_count" { + description = "Number of servers that should be instanciated for this cluster" + default = 3 +} + +variable "mongodb_primary_timeframe" { + description = "Monitor timeframe for MongoDB wrong state for primary node [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_1m" +} + +variable "mongodb_secondary_timeframe" { + description = "Monitor timeframe for MongoDB wrong state for secondaries nodes [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_5m" +} + +variable "mongodb_server_count_timeframe" { + description = "Monitor timeframe for MongoDB wrong server count [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_15m" +} + +variable "mongodb_replication_timeframe" { + description = "Monitor timeframe for MongoDB replication lag [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_1m" +} + +variable "mongodb_lag_warning" { + description = "Warn replication lag in s" + default = 2 +} + +variable "mongodb_lag_critical" { + description = "Critical replication lag in s" + default = 5 +} + +variable "mongodb_primary_silenced" { + description = "Groups to mute for MongoDB primary state monitor" type = "map" default = {} } -variable "mongodb_replicaset_message" { - description = "Custom message for Mongodb replicaset monitor" +variable "mongodb_secondary_silenced" { + description = "Groups to mute for MongoDB secondary state monitor" + type = "map" + default = {} +} + +variable "mongodb_server_count_silenced" { + description = "Groups to mute for MongoDB server count monitor" + type = "map" + default = {} +} + +variable "mongodb_replication_silenced" { + description = "Groups to mute for MongoDB replication lag monitor" + type = "map" + default = {} +} + +variable "mongodb_primary_message" { + description = "Custom message for MongoDB primary monitor" type = "string" default = "" } -variable "mongodb_replicaset_time_aggregator" { - description = "Monitor aggregator for Mongodb replicaset [available values: min, max or avg]" +variable "mongodb_secondary_message" { + description = "Custom message for MongoDB secondary monitor" + type = "string" + default = "" +} + +variable "mongodb_server_count_message" { + description = "Custom message for MongoDB server count" + type = "string" + default = "" +} + +variable "mongodb_replication_message" { + description = "Custom message for MongoDB replication monitor" + type = "string" + default = "" +} + +variable "mongodb_primary_aggregator" { + description = "Monitor aggregator for MongoDB primary state [available values: min, max]" type = "string" default = "max" } -variable "mongodb_replicaset_timeframe" { - description = "Monitor timeframe for Mongodb replicaset [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" +variable "mongodb_secondary_aggregator" { + description = "Monitor aggregator for MongoDB secondary state [available values: min, max]" type = "string" - default = "last_5m" + default = "max" +} + +variable "mongodb_server_count_aggregator" { + description = "Monitor aggregator for MongoDB server count [available values: min, max]" + type = "string" + default = "min" +} + +variable "mongodb_replication_aggregator" { + description = "Monitor aggregator for MongoDB replication lag [available values: min, max, sum or avg]" + type = "string" + default = "avg" } diff --git a/databases/mongodb/monitors-mongo.tf b/databases/mongodb/monitors-mongo.tf index aaa2549..5f54331 100644 --- a/databases/mongodb/monitors-mongo.tf +++ b/databases/mongodb/monitors-mongo.tf @@ -2,18 +2,17 @@ data "template_file" "filter" { template = "$${filter}" vars { - filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_monitoring_mongodb:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" + filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_mongodb:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" } } -resource "datadog_monitor" "mongodb_replicaset_state" { - name = "[${var.environment}] Member down in the replica set" - message = "${coalesce(var.mongodb_replicaset_message, var.message)}" +resource "datadog_monitor" "mongodb_primary" { + name = "[${var.environment}] MongoDB primary state" + message = "${coalesce(var.mongodb_primary_message, var.message)}" query = <= 2 EOF type = "metric alert" @@ -27,7 +26,100 @@ resource "datadog_monitor" "mongodb_replicaset_state" { include_tags = true require_full_window = true - silenced = "${var.mongodb_replicaset_silenced}" + silenced = "${var.mongodb_primary_silenced}" + + tags = ["env:${var.environment}", "resource:mongodb"] +} + +resource "datadog_monitor" "mongodb_secondary" { + name = "[${var.environment}] MongoDB secondary missing" + message = "${coalesce(var.mongodb_secondary_message, var.message)}" + + query = < 1 + EOF + + thresholds { + critical = 1 + warning = 0 + } + + type = "metric alert" + + notify_no_data = false + renotify_interval = 0 + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + notify_audit = false + timeout_h = 0 + include_tags = true + require_full_window = true + + silenced = "${var.mongodb_secondary_silenced}" + + tags = ["env:${var.environment}", "resource:mongodb"] +} + +resource "datadog_monitor" "mongodb_server_count" { + name = "[${var.environment}] MongoDB too much servers or wrong monitoring config" + message = "${coalesce(var.mongodb_server_count_message, var.message)}" + + query = < 99 + EOF + + thresholds { + critical = 99 + warning = "${var.mongodb_desired_servers_count}" + } + + type = "metric alert" + + notify_no_data = false + renotify_interval = 0 + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + notify_audit = false + timeout_h = 0 + include_tags = true + require_full_window = true + + silenced = "${var.mongodb_secondary_silenced}" + + tags = ["env:${var.environment}", "resource:mongodb"] +} + +resource "datadog_monitor" "mongodb_replication" { + name = "[${var.environment}] MongoDB replication lag" + message = "${coalesce(var.mongodb_replication_message, var.message)}" + + query = < ${var.mongodb_lag_critical} + EOF + + thresholds { + critical = "${var.mongodb_lag_critical}" + warning = "${var.mongodb_lag_warning}" + } + + type = "metric alert" + + notify_no_data = false + renotify_interval = 0 + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + notify_audit = false + timeout_h = 0 + include_tags = true + require_full_window = true + + silenced = "${var.mongodb_replication_silenced}" tags = ["env:${var.environment}", "resource:mongodb"] } diff --git a/databases/mongodb/outputs.tf b/databases/mongodb/outputs.tf index 39ac537..8355291 100644 --- a/databases/mongodb/outputs.tf +++ b/databases/mongodb/outputs.tf @@ -1,4 +1,19 @@ -output "mongodb_replicaset_state_id" { - description = "id for monitor mongodb_replicaset_state" - value = "${datadog_monitor.mongodb_replicaset_state.id}" +output "mongodb_primary_id" { + description = "id for monitor mongodb_primary" + value = "${datadog_monitor.mongodb_primary.id}" +} + +output "mongodb_secondary_id" { + description = "id for monitor mongodb_secondary" + value = "${datadog_monitor.mongodb_secondary.id}" +} + +output "mongodb_server_count_id" { + description = "id for monitor mongodb_server_count" + value = "${datadog_monitor.mongodb_server_count.id}" +} + +output "mongodb_replication_id" { + description = "id for monitor mongodb_replication" + value = "${datadog_monitor.mongodb_replication.id}" }