diff --git a/databases/mongodb/README.md b/databases/mongodb/README.md index fc521cd..8dfb57b 100644 --- a/databases/mongodb/README.md +++ b/databases/mongodb/README.md @@ -1,58 +1,21 @@ AWS MongoDB Service DataDog monitors ========================================== -Link to integration documentation : - -[https://docs.datadoghq.com/integrations/mongo/](https://docs.datadoghq.com/integrations/mongo/) - - -**Prepare your ReplicaSet** : - -Add a user to your ReplicaSet (on the primary instance) - - -``` -use admin -db.auth("admin", "admin-password") ## This is optional is you don't have any admin password -db.createUser({"user":"datadog", "pwd": "{{PASSWORD}}", "roles" : [ {role: 'read', db: 'admin' }, {role: 'clusterMonitor', db: 'admin'}, {role: 'read', db: 'local' }]}) -``` - -**Configure your Datadog agent** - -Add this file conf.d/mongo.yaml - -``` - -init_config: - -instances: - - server: mongodb://datadog:password@[MONGO_URI] - tags: - - mytag1 - - mytag2 - - server: mongodb://datadog:password@[MONGO_URI] - tags: - - mytag1 - - mytag2 -``` - -**Monitor ReplicaSet Health** - -Name: [environment] Replica Set heath for {{ replset_name }} - -This monitor will check the health of your ReplicaSet - -Metrics are : - -1: The replicaSet is OK -0: The replicaSet is KO - -This monitor will trigger an alert for each ReplicaSet. How to use this module ---------------------- +Add a user to MongoDB (on the primary instance) : + +``` +use admin +db.auth("admin", "admin-password") ## This is optional is you don't have any admin password +db.createUser({"user":"datadog", "pwd": "{{PASSWORD}}", "roles" : [ {role: 'read', db: 'admin' }, {role: 'clusterMonitor', db: 'admin'}, {role: 'read', db: 'local' }]}) +``` + +Add a module in your Terraform project : + ``` module "datadog-monitors-aws-mongodb" { source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//databases/mongodb?ref={revision}" @@ -63,22 +26,86 @@ module "datadog-monitors-aws-mongodb" { } ``` +Configure your Datadog agent for kubernetes with this config : + +``` +datadog: + confd: + mongo.yaml: |- + ad_identifiers: + - mongodb + init_config: + instances: + - server: mongodb://datadog:password@%%host%%/admin + tags: + - dd_monitoring:enabled + - dd_mongodb:enabled + - env:prod +``` + + Purpose ------- Creates a DataDog monitors with the following checks : -* Mongodb ReplicaSet status +* MongoDB Primary status +* MongoDB Secondaries status +* MongoDB replication lag + +**Monitor MongoDB Primary** + +Name: [environment] MongoDB Primary + +This monitor will check the health of the Primary node + +This monitor will trigger an alert if there's no primary or if the primary state is wrong. + + +**Monitor MongoDB Secondary** + +Name: [environment] MongoDB Secondary + +This monitor will check the health for secondaries nodes + +This monitor will trigger an alert if a secondary is missing or if there's a wrong state + + +**Monitor MongoDB Replication lag** + +Name: [environment] MongoDB Replication lag + +This monitor will check the replication lag + +This monitor will trigger an alert if the replication high is too high + Inputs ------ | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| -| delay | Delay in seconds for the metric evaluation | string | `15` | no | | environment | Architecture Environment | string | - | yes | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | message | Message sent when an alert is triggered | string | - | yes | -| mongodb_replicaset_message | Custom message for Mongodb replicaset monitor | string | `` | no | -| mongodb_replicaset_silenced | Groups to mute for Mongodb replicaset monitor | map | `` | no | -| mongodb_replicaset_timeframe | Monitor timeframe for Mongodb replicaset [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `last_5m` | no | +| mongodb_primary_message | Message sent when an alert is triggered on primary state | string | - | no | +| mongodb_secondary_message | Message sent when an alert is triggered on secondary state | string | - | no | +| mongodb_replication_message | Message sent when an alert is triggered on replication lag | string | - | no | +| mongodb_lag_warning | Replication lag in seconds to trigger a warn alert | string | 2 | no | +| mongodb_lag_critical | Replication lag in seconds to trigger a critical alert | string | 5 | no | +| mongodb_primary_silenced | Groups to mute for Mongodb primary state monitor | map | `` | no | +| mongodb_secondary_silenced | Groups to mute for Mongodb secondary state monitor | map | `` | no | +| mongodb_replication_silenced | Groups to mute for Mongodb replication lag monitor | map | `` | no | +| mongodb_replication_aggregator | Monitor aggregator for Mongodb state on primary node | string | available values: min, max | no | +| mongodb_replication_aggregator | Monitor aggregator for Mongodb state for secondaries | string | available values: min, max | no | +| mongodb_replication_aggregator | Monitor aggregator for Mongodb replication lag | string | available values: min, max, sum or avg | no | +| mongodb_primary_timeframe | Time frame for MongoDB primary state | string | available values: `last_#m` (1, 5, 10, 15, or 30) | no | +| mongodb_secondary_timeframe | Time frame for MongoDB secondary state | string | available values: `last_#m` (1, 5, 10, 15, or 30) | no | +| mongodb_replication_timeframe | Time frame for MongoDB replication lag | string | available values: `last_#m` (1, 5, 10, 15, or 30) | no | + + +Related documentation +--------------------- + +[https://docs.datadoghq.com/integrations/mongo/](https://docs.datadoghq.com/integrations/mongo/) + diff --git a/databases/mongodb/inputs.tf b/databases/mongodb/inputs.tf index 46cf5ee..54ae5a7 100644 --- a/databases/mongodb/inputs.tf +++ b/databases/mongodb/inputs.tf @@ -24,20 +24,84 @@ variable "filter_tags_custom" { default = "*" } -variable "mongodb_replicaset_silenced" { - description = "Groups to mute for Mongodb replicaset monitor" +variable "mongodb_primary_timeframe" { + description = "Monitor timeframe for MongoDB wrong state for primary node [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_1m" +} + +variable "mongodb_secondary_timeframe" { + description = "Monitor timeframe for MongoDB wrong state for secondaries nodes [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_1m" +} + +variable "mongodb_replication_timeframe" { + description = "Monitor timeframe for MongoDB replication lag [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_1m" +} + +variable "mongodb_lag_warning" { + description = "Warn replication lag in s" + default = 2 +} + +variable "mongodb_lag_critical" { + description = "Critical replication lag in s" + default = 5 +} + +variable "mongodb_primary_silenced" { + description = "Groups to mute for Mongodb primary state monitor" type = "map" default = {} } -variable "mongodb_replicaset_message" { - description = "Custom message for Mongodb replicaset monitor" +variable "mongodb_secondary_silenced" { + description = "Groups to mute for Mongodb secondary state monitor" + type = "map" + default = {} +} + +variable "mongodb_replication_silenced" { + description = "Groups to mute for Mongodb replication lag monitor" + type = "map" + default = {} +} + +variable "mongodb_primary_message" { + description = "Custom message for MongoDB primary monitor" type = "string" default = "" } -variable "mongodb_replicaset_timeframe" { - description = "Monitor timeframe for Mongodb replicaset [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" +variable "mongodb_secondary_message" { + description = "Custom message for MongoDB secondary monitor" type = "string" - default = "last_5m" + default = "" +} + +variable "mongodb_replication_message" { + description = "Custom message for MongoDB replication monitor" + type = "string" + default = "" +} + +variable "mongodb_primary_aggregator" { + description = "Monitor aggregator for Mongodb primary state [available values: min, max]" + type = "string" + default = "max" +} + +variable "mongodb_secondary_aggregator" { + description = "Monitor aggregator for Mongodb secondary state [available values: min, max]" + type = "string" + default = "max" +} + +variable "mongodb_replication_aggregator" { + description = "Monitor aggregator for Mongodb replication lag [available values: min, max, sum or avg]" + type = "string" + default = "avg" } diff --git a/databases/mongodb/monitors-mongo.tf b/databases/mongodb/monitors-mongo.tf index 5357aeb..ccaaec3 100644 --- a/databases/mongodb/monitors-mongo.tf +++ b/databases/mongodb/monitors-mongo.tf @@ -2,18 +2,17 @@ data "template_file" "filter" { template = "$${filter}" vars { - filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_monitoring_mongodb:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" + filter = "${var.filter_tags_use_defaults == "true" ? format("dd_monitoring:enabled,dd_mongodb:enabled,env:%s", var.environment) : "${var.filter_tags_custom}"}" } } -resource "datadog_monitor" "mongodb_replicaset_state" { - name = "[${var.environment}] Member down in the replica set" - message = "${coalesce(var.mongodb_replicaset_message, var.message)}" +resource "datadog_monitor" "mongodb_primary" { + name = "[${var.environment}] MongoDB primary state" + message = "${coalesce(var.mongodb_primary_message, var.message)}" query = <= 2 EOF type = "metric alert" @@ -27,7 +26,67 @@ resource "datadog_monitor" "mongodb_replicaset_state" { include_tags = true require_full_window = true - silenced = "${var.mongodb_replicaset_silenced}" + silenced = "${var.mongodb_primary_silenced}" + + tags = ["env:${var.environment}", "resource:mongodb"] +} + +resource "datadog_monitor" "mongodb_secondary" { + name = "[${var.environment}] MongoDB secondary state" + message = "${coalesce(var.mongodb_secondary_message, var.message)}" + + query = <= 6 + EOF + + thresholds { + critical = 6 + warning = 3 + } + + type = "metric alert" + + notify_no_data = true + renotify_interval = 0 + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + notify_audit = false + timeout_h = 0 + include_tags = true + require_full_window = true + + silenced = "${var.mongodb_secondary_silenced}" + + tags = ["env:${var.environment}", "resource:mongodb"] +} + +resource "datadog_monitor" "mongodb_replication" { + name = "[${var.environment}] MongoDB replication lag" + message = "${coalesce(var.mongodb_replication_message, var.message)}" + + query = < ${var.mongodb_lag_critical} + EOF + + thresholds { + critical = "${var.mongodb_lag_critical}" + warning = "${var.mongodb_lag_warning}" + } + + type = "metric alert" + + notify_no_data = false + renotify_interval = 0 + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + notify_audit = false + timeout_h = 0 + include_tags = true + require_full_window = true + + silenced = "${var.mongodb_replication_silenced}" tags = ["env:${var.environment}", "resource:mongodb"] }