From 0e71ff250611d0bf25805834548401db9d2070aa Mon Sep 17 00:00:00 2001 From: Matthieu Bourgain Date: Tue, 24 Dec 2019 14:00:04 +0100 Subject: [PATCH] MON-547 add MySQL replication monitors --- database/mysql/README.md | 16 +++++++ database/mysql/inputs.tf | 77 ++++++++++++++++++++++++++++++++ database/mysql/monitors-mysql.tf | 59 ++++++++++++++++++++++++ database/mysql/outputs.tf | 9 ++++ 4 files changed, 161 insertions(+) diff --git a/database/mysql/README.md b/database/mysql/README.md index 29cf74c..44fa28d 100644 --- a/database/mysql/README.md +++ b/database/mysql/README.md @@ -25,6 +25,8 @@ Creates DataDog monitors with the following checks: - Mysql server does not respond - Mysql Slow queries - Mysql threads changed abnormally +- Mysql replication lag +- Mysql replicagtion status ## Inputs @@ -102,6 +104,18 @@ Creates DataDog monitors with the following checks: | mysql\_threads\_threshold\_critical | Maximum critical acceptable number of threads | string | `"1"` | no | | mysql\_threads\_time\_aggregator | Monitor time aggregator for MySQL threads monitor [available values: min, max or avg] | string | `"avg"` | no | | mysql\_threads\_timeframe | Monitor timeframe for MySQL threads monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `"last_4h"` | no | +| mysql\_replication\_lag\_enabled | Flag to enable mysql replication lag monitor | string | `"false"` | no | +| mysql\_replication\_lag\_extra\_tags | Extra tags for MySQL replication lag monitor | list(string) | `[]` | no | +| mysql\_replication\_lag\_message | Custom message for MySQL replication lag monitor | string | `""` | no | +| mysql\_replication\_lag\_threshold\_warning | Maximum warning acceptable seconds of replication lag | string | `"100"` | no | +| mysql\_replication\_lag\_threshold\_critical | Maximum critical acceptable seconds of replication lag | string | `"200"` | no | +| mysql\_replication\_lag\__time\_aggregator | Monitor time aggregator for MySQL replication lag monitor [available values: min, max or avg] | string | `"min"` | no | +| mysql\_replication\_lag\_timeframe | Monitor timeframe for MySQL replication lag monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `"last_15m"` | no | +| mysql\_replication\_status\_enabled | Flag to enable mysql replication status monitor | string | `"false"` | no | +| mysql\_replication\_status\_extra\_tags | Extra tags for MySQL replication status monitor | list(string) | `[]` | no | +| mysql\_replication\_status\_message | Custom message for MySQL replication status monitor | string | `""` | no | +| mysql\_replication\_status\__time\_aggregator | Monitor time aggregator for MySQL replication status monitor [available values: min, max or avg] | string | `"min"` | no | +| mysql\_replication\_status\_timeframe | Monitor timeframe for MySQL replication status monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `"last_5m"` | no | | new\_host\_delay | Delay in seconds for the metric evaluation | string | `"300"` | no | | notify\_no\_data | Will raise no data alert if set to true | string | `"true"` | no | | prefix\_slug | Prefix string to prepend between brackets on every monitors names | string | `""` | no | @@ -118,6 +132,8 @@ Creates DataDog monitors with the following checks: | mysql\_questions\_anomaly\_id | id for monitor mysql_questions_anomaly | | mysql\_slow\_id | id for monitor mysql_slow | | mysql\_threads\_anomaly\_id | id for monitor mysql_threads_anomaly | +| mysql\_replication\_lag\_id | id for monitor mysql_replication_lag | +| mysql\_replication\_status\_id | id for monitor mysql_replication_status | ## Related documentation diff --git a/database/mysql/inputs.tf b/database/mysql/inputs.tf index d091611..1de13b3 100644 --- a/database/mysql/inputs.tf +++ b/database/mysql/inputs.tf @@ -460,3 +460,80 @@ variable "mysql_questions_timeframe" { default = "last_4h" } +################################# +### MySQL replication lag ### +################################# + +variable "mysql_replication_lag_enabled" { + description = "Flag to enable mysql replication lag monitor" + type = string + default = "false" +} + +variable "mysql_replication_lag_extra_tags" { + description = "Extra tags for MySQL replication lag monitor" + type = list(string) + default = [] +} + +variable "mysql_replication_lag_message" { + description = "Custom message for MySQL replication lag monitor" + type = string + default = "" +} + +variable "mysql_replication_lag_threshold_warning" { + default = 100 + description = "Maximum warning acceptable seconds of replication lag" +} + +variable "mysql_replication_lag_threshold_critical" { + default = 200 + description = "Maximum critical acceptable seconds of replication lag" +} + +variable "mysql_replication_lag_time_aggregator" { + description = "Monitor time aggregator for MySQL replication lag monitor [available values: min, max or avg]" + type = string + default = "min" +} + +variable "mysql_replication_lag_timeframe" { + description = "Monitor timeframe for MySQL replication lag monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = string + default = "last_15m" +} + +################################### +### MySQL replication status ### +################################### + +variable "mysql_replication_status_enabled" { + description = "Flag to enable mysql replication status monitor" + type = string + default = "false" +} + +variable "mysql_replication_status_extra_tags" { + description = "Extra tags for MySQL replication status monitor" + type = list(string) + default = [] +} + +variable "mysql_replication_status_message" { + description = "Custom message for MySQL replication status monitor" + type = string + default = "" +} + +variable "mysql_replication_status_time_aggregator" { + description = "Monitor time aggregator for MySQL replication status monitor [available values: min, max or avg]" + type = string + default = "min" +} + +variable "mysql_replication_status_timeframe" { + description = "Monitor timeframe for MySQL replication status monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = string + default = "last_5m" +} diff --git a/database/mysql/monitors-mysql.tf b/database/mysql/monitors-mysql.tf index 23e1261..5219da4 100644 --- a/database/mysql/monitors-mysql.tf +++ b/database/mysql/monitors-mysql.tf @@ -288,3 +288,62 @@ EOQ } } +resource "datadog_monitor" "mysql_replication_lag" { + count = var.mysql_replication_lag_enabled == "true" ? 1 : 0 + name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Mysql replication lag {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}" + message = coalesce(var.mysql_replication_lag_message, var.message) + type = "query alert" + + query = < ${var.mysql_replication_lag_threshold_critical} +EOQ + + thresholds = { + warning = var.mysql_replication_lag_threshold_warning + critical = var.mysql_replication_lag_threshold_critical + } + + evaluation_delay = var.evaluation_delay + new_host_delay = var.new_host_delay + notify_no_data = false + renotify_interval = 0 + require_full_window = false + timeout_h = 0 + include_tags = true + + tags = concat(["env:${var.environment}", "type:database", "provider:mysql", "resource:mysql", "team:claranet", "created-by:terraform"], var.mysql_replication_lag_extra_tags) + + lifecycle { + ignore_changes = [silenced] + } +} + +resource "datadog_monitor" "mysql_replication_status" { + count = var.mysql_replication_status_enabled == "true" ? 1 : 0 + name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Mysql replication status changed abnormally" + message = coalesce(var.mysql_replication_status_message, var.message) + type = "metric alert" + + query = <