From 0e22a0548937a0385cbe2996d5de00f6bc8b2a78 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Mon, 27 Aug 2018 16:50:49 +0200 Subject: [PATCH] MON-142 add some monitors for mysql --- database/mysql/inputs.tf | 370 ++++++++++++++++++++++++++++++- database/mysql/monitors-mysql.tf | 227 ++++++++++++++++++- 2 files changed, 574 insertions(+), 23 deletions(-) diff --git a/database/mysql/inputs.tf b/database/mysql/inputs.tf index 35ed6df..0b150c9 100644 --- a/database/mysql/inputs.tf +++ b/database/mysql/inputs.tf @@ -28,6 +28,36 @@ variable "filter_tags_custom" { } # MySQL specific + +################################# +### MySQL availability ### +################################# + +variable "mysql_availability_silenced" { + description = "Groups to mute for Mysql availability monitor" + type = "map" + default = {} +} + +variable "mysql_availability_extra_tags" { + description = "Extra tags for Mysql availability monitor" + type = "list" + default = [] +} + +variable "mysql_availability_message" { + description = "Custom message for Mysql availability monitor" + type = "string" + default = "" +} + +variable "mysql_availability_threshold_critical" { + description = "Nginx status monitor (critical threshold)" + type = "string" + default = 1.1754943508222875e-38 +} + + ################################# ### MySQL connections ### ################################# @@ -42,8 +72,20 @@ variable "mysql_connection_threshold_warning" { description = "Maximum warning acceptable percent of connections" } +variable "mysql_connection_time_aggregator" { + description = "Monitor time aggregator for MySQL connection monitor [available values: min, max or avg]" + type = "string" + default = "avg" +} + +variable "mysql_connection_timeframe" { + description = "Monitor timeframe for MySQL connection monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_10m" +} + variable "mysql_connection_silenced" { - description = "Groups to mute mysql connection monitor" + description = "Groups to mute MySQL connection monitor" type = "map" default = {} } @@ -60,34 +102,340 @@ variable "mysql_connection_extra_tags" { default = [] } +################################# +### MySQL aborted connects ### +################################# + +variable "mysql_aborted_threshold_critical" { + default = 10 + description = "Maximum critical acceptable percent of aborted connects" +} + +variable "mysql_aborted_threshold_warning" { + default = 5 + description = "Maximum warning acceptable percent of aborted connects" +} + +variable "mysql_aborted_time_aggregator" { + description = "Monitor time aggregator for MySQL aborted connects monitor [available values: min, max or avg]" + type = "string" + default = "avg" +} + +variable "mysql_aborted_timeframe" { + description = "Monitor timeframe for MySQL aborted connects monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_10m" +} + +variable "mysql_aborted_silenced" { + description = "Groups to mute MySQL aborted connects monitor" + type = "map" + default = {} +} + +variable "mysql_aborted_message" { + description = "Custom message for MySQL aborted connects monitor" + type = "string" + default = "" +} + +variable "mysql_aborted_extra_tags" { + description = "Extra tags for MySQL aborted connects monitor" + type = "list" + default = [] +} + +################################# +### MySQL slow queries ### +################################# + +variable "mysql_slow_threshold_critical" { + default = 20 + description = "Maximum critical acceptable percent of slow queries" +} + +variable "mysql_slow_threshold_warning" { + default = 5 + description = "Maximum warning acceptable percent of slow queries" +} + +variable "mysql_slow_time_aggregator" { + description = "Monitor time aggregator for MySQL slow queries monitor [available values: min, max or avg]" + type = "string" + default = "avg" +} + +variable "mysql_slow_timeframe" { + description = "Monitor timeframe for MySQL slow queries monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_15m" +} + +variable "mysql_slow_silenced" { + description = "Groups to mute MySQL slow queries monitor" + type = "map" + default = {} +} + +variable "mysql_slow_message" { + description = "Custom message for MySQL slow queries monitor" + type = "string" + default = "" +} + +variable "mysql_slow_extra_tags" { + description = "Extra tags for MySQL slow queries monitor" + type = "list" + default = [] +} + +################################# +# MySQL innodb pool efficiency # +################################# + +variable "mysql_pool_efficiency_threshold_critical" { + default = 20 + description = "Maximum critical acceptable percent of innodb buffer pool efficiency" +} + +variable "mysql_pool_efficiency_threshold_warning" { + default = 1 + description = "Maximum warning acceptable percent of innodb buffer pool efficiency" +} + +variable "mysql_pool_efficiency_time_aggregator" { + description = "Monitor time aggregator for MySQL innodb buffer pool efficiency monitor [available values: min, max or avg]" + type = "string" + default = "sum" +} + +variable "mysql_pool_efficiency_timeframe" { + description = "Monitor timeframe for MySQL innodb buffer pool efficiency monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_1h" +} + +variable "mysql_pool_efficiency_silenced" { + description = "Groups to mute MySQL innodb buffer pool efficiency monitor" + type = "map" + default = {} +} + +variable "mysql_pool_efficiency_message" { + description = "Custom message for MySQL innodb buffer pool efficiency monitor" + type = "string" + default = "" +} + +variable "mysql_pool_efficiency_extra_tags" { + description = "Extra tags for MySQL innodb buffer pool efficiency monitor" + type = "list" + default = [] +} + +################################# +# MySQL innodb pool utilization # +################################# + +variable "mysql_pool_utilization_threshold_critical" { + default = 90 + description = "Maximum critical acceptable percent of innodb buffer pool utilization" +} + +variable "mysql_pool_utilization_threshold_warning" { + default = 75 + description = "Maximum warning acceptable percent of innodb buffer pool utilization" +} + +variable "mysql_pool_utilization_time_aggregator" { + description = "Monitor time aggregator for MySQL innodb buffer pool utilization monitor [available values: min, max or avg]" + type = "string" + default = "min" +} + +variable "mysql_pool_utilization_timeframe" { + description = "Monitor timeframe for MySQL innodb buffer pool utilization monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_4h" +} + +variable "mysql_pool_utilization_silenced" { + description = "Groups to mute MySQL innodb buffer pool utilization monitor" + type = "map" + default = {} +} + +variable "mysql_pool_utilization_message" { + description = "Custom message for MySQL innodb buffer pool utilization monitor" + type = "string" + default = "" +} + +variable "mysql_pool_utilization_extra_tags" { + description = "Extra tags for MySQL innodb buffer pool utilization monitor" + type = "list" + default = [] +} + ################################# ### MySQL threads ### ################################# -variable "mysql_thread_threshold_critical" { - default = 500 +variable "mysql_threads_threshold_critical" { + default = 1 description = "Maximum critical acceptable number of threads" } -variable "mysql_thread_threshold_warning" { - default = 400 - description = "Maximum warning acceptable number of threads" +variable "mysql_threads_detection_algorithm" { + description = "Anomaly Detection Algorithm used" + type = "string" + default = "basic" } -variable "mysql_thread_silenced" { +variable "mysql_threads_deviations" { + description = "Deviations to detect the anomaly" + type = "string" + default = 2 +} + +variable "mysql_threads_direction" { + description = "Direction of the anomaly. It can be both, below or above." + type = "string" + default = "above" +} + +variable "mysql_threads_alert_window" { + description = "Alert window." + type = "string" + default = "last_15m" +} + +variable "mysql_threads_interval" { + description = "Interval." + type = "string" + default = 60 +} + +variable "mysql_threads_count_default_zero" { + description = "Count default zero." + type = "string" + default = "true" +} + +variable "mysql_threads_seasonality" { + description = "Seasonality of the algorithm" + type = "string" + default = "daily" +} + +variable "mysql_threads_time_aggregator" { + description = "Monitor time aggregator for MySQL threads monitor [available values: min, max or avg]" + type = "string" + default = "avg" +} + +variable "mysql_threads_timeframe" { + description = "Monitor timeframe for MySQL threads monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_4h" +} + +variable "mysql_threads_silenced" { description = "Groups to mute mysql threads monitor" type = "map" default = {} } -variable "mysql_thread_message" { - description = "Custom message for MySQL thread monitor" +variable "mysql_threads_message" { + description = "Custom message for MySQL threads monitor" type = "string" default = "" } -variable "mysql_thread_extra_tags" { - description = "Extra tags for MySQL thread monitor" +variable "mysql_threads_extra_tags" { + description = "Extra tags for MySQL threads monitor" + type = "list" + default = [] +} + +################################# +### MySQL queries ### +################################# + +variable "mysql_queries_threshold_critical" { + default = 1 + description = "Maximum critical acceptable number of queries" +} + +variable "mysql_queries_detection_algorithm" { + description = "Anomaly Detection Algorithm used" + type = "string" + default = "agile" +} + +variable "mysql_queries_deviations" { + description = "Deviations to detect the anomaly" + type = "string" + default = 2 +} + +variable "mysql_queries_direction" { + description = "Direction of the anomaly. It can be both, below or above." + type = "string" + default = "both" +} + +variable "mysql_queries_alert_window" { + description = "Alert window." + type = "string" + default = "last_15m" +} + +variable "mysql_queries_interval" { + description = "Interval." + type = "string" + default = 60 +} + +variable "mysql_queries_count_default_zero" { + description = "Count default zero." + type = "string" + default = "true" +} + +variable "mysql_queries_seasonality" { + description = "Seasonality of the algorithm" + type = "string" + default = "daily" +} + +variable "mysql_queries_time_aggregator" { + description = "Monitor time aggregator for MySQL queries monitor [available values: min, max or avg]" + type = "string" + default = "avg" +} + +variable "mysql_queries_timeframe" { + description = "Monitor timeframe for MySQL queries monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]" + type = "string" + default = "last_4h" +} + +variable "mysql_queries_silenced" { + description = "Groups to mute mysql queries monitor" + type = "map" + default = {} +} + +variable "mysql_queries_message" { + description = "Custom message for MySQL queries monitor" + type = "string" + default = "" +} + +variable "mysql_queries_extra_tags" { + description = "Extra tags for MySQL queries monitor" type = "list" default = [] } diff --git a/database/mysql/monitors-mysql.tf b/database/mysql/monitors-mysql.tf index 5eae56f..acd8d42 100644 --- a/database/mysql/monitors-mysql.tf +++ b/database/mysql/monitors-mysql.tf @@ -1,10 +1,40 @@ -resource "datadog_monitor" "mysql_connection_too_high" { +resource "datadog_monitor" "mysql_availability" { + name = "[${var.environment}] Mysql server does not respond" + message = "${coalesce(var.mysql_availability_message, var.message)}" + + type = "service check" + + query = < ${var.mysql_connection_threshold_critical} @@ -29,23 +59,24 @@ resource "datadog_monitor" "mysql_connection_too_high" { tags = ["env:${var.environment}", "type:database", "provider:mysql", "resource:mysql", "team:claranet", "created-by:terraform", "${var.mysql_connection_extra_tags}"] } -resource "datadog_monitor" "mysql_thread_too_high" { - name = "[${var.environment}] Mysql threads {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" - message = "${coalesce(var.mysql_thread_message, var.message)}" +resource "datadog_monitor" "mysql_aborted" { + name = "[${var.environment}] Mysql Aborted connects {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.mysql_aborted_message, var.message)}" type = "metric alert" query = < ${var.mysql_thread_threshold_critical} + ${var.mysql_aborted_time_aggregator}(${var.mysql_aborted_timeframe}): ( + avg:mysql.net.aborted_connects${module.filter-tags.query_alert} by {server} / + avg:mysql.performance.queries_connected${module.filter-tags.query_alert} by {server} + ) * 100 > ${var.mysql_aborted_threshold_critical} EOF evaluation_delay = "${var.evaluation_delay}" new_host_delay = "${var.new_host_delay}" thresholds { - warning = "${var.mysql_thread_threshold_warning}" - critical = "${var.mysql_thread_threshold_critical}" + warning = "${var.mysql_aborted_threshold_warning}" + critical = "${var.mysql_aborted_threshold_critical}" } notify_no_data = false @@ -54,7 +85,179 @@ resource "datadog_monitor" "mysql_thread_too_high" { timeout_h = 0 include_tags = true - silenced = "${var.mysql_thread_silenced}" + silenced = "${var.mysql_aborted_silenced}" - tags = ["env:${var.environment}", "type:database", "provider:mysql", "resource:mysql", "team:claranet", "created-by:terraform", "${var.mysql_thread_extra_tags}"] + tags = ["env:${var.environment}", "type:database", "provider:mysql", "resource:mysql", "team:claranet", "created-by:terraform", "${var.mysql_aborted_extra_tags}"] } + +resource "datadog_monitor" "mysql_slow" { + name = "[${var.environment}] Mysql Slow queries {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.mysql_slow_message, var.message)}" + type = "metric alert" + + query = < ${var.mysql_slow_threshold_critical} + EOF + + evaluation_delay = "${var.evaluation_delay}" + new_host_delay = "${var.new_host_delay}" + + thresholds { + warning = "${var.mysql_slow_threshold_warning}" + critical = "${var.mysql_slow_threshold_critical}" + } + + notify_no_data = false + renotify_interval = 0 + require_full_window = true + timeout_h = 0 + include_tags = true + + silenced = "${var.mysql_slow_silenced}" + + tags = ["env:${var.environment}", "type:database", "provider:mysql", "resource:mysql", "team:claranet", "created-by:terraform", "${var.mysql_slow_extra_tags}"] +} + +resource "datadog_monitor" "mysql_pool_efficiency" { + name = "[${var.environment}] Mysql Innodb buffer pool efficiency {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + message = "${coalesce(var.mysql_pool_efficiency_message, var.message)}" + type = "metric alert" + + query = < ${var.mysql_pool_efficiency_threshold_critical} + EOF + + evaluation_delay = "${var.evaluation_delay}" + new_host_delay = "${var.new_host_delay}" + + thresholds { + warning = "${var.mysql_pool_efficiency_threshold_warning}" + critical = "${var.mysql_pool_efficiency_threshold_critical}" + } + + notify_no_data = false + renotify_interval = 0 + require_full_window = true + timeout_h = 0 + include_tags = true + + silenced = "${var.mysql_pool_efficiency_silenced}" + + tags = ["env:${var.environment}", "type:database", "provider:mysql", "resource:mysql", "team:claranet", "created-by:terraform", "${var.mysql_pool_efficiency_extra_tags}"] +} + +resource "datadog_monitor" "mysql_pool_utilization" { + name = "[${var.environment}] Mysql Innodb buffer pool utilization {{#is_alert}}{{{comparator}}} {{threshold}} ({{value}}){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}} ({{value}}){{/is_warning}}" + message = "${coalesce(var.mysql_pool_utilization_message, var.message)}" + type = "metric alert" + + query = < ${var.mysql_pool_utilization_threshold_critical} + EOF + + evaluation_delay = "${var.evaluation_delay}" + new_host_delay = "${var.new_host_delay}" + + thresholds { + warning = "${var.mysql_pool_utilization_threshold_warning}" + critical = "${var.mysql_pool_utilization_threshold_critical}" + } + + notify_no_data = false + renotify_interval = 0 + require_full_window = true + timeout_h = 0 + include_tags = true + + silenced = "${var.mysql_pool_utilization_silenced}" + + tags = ["env:${var.environment}", "type:database", "provider:mysql", "resource:mysql", "team:claranet", "created-by:terraform", "${var.mysql_pool_utilization_extra_tags}"] +} + +resource "datadog_monitor" "mysql_threads_anomaly" { + name = "[${var.environment}] Mysql threads changed abnormally" + message = "${coalesce(var.mysql_threads_message, var.message)}" + type = "metric alert" + + query = <= ${var.mysql_threads_threshold_critical} + EOF + + evaluation_delay = "${var.evaluation_delay}" + new_host_delay = "${var.new_host_delay}" + + thresholds { + critical = "${var.mysql_threads_threshold_critical}" + critical_recovery = 0 + } + + notify_no_data = false + renotify_interval = 0 + require_full_window = true + timeout_h = 0 + include_tags = true + + silenced = "${var.mysql_threads_silenced}" + + tags = ["env:${var.environment}", "type:database", "provider:mysql", "resource:mysql", "team:claranet", "created-by:terraform", "${var.mysql_threads_extra_tags}"] +} + +resource "datadog_monitor" "mysql_queries_anomaly" { + name = "[${var.environment}] Mysql queries changed abnormally" + message = "${coalesce(var.mysql_queries_message, var.message)}" + type = "metric alert" + + query = <= ${var.mysql_queries_threshold_critical} + EOF + + evaluation_delay = "${var.evaluation_delay}" + new_host_delay = "${var.new_host_delay}" + + thresholds { + critical = "${var.mysql_queries_threshold_critical}" + critical_recovery = 0 + } + + notify_no_data = false + renotify_interval = 0 + require_full_window = true + timeout_h = 0 + include_tags = true + + silenced = "${var.mysql_queries_silenced}" + + tags = ["env:${var.environment}", "type:database", "provider:mysql", "resource:mysql", "team:claranet", "created-by:terraform", "${var.mysql_queries_extra_tags}"] +} \ No newline at end of file