Merge branch 'MON-547-MySQL_replication_monitor' into 'master'

MON-547 Add MySQL replication monitors

See merge request claranet/pt-monitoring/projects/datadog/terraform/monitors!151
This commit is contained in:
Quentin Manfroi 2020-01-03 12:15:26 +01:00
commit e29e6f9b0b
4 changed files with 162 additions and 0 deletions

View File

@ -22,6 +22,8 @@ Creates DataDog monitors with the following checks:
- Mysql Innodb buffer pool efficiency - Mysql Innodb buffer pool efficiency
- Mysql Innodb buffer pool utilization - Mysql Innodb buffer pool utilization
- Mysql queries changed abnormally - Mysql queries changed abnormally
- Mysql replication lag (disabled by default)
- Mysql replication status changed abnormally (disabled by default)
- Mysql server does not respond - Mysql server does not respond
- Mysql Slow queries - Mysql Slow queries
- Mysql threads changed abnormally - Mysql threads changed abnormally
@ -82,6 +84,18 @@ Creates DataDog monitors with the following checks:
| mysql\_questions\_threshold\_critical | Maximum critical acceptable number of queries | string | `"1"` | no | | mysql\_questions\_threshold\_critical | Maximum critical acceptable number of queries | string | `"1"` | no |
| mysql\_questions\_time\_aggregator | Monitor time aggregator for MySQL queries monitor [available values: min, max or avg] | string | `"avg"` | no | | mysql\_questions\_time\_aggregator | Monitor time aggregator for MySQL queries monitor [available values: min, max or avg] | string | `"avg"` | no |
| mysql\_questions\_timeframe | Monitor timeframe for MySQL queries monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `"last_4h"` | no | | mysql\_questions\_timeframe | Monitor timeframe for MySQL queries monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `"last_4h"` | no |
| mysql\_replication\_lag\_enabled | Flag to enable mysql replication lag monitor | string | `"false"` | no |
| mysql\_replication\_lag\_extra\_tags | Extra tags for MySQL replication lag monitor | list(string) | `[]` | no |
| mysql\_replication\_lag\_message | Custom message for MySQL replication lag monitor | string | `""` | no |
| mysql\_replication\_lag\_threshold\_critical | Maximum critical acceptable seconds of replication lag | string | `"200"` | no |
| mysql\_replication\_lag\_threshold\_warning | Maximum warning acceptable seconds of replication lag | string | `"100"` | no |
| mysql\_replication\_lag\_time\_aggregator | Monitor time aggregator for MySQL replication lag monitor [available values: min, max or avg] | string | `"min"` | no |
| mysql\_replication\_lag\_timeframe | Monitor timeframe for MySQL replication lag monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `"last_15m"` | no |
| mysql\_replication\_status\_enabled | Flag to enable mysql replication status monitor | string | `"false"` | no |
| mysql\_replication\_status\_extra\_tags | Extra tags for MySQL replication status monitor | list(string) | `[]` | no |
| mysql\_replication\_status\_message | Custom message for MySQL replication status monitor | string | `""` | no |
| mysql\_replication\_status\_time\_aggregator | Monitor time aggregator for MySQL replication status monitor [available values: min, max or avg] | string | `"min"` | no |
| mysql\_replication\_status\_timeframe | Monitor timeframe for MySQL replication status monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `"last_5m"` | no |
| mysql\_slow\_enabled | Flag to enable MySQL slow queries monitor | string | `"true"` | no | | mysql\_slow\_enabled | Flag to enable MySQL slow queries monitor | string | `"true"` | no |
| mysql\_slow\_extra\_tags | Extra tags for MySQL slow queries monitor | list(string) | `[]` | no | | mysql\_slow\_extra\_tags | Extra tags for MySQL slow queries monitor | list(string) | `[]` | no |
| mysql\_slow\_message | Custom message for MySQL slow queries monitor | string | `""` | no | | mysql\_slow\_message | Custom message for MySQL slow queries monitor | string | `""` | no |
@ -116,6 +130,8 @@ Creates DataDog monitors with the following checks:
| mysql\_pool\_efficiency\_id | id for monitor mysql_pool_efficiency | | mysql\_pool\_efficiency\_id | id for monitor mysql_pool_efficiency |
| mysql\_pool\_utilization\_id | id for monitor mysql_pool_utilization | | mysql\_pool\_utilization\_id | id for monitor mysql_pool_utilization |
| mysql\_questions\_anomaly\_id | id for monitor mysql_questions_anomaly | | mysql\_questions\_anomaly\_id | id for monitor mysql_questions_anomaly |
| mysql\_replication\_lag\_id | id for monitor mysql_replication_lag |
| mysql\_replication\_status\_id | id for monitor mysql_replication_status |
| mysql\_slow\_id | id for monitor mysql_slow | | mysql\_slow\_id | id for monitor mysql_slow |
| mysql\_threads\_anomaly\_id | id for monitor mysql_threads_anomaly | | mysql\_threads\_anomaly\_id | id for monitor mysql_threads_anomaly |

View File

@ -460,3 +460,80 @@ variable "mysql_questions_timeframe" {
default = "last_4h" default = "last_4h"
} }
#################################
### MySQL replication lag ###
#################################
variable "mysql_replication_lag_enabled" {
description = "Flag to enable mysql replication lag monitor"
type = string
default = "false"
}
variable "mysql_replication_lag_extra_tags" {
description = "Extra tags for MySQL replication lag monitor"
type = list(string)
default = []
}
variable "mysql_replication_lag_message" {
description = "Custom message for MySQL replication lag monitor"
type = string
default = ""
}
variable "mysql_replication_lag_threshold_warning" {
default = 100
description = "Maximum warning acceptable seconds of replication lag"
}
variable "mysql_replication_lag_threshold_critical" {
default = 200
description = "Maximum critical acceptable seconds of replication lag"
}
variable "mysql_replication_lag_time_aggregator" {
description = "Monitor time aggregator for MySQL replication lag monitor [available values: min, max or avg]"
type = string
default = "min"
}
variable "mysql_replication_lag_timeframe" {
description = "Monitor timeframe for MySQL replication lag monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
type = string
default = "last_15m"
}
###################################
### MySQL replication status ###
###################################
variable "mysql_replication_status_enabled" {
description = "Flag to enable mysql replication status monitor"
type = string
default = "false"
}
variable "mysql_replication_status_extra_tags" {
description = "Extra tags for MySQL replication status monitor"
type = list(string)
default = []
}
variable "mysql_replication_status_message" {
description = "Custom message for MySQL replication status monitor"
type = string
default = ""
}
variable "mysql_replication_status_time_aggregator" {
description = "Monitor time aggregator for MySQL replication status monitor [available values: min, max or avg]"
type = string
default = "min"
}
variable "mysql_replication_status_timeframe" {
description = "Monitor timeframe for MySQL replication status monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
type = string
default = "last_5m"
}

View File

@ -288,3 +288,62 @@ EOQ
} }
} }
resource "datadog_monitor" "mysql_replication_lag" {
count = var.mysql_replication_lag_enabled == "true" ? 1 : 0
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Mysql replication lag {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}"
message = coalesce(var.mysql_replication_lag_message, var.message)
type = "query alert"
query = <<EOQ
${var.mysql_replication_lag_time_aggregator}(${var.mysql_replication_lag_timeframe}):avg:mysql.replication.seconds_behind_master${module.filter-tags.query_alert} by {server} > ${var.mysql_replication_lag_threshold_critical}
EOQ
thresholds = {
warning = var.mysql_replication_lag_threshold_warning
critical = var.mysql_replication_lag_threshold_critical
}
evaluation_delay = var.evaluation_delay
new_host_delay = var.new_host_delay
notify_no_data = false
renotify_interval = 0
require_full_window = false
timeout_h = 0
include_tags = true
tags = concat(["env:${var.environment}", "type:database", "provider:mysql", "resource:mysql", "team:claranet", "created-by:terraform"], var.mysql_replication_lag_extra_tags)
lifecycle {
ignore_changes = [silenced]
}
}
resource "datadog_monitor" "mysql_replication_status" {
count = var.mysql_replication_status_enabled == "true" ? 1 : 0
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Mysql replication status changed abnormally"
message = coalesce(var.mysql_replication_status_message, var.message)
type = "metric alert"
query = <<EOQ
${var.mysql_replication_status_time_aggregator}(${var.mysql_replication_status_timeframe}):avg:mysql.replication.slave_running${module.filter-tags.query_alert} by {server} < 1
EOQ
thresholds = {
critical = 1
}
new_host_delay = var.new_host_delay
notify_no_data = false
renotify_interval = 0
notify_audit = false
timeout_h = 0
include_tags = true
locked = false
require_full_window = true
tags = concat(["env:${var.environment}", "type:database", "provider:mysql", "resource:mysql", "team:claranet", "created-by:terraform"], var.mysql_replication_status_extra_tags)
lifecycle {
ignore_changes = [silenced]
}
}

View File

@ -28,6 +28,16 @@ output "mysql_questions_anomaly_id" {
value = datadog_monitor.mysql_questions_anomaly.*.id value = datadog_monitor.mysql_questions_anomaly.*.id
} }
output "mysql_replication_lag_id" {
description = "id for monitor mysql_replication_lag"
value = datadog_monitor.mysql_replication_lag.*.id
}
output "mysql_replication_status_id" {
description = "id for monitor mysql_replication_status"
value = datadog_monitor.mysql_replication_status.*.id
}
output "mysql_slow_id" { output "mysql_slow_id" {
description = "id for monitor mysql_slow" description = "id for monitor mysql_slow"
value = datadog_monitor.mysql_slow.*.id value = datadog_monitor.mysql_slow.*.id