MON-547 add MySQL replication monitors
This commit is contained in:
parent
dda61c2353
commit
0e71ff2506
@ -25,6 +25,8 @@ Creates DataDog monitors with the following checks:
|
||||
- Mysql server does not respond
|
||||
- Mysql Slow queries
|
||||
- Mysql threads changed abnormally
|
||||
- Mysql replication lag
|
||||
- Mysql replicagtion status
|
||||
|
||||
## Inputs
|
||||
|
||||
@ -102,6 +104,18 @@ Creates DataDog monitors with the following checks:
|
||||
| mysql\_threads\_threshold\_critical | Maximum critical acceptable number of threads | string | `"1"` | no |
|
||||
| mysql\_threads\_time\_aggregator | Monitor time aggregator for MySQL threads monitor [available values: min, max or avg] | string | `"avg"` | no |
|
||||
| mysql\_threads\_timeframe | Monitor timeframe for MySQL threads monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `"last_4h"` | no |
|
||||
| mysql\_replication\_lag\_enabled | Flag to enable mysql replication lag monitor | string | `"false"` | no |
|
||||
| mysql\_replication\_lag\_extra\_tags | Extra tags for MySQL replication lag monitor | list(string) | `[]` | no |
|
||||
| mysql\_replication\_lag\_message | Custom message for MySQL replication lag monitor | string | `""` | no |
|
||||
| mysql\_replication\_lag\_threshold\_warning | Maximum warning acceptable seconds of replication lag | string | `"100"` | no |
|
||||
| mysql\_replication\_lag\_threshold\_critical | Maximum critical acceptable seconds of replication lag | string | `"200"` | no |
|
||||
| mysql\_replication\_lag\__time\_aggregator | Monitor time aggregator for MySQL replication lag monitor [available values: min, max or avg] | string | `"min"` | no |
|
||||
| mysql\_replication\_lag\_timeframe | Monitor timeframe for MySQL replication lag monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `"last_15m"` | no |
|
||||
| mysql\_replication\_status\_enabled | Flag to enable mysql replication status monitor | string | `"false"` | no |
|
||||
| mysql\_replication\_status\_extra\_tags | Extra tags for MySQL replication status monitor | list(string) | `[]` | no |
|
||||
| mysql\_replication\_status\_message | Custom message for MySQL replication status monitor | string | `""` | no |
|
||||
| mysql\_replication\_status\__time\_aggregator | Monitor time aggregator for MySQL replication status monitor [available values: min, max or avg] | string | `"min"` | no |
|
||||
| mysql\_replication\_status\_timeframe | Monitor timeframe for MySQL replication status monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `"last_5m"` | no |
|
||||
| new\_host\_delay | Delay in seconds for the metric evaluation | string | `"300"` | no |
|
||||
| notify\_no\_data | Will raise no data alert if set to true | string | `"true"` | no |
|
||||
| prefix\_slug | Prefix string to prepend between brackets on every monitors names | string | `""` | no |
|
||||
@ -118,6 +132,8 @@ Creates DataDog monitors with the following checks:
|
||||
| mysql\_questions\_anomaly\_id | id for monitor mysql_questions_anomaly |
|
||||
| mysql\_slow\_id | id for monitor mysql_slow |
|
||||
| mysql\_threads\_anomaly\_id | id for monitor mysql_threads_anomaly |
|
||||
| mysql\_replication\_lag\_id | id for monitor mysql_replication_lag |
|
||||
| mysql\_replication\_status\_id | id for monitor mysql_replication_status |
|
||||
|
||||
## Related documentation
|
||||
|
||||
|
||||
@ -460,3 +460,80 @@ variable "mysql_questions_timeframe" {
|
||||
default = "last_4h"
|
||||
}
|
||||
|
||||
#################################
|
||||
### MySQL replication lag ###
|
||||
#################################
|
||||
|
||||
variable "mysql_replication_lag_enabled" {
|
||||
description = "Flag to enable mysql replication lag monitor"
|
||||
type = string
|
||||
default = "false"
|
||||
}
|
||||
|
||||
variable "mysql_replication_lag_extra_tags" {
|
||||
description = "Extra tags for MySQL replication lag monitor"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "mysql_replication_lag_message" {
|
||||
description = "Custom message for MySQL replication lag monitor"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "mysql_replication_lag_threshold_warning" {
|
||||
default = 100
|
||||
description = "Maximum warning acceptable seconds of replication lag"
|
||||
}
|
||||
|
||||
variable "mysql_replication_lag_threshold_critical" {
|
||||
default = 200
|
||||
description = "Maximum critical acceptable seconds of replication lag"
|
||||
}
|
||||
|
||||
variable "mysql_replication_lag_time_aggregator" {
|
||||
description = "Monitor time aggregator for MySQL replication lag monitor [available values: min, max or avg]"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "mysql_replication_lag_timeframe" {
|
||||
description = "Monitor timeframe for MySQL replication lag monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = string
|
||||
default = "last_15m"
|
||||
}
|
||||
|
||||
###################################
|
||||
### MySQL replication status ###
|
||||
###################################
|
||||
|
||||
variable "mysql_replication_status_enabled" {
|
||||
description = "Flag to enable mysql replication status monitor"
|
||||
type = string
|
||||
default = "false"
|
||||
}
|
||||
|
||||
variable "mysql_replication_status_extra_tags" {
|
||||
description = "Extra tags for MySQL replication status monitor"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "mysql_replication_status_message" {
|
||||
description = "Custom message for MySQL replication status monitor"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "mysql_replication_status_time_aggregator" {
|
||||
description = "Monitor time aggregator for MySQL replication status monitor [available values: min, max or avg]"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "mysql_replication_status_timeframe" {
|
||||
description = "Monitor timeframe for MySQL replication status monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -288,3 +288,62 @@ EOQ
|
||||
}
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "mysql_replication_lag" {
|
||||
count = var.mysql_replication_lag_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Mysql replication lag {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}"
|
||||
message = coalesce(var.mysql_replication_lag_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
${var.mysql_replication_lag_time_aggregator}(${var.mysql_replication_lag_timeframe}):avg:mysql.replication.seconds_behind_master${module.filter-tags.query_alert} by {server} > ${var.mysql_replication_lag_threshold_critical}
|
||||
EOQ
|
||||
|
||||
thresholds = {
|
||||
warning = var.mysql_replication_lag_threshold_warning
|
||||
critical = var.mysql_replication_lag_threshold_critical
|
||||
}
|
||||
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
require_full_window = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
|
||||
tags = concat(["env:${var.environment}", "type:database", "provider:mysql", "resource:mysql", "team:claranet", "created-by:terraform"], var.mysql_replication_lag_extra_tags)
|
||||
|
||||
lifecycle {
|
||||
ignore_changes = [silenced]
|
||||
}
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "mysql_replication_status" {
|
||||
count = var.mysql_replication_status_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Mysql replication status changed abnormally"
|
||||
message = coalesce(var.mysql_replication_status_message, var.message)
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOQ
|
||||
${var.mysql_replication_status_time_aggregator}(${var.mysql_replication_status_timeframe}):avg:mysql.replication.slave_running${module.filter-tags.query_alert} by {server} < 1
|
||||
EOQ
|
||||
|
||||
thresholds = {
|
||||
critical = 1
|
||||
}
|
||||
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
|
||||
tags = concat(["env:${var.environment}", "type:database", "provider:mysql", "resource:mysql", "team:claranet", "created-by:terraform"], var.mysql_replication_status_extra_tags)
|
||||
|
||||
lifecycle {
|
||||
ignore_changes = [silenced]
|
||||
}
|
||||
}
|
||||
|
||||
@ -38,3 +38,12 @@ output "mysql_threads_anomaly_id" {
|
||||
value = datadog_monitor.mysql_threads_anomaly.*.id
|
||||
}
|
||||
|
||||
output "mysql_replication_lag_id" {
|
||||
description = "id for monitor mysql_replication_lag"
|
||||
value = datadog_monitor.mysql_replication_lag.*.id
|
||||
}
|
||||
|
||||
output "mysql_replication_status_id" {
|
||||
description = "id for monitor mysql_replication_status"
|
||||
value = datadog_monitor.mysql_replication_status.*.id
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user