Merge branch 'MON-547-MySQL_replication_monitor' into 'master'
MON-547 Add MySQL replication monitors See merge request claranet/pt-monitoring/projects/datadog/terraform/monitors!151
This commit is contained in:
commit
e29e6f9b0b
@ -22,6 +22,8 @@ Creates DataDog monitors with the following checks:
|
||||
- Mysql Innodb buffer pool efficiency
|
||||
- Mysql Innodb buffer pool utilization
|
||||
- Mysql queries changed abnormally
|
||||
- Mysql replication lag (disabled by default)
|
||||
- Mysql replication status changed abnormally (disabled by default)
|
||||
- Mysql server does not respond
|
||||
- Mysql Slow queries
|
||||
- Mysql threads changed abnormally
|
||||
@ -82,6 +84,18 @@ Creates DataDog monitors with the following checks:
|
||||
| mysql\_questions\_threshold\_critical | Maximum critical acceptable number of queries | string | `"1"` | no |
|
||||
| mysql\_questions\_time\_aggregator | Monitor time aggregator for MySQL queries monitor [available values: min, max or avg] | string | `"avg"` | no |
|
||||
| mysql\_questions\_timeframe | Monitor timeframe for MySQL queries monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `"last_4h"` | no |
|
||||
| mysql\_replication\_lag\_enabled | Flag to enable mysql replication lag monitor | string | `"false"` | no |
|
||||
| mysql\_replication\_lag\_extra\_tags | Extra tags for MySQL replication lag monitor | list(string) | `[]` | no |
|
||||
| mysql\_replication\_lag\_message | Custom message for MySQL replication lag monitor | string | `""` | no |
|
||||
| mysql\_replication\_lag\_threshold\_critical | Maximum critical acceptable seconds of replication lag | string | `"200"` | no |
|
||||
| mysql\_replication\_lag\_threshold\_warning | Maximum warning acceptable seconds of replication lag | string | `"100"` | no |
|
||||
| mysql\_replication\_lag\_time\_aggregator | Monitor time aggregator for MySQL replication lag monitor [available values: min, max or avg] | string | `"min"` | no |
|
||||
| mysql\_replication\_lag\_timeframe | Monitor timeframe for MySQL replication lag monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `"last_15m"` | no |
|
||||
| mysql\_replication\_status\_enabled | Flag to enable mysql replication status monitor | string | `"false"` | no |
|
||||
| mysql\_replication\_status\_extra\_tags | Extra tags for MySQL replication status monitor | list(string) | `[]` | no |
|
||||
| mysql\_replication\_status\_message | Custom message for MySQL replication status monitor | string | `""` | no |
|
||||
| mysql\_replication\_status\_time\_aggregator | Monitor time aggregator for MySQL replication status monitor [available values: min, max or avg] | string | `"min"` | no |
|
||||
| mysql\_replication\_status\_timeframe | Monitor timeframe for MySQL replication status monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`] | string | `"last_5m"` | no |
|
||||
| mysql\_slow\_enabled | Flag to enable MySQL slow queries monitor | string | `"true"` | no |
|
||||
| mysql\_slow\_extra\_tags | Extra tags for MySQL slow queries monitor | list(string) | `[]` | no |
|
||||
| mysql\_slow\_message | Custom message for MySQL slow queries monitor | string | `""` | no |
|
||||
@ -116,6 +130,8 @@ Creates DataDog monitors with the following checks:
|
||||
| mysql\_pool\_efficiency\_id | id for monitor mysql_pool_efficiency |
|
||||
| mysql\_pool\_utilization\_id | id for monitor mysql_pool_utilization |
|
||||
| mysql\_questions\_anomaly\_id | id for monitor mysql_questions_anomaly |
|
||||
| mysql\_replication\_lag\_id | id for monitor mysql_replication_lag |
|
||||
| mysql\_replication\_status\_id | id for monitor mysql_replication_status |
|
||||
| mysql\_slow\_id | id for monitor mysql_slow |
|
||||
| mysql\_threads\_anomaly\_id | id for monitor mysql_threads_anomaly |
|
||||
|
||||
|
||||
@ -460,3 +460,80 @@ variable "mysql_questions_timeframe" {
|
||||
default = "last_4h"
|
||||
}
|
||||
|
||||
#################################
|
||||
### MySQL replication lag ###
|
||||
#################################
|
||||
|
||||
variable "mysql_replication_lag_enabled" {
|
||||
description = "Flag to enable mysql replication lag monitor"
|
||||
type = string
|
||||
default = "false"
|
||||
}
|
||||
|
||||
variable "mysql_replication_lag_extra_tags" {
|
||||
description = "Extra tags for MySQL replication lag monitor"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "mysql_replication_lag_message" {
|
||||
description = "Custom message for MySQL replication lag monitor"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "mysql_replication_lag_threshold_warning" {
|
||||
default = 100
|
||||
description = "Maximum warning acceptable seconds of replication lag"
|
||||
}
|
||||
|
||||
variable "mysql_replication_lag_threshold_critical" {
|
||||
default = 200
|
||||
description = "Maximum critical acceptable seconds of replication lag"
|
||||
}
|
||||
|
||||
variable "mysql_replication_lag_time_aggregator" {
|
||||
description = "Monitor time aggregator for MySQL replication lag monitor [available values: min, max or avg]"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "mysql_replication_lag_timeframe" {
|
||||
description = "Monitor timeframe for MySQL replication lag monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = string
|
||||
default = "last_15m"
|
||||
}
|
||||
|
||||
###################################
|
||||
### MySQL replication status ###
|
||||
###################################
|
||||
|
||||
variable "mysql_replication_status_enabled" {
|
||||
description = "Flag to enable mysql replication status monitor"
|
||||
type = string
|
||||
default = "false"
|
||||
}
|
||||
|
||||
variable "mysql_replication_status_extra_tags" {
|
||||
description = "Extra tags for MySQL replication status monitor"
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "mysql_replication_status_message" {
|
||||
description = "Custom message for MySQL replication status monitor"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "mysql_replication_status_time_aggregator" {
|
||||
description = "Monitor time aggregator for MySQL replication status monitor [available values: min, max or avg]"
|
||||
type = string
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "mysql_replication_status_timeframe" {
|
||||
description = "Monitor timeframe for MySQL replication status monitor [available values: `last_#m` (1, 5, 10, 15, or 30), `last_#h` (1, 2, or 4), or `last_1d`]"
|
||||
type = string
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
@ -288,3 +288,62 @@ EOQ
|
||||
}
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "mysql_replication_lag" {
|
||||
count = var.mysql_replication_lag_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Mysql replication lag {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}"
|
||||
message = coalesce(var.mysql_replication_lag_message, var.message)
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOQ
|
||||
${var.mysql_replication_lag_time_aggregator}(${var.mysql_replication_lag_timeframe}):avg:mysql.replication.seconds_behind_master${module.filter-tags.query_alert} by {server} > ${var.mysql_replication_lag_threshold_critical}
|
||||
EOQ
|
||||
|
||||
thresholds = {
|
||||
warning = var.mysql_replication_lag_threshold_warning
|
||||
critical = var.mysql_replication_lag_threshold_critical
|
||||
}
|
||||
|
||||
evaluation_delay = var.evaluation_delay
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
require_full_window = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
|
||||
tags = concat(["env:${var.environment}", "type:database", "provider:mysql", "resource:mysql", "team:claranet", "created-by:terraform"], var.mysql_replication_lag_extra_tags)
|
||||
|
||||
lifecycle {
|
||||
ignore_changes = [silenced]
|
||||
}
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "mysql_replication_status" {
|
||||
count = var.mysql_replication_status_enabled == "true" ? 1 : 0
|
||||
name = "${var.prefix_slug == "" ? "" : "[${var.prefix_slug}]"}[${var.environment}] Mysql replication status changed abnormally"
|
||||
message = coalesce(var.mysql_replication_status_message, var.message)
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOQ
|
||||
${var.mysql_replication_status_time_aggregator}(${var.mysql_replication_status_timeframe}):avg:mysql.replication.slave_running${module.filter-tags.query_alert} by {server} < 1
|
||||
EOQ
|
||||
|
||||
thresholds = {
|
||||
critical = 1
|
||||
}
|
||||
|
||||
new_host_delay = var.new_host_delay
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
require_full_window = true
|
||||
|
||||
tags = concat(["env:${var.environment}", "type:database", "provider:mysql", "resource:mysql", "team:claranet", "created-by:terraform"], var.mysql_replication_status_extra_tags)
|
||||
|
||||
lifecycle {
|
||||
ignore_changes = [silenced]
|
||||
}
|
||||
}
|
||||
|
||||
@ -28,6 +28,16 @@ output "mysql_questions_anomaly_id" {
|
||||
value = datadog_monitor.mysql_questions_anomaly.*.id
|
||||
}
|
||||
|
||||
output "mysql_replication_lag_id" {
|
||||
description = "id for monitor mysql_replication_lag"
|
||||
value = datadog_monitor.mysql_replication_lag.*.id
|
||||
}
|
||||
|
||||
output "mysql_replication_status_id" {
|
||||
description = "id for monitor mysql_replication_status"
|
||||
value = datadog_monitor.mysql_replication_status.*.id
|
||||
}
|
||||
|
||||
output "mysql_slow_id" {
|
||||
description = "id for monitor mysql_slow"
|
||||
value = datadog_monitor.mysql_slow.*.id
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user