MON-224 CloudSQL MySQL Replication Lag monitor
This commit is contained in:
parent
79f8a5d486
commit
1673d8bbce
@ -29,6 +29,7 @@ Useful links
|
||||
* [GCP Metrics for CloudSQL](https://cloud.google.com/monitoring/api/metrics_gcp#gcp-cloudsql)
|
||||
* [Datadog Useful monitors for GCP CloudSQL](https://www.datadoghq.com/blog/monitor-google-cloud-sql/)
|
||||
* [Max connections depends on the type of the instance](https://cloud.google.com/sql/docs/quotas#fixed-limits)
|
||||
* [Monitoring Replication Lag](https://cloud.google.com/sql/docs/mysql/high-availability#replication-lag-monitor)
|
||||
|
||||
Inputs
|
||||
------
|
||||
@ -47,3 +48,8 @@ Inputs
|
||||
| network_connections_threshold_warning | Number of network connections (warning threshold) | string | `3200` | no |
|
||||
| network_connections_timeframe | Timeframe for the Network Connections monitor | string | `last_5m` | no |
|
||||
| project_id | ID of the GCP Project | string | - | yes |
|
||||
| replication_lag_message | Custom message for the Replication Lag monitor | string | `` | no |
|
||||
| replication_lag_silenced | Groups to mute for GCP Cloud SQL Replication Lag monitor | map | `<map>` | no |
|
||||
| replication_lag_threshold_critical | Seconds behind the master (critical threshold) | string | `2700` | no |
|
||||
| replication_lag_threshold_warning | Seconds behind the master (warning threshold) | string | `2000` | no |
|
||||
| replication_lag_timeframe | Timeframe for the Replication Lag monitor | string | `last_10m` | no |
|
||||
|
||||
@ -71,3 +71,36 @@ variable "network_connections_silenced" {
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
#
|
||||
# Replication Lag
|
||||
#
|
||||
variable "replication_lag_message" {
|
||||
description = "Custom message for the Replication Lag monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "replication_lag_timeframe" {
|
||||
description = "Timeframe for the Replication Lag monitor"
|
||||
type = "string"
|
||||
default = "last_10m"
|
||||
}
|
||||
|
||||
variable "replication_lag_threshold_warning" {
|
||||
description = "Seconds behind the master (warning threshold)"
|
||||
type = "string"
|
||||
default = 2000
|
||||
}
|
||||
|
||||
variable "replication_lag_threshold_critical" {
|
||||
description = "Seconds behind the master (critical threshold)"
|
||||
type = "string"
|
||||
default = 2700
|
||||
}
|
||||
|
||||
variable "replication_lag_silenced" {
|
||||
description = "Groups to mute for GCP Cloud SQL Replication Lag monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
@ -52,3 +52,45 @@ EOF
|
||||
"engine:mysql",
|
||||
]
|
||||
}
|
||||
|
||||
#
|
||||
# Replication Lag
|
||||
#
|
||||
resource "datadog_monitor" "datadog_monitor_cloud_sql_mysql_replication_lag" {
|
||||
name = "[${var.environment}] Cloud SQL MySQL Replication Lag too high"
|
||||
message = "${coalesce(var.replication_lag_message, var.message)}"
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
min(last_10m):
|
||||
avg:gcp.cloudsql.database.mysql.replication.seconds_behind_master{${data.template_file.filter.rendered}}
|
||||
by {database_id}
|
||||
> ${var.replication_lag_threshold_critical}
|
||||
EOF
|
||||
|
||||
thresholds {
|
||||
critical = "${var.replication_lag_threshold_critical}"
|
||||
warning = "${var.replication_lag_threshold_warning}"
|
||||
}
|
||||
|
||||
include_tags = true
|
||||
notify_no_data = true
|
||||
require_full_window = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
evaluation_delay = "${var.delay}"
|
||||
new_host_delay = "${var.delay}"
|
||||
silenced = "${var.questions_changing_silenced}"
|
||||
|
||||
tags = [
|
||||
"team:gcp",
|
||||
"provider:gcp",
|
||||
"env:${var.environment}",
|
||||
"resource:cloud-sql",
|
||||
"engine:mysql",
|
||||
]
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user