diff --git a/cloud/gcp/cloud-sql/mysql/README.md b/cloud/gcp/cloud-sql/mysql/README.md index 1d0cecd..d316646 100644 --- a/cloud/gcp/cloud-sql/mysql/README.md +++ b/cloud/gcp/cloud-sql/mysql/README.md @@ -29,6 +29,7 @@ Useful links * [GCP Metrics for CloudSQL](https://cloud.google.com/monitoring/api/metrics_gcp#gcp-cloudsql) * [Datadog Useful monitors for GCP CloudSQL](https://www.datadoghq.com/blog/monitor-google-cloud-sql/) * [Max connections depends on the type of the instance](https://cloud.google.com/sql/docs/quotas#fixed-limits) +* [Monitoring Replication Lag](https://cloud.google.com/sql/docs/mysql/high-availability#replication-lag-monitor) Inputs ------ @@ -47,3 +48,8 @@ Inputs | network_connections_threshold_warning | Number of network connections (warning threshold) | string | `3200` | no | | network_connections_timeframe | Timeframe for the Network Connections monitor | string | `last_5m` | no | | project_id | ID of the GCP Project | string | - | yes | +| replication_lag_message | Custom message for the Replication Lag monitor | string | `` | no | +| replication_lag_silenced | Groups to mute for GCP Cloud SQL Replication Lag monitor | map | `` | no | +| replication_lag_threshold_critical | Seconds behind the master (critical threshold) | string | `2700` | no | +| replication_lag_threshold_warning | Seconds behind the master (warning threshold) | string | `2000` | no | +| replication_lag_timeframe | Timeframe for the Replication Lag monitor | string | `last_10m` | no | diff --git a/cloud/gcp/cloud-sql/mysql/inputs.tf b/cloud/gcp/cloud-sql/mysql/inputs.tf index b1fbc95..65f190e 100644 --- a/cloud/gcp/cloud-sql/mysql/inputs.tf +++ b/cloud/gcp/cloud-sql/mysql/inputs.tf @@ -71,3 +71,36 @@ variable "network_connections_silenced" { type = "map" default = {} } + +# +# Replication Lag +# +variable "replication_lag_message" { + description = "Custom message for the Replication Lag monitor" + type = "string" + default = "" +} + +variable "replication_lag_timeframe" { + description = "Timeframe for the Replication Lag monitor" + type = "string" + default = "last_10m" +} + +variable "replication_lag_threshold_warning" { + description = "Seconds behind the master (warning threshold)" + type = "string" + default = 2000 +} + +variable "replication_lag_threshold_critical" { + description = "Seconds behind the master (critical threshold)" + type = "string" + default = 2700 +} + +variable "replication_lag_silenced" { + description = "Groups to mute for GCP Cloud SQL Replication Lag monitor" + type = "map" + default = {} +} diff --git a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf index 847de67..8c4a580 100644 --- a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf +++ b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf @@ -52,3 +52,45 @@ EOF "engine:mysql", ] } + +# +# Replication Lag +# +resource "datadog_monitor" "datadog_monitor_cloud_sql_mysql_replication_lag" { + name = "[${var.environment}] Cloud SQL MySQL Replication Lag too high" + message = "${coalesce(var.replication_lag_message, var.message)}" + + type = "metric alert" + + query = < ${var.replication_lag_threshold_critical} +EOF + + thresholds { + critical = "${var.replication_lag_threshold_critical}" + warning = "${var.replication_lag_threshold_warning}" + } + + include_tags = true + notify_no_data = true + require_full_window = false + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + silenced = "${var.questions_changing_silenced}" + + tags = [ + "team:gcp", + "provider:gcp", + "env:${var.environment}", + "resource:cloud-sql", + "engine:mysql", + ] +}