From 9233464195b6b58ca270ab3bdd286dd8f9826a8c Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Fri, 15 Jun 2018 16:53:23 +0200 Subject: [PATCH] MON-224 CloudSQL MySQL Monitors for Queries and Questions Anomalies --- cloud/gcp/cloud-sql/mysql/README.md | 23 +++ cloud/gcp/cloud-sql/mysql/inputs.tf | 138 ++++++++++++++++++ .../mysql/monitors-cloudsql-mysql.tf | 104 +++++++++++++ 3 files changed, 265 insertions(+) diff --git a/cloud/gcp/cloud-sql/mysql/README.md b/cloud/gcp/cloud-sql/mysql/README.md index d316646..12dfdd3 100644 --- a/cloud/gcp/cloud-sql/mysql/README.md +++ b/cloud/gcp/cloud-sql/mysql/README.md @@ -30,6 +30,7 @@ Useful links * [Datadog Useful monitors for GCP CloudSQL](https://www.datadoghq.com/blog/monitor-google-cloud-sql/) * [Max connections depends on the type of the instance](https://cloud.google.com/sql/docs/quotas#fixed-limits) * [Monitoring Replication Lag](https://cloud.google.com/sql/docs/mysql/high-availability#replication-lag-monitor) +* [Monitoring MySQL Performance Metrics](https://www.datadoghq.com/blog/monitoring-mysql-performance-metrics) Inputs ------ @@ -48,6 +49,28 @@ Inputs | network_connections_threshold_warning | Number of network connections (warning threshold) | string | `3200` | no | | network_connections_timeframe | Timeframe for the Network Connections monitor | string | `last_5m` | no | | project_id | ID of the GCP Project | string | - | yes | +| queries_changing_anomaly_detection_algorithm | Anomaly Detection Algorithm used | string | `robust` | no | +| queries_changing_database_ids | Queries Changing Abnormally | list | `` | no | +| queries_changing_deviations | Deviations to detect the anomaly | string | `4` | no | +| queries_changing_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no | +| queries_changing_message | Custom message for the Queries Changing monitor | string | `` | no | +| queries_changing_region | | string | `` | no | +| queries_changing_seasonality | Seasonality of the algorithm | string | `weekly` | no | +| queries_changing_silenced | Groups to mute for GCP Cloud SQL Queries Changing monitor | map | `` | no | +| queries_changing_threshold_critical | Queries Changing critical threshold | string | `1` | no | +| queries_changing_threshold_warning | Queries Changing warning threshold | string | `0.5` | no | +| queries_changing_timeframe | Timeframe for the Queries Changing mon monitor | string | `last_10m` | no | +| questions_changing_anomaly_detection_algorithm | Anomaly Detection Algorithm used | string | `robust` | no | +| questions_changing_database_ids | | list | `` | no | +| questions_changing_deviations | Deviations to detect the anomaly | string | `4` | no | +| questions_changing_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no | +| questions_changing_message | Custom message for the Questions Changing monitor | string | `` | no | +| questions_changing_region | | string | `` | no | +| questions_changing_seasonality | Seasonality of the algorithm | string | `weekly` | no | +| questions_changing_silenced | Groups to mute for GCP Cloud SQL Network Connections monitor | map | `` | no | +| questions_changing_threshold_critical | Questions Changing critical threshold | string | `1` | no | +| questions_changing_threshold_warning | Questions Changing warning threshold | string | `0.5` | no | +| questions_changing_timeframe | Timeframe for the Questions Changing mon monitor | string | `last_10m` | no | | replication_lag_message | Custom message for the Replication Lag monitor | string | `` | no | | replication_lag_silenced | Groups to mute for GCP Cloud SQL Replication Lag monitor | map | `` | no | | replication_lag_threshold_critical | Seconds behind the master (critical threshold) | string | `2700` | no | diff --git a/cloud/gcp/cloud-sql/mysql/inputs.tf b/cloud/gcp/cloud-sql/mysql/inputs.tf index 65f190e..e440edc 100644 --- a/cloud/gcp/cloud-sql/mysql/inputs.tf +++ b/cloud/gcp/cloud-sql/mysql/inputs.tf @@ -104,3 +104,141 @@ variable "replication_lag_silenced" { type = "map" default = {} } + +# +# Queries Changing Abnormally +# +variable "queries_changing_database_ids" { + description = "" + type = "list" + default = [] +} + +variable "queries_changing_region" { + description = "" + type = "string" + default = "" +} + +variable "queries_changing_message" { + description = "Custom message for the Queries Changing monitor" + type = "string" + default = "" +} + +variable "queries_changing_timeframe" { + description = "Timeframe for the Queries Changing mon monitor" + type = "string" + default = "last_10m" +} + +variable "queries_changing_anomaly_detection_algorithm" { + description = "Anomaly Detection Algorithm used" + type = "string" + default = "robust" +} + +variable "queries_changing_deviations" { + description = "Deviations to detect the anomaly" + type = "string" + default = 4 +} + +variable "queries_changing_direction" { + description = "Direction of the anomaly. It can be both, below or above." + type = "string" + default = "both" +} + +variable "queries_changing_seasonality" { + description = "Seasonality of the algorithm" + type = "string" + default = "weekly" +} + +variable "queries_changing_threshold_warning" { + description = "Queries Changing warning threshold" + type = "string" + default = 0.5 +} + +variable "queries_changing_threshold_critical" { + description = "Queries Changing critical threshold" + type = "string" + default = 1 +} + +variable "queries_changing_silenced" { + description = "Groups to mute for GCP Cloud SQL Queries Changing monitor" + type = "map" + default = {} +} + +# +# Questions Changing +# +variable "questions_changing_message" { + description = "Custom message for the Questions Changing monitor" + type = "string" + default = "" +} + +variable "questions_changing_timeframe" { + description = "Timeframe for the Questions Changing mon monitor" + type = "string" + default = "last_10m" +} + +variable "questions_changing_database_ids" { + description = "" + type = "list" + default = [] +} + +variable "questions_changing_region" { + description = "" + type = "string" + default = "" +} + +variable "questions_changing_anomaly_detection_algorithm" { + description = "Anomaly Detection Algorithm used" + type = "string" + default = "robust" +} + +variable "questions_changing_deviations" { + description = "Deviations to detect the anomaly" + type = "string" + default = 4 +} + +variable "questions_changing_direction" { + description = "Direction of the anomaly. It can be both, below or above." + type = "string" + default = "both" +} + +variable "questions_changing_seasonality" { + description = "Seasonality of the algorithm" + type = "string" + default = "weekly" +} + +variable "questions_changing_threshold_warning" { + description = "Questions Changing warning threshold" + type = "string" + default = 0.5 +} + +variable "questions_changing_threshold_critical" { + description = "Questions Changing critical threshold" + type = "string" + default = 1 +} + +variable "questions_changing_silenced" { + description = "Groups to mute for GCP Cloud SQL Network Connections monitor" + type = "map" + default = {} +} diff --git a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf index abe928a..e24b5db 100644 --- a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf +++ b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf @@ -94,3 +94,107 @@ EOF "engine:mysql", ] } + +# +# Queries Anomaly +# +resource "datadog_monitor" "queries_changing_anomaly" { + count = "${length(var.queries_changing_database_ids)}" + + name = "[${var.environment}] Cloud SQL MySQL Queries Count changed abnormally on ${var.project_id}:${var.queries_changing_region}:${var.queries_changing_database_ids[count.index]} {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.queries_changing_message, var.message)}" + + type = "metric alert" + + query = < ${var.queries_changing_threshold_critical} +EOF + + thresholds { + warning = "${var.queries_changing_threshold_warning}" + critical = "${var.queries_changing_threshold_critical}" + } + + include_tags = true + notify_no_data = true + require_full_window = false + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + silenced = "${var.queries_changing_silenced}" + + tags = [ + "team:gcp", + "provider:gcp", + "env:${var.environment}", + "resource:cloud-sql", + "engine:mysql", + "database_id:${var.project_id}:${var.queries_changing_region}:${var.queries_changing_database_ids[count.index]}}", + ] +} + +# +# Questions Anomaly +# +resource "datadog_monitor" "questions_changing_anomaly" { + count = "${length(var.questions_changing_database_ids)}" + + name = "[${var.environment}] Cloud SQL MySQL Questions Count changed abnormally on ${var.project_id}:${var.questions_changing_region}:${var.questions_changing_database_ids[count.index]} {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.questions_changing_message, var.message)}" + + type = "metric alert" + + query = < ${var.questions_changing_threshold_critical} +EOF + + thresholds { + warning = "${var.questions_changing_threshold_warning}" + critical = "${var.questions_changing_threshold_critical}" + } + + include_tags = true + notify_no_data = true + require_full_window = false + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + silenced = "${var.questions_changing_silenced}" + + tags = [ + "team:gcp", + "provider:gcp", + "env:${var.environment}", + "resource:cloud-sql", + "engine:mysql", + "database_id:${var.project_id}:${var.questions_changing_region}:${var.questions_changing_database_ids[count.index]}", + ] +}