From 70a45ed9f68cd1bb3ed1ea380f1f62583053ad90 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Thu, 14 Jun 2018 10:55:03 +0200 Subject: [PATCH 01/46] MON-224 Standard and recommended monitors with their inputs and readme --- cloud/gcp/cloud-sql/instance/README.md | 32 ++++ cloud/gcp/cloud-sql/instance/inputs.tf | 161 ++++++++++++++++++ .../instance/monitors-cloud-sql-instance.tf | 142 +++++++++++++++ 3 files changed, 335 insertions(+) create mode 100644 cloud/gcp/cloud-sql/instance/README.md create mode 100644 cloud/gcp/cloud-sql/instance/inputs.tf create mode 100644 cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf diff --git a/cloud/gcp/cloud-sql/instance/README.md b/cloud/gcp/cloud-sql/instance/README.md new file mode 100644 index 0000000..805c302 --- /dev/null +++ b/cloud/gcp/cloud-sql/instance/README.md @@ -0,0 +1,32 @@ + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| cpu_message | Custom message for the CPU Utilization monitor | string | `` | no | +| cpu_silenced | Groups to mute for GCP Cloud SQL CPU Utilization monitor | map | `` | no | +| cpu_tags | Tags to add to the CPU Utilization monitors | map | `` | no | +| cpu_threshold_critical | CPU Utilization in fraction (critical threshold) | string | `0.9` | no | +| cpu_threshold_warning | CPU Utilization in fraction (warning threshold) | string | `0.85` | no | +| cpu_timeframe | Timeframe for the CPU Utilization monitor | string | `last_2h` | no | +| database_id | ID of the Cloud SQL Database Instance | string | - | yes | +| delay | Delay in seconds for the metric evaluation | string | `900` | no | +| disk_message | Custom message for the Disk Utilization monitor | string | `` | no | +| disk_silenced | Groups to mute for GCP Cloud SQL Disk Utilization monitor | map | `` | no | +| disk_tags | Tags to add to the Disk Utilization monitors | map | `` | no | +| disk_threshold_critical | Disk Utilization in fraction (critical threshold) | string | `0.9` | no | +| disk_threshold_warning | Disk Utilization in fraction (warning threshold) | string | `0.8` | no | +| disk_timeframe | Timeframe for the Disk Utilization monitor | string | `last_5m` | no | +| environment | Architecture environment | string | - | yes | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| message | Message sent when a monitor is triggered | string | - | yes | +| network_connections_hard_limit | Max number of network connections | string | - | yes | +| network_connections_message | Custom message for the Netowork Connections monitor | string | `` | no | +| network_connections_silenced | Groups to mute for GCP Cloud SQL Network Connections monitor | map | `` | no | +| network_connections_tags | Tags to add to the Network Connections monitors | map | `` | no | +| network_connections_threshold_critical | Fraction of network connections (warning threshold) | string | `0.9` | no | +| network_connections_threshold_warning | Fraction of network connections (warning threshold) | string | `0.8` | no | +| network_connections_timeframe | Timeframe for the Network Connections monitor | string | `last_5m` | no | +| project_id | ID of the GCP Project | string | - | yes | + diff --git a/cloud/gcp/cloud-sql/instance/inputs.tf b/cloud/gcp/cloud-sql/instance/inputs.tf new file mode 100644 index 0000000..6cb511b --- /dev/null +++ b/cloud/gcp/cloud-sql/instance/inputs.tf @@ -0,0 +1,161 @@ +# +# Datadog global variables +# +variable "environment" { + description = "Architecture environment" + type = "string" +} + +variable "filter_tags_use_defaults" { + description = "Use default filter tags convention" + default = "true" +} + +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_use_defaults is false" + default = "*" +} + +variable "message" { + description = "Message sent when a monitor is triggered" +} + +variable "delay" { + description = "Delay in seconds for the metric evaluation" + default = 900 +} + +# +# Filter variables +# +variable "project_id" { + type = "string" + description = "ID of the GCP Project" +} + +variable "database_id" { + type = "string" + description = "ID of the Cloud SQL Database Instance" +} + +# +# CPU +# +variable "cpu_message" { + description = "Custom message for the CPU Utilization monitor" + type = "string" + default = "" +} + +variable "cpu_timeframe" { + description = "Timeframe for the CPU Utilization monitor" + type = "string" + default = "last_2h" +} + +variable "cpu_threshold_warning" { + description = "CPU Utilization in fraction (warning threshold)" + type = "string" + default = 0.85 +} + +variable "cpu_threshold_critical" { + description = "CPU Utilization in fraction (critical threshold)" + type = "string" + default = 0.9 +} + +variable "cpu_silenced" { + description = "Groups to mute for GCP Cloud SQL CPU Utilization monitor" + type = "map" + default = {} +} + +variable "cpu_tags" { + description = "Tags to add to the CPU Utilization monitors" + type = "map" + default = {} +} + +# +# DISK +# +variable "disk_message" { + description = "Custom message for the Disk Utilization monitor" + type = "string" + default = "" +} + +variable "disk_timeframe" { + description = "Timeframe for the Disk Utilization monitor" + type = "string" + default = "last_5m" +} + +variable "disk_threshold_warning" { + description = "Disk Utilization in fraction (warning threshold)" + type = "string" + default = 0.8 +} + +variable "disk_threshold_critical" { + description = "Disk Utilization in fraction (critical threshold)" + type = "string" + default = 0.9 +} + +variable "disk_silenced" { + description = "Groups to mute for GCP Cloud SQL Disk Utilization monitor" + type = "map" + default = {} +} + +variable "disk_tags" { + description = "Tags to add to the Disk Utilization monitors" + type = "map" + default = {} +} + +# +# Network Connections +# +variable "network_connections_message" { + description = "Custom message for the Netowork Connections monitor" + type = "string" + default = "" +} + +variable "network_connections_timeframe" { + description = "Timeframe for the Network Connections monitor" + type = "string" + default = "last_5m" +} + +variable "network_connections_hard_limit" { + description = "Max number of network connections" + type = "string" +} + +variable "network_connections_threshold_warning" { + description = "Fraction of network connections (warning threshold)" + type = "string" + default = 0.8 +} + +variable "network_connections_threshold_critical" { + description = "Fraction of network connections (warning threshold)" + type = "string" + default = 0.9 +} + +variable "network_connections_silenced" { + description = "Groups to mute for GCP Cloud SQL Network Connections monitor" + type = "map" + default = {} +} + +variable "network_connections_tags" { + description = "Tags to add to the Network Connections monitors" + type = "map" + default = {} +} diff --git a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf new file mode 100644 index 0000000..73a51fc --- /dev/null +++ b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf @@ -0,0 +1,142 @@ +# +# FILTER +# +data "template_file" "filter" { + template = "$${filter}" + + vars { + filter = "${var.filter_tags_use_defaults == "true" ? + format("project_id:%s", var.project_id) : + "${var.filter_tags_custom}"}" + } +} + +# +# CPU Utilization +# +resource "datadog_monitor" "cpu_utilization" { + name = "[${var.environment}] Cloud SQL CPU utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.cpu_message, var.message)}" + + type = "metric alert" + + query = < ${var.cpu_threshold_critical} +EOF + + thresholds { + warning = "${var.cpu_threshold_warning}" + critical = "${var.cpu_threshold_critical}" + } + + include_tags = true + notify_no_data = true + require_full_window = false + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + silenced = "${var.cpu_silenced}" + + tags = [ + "team:gcp", + "provider:gcp", + "env:${var.environment}", + "resource:cloud-sql", + "${var.cpu_tags}", + ] +} + +# +# Disk Utilization +# +resource "datadog_monitor" "disk_utilization" { + name = "[${var.environment}] Cloud SQL Disk utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.disk_message, var.message)}" + + type = "metric alert" + + query = < ${var.disk_threshold_critical} +EOF + + thresholds { + warning = "${var.disk_threshold_warning}" + critical = "${var.disk_threshold_critical}" + } + + include_tags = true + notify_no_data = true + require_full_window = false + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + silenced = "${var.disk_silenced}" + + tags = [ + "team:gcp", + "provider:gcp", + "env:${var.environment}", + "resource:cloud-sql", + "${var.disk_tags}", + ] +} + +# +# Network Connections +# +resource "datadog_monitor" "network_connections" { + name = "[${var.environment}] Cloud SQL Network Connections (hard limit: ${var.network_connections_hard_limit}) {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.network_connections_message, var.message)}" + + type = "metric alert" + + query = < ${var.network_connections_threshold_critical} +EOF + + thresholds { + warning = "${var.network_connections_threshold_warning}" + critical = "${var.network_connections_threshold_critical}" + } + + include_tags = true + notify_no_data = true + require_full_window = false + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + silenced = "${var.network_connections_silenced}" + + tags = [ + "team:gcp", + "provider:gcp", + "env:${var.environment}", + "resource:cloud-sql", + "${var.network_connections_tags}", + ] +} From 7e05a55331c10d4a9010b7554b15f9ce719558c3 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Thu, 14 Jun 2018 11:25:26 +0200 Subject: [PATCH 02/46] MON-224 Remove database_id from inputs because all the monitors are grouping by database_id --- cloud/gcp/cloud-sql/instance/inputs.tf | 5 ----- 1 file changed, 5 deletions(-) diff --git a/cloud/gcp/cloud-sql/instance/inputs.tf b/cloud/gcp/cloud-sql/instance/inputs.tf index 6cb511b..ddb5739 100644 --- a/cloud/gcp/cloud-sql/instance/inputs.tf +++ b/cloud/gcp/cloud-sql/instance/inputs.tf @@ -33,11 +33,6 @@ variable "project_id" { description = "ID of the GCP Project" } -variable "database_id" { - type = "string" - description = "ID of the Cloud SQL Database Instance" -} - # # CPU # From 91b07cbb65fe9b9de9546dc9c3f4383b784a4bd5 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Thu, 14 Jun 2018 11:33:21 +0200 Subject: [PATCH 03/46] MON-224 Removed additional tags for monitors --- cloud/gcp/cloud-sql/instance/inputs.tf | 18 ------------------ .../instance/monitors-cloud-sql-instance.tf | 3 --- 2 files changed, 21 deletions(-) diff --git a/cloud/gcp/cloud-sql/instance/inputs.tf b/cloud/gcp/cloud-sql/instance/inputs.tf index ddb5739..9918e2e 100644 --- a/cloud/gcp/cloud-sql/instance/inputs.tf +++ b/cloud/gcp/cloud-sql/instance/inputs.tf @@ -66,12 +66,6 @@ variable "cpu_silenced" { default = {} } -variable "cpu_tags" { - description = "Tags to add to the CPU Utilization monitors" - type = "map" - default = {} -} - # # DISK # @@ -105,12 +99,6 @@ variable "disk_silenced" { default = {} } -variable "disk_tags" { - description = "Tags to add to the Disk Utilization monitors" - type = "map" - default = {} -} - # # Network Connections # @@ -148,9 +136,3 @@ variable "network_connections_silenced" { type = "map" default = {} } - -variable "network_connections_tags" { - description = "Tags to add to the Network Connections monitors" - type = "map" - default = {} -} diff --git a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf index 73a51fc..722af31 100644 --- a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf +++ b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf @@ -50,7 +50,6 @@ EOF "provider:gcp", "env:${var.environment}", "resource:cloud-sql", - "${var.cpu_tags}", ] } @@ -93,7 +92,6 @@ EOF "provider:gcp", "env:${var.environment}", "resource:cloud-sql", - "${var.disk_tags}", ] } @@ -137,6 +135,5 @@ EOF "provider:gcp", "env:${var.environment}", "resource:cloud-sql", - "${var.network_connections_tags}", ] } From 316fde7e755b3e9e3c3284f9103e0f07af3940f1 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Thu, 14 Jun 2018 13:24:55 +0200 Subject: [PATCH 04/46] MON-224 Monitors for CPU, Disk, Memory Utilization and Memory Utilization Forecast --- cloud/gcp/cloud-sql/instance/README.md | 22 +++--- cloud/gcp/cloud-sql/instance/inputs.tf | 70 ++++++++++++----- .../instance/monitors-cloud-sql-instance.tf | 77 ++++++++++++++----- 3 files changed, 120 insertions(+), 49 deletions(-) diff --git a/cloud/gcp/cloud-sql/instance/README.md b/cloud/gcp/cloud-sql/instance/README.md index 805c302..900c15a 100644 --- a/cloud/gcp/cloud-sql/instance/README.md +++ b/cloud/gcp/cloud-sql/instance/README.md @@ -5,28 +5,30 @@ |------|-------------|:----:|:-----:|:-----:| | cpu_message | Custom message for the CPU Utilization monitor | string | `` | no | | cpu_silenced | Groups to mute for GCP Cloud SQL CPU Utilization monitor | map | `` | no | -| cpu_tags | Tags to add to the CPU Utilization monitors | map | `` | no | | cpu_threshold_critical | CPU Utilization in fraction (critical threshold) | string | `0.9` | no | | cpu_threshold_warning | CPU Utilization in fraction (warning threshold) | string | `0.85` | no | | cpu_timeframe | Timeframe for the CPU Utilization monitor | string | `last_2h` | no | -| database_id | ID of the Cloud SQL Database Instance | string | - | yes | | delay | Delay in seconds for the metric evaluation | string | `900` | no | | disk_message | Custom message for the Disk Utilization monitor | string | `` | no | | disk_silenced | Groups to mute for GCP Cloud SQL Disk Utilization monitor | map | `` | no | -| disk_tags | Tags to add to the Disk Utilization monitors | map | `` | no | | disk_threshold_critical | Disk Utilization in fraction (critical threshold) | string | `0.9` | no | | disk_threshold_warning | Disk Utilization in fraction (warning threshold) | string | `0.8` | no | | disk_timeframe | Timeframe for the Disk Utilization monitor | string | `last_5m` | no | | environment | Architecture environment | string | - | yes | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| memory_forecast_history | History for the Memory Utilization Forecast monitor | string | `12h` | no | +| memory_forecast_interval | Interval for the Memory Utilization Forecast monitor | string | `30m` | no | +| memory_forecast_message | Custom message for the Memory Utilization Forecast monitor | string | `` | no | +| memory_forecast_silenced | Groups to mute for GCP Cloud SQL Memory Utilization Forecast monitor | map | `` | no | +| memory_forecast_threshold_critical | Memory Utilization Forecast in fraction (critical threshold) | string | `0.9` | no | +| memory_forecast_threshold_warning | Memory Utilization Forecast in fraction (warning threshold) | string | `0.8` | no | +| memory_forecast_timeframe | Timeframe for the Memory Utilization Forecast monitor | string | `next_3d` | no | +| memory_message | Custom message for the Memory Utilization monitor | string | `` | no | +| memory_silenced | Groups to mute for GCP Cloud SQL Memory Utilization monitor | map | `` | no | +| memory_threshold_critical | Memory Utilization in fraction (critical threshold) | string | `0.9` | no | +| memory_threshold_warning | Memory Utilization in fraction (warning threshold) | string | `0.8` | no | +| memory_timeframe | Timeframe for the Memory Utilization monitor | string | `last_5m` | no | | message | Message sent when a monitor is triggered | string | - | yes | -| network_connections_hard_limit | Max number of network connections | string | - | yes | -| network_connections_message | Custom message for the Netowork Connections monitor | string | `` | no | -| network_connections_silenced | Groups to mute for GCP Cloud SQL Network Connections monitor | map | `` | no | -| network_connections_tags | Tags to add to the Network Connections monitors | map | `` | no | -| network_connections_threshold_critical | Fraction of network connections (warning threshold) | string | `0.9` | no | -| network_connections_threshold_warning | Fraction of network connections (warning threshold) | string | `0.8` | no | -| network_connections_timeframe | Timeframe for the Network Connections monitor | string | `last_5m` | no | | project_id | ID of the GCP Project | string | - | yes | diff --git a/cloud/gcp/cloud-sql/instance/inputs.tf b/cloud/gcp/cloud-sql/instance/inputs.tf index 9918e2e..4109158 100644 --- a/cloud/gcp/cloud-sql/instance/inputs.tf +++ b/cloud/gcp/cloud-sql/instance/inputs.tf @@ -100,39 +100,69 @@ variable "disk_silenced" { } # -# Network Connections +# Memory Utilization # -variable "network_connections_message" { - description = "Custom message for the Netowork Connections monitor" - type = "string" +variable "memory_message" { + description = "Custom message for the Memory Utilization monitor" default = "" } -variable "network_connections_timeframe" { - description = "Timeframe for the Network Connections monitor" - type = "string" +variable "memory_timeframe" { + description = "Timeframe for the Memory Utilization monitor" default = "last_5m" } -variable "network_connections_hard_limit" { - description = "Max number of network connections" - type = "string" -} - -variable "network_connections_threshold_warning" { - description = "Fraction of network connections (warning threshold)" - type = "string" +variable "memory_threshold_warning" { + description = "Memory Utilization in fraction (warning threshold)" default = 0.8 } -variable "network_connections_threshold_critical" { - description = "Fraction of network connections (warning threshold)" - type = "string" +variable "memory_threshold_critical" { + description = "Memory Utilization in fraction (critical threshold)" default = 0.9 } -variable "network_connections_silenced" { - description = "Groups to mute for GCP Cloud SQL Network Connections monitor" +variable "memory_silenced" { + description = "Groups to mute for GCP Cloud SQL Memory Utilization monitor" + type = "map" + default = {} +} + +# +# Memory Utilization Forecast +# +variable "memory_forecast_message" { + description = "Custom message for the Memory Utilization Forecast monitor" + default = "" +} + +variable "memory_forecast_timeframe" { + description = "Timeframe for the Memory Utilization Forecast monitor" + default = "next_3d" +} + +variable "memory_forecast_interval" { + description = "Interval for the Memory Utilization Forecast monitor" + default = "30m" +} + +variable "memory_forecast_history" { + description = "History for the Memory Utilization Forecast monitor" + default = "12h" +} + +variable "memory_forecast_threshold_warning" { + description = "Memory Utilization Forecast in fraction (warning threshold)" + default = 0.8 +} + +variable "memory_forecast_threshold_critical" { + description = "Memory Utilization Forecast in fraction (critical threshold)" + default = 0.9 +} + +variable "memory_forecast_silenced" { + description = "Groups to mute for GCP Cloud SQL Memory Utilization Forecast monitor" type = "map" default = {} } diff --git a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf index 722af31..69df62d 100644 --- a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf +++ b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf @@ -21,11 +21,9 @@ resource "datadog_monitor" "cpu_utilization" { type = "metric alert" query = < ${var.cpu_threshold_critical} + avg(${var.cpu_timeframe}): avg:gcp.cloudsql.database.cpu.utilization{${data.template_file.filter.rendered}} + by {database_id} + > ${var.cpu_threshold_critical} EOF thresholds { @@ -63,9 +61,8 @@ resource "datadog_monitor" "disk_utilization" { type = "metric alert" query = < ${var.disk_threshold_critical} EOF @@ -96,26 +93,24 @@ EOF } # -# Network Connections +# Memory Utilization # -resource "datadog_monitor" "network_connections" { - name = "[${var.environment}] Cloud SQL Network Connections (hard limit: ${var.network_connections_hard_limit}) {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" - message = "${coalesce(var.network_connections_message, var.message)}" +resource "datadog_monitor" "memory_utilization" { + name = "[${var.environment}] Cloud SQL Memory Utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.memory_message, var.message)}" type = "metric alert" query = < ${var.network_connections_threshold_critical} + > ${var.memory_threshold_critical} EOF thresholds { - warning = "${var.network_connections_threshold_warning}" - critical = "${var.network_connections_threshold_critical}" + warning = "${var.memory_threshold_warning}" + critical = "${var.memory_threshold_critical}" } include_tags = true @@ -128,7 +123,51 @@ EOF locked = false evaluation_delay = "${var.delay}" new_host_delay = "${var.delay}" - silenced = "${var.network_connections_silenced}" + silenced = "${var.memory_silenced}" + + tags = [ + "team:gcp", + "provider:gcp", + "env:${var.environment}", + "resource:cloud-sql", + ] +} + +resource "datadog_monitor" "memory_utilization_forecast" { + name = "[${var.environment}] Cloud SQL Memory Utilization Forecast {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.memory_forecast_message, var.message)}" + + type = "query alert" + + query = < ${var.memory_forecast_threshold_critical} +EOF + + thresholds { + warning = "${var.memory_forecast_threshold_warning}" + critical = "${var.memory_forecast_threshold_critical}" + } + + include_tags = true + notify_no_data = true + require_full_window = false + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + silenced = "${var.memory_forecast_silenced}" tags = [ "team:gcp", From d76ebfc1c45347da562528a93d87731a8789e321 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Thu, 14 Jun 2018 16:58:22 +0200 Subject: [PATCH 05/46] MON-224 Instance Memory Utilization alerts, including forecasts. --- cloud/gcp/cloud-sql/instance/README.md | 42 ++++++++++++++++++- .../instance/monitors-cloud-sql-instance.tf | 5 ++- 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/cloud/gcp/cloud-sql/instance/README.md b/cloud/gcp/cloud-sql/instance/README.md index 900c15a..b9d5ce5 100644 --- a/cloud/gcp/cloud-sql/instance/README.md +++ b/cloud/gcp/cloud-sql/instance/README.md @@ -1,5 +1,37 @@ +GCP CloudSQL Instance Monitors +============================== -## Inputs +How to use this module +---------------------- + +``` +module "datadog-monitors-gcp-cloudsql" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/gcp/clouds-sql/instance?ref={revision}" + + project_id = "${var.gcp_project_id}" + environment = "${var.environment}" + message = "${module.datadog-message-alerting.alerting-message}" +} + +``` + +Purpose +------- +Creates DataDog monitors with the following checks : + +* CloudSQL Instance CPU Utilization +* CloudSQL Instance Disk Utilization +* CloudSQL Instance Memory Utilization +* CloudSQL Instance Memory Utilization Forecast + +Useful links +------------ + +* [GCP Metrics for CloudSQL](https://cloud.google.com/monitoring/api/metrics_gcp#gcp-cloudsql) +* [Datadog Useful monitors for GCP CloudSQL](https://www.datadoghq.com/blog/monitor-google-cloud-sql/) + +Inputs +------ | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| @@ -18,11 +50,18 @@ | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | memory_forecast_history | History for the Memory Utilization Forecast monitor | string | `12h` | no | +| memory_forecast_history | History for the Memory Utilization Forecast monitor | string | `12h` | no | +| memory_forecast_interval | Interval for the Memory Utilization Forecast monitor | string | `30m` | no | | memory_forecast_interval | Interval for the Memory Utilization Forecast monitor | string | `30m` | no | | memory_forecast_message | Custom message for the Memory Utilization Forecast monitor | string | `` | no | +| memory_forecast_message | Custom message for the Memory Utilization Forecast monitor | string | `` | no | +| memory_forecast_silenced | Groups to mute for GCP Cloud SQL Memory Utilization Forecast monitor | map | `` | no | | memory_forecast_silenced | Groups to mute for GCP Cloud SQL Memory Utilization Forecast monitor | map | `` | no | | memory_forecast_threshold_critical | Memory Utilization Forecast in fraction (critical threshold) | string | `0.9` | no | +| memory_forecast_threshold_critical | Memory Utilization Forecast in fraction (critical threshold) | string | `0.9` | no | | memory_forecast_threshold_warning | Memory Utilization Forecast in fraction (warning threshold) | string | `0.8` | no | +| memory_forecast_threshold_warning | Memory Utilization Forecast in fraction (warning threshold) | string | `0.8` | no | +| memory_forecast_timeframe | Timeframe for the Memory Utilization Forecast monitor | string | `next_3d` | no | | memory_forecast_timeframe | Timeframe for the Memory Utilization Forecast monitor | string | `next_3d` | no | | memory_message | Custom message for the Memory Utilization monitor | string | `` | no | | memory_silenced | Groups to mute for GCP Cloud SQL Memory Utilization monitor | map | `` | no | @@ -31,4 +70,3 @@ | memory_timeframe | Timeframe for the Memory Utilization monitor | string | `last_5m` | no | | message | Message sent when a monitor is triggered | string | - | yes | | project_id | ID of the GCP Project | string | - | yes | - diff --git a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf index 69df62d..65deccc 100644 --- a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf +++ b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf @@ -6,7 +6,7 @@ data "template_file" "filter" { vars { filter = "${var.filter_tags_use_defaults == "true" ? - format("project_id:%s", var.project_id) : + format("project_id:%s", var.project_id) : "${var.filter_tags_custom}"}" } } @@ -133,6 +133,9 @@ EOF ] } +# +# Memory Utilization Forecast +# resource "datadog_monitor" "memory_utilization_forecast" { name = "[${var.environment}] Cloud SQL Memory Utilization Forecast {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.memory_forecast_message, var.message)}" From 9557437195ec4efa7b503c9e27e9e03113bba565 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Thu, 14 Jun 2018 18:17:04 +0200 Subject: [PATCH 06/46] MON-224 CloudSQL MySQL Network Connections monitor --- cloud/gcp/cloud-sql/mysql/README.md | 49 +++++++++++++ cloud/gcp/cloud-sql/mysql/inputs.tf | 73 +++++++++++++++++++ .../mysql/monitors-cloudsql-mysql.tf | 54 ++++++++++++++ 3 files changed, 176 insertions(+) create mode 100644 cloud/gcp/cloud-sql/mysql/README.md create mode 100644 cloud/gcp/cloud-sql/mysql/inputs.tf create mode 100644 cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf diff --git a/cloud/gcp/cloud-sql/mysql/README.md b/cloud/gcp/cloud-sql/mysql/README.md new file mode 100644 index 0000000..1d0cecd --- /dev/null +++ b/cloud/gcp/cloud-sql/mysql/README.md @@ -0,0 +1,49 @@ +GCP CloudSQL MySQL Monitors +============================== + +How to use this module +---------------------- + +``` +module "datadog-monitors-gcp-cloudsql-mysql" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/gcp/clouds-sql/mysql?ref={revision}" + + project_id = "${var.gcp_project_id}" + environment = "${var.environment}" + message = "${module.datadog-message-alerting.alerting-message}" +} + +``` + +Purpose +------- +Creates DataDog monitors with the following checks : + +* CloudSQL MySQL Network Connections +* CloudSQL MySQL Replication Lag +* CloudSQL MySQL Failover Lag + +Useful links +------------ + +* [GCP Metrics for CloudSQL](https://cloud.google.com/monitoring/api/metrics_gcp#gcp-cloudsql) +* [Datadog Useful monitors for GCP CloudSQL](https://www.datadoghq.com/blog/monitor-google-cloud-sql/) +* [Max connections depends on the type of the instance](https://cloud.google.com/sql/docs/quotas#fixed-limits) + +Inputs +------ + +| Name | Description | Type | Default | Required | +|------|-------------|:----:|:-----:|:-----:| +| delay | Delay in seconds for the metric evaluation | string | `900` | no | +| environment | Architecture environment | string | - | yes | +| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | +| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| message | Message sent when a monitor is triggered | string | - | yes | +| network_connections_hard_limit | Max number of connections for the CloudSQL Instance. Default value is the max value on https://cloud.google.com/sql/docs/quotas#fixed-limits for MySQL | string | `4000` | no | +| network_connections_message | Custom message for the Network Connections monitor | string | `` | no | +| network_connections_silenced | Groups to mute for GCP Cloud SQL Network Connections monitor | map | `` | no | +| network_connections_threshold_critical | Number of network connections (critical threshold) | string | `3600` | no | +| network_connections_threshold_warning | Number of network connections (warning threshold) | string | `3200` | no | +| network_connections_timeframe | Timeframe for the Network Connections monitor | string | `last_5m` | no | +| project_id | ID of the GCP Project | string | - | yes | diff --git a/cloud/gcp/cloud-sql/mysql/inputs.tf b/cloud/gcp/cloud-sql/mysql/inputs.tf new file mode 100644 index 0000000..b1fbc95 --- /dev/null +++ b/cloud/gcp/cloud-sql/mysql/inputs.tf @@ -0,0 +1,73 @@ +# +# Datadog global variables +# +variable "environment" { + description = "Architecture environment" + type = "string" +} + +variable "filter_tags_use_defaults" { + description = "Use default filter tags convention" + default = "true" +} + +variable "filter_tags_custom" { + description = "Tags used for custom filtering when filter_tags_use_defaults is false" + default = "*" +} + +variable "message" { + description = "Message sent when a monitor is triggered" +} + +variable "delay" { + description = "Delay in seconds for the metric evaluation" + default = 900 +} + +# +# Filter variables +# +variable "project_id" { + type = "string" + description = "ID of the GCP Project" +} + +# +# Network Connections +# +variable "network_connections_message" { + description = "Custom message for the Network Connections monitor" + type = "string" + default = "" +} + +variable "network_connections_timeframe" { + description = "Timeframe for the Network Connections monitor" + type = "string" + default = "last_5m" +} + +variable "network_connections_hard_limit" { + description = "Max number of connections for the CloudSQL Instance. Default value is the max value on https://cloud.google.com/sql/docs/quotas#fixed-limits for MySQL" + type = "string" + default = 4000 +} + +variable "network_connections_threshold_warning" { + description = "Number of network connections (warning threshold)" + type = "string" + default = 3200 +} + +variable "network_connections_threshold_critical" { + description = "Number of network connections (critical threshold)" + type = "string" + default = 3600 +} + +variable "network_connections_silenced" { + description = "Groups to mute for GCP Cloud SQL Network Connections monitor" + type = "map" + default = {} +} diff --git a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf new file mode 100644 index 0000000..847de67 --- /dev/null +++ b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf @@ -0,0 +1,54 @@ +# +# FILTERS +# +data "template_file" "filter" { + template = "$${filter}" + + vars { + filter = "${var.filter_tags_use_defaults == "true" ? + format("project_id:%s",var.project_id) : + "${var.filter_tags_custom}"}" + } +} + +# +# MySQL Network Connections +# +resource "datadog_monitor" "network_connections" { + name = "[${var.environment}] Cloud SQL MySQL Network Connections (hard limit: ${var.network_connections_hard_limit}) {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.network_connections_message, var.message)}" + + type = "metric alert" + + query = < ${var.network_connections_threshold_critical} +EOF + + thresholds { + warning = "${var.network_connections_threshold_warning}" + critical = "${var.network_connections_threshold_critical}" + } + + include_tags = true + notify_no_data = true + require_full_window = false + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + silenced = "${var.network_connections_silenced}" + + tags = [ + "team:gcp", + "provider:gcp", + "env:${var.environment}", + "resource:cloud-sql", + "engine:mysql", + ] +} From 79f8a5d486edaf8e5344978375b16326e6f6fa27 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Fri, 15 Jun 2018 10:17:19 +0200 Subject: [PATCH 07/46] MON-224 CloudSQL Instance Failover Unavailable monitor --- cloud/gcp/cloud-sql/instance/README.md | 12 +++--- cloud/gcp/cloud-sql/instance/inputs.tf | 28 +++++++++++++ .../instance/monitors-cloud-sql-instance.tf | 40 +++++++++++++++++++ 3 files changed, 73 insertions(+), 7 deletions(-) diff --git a/cloud/gcp/cloud-sql/instance/README.md b/cloud/gcp/cloud-sql/instance/README.md index b9d5ce5..ab0b700 100644 --- a/cloud/gcp/cloud-sql/instance/README.md +++ b/cloud/gcp/cloud-sql/instance/README.md @@ -23,6 +23,7 @@ Creates DataDog monitors with the following checks : * CloudSQL Instance Disk Utilization * CloudSQL Instance Memory Utilization * CloudSQL Instance Memory Utilization Forecast +* CloudSQL Instance Failover Unavailable Useful links ------------ @@ -47,21 +48,18 @@ Inputs | disk_threshold_warning | Disk Utilization in fraction (warning threshold) | string | `0.8` | no | | disk_timeframe | Timeframe for the Disk Utilization monitor | string | `last_5m` | no | | environment | Architecture environment | string | - | yes | +| failover_unavailable_message | Custom message for the Failover Unavailable monitor | string | `` | no | +| failover_unavailable_silenced | Groups to mute for GCP Cloud SQL Failover Unavailable monitor | map | `` | no | +| failover_unavailable_threshold_critical | Failover Unavailable critical threshold | string | `0` | no | +| failover_unavailable_timeframe | Timeframe for the Failover Unavailable monitor | string | `last_5m` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | memory_forecast_history | History for the Memory Utilization Forecast monitor | string | `12h` | no | -| memory_forecast_history | History for the Memory Utilization Forecast monitor | string | `12h` | no | -| memory_forecast_interval | Interval for the Memory Utilization Forecast monitor | string | `30m` | no | | memory_forecast_interval | Interval for the Memory Utilization Forecast monitor | string | `30m` | no | | memory_forecast_message | Custom message for the Memory Utilization Forecast monitor | string | `` | no | -| memory_forecast_message | Custom message for the Memory Utilization Forecast monitor | string | `` | no | -| memory_forecast_silenced | Groups to mute for GCP Cloud SQL Memory Utilization Forecast monitor | map | `` | no | | memory_forecast_silenced | Groups to mute for GCP Cloud SQL Memory Utilization Forecast monitor | map | `` | no | | memory_forecast_threshold_critical | Memory Utilization Forecast in fraction (critical threshold) | string | `0.9` | no | -| memory_forecast_threshold_critical | Memory Utilization Forecast in fraction (critical threshold) | string | `0.9` | no | | memory_forecast_threshold_warning | Memory Utilization Forecast in fraction (warning threshold) | string | `0.8` | no | -| memory_forecast_threshold_warning | Memory Utilization Forecast in fraction (warning threshold) | string | `0.8` | no | -| memory_forecast_timeframe | Timeframe for the Memory Utilization Forecast monitor | string | `next_3d` | no | | memory_forecast_timeframe | Timeframe for the Memory Utilization Forecast monitor | string | `next_3d` | no | | memory_message | Custom message for the Memory Utilization monitor | string | `` | no | | memory_silenced | Groups to mute for GCP Cloud SQL Memory Utilization monitor | map | `` | no | diff --git a/cloud/gcp/cloud-sql/instance/inputs.tf b/cloud/gcp/cloud-sql/instance/inputs.tf index 4109158..e060c47 100644 --- a/cloud/gcp/cloud-sql/instance/inputs.tf +++ b/cloud/gcp/cloud-sql/instance/inputs.tf @@ -166,3 +166,31 @@ variable "memory_forecast_silenced" { type = "map" default = {} } + +# +# Failover Unavailable +# + +variable "failover_unavailable_message" { + description = "Custom message for the Failover Unavailable monitor" + type = "string" + default = "" +} + +variable "failover_unavailable_timeframe" { + description = "Timeframe for the Failover Unavailable monitor" + type = "string" + default = "last_5m" +} + +variable "failover_unavailable_threshold_critical" { + description = "Failover Unavailable critical threshold" + type = "string" + default = 0 +} + +variable "failover_unavailable_silenced" { + description = "Groups to mute for GCP Cloud SQL Failover Unavailable monitor" + type = "map" + default = {} +} diff --git a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf index 65deccc..77aab22 100644 --- a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf +++ b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf @@ -179,3 +179,43 @@ EOF "resource:cloud-sql", ] } + +# +# Failover Unavailable +# +resource "datadog_monitor" "failover_unavailable" { + name = "[${var.environment}] Cloud SQL MySQL Failover Unavailable {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.failover_unavailable_message, var.message)}" + + type = "metric alert" + + query = < Date: Fri, 15 Jun 2018 11:25:10 +0200 Subject: [PATCH 08/46] MON-224 CloudSQL MySQL Replication Lag monitor --- cloud/gcp/cloud-sql/mysql/README.md | 6 +++ cloud/gcp/cloud-sql/mysql/inputs.tf | 33 +++++++++++++++ .../mysql/monitors-cloudsql-mysql.tf | 42 +++++++++++++++++++ 3 files changed, 81 insertions(+) diff --git a/cloud/gcp/cloud-sql/mysql/README.md b/cloud/gcp/cloud-sql/mysql/README.md index 1d0cecd..d316646 100644 --- a/cloud/gcp/cloud-sql/mysql/README.md +++ b/cloud/gcp/cloud-sql/mysql/README.md @@ -29,6 +29,7 @@ Useful links * [GCP Metrics for CloudSQL](https://cloud.google.com/monitoring/api/metrics_gcp#gcp-cloudsql) * [Datadog Useful monitors for GCP CloudSQL](https://www.datadoghq.com/blog/monitor-google-cloud-sql/) * [Max connections depends on the type of the instance](https://cloud.google.com/sql/docs/quotas#fixed-limits) +* [Monitoring Replication Lag](https://cloud.google.com/sql/docs/mysql/high-availability#replication-lag-monitor) Inputs ------ @@ -47,3 +48,8 @@ Inputs | network_connections_threshold_warning | Number of network connections (warning threshold) | string | `3200` | no | | network_connections_timeframe | Timeframe for the Network Connections monitor | string | `last_5m` | no | | project_id | ID of the GCP Project | string | - | yes | +| replication_lag_message | Custom message for the Replication Lag monitor | string | `` | no | +| replication_lag_silenced | Groups to mute for GCP Cloud SQL Replication Lag monitor | map | `` | no | +| replication_lag_threshold_critical | Seconds behind the master (critical threshold) | string | `2700` | no | +| replication_lag_threshold_warning | Seconds behind the master (warning threshold) | string | `2000` | no | +| replication_lag_timeframe | Timeframe for the Replication Lag monitor | string | `last_10m` | no | diff --git a/cloud/gcp/cloud-sql/mysql/inputs.tf b/cloud/gcp/cloud-sql/mysql/inputs.tf index b1fbc95..65f190e 100644 --- a/cloud/gcp/cloud-sql/mysql/inputs.tf +++ b/cloud/gcp/cloud-sql/mysql/inputs.tf @@ -71,3 +71,36 @@ variable "network_connections_silenced" { type = "map" default = {} } + +# +# Replication Lag +# +variable "replication_lag_message" { + description = "Custom message for the Replication Lag monitor" + type = "string" + default = "" +} + +variable "replication_lag_timeframe" { + description = "Timeframe for the Replication Lag monitor" + type = "string" + default = "last_10m" +} + +variable "replication_lag_threshold_warning" { + description = "Seconds behind the master (warning threshold)" + type = "string" + default = 2000 +} + +variable "replication_lag_threshold_critical" { + description = "Seconds behind the master (critical threshold)" + type = "string" + default = 2700 +} + +variable "replication_lag_silenced" { + description = "Groups to mute for GCP Cloud SQL Replication Lag monitor" + type = "map" + default = {} +} diff --git a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf index 847de67..8c4a580 100644 --- a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf +++ b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf @@ -52,3 +52,45 @@ EOF "engine:mysql", ] } + +# +# Replication Lag +# +resource "datadog_monitor" "datadog_monitor_cloud_sql_mysql_replication_lag" { + name = "[${var.environment}] Cloud SQL MySQL Replication Lag too high" + message = "${coalesce(var.replication_lag_message, var.message)}" + + type = "metric alert" + + query = < ${var.replication_lag_threshold_critical} +EOF + + thresholds { + critical = "${var.replication_lag_threshold_critical}" + warning = "${var.replication_lag_threshold_warning}" + } + + include_tags = true + notify_no_data = true + require_full_window = false + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + silenced = "${var.questions_changing_silenced}" + + tags = [ + "team:gcp", + "provider:gcp", + "env:${var.environment}", + "resource:cloud-sql", + "engine:mysql", + ] +} From 989fe8794e82fc1188e4800318789d429da7a8b9 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Fri, 15 Jun 2018 11:29:43 +0200 Subject: [PATCH 09/46] MON-224 Fix name of Failover alert which is valid for all and not only MySQL --- cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf index 77aab22..5f08268 100644 --- a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf +++ b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf @@ -184,7 +184,7 @@ EOF # Failover Unavailable # resource "datadog_monitor" "failover_unavailable" { - name = "[${var.environment}] Cloud SQL MySQL Failover Unavailable {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + name = "[${var.environment}] Cloud SQL Failover Unavailable {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.failover_unavailable_message, var.message)}" type = "metric alert" From d0e2f9106e4aa27006a81f61fa4c346d249aaa16 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Fri, 15 Jun 2018 12:35:55 +0200 Subject: [PATCH 10/46] MON-224 Query alert changed to Metric alert --- cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf index 5f08268..5ffa534 100644 --- a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf +++ b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf @@ -140,7 +140,7 @@ resource "datadog_monitor" "memory_utilization_forecast" { name = "[${var.environment}] Cloud SQL Memory Utilization Forecast {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.memory_forecast_message, var.message)}" - type = "query alert" + type = "metric alert" query = < Date: Fri, 15 Jun 2018 13:35:49 +0200 Subject: [PATCH 11/46] MON-224 Fixed name and variables in Replication Lag monitor --- cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf index 8c4a580..abe928a 100644 --- a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf +++ b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf @@ -56,7 +56,7 @@ EOF # # Replication Lag # -resource "datadog_monitor" "datadog_monitor_cloud_sql_mysql_replication_lag" { +resource "datadog_monitor" "replication_lag" { name = "[${var.environment}] Cloud SQL MySQL Replication Lag too high" message = "${coalesce(var.replication_lag_message, var.message)}" @@ -84,7 +84,7 @@ EOF locked = false evaluation_delay = "${var.delay}" new_host_delay = "${var.delay}" - silenced = "${var.questions_changing_silenced}" + silenced = "${var.replication_lag_silenced}" tags = [ "team:gcp", From 9233464195b6b58ca270ab3bdd286dd8f9826a8c Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Fri, 15 Jun 2018 16:53:23 +0200 Subject: [PATCH 12/46] MON-224 CloudSQL MySQL Monitors for Queries and Questions Anomalies --- cloud/gcp/cloud-sql/mysql/README.md | 23 +++ cloud/gcp/cloud-sql/mysql/inputs.tf | 138 ++++++++++++++++++ .../mysql/monitors-cloudsql-mysql.tf | 104 +++++++++++++ 3 files changed, 265 insertions(+) diff --git a/cloud/gcp/cloud-sql/mysql/README.md b/cloud/gcp/cloud-sql/mysql/README.md index d316646..12dfdd3 100644 --- a/cloud/gcp/cloud-sql/mysql/README.md +++ b/cloud/gcp/cloud-sql/mysql/README.md @@ -30,6 +30,7 @@ Useful links * [Datadog Useful monitors for GCP CloudSQL](https://www.datadoghq.com/blog/monitor-google-cloud-sql/) * [Max connections depends on the type of the instance](https://cloud.google.com/sql/docs/quotas#fixed-limits) * [Monitoring Replication Lag](https://cloud.google.com/sql/docs/mysql/high-availability#replication-lag-monitor) +* [Monitoring MySQL Performance Metrics](https://www.datadoghq.com/blog/monitoring-mysql-performance-metrics) Inputs ------ @@ -48,6 +49,28 @@ Inputs | network_connections_threshold_warning | Number of network connections (warning threshold) | string | `3200` | no | | network_connections_timeframe | Timeframe for the Network Connections monitor | string | `last_5m` | no | | project_id | ID of the GCP Project | string | - | yes | +| queries_changing_anomaly_detection_algorithm | Anomaly Detection Algorithm used | string | `robust` | no | +| queries_changing_database_ids | Queries Changing Abnormally | list | `` | no | +| queries_changing_deviations | Deviations to detect the anomaly | string | `4` | no | +| queries_changing_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no | +| queries_changing_message | Custom message for the Queries Changing monitor | string | `` | no | +| queries_changing_region | | string | `` | no | +| queries_changing_seasonality | Seasonality of the algorithm | string | `weekly` | no | +| queries_changing_silenced | Groups to mute for GCP Cloud SQL Queries Changing monitor | map | `` | no | +| queries_changing_threshold_critical | Queries Changing critical threshold | string | `1` | no | +| queries_changing_threshold_warning | Queries Changing warning threshold | string | `0.5` | no | +| queries_changing_timeframe | Timeframe for the Queries Changing mon monitor | string | `last_10m` | no | +| questions_changing_anomaly_detection_algorithm | Anomaly Detection Algorithm used | string | `robust` | no | +| questions_changing_database_ids | | list | `` | no | +| questions_changing_deviations | Deviations to detect the anomaly | string | `4` | no | +| questions_changing_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no | +| questions_changing_message | Custom message for the Questions Changing monitor | string | `` | no | +| questions_changing_region | | string | `` | no | +| questions_changing_seasonality | Seasonality of the algorithm | string | `weekly` | no | +| questions_changing_silenced | Groups to mute for GCP Cloud SQL Network Connections monitor | map | `` | no | +| questions_changing_threshold_critical | Questions Changing critical threshold | string | `1` | no | +| questions_changing_threshold_warning | Questions Changing warning threshold | string | `0.5` | no | +| questions_changing_timeframe | Timeframe for the Questions Changing mon monitor | string | `last_10m` | no | | replication_lag_message | Custom message for the Replication Lag monitor | string | `` | no | | replication_lag_silenced | Groups to mute for GCP Cloud SQL Replication Lag monitor | map | `` | no | | replication_lag_threshold_critical | Seconds behind the master (critical threshold) | string | `2700` | no | diff --git a/cloud/gcp/cloud-sql/mysql/inputs.tf b/cloud/gcp/cloud-sql/mysql/inputs.tf index 65f190e..e440edc 100644 --- a/cloud/gcp/cloud-sql/mysql/inputs.tf +++ b/cloud/gcp/cloud-sql/mysql/inputs.tf @@ -104,3 +104,141 @@ variable "replication_lag_silenced" { type = "map" default = {} } + +# +# Queries Changing Abnormally +# +variable "queries_changing_database_ids" { + description = "" + type = "list" + default = [] +} + +variable "queries_changing_region" { + description = "" + type = "string" + default = "" +} + +variable "queries_changing_message" { + description = "Custom message for the Queries Changing monitor" + type = "string" + default = "" +} + +variable "queries_changing_timeframe" { + description = "Timeframe for the Queries Changing mon monitor" + type = "string" + default = "last_10m" +} + +variable "queries_changing_anomaly_detection_algorithm" { + description = "Anomaly Detection Algorithm used" + type = "string" + default = "robust" +} + +variable "queries_changing_deviations" { + description = "Deviations to detect the anomaly" + type = "string" + default = 4 +} + +variable "queries_changing_direction" { + description = "Direction of the anomaly. It can be both, below or above." + type = "string" + default = "both" +} + +variable "queries_changing_seasonality" { + description = "Seasonality of the algorithm" + type = "string" + default = "weekly" +} + +variable "queries_changing_threshold_warning" { + description = "Queries Changing warning threshold" + type = "string" + default = 0.5 +} + +variable "queries_changing_threshold_critical" { + description = "Queries Changing critical threshold" + type = "string" + default = 1 +} + +variable "queries_changing_silenced" { + description = "Groups to mute for GCP Cloud SQL Queries Changing monitor" + type = "map" + default = {} +} + +# +# Questions Changing +# +variable "questions_changing_message" { + description = "Custom message for the Questions Changing monitor" + type = "string" + default = "" +} + +variable "questions_changing_timeframe" { + description = "Timeframe for the Questions Changing mon monitor" + type = "string" + default = "last_10m" +} + +variable "questions_changing_database_ids" { + description = "" + type = "list" + default = [] +} + +variable "questions_changing_region" { + description = "" + type = "string" + default = "" +} + +variable "questions_changing_anomaly_detection_algorithm" { + description = "Anomaly Detection Algorithm used" + type = "string" + default = "robust" +} + +variable "questions_changing_deviations" { + description = "Deviations to detect the anomaly" + type = "string" + default = 4 +} + +variable "questions_changing_direction" { + description = "Direction of the anomaly. It can be both, below or above." + type = "string" + default = "both" +} + +variable "questions_changing_seasonality" { + description = "Seasonality of the algorithm" + type = "string" + default = "weekly" +} + +variable "questions_changing_threshold_warning" { + description = "Questions Changing warning threshold" + type = "string" + default = 0.5 +} + +variable "questions_changing_threshold_critical" { + description = "Questions Changing critical threshold" + type = "string" + default = 1 +} + +variable "questions_changing_silenced" { + description = "Groups to mute for GCP Cloud SQL Network Connections monitor" + type = "map" + default = {} +} diff --git a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf index abe928a..e24b5db 100644 --- a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf +++ b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf @@ -94,3 +94,107 @@ EOF "engine:mysql", ] } + +# +# Queries Anomaly +# +resource "datadog_monitor" "queries_changing_anomaly" { + count = "${length(var.queries_changing_database_ids)}" + + name = "[${var.environment}] Cloud SQL MySQL Queries Count changed abnormally on ${var.project_id}:${var.queries_changing_region}:${var.queries_changing_database_ids[count.index]} {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.queries_changing_message, var.message)}" + + type = "metric alert" + + query = < ${var.queries_changing_threshold_critical} +EOF + + thresholds { + warning = "${var.queries_changing_threshold_warning}" + critical = "${var.queries_changing_threshold_critical}" + } + + include_tags = true + notify_no_data = true + require_full_window = false + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + silenced = "${var.queries_changing_silenced}" + + tags = [ + "team:gcp", + "provider:gcp", + "env:${var.environment}", + "resource:cloud-sql", + "engine:mysql", + "database_id:${var.project_id}:${var.queries_changing_region}:${var.queries_changing_database_ids[count.index]}}", + ] +} + +# +# Questions Anomaly +# +resource "datadog_monitor" "questions_changing_anomaly" { + count = "${length(var.questions_changing_database_ids)}" + + name = "[${var.environment}] Cloud SQL MySQL Questions Count changed abnormally on ${var.project_id}:${var.questions_changing_region}:${var.questions_changing_database_ids[count.index]} {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.questions_changing_message, var.message)}" + + type = "metric alert" + + query = < ${var.questions_changing_threshold_critical} +EOF + + thresholds { + warning = "${var.questions_changing_threshold_warning}" + critical = "${var.questions_changing_threshold_critical}" + } + + include_tags = true + notify_no_data = true + require_full_window = false + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + silenced = "${var.questions_changing_silenced}" + + tags = [ + "team:gcp", + "provider:gcp", + "env:${var.environment}", + "resource:cloud-sql", + "engine:mysql", + "database_id:${var.project_id}:${var.questions_changing_region}:${var.questions_changing_database_ids[count.index]}", + ] +} From de71446a1b5a89b678089ca9d67b4cc9d8de3e8c Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Mon, 2 Jul 2018 10:21:14 +0200 Subject: [PATCH 13/46] MON-224 Fixed filter for Queries and Question Changing monitors. Removed non used variables and added description of the ones without any. --- cloud/gcp/cloud-sql/mysql/README.md | 9 ++++----- cloud/gcp/cloud-sql/mysql/inputs.tf | 18 +++--------------- .../cloud-sql/mysql/monitors-cloudsql-mysql.tf | 16 ++++++---------- 3 files changed, 13 insertions(+), 30 deletions(-) diff --git a/cloud/gcp/cloud-sql/mysql/README.md b/cloud/gcp/cloud-sql/mysql/README.md index 12dfdd3..f798ee7 100644 --- a/cloud/gcp/cloud-sql/mysql/README.md +++ b/cloud/gcp/cloud-sql/mysql/README.md @@ -50,29 +50,28 @@ Inputs | network_connections_timeframe | Timeframe for the Network Connections monitor | string | `last_5m` | no | | project_id | ID of the GCP Project | string | - | yes | | queries_changing_anomaly_detection_algorithm | Anomaly Detection Algorithm used | string | `robust` | no | -| queries_changing_database_ids | Queries Changing Abnormally | list | `` | no | +| queries_changing_database_ids | List of database ids for the Queries Changing monitor | list | `` | no | | queries_changing_deviations | Deviations to detect the anomaly | string | `4` | no | | queries_changing_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no | | queries_changing_message | Custom message for the Queries Changing monitor | string | `` | no | -| queries_changing_region | | string | `` | no | | queries_changing_seasonality | Seasonality of the algorithm | string | `weekly` | no | | queries_changing_silenced | Groups to mute for GCP Cloud SQL Queries Changing monitor | map | `` | no | | queries_changing_threshold_critical | Queries Changing critical threshold | string | `1` | no | | queries_changing_threshold_warning | Queries Changing warning threshold | string | `0.5` | no | | queries_changing_timeframe | Timeframe for the Queries Changing mon monitor | string | `last_10m` | no | | questions_changing_anomaly_detection_algorithm | Anomaly Detection Algorithm used | string | `robust` | no | -| questions_changing_database_ids | | list | `` | no | +| questions_changing_database_ids | List of database ids for the Questions Changing monitor | list | `` | no | | questions_changing_deviations | Deviations to detect the anomaly | string | `4` | no | | questions_changing_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no | | questions_changing_message | Custom message for the Questions Changing monitor | string | `` | no | -| questions_changing_region | | string | `` | no | | questions_changing_seasonality | Seasonality of the algorithm | string | `weekly` | no | | questions_changing_silenced | Groups to mute for GCP Cloud SQL Network Connections monitor | map | `` | no | | questions_changing_threshold_critical | Questions Changing critical threshold | string | `1` | no | | questions_changing_threshold_warning | Questions Changing warning threshold | string | `0.5` | no | -| questions_changing_timeframe | Timeframe for the Questions Changing mon monitor | string | `last_10m` | no | +| questions_changing_timeframe | Timeframe for the Questions Changing monitor | string | `last_10m` | no | | replication_lag_message | Custom message for the Replication Lag monitor | string | `` | no | | replication_lag_silenced | Groups to mute for GCP Cloud SQL Replication Lag monitor | map | `` | no | | replication_lag_threshold_critical | Seconds behind the master (critical threshold) | string | `2700` | no | | replication_lag_threshold_warning | Seconds behind the master (warning threshold) | string | `2000` | no | | replication_lag_timeframe | Timeframe for the Replication Lag monitor | string | `last_10m` | no | + diff --git a/cloud/gcp/cloud-sql/mysql/inputs.tf b/cloud/gcp/cloud-sql/mysql/inputs.tf index e440edc..833d159 100644 --- a/cloud/gcp/cloud-sql/mysql/inputs.tf +++ b/cloud/gcp/cloud-sql/mysql/inputs.tf @@ -109,17 +109,11 @@ variable "replication_lag_silenced" { # Queries Changing Abnormally # variable "queries_changing_database_ids" { - description = "" + description = "List of database ids for the Queries Changing monitor" type = "list" default = [] } -variable "queries_changing_region" { - description = "" - type = "string" - default = "" -} - variable "queries_changing_message" { description = "Custom message for the Queries Changing monitor" type = "string" @@ -184,23 +178,17 @@ variable "questions_changing_message" { } variable "questions_changing_timeframe" { - description = "Timeframe for the Questions Changing mon monitor" + description = "Timeframe for the Questions Changing monitor" type = "string" default = "last_10m" } variable "questions_changing_database_ids" { - description = "" + description = "List of database ids for the Questions Changing monitor" type = "list" default = [] } -variable "questions_changing_region" { - description = "" - type = "string" - default = "" -} - variable "questions_changing_anomaly_detection_algorithm" { description = "Anomaly Detection Algorithm used" type = "string" diff --git a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf index e24b5db..a81b8af 100644 --- a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf +++ b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf @@ -32,7 +32,6 @@ EOF critical = "${var.network_connections_threshold_critical}" } - include_tags = true notify_no_data = true require_full_window = false renotify_interval = 0 @@ -74,7 +73,6 @@ EOF warning = "${var.replication_lag_threshold_warning}" } - include_tags = true notify_no_data = true require_full_window = false renotify_interval = 0 @@ -101,7 +99,7 @@ EOF resource "datadog_monitor" "queries_changing_anomaly" { count = "${length(var.queries_changing_database_ids)}" - name = "[${var.environment}] Cloud SQL MySQL Queries Count changed abnormally on ${var.project_id}:${var.queries_changing_region}:${var.queries_changing_database_ids[count.index]} {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + name = "[${var.environment}] Cloud SQL MySQL Queries Count changed abnormally on ${var.project_id}:${var.queries_changing_database_ids[count.index]} {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.queries_changing_message, var.message)}" type = "metric alert" @@ -110,7 +108,7 @@ resource "datadog_monitor" "queries_changing_anomaly" { avg(${var.queries_changing_timeframe}): anomalies( default( - avg:gcp.cloudsql.database.mysql.queries{project_id:${var.project_id},database_id:${var.project_id}:${var.queries_changing_region}:${var.queries_changing_database_ids[count.index]}}, + avg:gcp.cloudsql.database.mysql.queries{project_id:${var.project_id},database_id:${var.project_id}:${var.queries_changing_database_ids[count.index]}}, 0), '${var.queries_changing_anomaly_detection_algorithm}', ${var.queries_changing_deviations}, @@ -125,7 +123,6 @@ EOF critical = "${var.queries_changing_threshold_critical}" } - include_tags = true notify_no_data = true require_full_window = false renotify_interval = 0 @@ -143,7 +140,7 @@ EOF "env:${var.environment}", "resource:cloud-sql", "engine:mysql", - "database_id:${var.project_id}:${var.queries_changing_region}:${var.queries_changing_database_ids[count.index]}}", + "database_id:${var.project_id}:${var.queries_changing_database_ids[count.index]}}", ] } @@ -153,7 +150,7 @@ EOF resource "datadog_monitor" "questions_changing_anomaly" { count = "${length(var.questions_changing_database_ids)}" - name = "[${var.environment}] Cloud SQL MySQL Questions Count changed abnormally on ${var.project_id}:${var.questions_changing_region}:${var.questions_changing_database_ids[count.index]} {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + name = "[${var.environment}] Cloud SQL MySQL Questions Count changed abnormally on ${var.project_id}:${var.questions_changing_database_ids[count.index]} {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.questions_changing_message, var.message)}" type = "metric alert" @@ -162,7 +159,7 @@ resource "datadog_monitor" "questions_changing_anomaly" { avg(${var.questions_changing_timeframe}): anomalies( default( - avg:gcp.cloudsql.database.mysql.questions{project_id:${var.project_id},database_id:${var.project_id}:${var.questions_changing_region}:${var.questions_changing_database_ids[count.index]}}, + avg:gcp.cloudsql.database.mysql.questions{project_id:${var.project_id},database_id:${var.project_id}:${var.questions_changing_database_ids[count.index]}}, 0), '${var.questions_changing_anomaly_detection_algorithm}', ${var.questions_changing_deviations}, @@ -177,7 +174,6 @@ EOF critical = "${var.questions_changing_threshold_critical}" } - include_tags = true notify_no_data = true require_full_window = false renotify_interval = 0 @@ -195,6 +191,6 @@ EOF "env:${var.environment}", "resource:cloud-sql", "engine:mysql", - "database_id:${var.project_id}:${var.questions_changing_region}:${var.questions_changing_database_ids[count.index]}", + "database_id:${var.project_id}:${var.questions_changing_database_ids[count.index]}", ] } From e5c84c1a33f40964108b241792f87136fe105384 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Wed, 4 Jul 2018 12:31:38 +0200 Subject: [PATCH 14/46] MON-224 adapt MySQL timeframe and thresholds --- cloud/gcp/cloud-sql/mysql/README.md | 6 +++--- cloud/gcp/cloud-sql/mysql/inputs.tf | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cloud/gcp/cloud-sql/mysql/README.md b/cloud/gcp/cloud-sql/mysql/README.md index f798ee7..3e54165 100644 --- a/cloud/gcp/cloud-sql/mysql/README.md +++ b/cloud/gcp/cloud-sql/mysql/README.md @@ -47,7 +47,7 @@ Inputs | network_connections_silenced | Groups to mute for GCP Cloud SQL Network Connections monitor | map | `` | no | | network_connections_threshold_critical | Number of network connections (critical threshold) | string | `3600` | no | | network_connections_threshold_warning | Number of network connections (warning threshold) | string | `3200` | no | -| network_connections_timeframe | Timeframe for the Network Connections monitor | string | `last_5m` | no | +| network_connections_timeframe | Timeframe for the Network Connections monitor | string | `last_15m` | no | | project_id | ID of the GCP Project | string | - | yes | | queries_changing_anomaly_detection_algorithm | Anomaly Detection Algorithm used | string | `robust` | no | | queries_changing_database_ids | List of database ids for the Queries Changing monitor | list | `` | no | @@ -71,7 +71,7 @@ Inputs | questions_changing_timeframe | Timeframe for the Questions Changing monitor | string | `last_10m` | no | | replication_lag_message | Custom message for the Replication Lag monitor | string | `` | no | | replication_lag_silenced | Groups to mute for GCP Cloud SQL Replication Lag monitor | map | `` | no | -| replication_lag_threshold_critical | Seconds behind the master (critical threshold) | string | `2700` | no | -| replication_lag_threshold_warning | Seconds behind the master (warning threshold) | string | `2000` | no | +| replication_lag_threshold_critical | Seconds behind the master (critical threshold) | string | `900` | no | +| replication_lag_threshold_warning | Seconds behind the master (warning threshold) | string | `300` | no | | replication_lag_timeframe | Timeframe for the Replication Lag monitor | string | `last_10m` | no | diff --git a/cloud/gcp/cloud-sql/mysql/inputs.tf b/cloud/gcp/cloud-sql/mysql/inputs.tf index 833d159..55d274b 100644 --- a/cloud/gcp/cloud-sql/mysql/inputs.tf +++ b/cloud/gcp/cloud-sql/mysql/inputs.tf @@ -45,7 +45,7 @@ variable "network_connections_message" { variable "network_connections_timeframe" { description = "Timeframe for the Network Connections monitor" type = "string" - default = "last_5m" + default = "last_15m" } variable "network_connections_hard_limit" { @@ -90,13 +90,13 @@ variable "replication_lag_timeframe" { variable "replication_lag_threshold_warning" { description = "Seconds behind the master (warning threshold)" type = "string" - default = 2000 + default = 300 } variable "replication_lag_threshold_critical" { description = "Seconds behind the master (critical threshold)" type = "string" - default = 2700 + default = 900 } variable "replication_lag_silenced" { From dd30d5336ea880af6878851d971387e7ce508aab Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Wed, 4 Jul 2018 12:32:34 +0200 Subject: [PATCH 15/46] MON-224 Avoid notifying if no data for MySQL alerts because we can have no queries, questions or connections and it could be completely right. --- cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf index a81b8af..dac2edd 100644 --- a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf +++ b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf @@ -32,7 +32,7 @@ EOF critical = "${var.network_connections_threshold_critical}" } - notify_no_data = true + notify_no_data = false require_full_window = false renotify_interval = 0 notify_audit = false @@ -123,7 +123,7 @@ EOF critical = "${var.queries_changing_threshold_critical}" } - notify_no_data = true + notify_no_data = false require_full_window = false renotify_interval = 0 notify_audit = false @@ -174,7 +174,7 @@ EOF critical = "${var.questions_changing_threshold_critical}" } - notify_no_data = true + notify_no_data = false require_full_window = false renotify_interval = 0 notify_audit = false From 8a67a2bde7ac72753251e75cb689366e5a1998a6 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Wed, 4 Jul 2018 12:33:41 +0200 Subject: [PATCH 16/46] MON-224 Fixed format and removed duplicate attributes for instance monitors --- .../gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf index 5ffa534..33f2a40 100644 --- a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf +++ b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf @@ -21,7 +21,8 @@ resource "datadog_monitor" "cpu_utilization" { type = "metric alert" query = < ${var.cpu_threshold_critical} EOF @@ -31,7 +32,6 @@ EOF critical = "${var.cpu_threshold_critical}" } - include_tags = true notify_no_data = true require_full_window = false renotify_interval = 0 @@ -72,7 +72,6 @@ EOF critical = "${var.disk_threshold_critical}" } - include_tags = true notify_no_data = true require_full_window = false renotify_interval = 0 @@ -113,7 +112,6 @@ EOF critical = "${var.memory_threshold_critical}" } - include_tags = true notify_no_data = true require_full_window = false renotify_interval = 0 @@ -160,7 +158,6 @@ EOF critical = "${var.memory_forecast_threshold_critical}" } - include_tags = true notify_no_data = true require_full_window = false renotify_interval = 0 @@ -200,7 +197,6 @@ EOF critical = "${var.failover_unavailable_threshold_critical}" } - include_tags = true notify_no_data = true require_full_window = false renotify_interval = 0 From 42ffc11cee5f199aba3080408dbf807fc3925f44 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Wed, 4 Jul 2018 12:34:17 +0200 Subject: [PATCH 17/46] MON-224 adapt instance timeframe and thresholds --- cloud/gcp/cloud-sql/instance/README.md | 5 +++-- cloud/gcp/cloud-sql/instance/inputs.tf | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/cloud/gcp/cloud-sql/instance/README.md b/cloud/gcp/cloud-sql/instance/README.md index ab0b700..0c2766b 100644 --- a/cloud/gcp/cloud-sql/instance/README.md +++ b/cloud/gcp/cloud-sql/instance/README.md @@ -39,8 +39,8 @@ Inputs | cpu_message | Custom message for the CPU Utilization monitor | string | `` | no | | cpu_silenced | Groups to mute for GCP Cloud SQL CPU Utilization monitor | map | `` | no | | cpu_threshold_critical | CPU Utilization in fraction (critical threshold) | string | `0.9` | no | -| cpu_threshold_warning | CPU Utilization in fraction (warning threshold) | string | `0.85` | no | -| cpu_timeframe | Timeframe for the CPU Utilization monitor | string | `last_2h` | no | +| cpu_threshold_warning | CPU Utilization in fraction (warning threshold) | string | `0.8` | no | +| cpu_timeframe | Timeframe for the CPU Utilization monitor | string | `last_30m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | | disk_message | Custom message for the Disk Utilization monitor | string | `` | no | | disk_silenced | Groups to mute for GCP Cloud SQL Disk Utilization monitor | map | `` | no | @@ -68,3 +68,4 @@ Inputs | memory_timeframe | Timeframe for the Memory Utilization monitor | string | `last_5m` | no | | message | Message sent when a monitor is triggered | string | - | yes | | project_id | ID of the GCP Project | string | - | yes | + diff --git a/cloud/gcp/cloud-sql/instance/inputs.tf b/cloud/gcp/cloud-sql/instance/inputs.tf index e060c47..f7d82cd 100644 --- a/cloud/gcp/cloud-sql/instance/inputs.tf +++ b/cloud/gcp/cloud-sql/instance/inputs.tf @@ -45,13 +45,13 @@ variable "cpu_message" { variable "cpu_timeframe" { description = "Timeframe for the CPU Utilization monitor" type = "string" - default = "last_2h" + default = "last_30m" } variable "cpu_threshold_warning" { description = "CPU Utilization in fraction (warning threshold)" type = "string" - default = 0.85 + default = 0.8 } variable "cpu_threshold_critical" { From 589960802342fd253e9f5060da08d828a105a261 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Thu, 5 Jul 2018 12:49:30 +0200 Subject: [PATCH 18/46] MON-224 Readme and monitor type for memory forecast fixed --- cloud/gcp/cloud-sql/instance/README.md | 4 ++-- cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cloud/gcp/cloud-sql/instance/README.md b/cloud/gcp/cloud-sql/instance/README.md index 0c2766b..b1638e2 100644 --- a/cloud/gcp/cloud-sql/instance/README.md +++ b/cloud/gcp/cloud-sql/instance/README.md @@ -5,8 +5,8 @@ How to use this module ---------------------- ``` -module "datadog-monitors-gcp-cloudsql" { - source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/gcp/clouds-sql/instance?ref={revision}" +module "datadog-monitors-gcp-cloudsql-instance" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/gcp/cloud-sql/instance?ref={revision}" project_id = "${var.gcp_project_id}" environment = "${var.environment}" diff --git a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf index 33f2a40..3ebc71d 100644 --- a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf +++ b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf @@ -138,7 +138,7 @@ resource "datadog_monitor" "memory_utilization_forecast" { name = "[${var.environment}] Cloud SQL Memory Utilization Forecast {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.memory_forecast_message, var.message)}" - type = "metric alert" + type = "query alert" query = < Date: Thu, 5 Jul 2018 12:52:03 +0200 Subject: [PATCH 19/46] MON-224 Readme and replication lag timeframe variable fixed --- cloud/gcp/cloud-sql/mysql/README.md | 5 +++-- cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cloud/gcp/cloud-sql/mysql/README.md b/cloud/gcp/cloud-sql/mysql/README.md index 3e54165..eaf55f6 100644 --- a/cloud/gcp/cloud-sql/mysql/README.md +++ b/cloud/gcp/cloud-sql/mysql/README.md @@ -6,7 +6,7 @@ How to use this module ``` module "datadog-monitors-gcp-cloudsql-mysql" { - source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/gcp/clouds-sql/mysql?ref={revision}" + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/gcp/cloud-sql/mysql?ref={revision}" project_id = "${var.gcp_project_id}" environment = "${var.environment}" @@ -21,7 +21,8 @@ Creates DataDog monitors with the following checks : * CloudSQL MySQL Network Connections * CloudSQL MySQL Replication Lag -* CloudSQL MySQL Failover Lag +* CloudSQL MySQL Queries Changing Anomaly (not created by default) +* CloudSQL MySQL Questions Changing Anomaly (not created by default) Useful links ------------ diff --git a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf index dac2edd..833664e 100644 --- a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf +++ b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf @@ -62,7 +62,7 @@ resource "datadog_monitor" "replication_lag" { type = "metric alert" query = < ${var.replication_lag_threshold_critical} From 0ca137cbd20be4513d9fd1a878a4b2193c7222d7 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Thu, 5 Jul 2018 15:34:15 +0200 Subject: [PATCH 20/46] MON-224 Extra tags and many fixes from testing --- cloud/gcp/cloud-sql/instance/README.md | 49 ++++++------ cloud/gcp/cloud-sql/instance/inputs.tf | 74 +++++++++++++------ .../instance/monitors-cloud-sql-instance.tf | 59 ++++++++------- cloud/gcp/cloud-sql/mysql/README.md | 6 +- cloud/gcp/cloud-sql/mysql/inputs.tf | 26 ++++++- .../mysql/monitors-cloudsql-mysql.tf | 12 ++- 6 files changed, 149 insertions(+), 77 deletions(-) diff --git a/cloud/gcp/cloud-sql/instance/README.md b/cloud/gcp/cloud-sql/instance/README.md index b1638e2..9584e07 100644 --- a/cloud/gcp/cloud-sql/instance/README.md +++ b/cloud/gcp/cloud-sql/instance/README.md @@ -36,36 +36,41 @@ Inputs | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| -| cpu_message | Custom message for the CPU Utilization monitor | string | `` | no | -| cpu_silenced | Groups to mute for GCP Cloud SQL CPU Utilization monitor | map | `` | no | -| cpu_threshold_critical | CPU Utilization in fraction (critical threshold) | string | `0.9` | no | -| cpu_threshold_warning | CPU Utilization in fraction (warning threshold) | string | `0.8` | no | -| cpu_timeframe | Timeframe for the CPU Utilization monitor | string | `last_30m` | no | +| cpu_utilization_extra_tags | Extra tags for GCP Cloud SQL CPU Utilization monitor | list | `` | no | +| cpu_utilization_message | Custom message for the CPU Utilization monitor | string | `` | no | +| cpu_utilization_silenced | Groups to mute for GCP Cloud SQL CPU Utilization monitor | map | `` | no | +| cpu_utilization_threshold_critical | CPU Utilization in fraction (critical threshold) | string | `0.9` | no | +| cpu_utilization_threshold_warning | CPU Utilization in fraction (warning threshold) | string | `0.8` | no | +| cpu_utilization_timeframe | Timeframe for the CPU Utilization monitor | string | `last_30m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | -| disk_message | Custom message for the Disk Utilization monitor | string | `` | no | -| disk_silenced | Groups to mute for GCP Cloud SQL Disk Utilization monitor | map | `` | no | -| disk_threshold_critical | Disk Utilization in fraction (critical threshold) | string | `0.9` | no | -| disk_threshold_warning | Disk Utilization in fraction (warning threshold) | string | `0.8` | no | -| disk_timeframe | Timeframe for the Disk Utilization monitor | string | `last_5m` | no | +| disk_utilization_extra_tags | Extra tags for GCP Cloud SQL CPU Utilization monitor | list | `` | no | +| disk_utilization_message | Custom message for the Disk Utilization monitor | string | `` | no | +| disk_utilization_silenced | Groups to mute for GCP Cloud SQL Disk Utilization monitor | map | `` | no | +| disk_utilization_threshold_critical | Disk Utilization in fraction (critical threshold) | string | `0.9` | no | +| disk_utilization_threshold_warning | Disk Utilization in fraction (warning threshold) | string | `0.8` | no | +| disk_utilization_timeframe | Timeframe for the Disk Utilization monitor | string | `last_5m` | no | | environment | Architecture environment | string | - | yes | +| failover_unavailable_extra_tags | Extra tags for GCP Cloud SQL Failover Unavailable monitor | list | `` | no | | failover_unavailable_message | Custom message for the Failover Unavailable monitor | string | `` | no | | failover_unavailable_silenced | Groups to mute for GCP Cloud SQL Failover Unavailable monitor | map | `` | no | | failover_unavailable_threshold_critical | Failover Unavailable critical threshold | string | `0` | no | | failover_unavailable_timeframe | Timeframe for the Failover Unavailable monitor | string | `last_5m` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | -| memory_forecast_history | History for the Memory Utilization Forecast monitor | string | `12h` | no | -| memory_forecast_interval | Interval for the Memory Utilization Forecast monitor | string | `30m` | no | -| memory_forecast_message | Custom message for the Memory Utilization Forecast monitor | string | `` | no | -| memory_forecast_silenced | Groups to mute for GCP Cloud SQL Memory Utilization Forecast monitor | map | `` | no | -| memory_forecast_threshold_critical | Memory Utilization Forecast in fraction (critical threshold) | string | `0.9` | no | -| memory_forecast_threshold_warning | Memory Utilization Forecast in fraction (warning threshold) | string | `0.8` | no | -| memory_forecast_timeframe | Timeframe for the Memory Utilization Forecast monitor | string | `next_3d` | no | -| memory_message | Custom message for the Memory Utilization monitor | string | `` | no | -| memory_silenced | Groups to mute for GCP Cloud SQL Memory Utilization monitor | map | `` | no | -| memory_threshold_critical | Memory Utilization in fraction (critical threshold) | string | `0.9` | no | -| memory_threshold_warning | Memory Utilization in fraction (warning threshold) | string | `0.8` | no | -| memory_timeframe | Timeframe for the Memory Utilization monitor | string | `last_5m` | no | +| memory_utilization_extra_tags | Extra tags for GCP Cloud SQL Memory Utilization monitor | list | `` | no | +| memory_utilization_forecast_extra_tags | Extra tags for GCP Cloud SQL Memory Utilization Forecast monitor | list | `` | no | +| memory_utilization_forecast_history | History for the Memory Utilization Forecast monitor | string | `12h` | no | +| memory_utilization_forecast_interval | Interval for the Memory Utilization Forecast monitor | string | `30m` | no | +| memory_utilization_forecast_message | Custom message for the Memory Utilization Forecast monitor | string | `` | no | +| memory_utilization_forecast_silenced | Groups to mute for GCP Cloud SQL Memory Utilization Forecast monitor | map | `` | no | +| memory_utilization_forecast_threshold_critical | Memory Utilization Forecast in fraction (critical threshold) | string | `0.9` | no | +| memory_utilization_forecast_threshold_warning | Memory Utilization Forecast in fraction (warning threshold) | string | `0.8` | no | +| memory_utilization_forecast_timeframe | Timeframe for the Memory Utilization Forecast monitor | string | `next_3d` | no | +| memory_utilization_message | Custom message for the Memory Utilization monitor | string | `` | no | +| memory_utilization_silenced | Groups to mute for GCP Cloud SQL Memory Utilization monitor | map | `` | no | +| memory_utilization_threshold_critical | Memory Utilization in fraction (critical threshold) | string | `0.9` | no | +| memory_utilization_threshold_warning | Memory Utilization in fraction (warning threshold) | string | `0.8` | no | +| memory_utilization_timeframe | Timeframe for the Memory Utilization monitor | string | `last_5m` | no | | message | Message sent when a monitor is triggered | string | - | yes | | project_id | ID of the GCP Project | string | - | yes | diff --git a/cloud/gcp/cloud-sql/instance/inputs.tf b/cloud/gcp/cloud-sql/instance/inputs.tf index f7d82cd..3d7370d 100644 --- a/cloud/gcp/cloud-sql/instance/inputs.tf +++ b/cloud/gcp/cloud-sql/instance/inputs.tf @@ -36,137 +36,161 @@ variable "project_id" { # # CPU # -variable "cpu_message" { +variable "cpu_utilization_message" { description = "Custom message for the CPU Utilization monitor" type = "string" default = "" } -variable "cpu_timeframe" { +variable "cpu_utilization_timeframe" { description = "Timeframe for the CPU Utilization monitor" type = "string" default = "last_30m" } -variable "cpu_threshold_warning" { +variable "cpu_utilization_threshold_warning" { description = "CPU Utilization in fraction (warning threshold)" type = "string" default = 0.8 } -variable "cpu_threshold_critical" { +variable "cpu_utilization_threshold_critical" { description = "CPU Utilization in fraction (critical threshold)" type = "string" default = 0.9 } -variable "cpu_silenced" { +variable "cpu_utilization_silenced" { description = "Groups to mute for GCP Cloud SQL CPU Utilization monitor" type = "map" default = {} } +variable "cpu_utilization_extra_tags" { + description = "Extra tags for GCP Cloud SQL CPU Utilization monitor" + type = "list" + default = [] +} + # # DISK # -variable "disk_message" { +variable "disk_utilization_message" { description = "Custom message for the Disk Utilization monitor" type = "string" default = "" } -variable "disk_timeframe" { +variable "disk_utilization_timeframe" { description = "Timeframe for the Disk Utilization monitor" type = "string" default = "last_5m" } -variable "disk_threshold_warning" { +variable "disk_utilization_threshold_warning" { description = "Disk Utilization in fraction (warning threshold)" type = "string" default = 0.8 } -variable "disk_threshold_critical" { +variable "disk_utilization_threshold_critical" { description = "Disk Utilization in fraction (critical threshold)" type = "string" default = 0.9 } -variable "disk_silenced" { +variable "disk_utilization_silenced" { description = "Groups to mute for GCP Cloud SQL Disk Utilization monitor" type = "map" default = {} } +variable "disk_utilization_extra_tags" { + description = "Extra tags for GCP Cloud SQL CPU Utilization monitor" + type = "list" + default = [] +} + # # Memory Utilization # -variable "memory_message" { +variable "memory_utilization_message" { description = "Custom message for the Memory Utilization monitor" default = "" } -variable "memory_timeframe" { +variable "memory_utilization_timeframe" { description = "Timeframe for the Memory Utilization monitor" default = "last_5m" } -variable "memory_threshold_warning" { +variable "memory_utilization_threshold_warning" { description = "Memory Utilization in fraction (warning threshold)" default = 0.8 } -variable "memory_threshold_critical" { +variable "memory_utilization_threshold_critical" { description = "Memory Utilization in fraction (critical threshold)" default = 0.9 } -variable "memory_silenced" { +variable "memory_utilization_silenced" { description = "Groups to mute for GCP Cloud SQL Memory Utilization monitor" type = "map" default = {} } +variable "memory_utilization_extra_tags" { + description = "Extra tags for GCP Cloud SQL Memory Utilization monitor" + type = "list" + default = [] +} + # # Memory Utilization Forecast # -variable "memory_forecast_message" { +variable "memory_utilization_forecast_message" { description = "Custom message for the Memory Utilization Forecast monitor" default = "" } -variable "memory_forecast_timeframe" { +variable "memory_utilization_forecast_timeframe" { description = "Timeframe for the Memory Utilization Forecast monitor" default = "next_3d" } -variable "memory_forecast_interval" { +variable "memory_utilization_forecast_interval" { description = "Interval for the Memory Utilization Forecast monitor" default = "30m" } -variable "memory_forecast_history" { +variable "memory_utilization_forecast_history" { description = "History for the Memory Utilization Forecast monitor" default = "12h" } -variable "memory_forecast_threshold_warning" { +variable "memory_utilization_forecast_threshold_warning" { description = "Memory Utilization Forecast in fraction (warning threshold)" default = 0.8 } -variable "memory_forecast_threshold_critical" { +variable "memory_utilization_forecast_threshold_critical" { description = "Memory Utilization Forecast in fraction (critical threshold)" default = 0.9 } -variable "memory_forecast_silenced" { +variable "memory_utilization_forecast_silenced" { description = "Groups to mute for GCP Cloud SQL Memory Utilization Forecast monitor" type = "map" default = {} } +variable "memory_utilization_forecast_extra_tags" { + description = "Extra tags for GCP Cloud SQL Memory Utilization Forecast monitor" + type = "list" + default = [] +} + # # Failover Unavailable # @@ -194,3 +218,9 @@ variable "failover_unavailable_silenced" { type = "map" default = {} } + +variable "failover_unavailable_extra_tags" { + description = "Extra tags for GCP Cloud SQL Failover Unavailable monitor" + type = "list" + default = [] +} diff --git a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf index 3ebc71d..ccf6ce9 100644 --- a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf +++ b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf @@ -16,20 +16,20 @@ data "template_file" "filter" { # resource "datadog_monitor" "cpu_utilization" { name = "[${var.environment}] Cloud SQL CPU utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" - message = "${coalesce(var.cpu_message, var.message)}" + message = "${coalesce(var.cpu_utilization_message, var.message)}" type = "metric alert" query = < ${var.cpu_threshold_critical} + > ${var.cpu_utilization_threshold_critical} EOF thresholds { - warning = "${var.cpu_threshold_warning}" - critical = "${var.cpu_threshold_critical}" + warning = "${var.cpu_utilization_threshold_warning}" + critical = "${var.cpu_utilization_threshold_critical}" } notify_no_data = true @@ -41,13 +41,14 @@ EOF locked = false evaluation_delay = "${var.delay}" new_host_delay = "${var.delay}" - silenced = "${var.cpu_silenced}" + silenced = "${var.cpu_utilization_silenced}" tags = [ "team:gcp", "provider:gcp", - "env:${var.environment}", "resource:cloud-sql", + "env:${var.environment}", + "${var.cpu_utilization_extra_tags}", ] } @@ -56,20 +57,20 @@ EOF # resource "datadog_monitor" "disk_utilization" { name = "[${var.environment}] Cloud SQL Disk utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" - message = "${coalesce(var.disk_message, var.message)}" + message = "${coalesce(var.disk_utilization_message, var.message)}" type = "metric alert" query = < ${var.disk_threshold_critical} + > ${var.disk_utilization_threshold_critical} EOF thresholds { - warning = "${var.disk_threshold_warning}" - critical = "${var.disk_threshold_critical}" + warning = "${var.disk_utilization_threshold_warning}" + critical = "${var.disk_utilization_threshold_critical}" } notify_no_data = true @@ -81,13 +82,14 @@ EOF locked = false evaluation_delay = "${var.delay}" new_host_delay = "${var.delay}" - silenced = "${var.disk_silenced}" + silenced = "${var.disk_utilization_silenced}" tags = [ "team:gcp", "provider:gcp", "env:${var.environment}", "resource:cloud-sql", + "${var.disk_utilization_extra_tags}", ] } @@ -96,20 +98,20 @@ EOF # resource "datadog_monitor" "memory_utilization" { name = "[${var.environment}] Cloud SQL Memory Utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" - message = "${coalesce(var.memory_message, var.message)}" + message = "${coalesce(var.memory_utilization_message, var.message)}" type = "metric alert" query = < ${var.memory_threshold_critical} + > ${var.memory_utilization_threshold_critical} EOF thresholds { - warning = "${var.memory_threshold_warning}" - critical = "${var.memory_threshold_critical}" + warning = "${var.memory_utilization_threshold_warning}" + critical = "${var.memory_utilization_threshold_critical}" } notify_no_data = true @@ -121,13 +123,14 @@ EOF locked = false evaluation_delay = "${var.delay}" new_host_delay = "${var.delay}" - silenced = "${var.memory_silenced}" + silenced = "${var.memory_utilization_silenced}" tags = [ "team:gcp", "provider:gcp", "env:${var.environment}", "resource:cloud-sql", + "${var.memory_utilization_extra_tags}", ] } @@ -136,26 +139,26 @@ EOF # resource "datadog_monitor" "memory_utilization_forecast" { name = "[${var.environment}] Cloud SQL Memory Utilization Forecast {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" - message = "${coalesce(var.memory_forecast_message, var.message)}" + message = "${coalesce(var.memory_utilization_forecast_message, var.message)}" type = "query alert" query = < ${var.memory_forecast_threshold_critical} + > ${var.memory_utilization_forecast_threshold_critical} EOF thresholds { - warning = "${var.memory_forecast_threshold_warning}" - critical = "${var.memory_forecast_threshold_critical}" + warning = "${var.memory_utilization_forecast_threshold_warning}" + critical = "${var.memory_utilization_forecast_threshold_critical}" } notify_no_data = true @@ -167,13 +170,14 @@ EOF locked = false evaluation_delay = "${var.delay}" new_host_delay = "${var.delay}" - silenced = "${var.memory_forecast_silenced}" + silenced = "${var.memory_utilization_forecast_silenced}" tags = [ "team:gcp", "provider:gcp", "env:${var.environment}", "resource:cloud-sql", + "${var.memory_utilization_forecast_extra_tags}", ] } @@ -213,5 +217,6 @@ EOF "provider:gcp", "env:${var.environment}", "resource:cloud-sql", + "${var.failover_unavailable_extra_tags}", ] } diff --git a/cloud/gcp/cloud-sql/mysql/README.md b/cloud/gcp/cloud-sql/mysql/README.md index eaf55f6..5fcf0b5 100644 --- a/cloud/gcp/cloud-sql/mysql/README.md +++ b/cloud/gcp/cloud-sql/mysql/README.md @@ -43,6 +43,7 @@ Inputs | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | message | Message sent when a monitor is triggered | string | - | yes | +| network_connections_extra_tags | Extra tags for GCP Cloud SQL Network Connections monitor | list | `` | no | | network_connections_hard_limit | Max number of connections for the CloudSQL Instance. Default value is the max value on https://cloud.google.com/sql/docs/quotas#fixed-limits for MySQL | string | `4000` | no | | network_connections_message | Custom message for the Network Connections monitor | string | `` | no | | network_connections_silenced | Groups to mute for GCP Cloud SQL Network Connections monitor | map | `` | no | @@ -54,6 +55,7 @@ Inputs | queries_changing_database_ids | List of database ids for the Queries Changing monitor | list | `` | no | | queries_changing_deviations | Deviations to detect the anomaly | string | `4` | no | | queries_changing_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no | +| queries_changing_extra_tags | Extra tags for GCP Cloud SQL Queries Changing monitor | list | `` | no | | queries_changing_message | Custom message for the Queries Changing monitor | string | `` | no | | queries_changing_seasonality | Seasonality of the algorithm | string | `weekly` | no | | queries_changing_silenced | Groups to mute for GCP Cloud SQL Queries Changing monitor | map | `` | no | @@ -64,12 +66,14 @@ Inputs | questions_changing_database_ids | List of database ids for the Questions Changing monitor | list | `` | no | | questions_changing_deviations | Deviations to detect the anomaly | string | `4` | no | | questions_changing_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no | +| questions_changing_extra_tags | Extra tags for GCP Cloud SQL Questions Changing monitor | list | `` | no | | questions_changing_message | Custom message for the Questions Changing monitor | string | `` | no | | questions_changing_seasonality | Seasonality of the algorithm | string | `weekly` | no | -| questions_changing_silenced | Groups to mute for GCP Cloud SQL Network Connections monitor | map | `` | no | +| questions_changing_silenced | Groups to mute for GCP Cloud SQL Questions Changing monitor | map | `` | no | | questions_changing_threshold_critical | Questions Changing critical threshold | string | `1` | no | | questions_changing_threshold_warning | Questions Changing warning threshold | string | `0.5` | no | | questions_changing_timeframe | Timeframe for the Questions Changing monitor | string | `last_10m` | no | +| replication_lag_extra_tags | Extra tags for GCP Cloud SQL SQL Replication monitor | list | `` | no | | replication_lag_message | Custom message for the Replication Lag monitor | string | `` | no | | replication_lag_silenced | Groups to mute for GCP Cloud SQL Replication Lag monitor | map | `` | no | | replication_lag_threshold_critical | Seconds behind the master (critical threshold) | string | `900` | no | diff --git a/cloud/gcp/cloud-sql/mysql/inputs.tf b/cloud/gcp/cloud-sql/mysql/inputs.tf index 55d274b..e7bda19 100644 --- a/cloud/gcp/cloud-sql/mysql/inputs.tf +++ b/cloud/gcp/cloud-sql/mysql/inputs.tf @@ -72,6 +72,12 @@ variable "network_connections_silenced" { default = {} } +variable "network_connections_extra_tags" { + description = "Extra tags for GCP Cloud SQL Network Connections monitor" + type = "list" + default = [] +} + # # Replication Lag # @@ -105,6 +111,12 @@ variable "replication_lag_silenced" { default = {} } +variable "replication_lag_extra_tags" { + description = "Extra tags for GCP Cloud SQL SQL Replication monitor" + type = "list" + default = [] +} + # # Queries Changing Abnormally # @@ -168,6 +180,12 @@ variable "queries_changing_silenced" { default = {} } +variable "queries_changing_extra_tags" { + description = "Extra tags for GCP Cloud SQL Queries Changing monitor" + type = "list" + default = [] +} + # # Questions Changing # @@ -226,7 +244,13 @@ variable "questions_changing_threshold_critical" { } variable "questions_changing_silenced" { - description = "Groups to mute for GCP Cloud SQL Network Connections monitor" + description = "Groups to mute for GCP Cloud SQL Questions Changing monitor" type = "map" default = {} } + +variable "questions_changing_extra_tags" { + description = "Extra tags for GCP Cloud SQL Questions Changing monitor" + type = "list" + default = [] +} diff --git a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf index 833664e..5f7327f 100644 --- a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf +++ b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf @@ -49,6 +49,7 @@ EOF "env:${var.environment}", "resource:cloud-sql", "engine:mysql", + "${var.network_connections_extra_tags}", ] } @@ -90,6 +91,7 @@ EOF "env:${var.environment}", "resource:cloud-sql", "engine:mysql", + "${var.replication_lag_extra_tags}", ] } @@ -99,10 +101,10 @@ EOF resource "datadog_monitor" "queries_changing_anomaly" { count = "${length(var.queries_changing_database_ids)}" - name = "[${var.environment}] Cloud SQL MySQL Queries Count changed abnormally on ${var.project_id}:${var.queries_changing_database_ids[count.index]} {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + name = "[${var.environment}] [${var.queries_changing_database_ids[count.index]}] Cloud SQL MySQL Queries Count changed abnormally {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.queries_changing_message, var.message)}" - type = "metric alert" + type = "query alert" query = < Date: Mon, 23 Jul 2018 14:41:18 +0200 Subject: [PATCH 21/46] MON-224 Outputs for all monitors of instances --- cloud/gcp/cloud-sql/instance/outputs.tf | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 cloud/gcp/cloud-sql/instance/outputs.tf diff --git a/cloud/gcp/cloud-sql/instance/outputs.tf b/cloud/gcp/cloud-sql/instance/outputs.tf new file mode 100644 index 0000000..55d966b --- /dev/null +++ b/cloud/gcp/cloud-sql/instance/outputs.tf @@ -0,0 +1,19 @@ +output "cpu_utilization_id" { + value = "${datadog_monitor.cpu_utilization.id}" +} + +output "disk_utilization_id" { + value = "${datadog_monitor.disk_utilization.id}" +} + +output "memory_utilization_id" { + value = "${datadog_monitor.memory_utilization.id}" +} + +output "memory_utilization_forecast_id" { + value = "${datadog_monitor.memory_utilization_forecast.id}" +} + +output "failover_unavailable_id" { + value = "${datadog_monitor.failover_unavailable.id}" +} From d0fe38e16dac026b4394856d59a7901d31e57d7f Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Mon, 23 Jul 2018 14:45:07 +0200 Subject: [PATCH 22/46] MON-224 Outputs for all monitors of mysql --- cloud/gcp/cloud-sql/mysql/outputs.tf | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 cloud/gcp/cloud-sql/mysql/outputs.tf diff --git a/cloud/gcp/cloud-sql/mysql/outputs.tf b/cloud/gcp/cloud-sql/mysql/outputs.tf new file mode 100644 index 0000000..2e26385 --- /dev/null +++ b/cloud/gcp/cloud-sql/mysql/outputs.tf @@ -0,0 +1,15 @@ +output "network_connections_id" { + value = "${datadog_monitor.network_connections.id}" +} + +output "replication_lag_id" { + value = "${datadog_monitor.replication_lag.id}" +} + +output "queries_changing_anomaly_id" { + value = ["${datadog_monitor.queries_changing_anomaly.*.id}"] +} + +output "questions_changing_anomaly_id" { + value = ["${datadog_monitor.questions_changing_anomaly.*.id}"] +} From 2e5ac912c1a6df17f83d701088cc50796132ac69 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Thu, 26 Jul 2018 17:04:05 +0200 Subject: [PATCH 23/46] MON-224 Automated readme and outputs --- README.md | 4 ++ cloud/gcp/cloud-sql/instance/README.md | 46 +++++++++-------- .../instance/monitors-cloud-sql-instance.tf | 4 +- cloud/gcp/cloud-sql/instance/outputs.tf | 15 ++++-- cloud/gcp/cloud-sql/mysql/README.md | 49 ++++++++++--------- cloud/gcp/cloud-sql/mysql/outputs.tf | 12 +++-- 6 files changed, 75 insertions(+), 55 deletions(-) diff --git a/README.md b/README.md index 9937fb7..892649a 100644 --- a/README.md +++ b/README.md @@ -91,6 +91,10 @@ The `//` is very important, it's a terraform specific syntax used to separate gi - [sql-database](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/azure/sql-database/) - [storage](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/azure/storage/) - [stream-analytics](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/azure/stream-analytics/) + - [gcp](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/gcp/) + - [cloud-sql](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/gcp/cloud-sql/) + - [instance](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/gcp/cloud-sql/instance/) + - [mysql](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/gcp/cloud-sql/mysql/) - [common](https://bitbucket.org/morea/terraform.feature.datadog/src/master/common/) - [alerting-message](https://bitbucket.org/morea/terraform.feature.datadog/src/master/common/alerting-message/) - [filter-tags](https://bitbucket.org/morea/terraform.feature.datadog/src/master/common/filter-tags/) diff --git a/cloud/gcp/cloud-sql/instance/README.md b/cloud/gcp/cloud-sql/instance/README.md index 9584e07..4350e35 100644 --- a/cloud/gcp/cloud-sql/instance/README.md +++ b/cloud/gcp/cloud-sql/instance/README.md @@ -1,38 +1,28 @@ -GCP CloudSQL Instance Monitors -============================== +# CLOUD GCP CLOUD-SQL INSTANCE DataDog monitors -How to use this module ----------------------- +## How to use this module ``` -module "datadog-monitors-gcp-cloudsql-instance" { +module "datadog-monitors-cloud-gcp-cloud-sql-instance" { source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/gcp/cloud-sql/instance?ref={revision}" - project_id = "${var.gcp_project_id}" environment = "${var.environment}" message = "${module.datadog-message-alerting.alerting-message}" } ``` -Purpose -------- -Creates DataDog monitors with the following checks : +## Purpose -* CloudSQL Instance CPU Utilization -* CloudSQL Instance Disk Utilization -* CloudSQL Instance Memory Utilization -* CloudSQL Instance Memory Utilization Forecast -* CloudSQL Instance Failover Unavailable +Creates DataDog monitors with the following checks: -Useful links ------------- +- Cloud SQL CPU Utilization +- Cloud SQL Disk Utilization +- Cloud SQL Memory Utilization +- Cloud SQL Memory Utilization Forecast +- Cloud SQL Failover Unavailable -* [GCP Metrics for CloudSQL](https://cloud.google.com/monitoring/api/metrics_gcp#gcp-cloudsql) -* [Datadog Useful monitors for GCP CloudSQL](https://www.datadoghq.com/blog/monitor-google-cloud-sql/) - -Inputs ------- +## Inputs | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| @@ -74,3 +64,17 @@ Inputs | message | Message sent when a monitor is triggered | string | - | yes | | project_id | ID of the GCP Project | string | - | yes | +## Outputs + +| Name | Description | +|------|-------------| +| cpu_utilization_id | id for monitor cpu_utilization | +| disk_utilization_id | id for monitor disk_utilization | +| failover_unavailable_id | id for monitor failover_unavailable | +| memory_utilization_forecast_id | id for monitor memory_utilization_forecast | +| memory_utilization_id | id for monitor memory_utilization | + +## Related documentation + +* [GCP Metrics for CloudSQL](https://cloud.google.com/monitoring/api/metrics_gcp#gcp-cloudsql) +* [Datadog Useful monitors for GCP CloudSQL](https://www.datadoghq.com/blog/monitor-google-cloud-sql/) diff --git a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf index ccf6ce9..54a5f91 100644 --- a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf +++ b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf @@ -15,7 +15,7 @@ data "template_file" "filter" { # CPU Utilization # resource "datadog_monitor" "cpu_utilization" { - name = "[${var.environment}] Cloud SQL CPU utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + name = "[${var.environment}] Cloud SQL CPU Utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.cpu_utilization_message, var.message)}" type = "metric alert" @@ -56,7 +56,7 @@ EOF # Disk Utilization # resource "datadog_monitor" "disk_utilization" { - name = "[${var.environment}] Cloud SQL Disk utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + name = "[${var.environment}] Cloud SQL Disk Utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.disk_utilization_message, var.message)}" type = "metric alert" diff --git a/cloud/gcp/cloud-sql/instance/outputs.tf b/cloud/gcp/cloud-sql/instance/outputs.tf index 55d966b..3fbeafd 100644 --- a/cloud/gcp/cloud-sql/instance/outputs.tf +++ b/cloud/gcp/cloud-sql/instance/outputs.tf @@ -1,19 +1,24 @@ output "cpu_utilization_id" { - value = "${datadog_monitor.cpu_utilization.id}" + description = "id for monitor cpu_utilization" + value = "${datadog_monitor.cpu_utilization.id}" } output "disk_utilization_id" { - value = "${datadog_monitor.disk_utilization.id}" + description = "id for monitor disk_utilization" + value = "${datadog_monitor.disk_utilization.id}" } output "memory_utilization_id" { - value = "${datadog_monitor.memory_utilization.id}" + description = "id for monitor memory_utilization" + value = "${datadog_monitor.memory_utilization.id}" } output "memory_utilization_forecast_id" { - value = "${datadog_monitor.memory_utilization_forecast.id}" + description = "id for monitor memory_utilization_forecast" + value = "${datadog_monitor.memory_utilization_forecast.id}" } output "failover_unavailable_id" { - value = "${datadog_monitor.failover_unavailable.id}" + description = "id for monitor failover_unavailable" + value = "${datadog_monitor.failover_unavailable.id}" } diff --git a/cloud/gcp/cloud-sql/mysql/README.md b/cloud/gcp/cloud-sql/mysql/README.md index 5fcf0b5..1b67001 100644 --- a/cloud/gcp/cloud-sql/mysql/README.md +++ b/cloud/gcp/cloud-sql/mysql/README.md @@ -1,40 +1,27 @@ -GCP CloudSQL MySQL Monitors -============================== +# CLOUD GCP CLOUD-SQL MYSQL DataDog monitors -How to use this module ----------------------- +## How to use this module ``` -module "datadog-monitors-gcp-cloudsql-mysql" { +module "datadog-monitors-cloud-gcp-cloud-sql-mysql" { source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/gcp/cloud-sql/mysql?ref={revision}" - project_id = "${var.gcp_project_id}" environment = "${var.environment}" message = "${module.datadog-message-alerting.alerting-message}" } ``` -Purpose -------- -Creates DataDog monitors with the following checks : +## Purpose -* CloudSQL MySQL Network Connections -* CloudSQL MySQL Replication Lag -* CloudSQL MySQL Queries Changing Anomaly (not created by default) -* CloudSQL MySQL Questions Changing Anomaly (not created by default) +Creates DataDog monitors with the following checks: -Useful links ------------- +- Cloud SQL MySQL Network Connections (hard limit: ${var.network_connections_hard_limit}) +- Cloud SQL MySQL Replication Lag too high +- Cloud SQL MySQL Queries Count changed abnormally +- Cloud SQL MySQL Questions Count changed abnormally -* [GCP Metrics for CloudSQL](https://cloud.google.com/monitoring/api/metrics_gcp#gcp-cloudsql) -* [Datadog Useful monitors for GCP CloudSQL](https://www.datadoghq.com/blog/monitor-google-cloud-sql/) -* [Max connections depends on the type of the instance](https://cloud.google.com/sql/docs/quotas#fixed-limits) -* [Monitoring Replication Lag](https://cloud.google.com/sql/docs/mysql/high-availability#replication-lag-monitor) -* [Monitoring MySQL Performance Metrics](https://www.datadoghq.com/blog/monitoring-mysql-performance-metrics) - -Inputs ------- +## Inputs | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| @@ -80,3 +67,19 @@ Inputs | replication_lag_threshold_warning | Seconds behind the master (warning threshold) | string | `300` | no | | replication_lag_timeframe | Timeframe for the Replication Lag monitor | string | `last_10m` | no | +## Outputs + +| Name | Description | +|------|-------------| +| network_connections_id | id for monitor network_connections | +| queries_changing_anomaly_id | id for monitor queries_changing_anomaly | +| questions_changing_anomaly_id | id for monitor questions_changing_anomaly | +| replication_lag_id | id for monitor replication_lag | + +## Related documentation + +* [GCP Metrics for CloudSQL](https://cloud.google.com/monitoring/api/metrics_gcp#gcp-cloudsql) +* [Datadog Useful monitors for GCP CloudSQL](https://www.datadoghq.com/blog/monitor-google-cloud-sql/) +* [Max connections depends on the type of the instance](https://cloud.google.com/sql/docs/quotas#fixed-limits) +* [Monitoring Replication Lag](https://cloud.google.com/sql/docs/mysql/high-availability#replication-lag-monitor) +* [Monitoring MySQL Performance Metrics](https://www.datadoghq.com/blog/monitoring-mysql-performance-metrics) diff --git a/cloud/gcp/cloud-sql/mysql/outputs.tf b/cloud/gcp/cloud-sql/mysql/outputs.tf index 2e26385..e093106 100644 --- a/cloud/gcp/cloud-sql/mysql/outputs.tf +++ b/cloud/gcp/cloud-sql/mysql/outputs.tf @@ -1,15 +1,19 @@ output "network_connections_id" { - value = "${datadog_monitor.network_connections.id}" + description = "id for monitor network_connections" + value = "${datadog_monitor.network_connections.id}" } output "replication_lag_id" { - value = "${datadog_monitor.replication_lag.id}" + description = "id for monitor replication_lag" + value = "${datadog_monitor.replication_lag.id}" } output "queries_changing_anomaly_id" { - value = ["${datadog_monitor.queries_changing_anomaly.*.id}"] + description = "id for monitor queries_changing_anomaly" + value = "${datadog_monitor.queries_changing_anomaly.id}" } output "questions_changing_anomaly_id" { - value = ["${datadog_monitor.questions_changing_anomaly.*.id}"] + description = "id for monitor questions_changing_anomaly" + value = "${datadog_monitor.questions_changing_anomaly.id}" } From 54a996e8b769e16a185db02a4f4523e75e9e8fa1 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Fri, 27 Jul 2018 18:15:43 +0200 Subject: [PATCH 24/46] MON-224 Monitors imported from the Actualys ones using the exported json --- cloud/gcp/cloud-sql/instance/inputs.tf | 79 ++++++--- .../instance/monitors-cloud-sql-instance.tf | 167 ++++++++++++------ cloud/gcp/cloud-sql/mysql/inputs.tf | 24 ++- .../mysql/monitors-cloudsql-mysql.tf | 97 +++++----- 4 files changed, 243 insertions(+), 124 deletions(-) diff --git a/cloud/gcp/cloud-sql/instance/inputs.tf b/cloud/gcp/cloud-sql/instance/inputs.tf index 3d7370d..845f679 100644 --- a/cloud/gcp/cloud-sql/instance/inputs.tf +++ b/cloud/gcp/cloud-sql/instance/inputs.tf @@ -45,19 +45,19 @@ variable "cpu_utilization_message" { variable "cpu_utilization_timeframe" { description = "Timeframe for the CPU Utilization monitor" type = "string" - default = "last_30m" + default = "last_15m" } variable "cpu_utilization_threshold_warning" { - description = "CPU Utilization in fraction (warning threshold)" + description = "CPU Utilization in percentage (warning threshold)" type = "string" - default = 0.8 + default = 80 } variable "cpu_utilization_threshold_critical" { - description = "CPU Utilization in fraction (critical threshold)" + description = "CPU Utilization in percentage (critical threshold)" type = "string" - default = 0.9 + default = 90 } variable "cpu_utilization_silenced" { @@ -73,7 +73,7 @@ variable "cpu_utilization_extra_tags" { } # -# DISK +# DISK Utilization # variable "disk_utilization_message" { description = "Custom message for the Disk Utilization monitor" @@ -88,15 +88,15 @@ variable "disk_utilization_timeframe" { } variable "disk_utilization_threshold_warning" { - description = "Disk Utilization in fraction (warning threshold)" + description = "Disk Utilization in percentage (warning threshold)" type = "string" - default = 0.8 + default = 80 } variable "disk_utilization_threshold_critical" { - description = "Disk Utilization in fraction (critical threshold)" + description = "Disk Utilization in percentage (critical threshold)" type = "string" - default = 0.9 + default = 90 } variable "disk_utilization_silenced" { @@ -111,6 +111,45 @@ variable "disk_utilization_extra_tags" { default = [] } +# +# DISK Utilization Forecast +# +variable "disk_utilization_forecast_message" { + description = "Custom message for the Disk Utilization monitor" + type = "string" + default = "" +} + +variable "disk_utilization_forecast_timeframe" { + description = "Timeframe for the Disk Utilization monitor" + type = "string" + default = "next_1w" +} + +variable "disk_utilization_forecast_threshold_critical" { + description = "Disk Utilization in percentage (critical threshold)" + type = "string" + default = 80 +} + +variable "disk_utilization_forecast_threshold_critical_recovery" { + description = "Disk Utilization in percentage (recovery threshold)" + type = "string" + default = 72 +} + +variable "disk_utilization_forecast_silenced" { + description = "Groups to mute for GCP Cloud SQL Disk Utilization monitor" + type = "map" + default = {} +} + +variable "disk_utilization_forecast_extra_tags" { + description = "Extra tags for GCP Cloud SQL CPU Utilization monitor" + type = "list" + default = [] +} + # # Memory Utilization # @@ -125,13 +164,13 @@ variable "memory_utilization_timeframe" { } variable "memory_utilization_threshold_warning" { - description = "Memory Utilization in fraction (warning threshold)" - default = 0.8 + description = "Memory Utilization in percentage (warning threshold)" + default = 80 } variable "memory_utilization_threshold_critical" { - description = "Memory Utilization in fraction (critical threshold)" - default = 0.9 + description = "Memory Utilization in percentage (critical threshold)" + default = 90 } variable "memory_utilization_silenced" { @@ -169,14 +208,14 @@ variable "memory_utilization_forecast_history" { default = "12h" } -variable "memory_utilization_forecast_threshold_warning" { - description = "Memory Utilization Forecast in fraction (warning threshold)" - default = 0.8 +variable "memory_utilization_forecast_threshold_critical" { + description = "Memory Utilization Forecast in percentage (warning threshold)" + default = 90 } -variable "memory_utilization_forecast_threshold_critical" { - description = "Memory Utilization Forecast in fraction (critical threshold)" - default = 0.9 +variable "memory_utilization_forecast_threshold_critical_recovery" { + description = "Memory Utilization Forecast in percentage (recovery threshold)" + default = 81 } variable "memory_utilization_forecast_silenced" { diff --git a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf index 54a5f91..e326c01 100644 --- a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf +++ b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf @@ -15,15 +15,15 @@ data "template_file" "filter" { # CPU Utilization # resource "datadog_monitor" "cpu_utilization" { - name = "[${var.environment}] Cloud SQL CPU Utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + name = "[${var.environment}] Cloud SQL CPU utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.cpu_utilization_message, var.message)}" type = "metric alert" query = < ${var.cpu_utilization_threshold_critical} EOF @@ -32,16 +32,19 @@ EOF critical = "${var.cpu_utilization_threshold_critical}" } - notify_no_data = true - require_full_window = false - renotify_interval = 0 notify_audit = false + locked = false timeout_h = 0 include_tags = true - locked = false - evaluation_delay = "${var.delay}" - new_host_delay = "${var.delay}" - silenced = "${var.cpu_utilization_silenced}" + no_data_timeframe = 30 + require_full_window = false + notify_no_data = true + renotify_interval = 0 + + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + + silenced = "${var.cpu_utilization_silenced}" tags = [ "team:gcp", @@ -56,7 +59,7 @@ EOF # Disk Utilization # resource "datadog_monitor" "disk_utilization" { - name = "[${var.environment}] Cloud SQL Disk Utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + name = "[${var.environment}] Cloud SQL Disk utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.disk_utilization_message, var.message)}" type = "metric alert" @@ -64,7 +67,7 @@ resource "datadog_monitor" "disk_utilization" { query = < ${var.disk_utilization_threshold_critical} EOF @@ -73,16 +76,19 @@ EOF critical = "${var.disk_utilization_threshold_critical}" } - notify_no_data = true - require_full_window = false - renotify_interval = 0 notify_audit = false + locked = false timeout_h = 0 include_tags = true - locked = false - evaluation_delay = "${var.delay}" - new_host_delay = "${var.delay}" - silenced = "${var.disk_utilization_silenced}" + no_data_timeframe = 20 + require_full_window = false + notify_no_data = true + renotify_interval = 0 + + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + + silenced = "${var.disk_utilization_silenced}" tags = [ "team:gcp", @@ -93,6 +99,55 @@ EOF ] } +# +# Disk Utilization Forecast +# +resource "datadog_monitor" "disk_utilization_forecast" { + name = "[${var.environment}] Cloud SQL Disk utilization could reach {{#is_alert}}{{threshold}}%{{/is_alert}} in a near future" + message = "${coalesce(var.disk_utilization_forecast_message, var.message)}" + + type = "metric alert" + + query = <= ${var.disk_utilization_forecast_threshold_critical} +EOF + + thresholds { + critical = "${var.disk_utilization_forecast_threshold_critical}" + critical_recovery = "${var.disk_utilization_forecast_threshold_critical_recovery}" + } + + notify_audit = false + locked = false + timeout_h = 0 + include_tags = true + require_full_window = false + notify_no_data = false + renotify_interval = 0 + + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + + silenced = "${var.disk_utilization_forecast_silenced}" + + tags = [ + "team:gcp", + "provider:gcp", + "env:${var.environment}", + "resource:cloud-sql", + "${var.disk_utilization_forecast_extra_tags}", + ] +} + # # Memory Utilization # @@ -105,8 +160,8 @@ resource "datadog_monitor" "memory_utilization" { query = < ${var.memory_utilization_threshold_critical} + by {database_id} * 100 + > ${var.memory_utilization_threshold_critical} EOF thresholds { @@ -114,16 +169,19 @@ EOF critical = "${var.memory_utilization_threshold_critical}" } - notify_no_data = true - require_full_window = false - renotify_interval = 0 notify_audit = false + locked = false timeout_h = 0 include_tags = true - locked = false - evaluation_delay = "${var.delay}" - new_host_delay = "${var.delay}" - silenced = "${var.memory_utilization_silenced}" + no_data_timeframe = 20 + require_full_window = false + notify_no_data = true + renotify_interval = 0 + + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + + silenced = "${var.memory_utilization_silenced}" tags = [ "team:gcp", @@ -138,7 +196,7 @@ EOF # Memory Utilization Forecast # resource "datadog_monitor" "memory_utilization_forecast" { - name = "[${var.environment}] Cloud SQL Memory Utilization Forecast {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + name = "[${var.environment}] Cloud SQL Memory Utilization could reach {{#is_alert}}{{threshold}}%{{/is_alert}} in a near future" message = "${coalesce(var.memory_utilization_forecast_message, var.message)}" type = "query alert" @@ -146,31 +204,33 @@ resource "datadog_monitor" "memory_utilization_forecast" { query = < ${var.memory_utilization_forecast_threshold_critical} + >= ${var.memory_utilization_forecast_threshold_critical} EOF thresholds { - warning = "${var.memory_utilization_forecast_threshold_warning}" - critical = "${var.memory_utilization_forecast_threshold_critical}" + critical = "${var.memory_utilization_forecast_threshold_critical}" + critical_recovery = "${var.memory_utilization_forecast_threshold_critical_recovery}" } - notify_no_data = true - require_full_window = false - renotify_interval = 0 notify_audit = false + locked = false timeout_h = 0 include_tags = true - locked = false - evaluation_delay = "${var.delay}" - new_host_delay = "${var.delay}" - silenced = "${var.memory_utilization_forecast_silenced}" + require_full_window = false + notify_no_data = false + renotify_interval = 0 + + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + + silenced = "${var.memory_utilization_forecast_silenced}" tags = [ "team:gcp", @@ -185,32 +245,35 @@ EOF # Failover Unavailable # resource "datadog_monitor" "failover_unavailable" { - name = "[${var.environment}] Cloud SQL Failover Unavailable {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + name = "[${var.environment}] Cloud SQL Failover Unavailable" message = "${coalesce(var.failover_unavailable_message, var.message)}" type = "metric alert" query = < ${var.replication_lag_threshold_critical} + min(${var.replication_lag_timeframe}): + avg:gcp.cloudsql.database.mysql.replication.seconds_behind_master{${data.template_file.filter.rendered}} + by {database_id} + > ${var.replication_lag_threshold_critical} EOF thresholds { @@ -74,16 +74,19 @@ EOF warning = "${var.replication_lag_threshold_warning}" } - notify_no_data = true - require_full_window = false - renotify_interval = 0 notify_audit = false + locked = false timeout_h = 0 include_tags = true - locked = false - evaluation_delay = "${var.delay}" - new_host_delay = "${var.delay}" - silenced = "${var.replication_lag_silenced}" + no_data_timeframe = 25 + require_full_window = false + notify_no_data = true + renotify_interval = 0 + + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + + silenced = "${var.replication_lag_silenced}" tags = [ "team:gcp", @@ -99,9 +102,7 @@ EOF # Queries Anomaly # resource "datadog_monitor" "queries_changing_anomaly" { - count = "${length(var.queries_changing_database_ids)}" - - name = "[${var.environment}] [${var.queries_changing_database_ids[count.index]}] Cloud SQL MySQL Queries Count changed abnormally {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + name = "[${var.environment}] Cloud SQL MySQL Queries Count changed abnormally" message = "${coalesce(var.queries_changing_message, var.message)}" type = "query alert" @@ -109,32 +110,36 @@ resource "datadog_monitor" "queries_changing_anomaly" { query = < ${var.queries_changing_threshold_critical} EOF thresholds { - warning = "${var.queries_changing_threshold_warning}" - critical = "${var.queries_changing_threshold_critical}" + warning = "${var.queries_changing_threshold_warning}" + critical = "${var.queries_changing_threshold_critical}" + critical_recovery = "${var.queries_changing_threshold_critical_recovery}" } - notify_no_data = false - require_full_window = false - renotify_interval = 0 notify_audit = false + locked = false timeout_h = 0 include_tags = true - locked = false - evaluation_delay = "${var.delay}" - new_host_delay = "${var.delay}" - silenced = "${var.queries_changing_silenced}" + require_full_window = false + notify_no_data = false + renotify_interval = 0 + + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + + silenced = "${var.queries_changing_silenced}" tags = [ "team:gcp", @@ -142,7 +147,6 @@ EOF "env:${var.environment}", "resource:cloud-sql", "engine:mysql", - "database_id:${var.project_id}:${var.queries_changing_database_ids[count.index]}}", "${var.queries_changing_extra_tags}", ] } @@ -151,42 +155,44 @@ EOF # Questions Anomaly # resource "datadog_monitor" "questions_changing_anomaly" { - count = "${length(var.questions_changing_database_ids)}" - - name = "[${var.environment}] [${var.questions_changing_database_ids[count.index]}] Cloud SQL MySQL Questions Count changed abnormally {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + name = "[${var.environment}] Cloud SQL MySQL Questions Count changed abnormally" message = "${coalesce(var.questions_changing_message, var.message)}" type = "query alert" query = < ${var.questions_changing_threshold_critical} + > ${var.questions_changing_threshold_critical} EOF thresholds { - warning = "${var.questions_changing_threshold_warning}" - critical = "${var.questions_changing_threshold_critical}" + warning = "${var.questions_changing_threshold_warning}" + critical = "${var.questions_changing_threshold_critical}" + critical_recovery = "${var.questions_changing_threshold_critical_recovery}" } - notify_no_data = false - require_full_window = false - renotify_interval = 0 notify_audit = false + locked = false timeout_h = 0 include_tags = true - locked = false - evaluation_delay = "${var.delay}" - new_host_delay = "${var.delay}" - silenced = "${var.questions_changing_silenced}" + require_full_window = false + notify_no_data = false + renotify_interval = 0 + + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + + silenced = "${var.questions_changing_silenced}" tags = [ "team:gcp", @@ -194,7 +200,6 @@ EOF "env:${var.environment}", "resource:cloud-sql", "engine:mysql", - "database_id:${var.project_id}:${var.questions_changing_database_ids[count.index]}", "${var.questions_changing_extra_tags}", ] } From 486f1c7514c3c8d1af3bc6af8c45ccda96d7d4f5 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Fri, 27 Jul 2018 18:16:48 +0200 Subject: [PATCH 25/46] MON-224 Auto readme and outputs --- README.md | 1 + cloud/gcp/cloud-sql/instance/README.md | 32 +++++++++++++++---------- cloud/gcp/cloud-sql/instance/outputs.tf | 5 ++++ cloud/gcp/cloud-sql/mysql/README.md | 16 +++++++------ 4 files changed, 35 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 892649a..0762021 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,7 @@ The `//` is very important, it's a terraform specific syntax used to separate gi - [aws](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/) - [alb](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/alb/) - [apigateway](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/apigateway/) + - [ecs](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/ecs/) - [elasticsearch](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/elasticsearch/) - [elb](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/elb/) - [kinesis-firehose](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/kinesis-firehose/) diff --git a/cloud/gcp/cloud-sql/instance/README.md b/cloud/gcp/cloud-sql/instance/README.md index 4350e35..60e7dbb 100644 --- a/cloud/gcp/cloud-sql/instance/README.md +++ b/cloud/gcp/cloud-sql/instance/README.md @@ -16,10 +16,11 @@ module "datadog-monitors-cloud-gcp-cloud-sql-instance" { Creates DataDog monitors with the following checks: -- Cloud SQL CPU Utilization -- Cloud SQL Disk Utilization +- Cloud SQL CPU utilization +- Cloud SQL Disk utilization +- Cloud SQL Disk utilization could reach - Cloud SQL Memory Utilization -- Cloud SQL Memory Utilization Forecast +- Cloud SQL Memory Utilization could reach - Cloud SQL Failover Unavailable ## Inputs @@ -29,15 +30,21 @@ Creates DataDog monitors with the following checks: | cpu_utilization_extra_tags | Extra tags for GCP Cloud SQL CPU Utilization monitor | list | `` | no | | cpu_utilization_message | Custom message for the CPU Utilization monitor | string | `` | no | | cpu_utilization_silenced | Groups to mute for GCP Cloud SQL CPU Utilization monitor | map | `` | no | -| cpu_utilization_threshold_critical | CPU Utilization in fraction (critical threshold) | string | `0.9` | no | -| cpu_utilization_threshold_warning | CPU Utilization in fraction (warning threshold) | string | `0.8` | no | -| cpu_utilization_timeframe | Timeframe for the CPU Utilization monitor | string | `last_30m` | no | +| cpu_utilization_threshold_critical | CPU Utilization in percentage (critical threshold) | string | `90` | no | +| cpu_utilization_threshold_warning | CPU Utilization in percentage (warning threshold) | string | `80` | no | +| cpu_utilization_timeframe | Timeframe for the CPU Utilization monitor | string | `last_15m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | | disk_utilization_extra_tags | Extra tags for GCP Cloud SQL CPU Utilization monitor | list | `` | no | +| disk_utilization_forecast_extra_tags | Extra tags for GCP Cloud SQL CPU Utilization monitor | list | `` | no | +| disk_utilization_forecast_message | Custom message for the Disk Utilization monitor | string | `` | no | +| disk_utilization_forecast_silenced | Groups to mute for GCP Cloud SQL Disk Utilization monitor | map | `` | no | +| disk_utilization_forecast_threshold_critical | Disk Utilization in percentage (critical threshold) | string | `80` | no | +| disk_utilization_forecast_threshold_critical_recovery | Disk Utilization in percentage (recovery threshold) | string | `72` | no | +| disk_utilization_forecast_timeframe | Timeframe for the Disk Utilization monitor | string | `next_1w` | no | | disk_utilization_message | Custom message for the Disk Utilization monitor | string | `` | no | | disk_utilization_silenced | Groups to mute for GCP Cloud SQL Disk Utilization monitor | map | `` | no | -| disk_utilization_threshold_critical | Disk Utilization in fraction (critical threshold) | string | `0.9` | no | -| disk_utilization_threshold_warning | Disk Utilization in fraction (warning threshold) | string | `0.8` | no | +| disk_utilization_threshold_critical | Disk Utilization in percentage (critical threshold) | string | `90` | no | +| disk_utilization_threshold_warning | Disk Utilization in percentage (warning threshold) | string | `80` | no | | disk_utilization_timeframe | Timeframe for the Disk Utilization monitor | string | `last_5m` | no | | environment | Architecture environment | string | - | yes | | failover_unavailable_extra_tags | Extra tags for GCP Cloud SQL Failover Unavailable monitor | list | `` | no | @@ -53,13 +60,13 @@ Creates DataDog monitors with the following checks: | memory_utilization_forecast_interval | Interval for the Memory Utilization Forecast monitor | string | `30m` | no | | memory_utilization_forecast_message | Custom message for the Memory Utilization Forecast monitor | string | `` | no | | memory_utilization_forecast_silenced | Groups to mute for GCP Cloud SQL Memory Utilization Forecast monitor | map | `` | no | -| memory_utilization_forecast_threshold_critical | Memory Utilization Forecast in fraction (critical threshold) | string | `0.9` | no | -| memory_utilization_forecast_threshold_warning | Memory Utilization Forecast in fraction (warning threshold) | string | `0.8` | no | +| memory_utilization_forecast_threshold_critical | Memory Utilization Forecast in percentage (warning threshold) | string | `90` | no | +| memory_utilization_forecast_threshold_critical_recovery | Memory Utilization Forecast in percentage (recovery threshold) | string | `81` | no | | memory_utilization_forecast_timeframe | Timeframe for the Memory Utilization Forecast monitor | string | `next_3d` | no | | memory_utilization_message | Custom message for the Memory Utilization monitor | string | `` | no | | memory_utilization_silenced | Groups to mute for GCP Cloud SQL Memory Utilization monitor | map | `` | no | -| memory_utilization_threshold_critical | Memory Utilization in fraction (critical threshold) | string | `0.9` | no | -| memory_utilization_threshold_warning | Memory Utilization in fraction (warning threshold) | string | `0.8` | no | +| memory_utilization_threshold_critical | Memory Utilization in percentage (critical threshold) | string | `90` | no | +| memory_utilization_threshold_warning | Memory Utilization in percentage (warning threshold) | string | `80` | no | | memory_utilization_timeframe | Timeframe for the Memory Utilization monitor | string | `last_5m` | no | | message | Message sent when a monitor is triggered | string | - | yes | | project_id | ID of the GCP Project | string | - | yes | @@ -69,6 +76,7 @@ Creates DataDog monitors with the following checks: | Name | Description | |------|-------------| | cpu_utilization_id | id for monitor cpu_utilization | +| disk_utilization_forecast_id | id for monitor disk_utilization_forecast | | disk_utilization_id | id for monitor disk_utilization | | failover_unavailable_id | id for monitor failover_unavailable | | memory_utilization_forecast_id | id for monitor memory_utilization_forecast | diff --git a/cloud/gcp/cloud-sql/instance/outputs.tf b/cloud/gcp/cloud-sql/instance/outputs.tf index 3fbeafd..8fb4292 100644 --- a/cloud/gcp/cloud-sql/instance/outputs.tf +++ b/cloud/gcp/cloud-sql/instance/outputs.tf @@ -8,6 +8,11 @@ output "disk_utilization_id" { value = "${datadog_monitor.disk_utilization.id}" } +output "disk_utilization_forecast_id" { + description = "id for monitor disk_utilization_forecast" + value = "${datadog_monitor.disk_utilization_forecast.id}" +} + output "memory_utilization_id" { description = "id for monitor memory_utilization" value = "${datadog_monitor.memory_utilization.id}" diff --git a/cloud/gcp/cloud-sql/mysql/README.md b/cloud/gcp/cloud-sql/mysql/README.md index 1b67001..dad3956 100644 --- a/cloud/gcp/cloud-sql/mysql/README.md +++ b/cloud/gcp/cloud-sql/mysql/README.md @@ -17,7 +17,7 @@ module "datadog-monitors-cloud-gcp-cloud-sql-mysql" { Creates DataDog monitors with the following checks: - Cloud SQL MySQL Network Connections (hard limit: ${var.network_connections_hard_limit}) -- Cloud SQL MySQL Replication Lag too high +- Cloud SQL MySQL Replication Lag - Cloud SQL MySQL Queries Count changed abnormally - Cloud SQL MySQL Questions Count changed abnormally @@ -38,7 +38,7 @@ Creates DataDog monitors with the following checks: | network_connections_threshold_warning | Number of network connections (warning threshold) | string | `3200` | no | | network_connections_timeframe | Timeframe for the Network Connections monitor | string | `last_15m` | no | | project_id | ID of the GCP Project | string | - | yes | -| queries_changing_anomaly_detection_algorithm | Anomaly Detection Algorithm used | string | `robust` | no | +| queries_changing_anomaly_detection_algorithm | Anomaly Detection Algorithm used | string | `agile` | no | | queries_changing_database_ids | List of database ids for the Queries Changing monitor | list | `` | no | | queries_changing_deviations | Deviations to detect the anomaly | string | `4` | no | | queries_changing_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no | @@ -47,9 +47,10 @@ Creates DataDog monitors with the following checks: | queries_changing_seasonality | Seasonality of the algorithm | string | `weekly` | no | | queries_changing_silenced | Groups to mute for GCP Cloud SQL Queries Changing monitor | map | `` | no | | queries_changing_threshold_critical | Queries Changing critical threshold | string | `1` | no | +| queries_changing_threshold_critical_recovery | Queries Changing critical recovery threshold | string | `0.99` | no | | queries_changing_threshold_warning | Queries Changing warning threshold | string | `0.5` | no | -| queries_changing_timeframe | Timeframe for the Queries Changing mon monitor | string | `last_10m` | no | -| questions_changing_anomaly_detection_algorithm | Anomaly Detection Algorithm used | string | `robust` | no | +| queries_changing_timeframe | Timeframe for the Queries Changing mon monitor | string | `last_1h` | no | +| questions_changing_anomaly_detection_algorithm | Anomaly Detection Algorithm used | string | `agile` | no | | questions_changing_database_ids | List of database ids for the Questions Changing monitor | list | `` | no | | questions_changing_deviations | Deviations to detect the anomaly | string | `4` | no | | questions_changing_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no | @@ -58,13 +59,14 @@ Creates DataDog monitors with the following checks: | questions_changing_seasonality | Seasonality of the algorithm | string | `weekly` | no | | questions_changing_silenced | Groups to mute for GCP Cloud SQL Questions Changing monitor | map | `` | no | | questions_changing_threshold_critical | Questions Changing critical threshold | string | `1` | no | +| questions_changing_threshold_critical_recovery | Questions Changing critical recovery threshold | string | `0.99` | no | | questions_changing_threshold_warning | Questions Changing warning threshold | string | `0.5` | no | -| questions_changing_timeframe | Timeframe for the Questions Changing monitor | string | `last_10m` | no | +| questions_changing_timeframe | Timeframe for the Questions Changing monitor | string | `last_1h` | no | | replication_lag_extra_tags | Extra tags for GCP Cloud SQL SQL Replication monitor | list | `` | no | | replication_lag_message | Custom message for the Replication Lag monitor | string | `` | no | | replication_lag_silenced | Groups to mute for GCP Cloud SQL Replication Lag monitor | map | `` | no | -| replication_lag_threshold_critical | Seconds behind the master (critical threshold) | string | `900` | no | -| replication_lag_threshold_warning | Seconds behind the master (warning threshold) | string | `300` | no | +| replication_lag_threshold_critical | Seconds behind the master (critical threshold) | string | `180` | no | +| replication_lag_threshold_warning | Seconds behind the master (warning threshold) | string | `90` | no | | replication_lag_timeframe | Timeframe for the Replication Lag monitor | string | `last_10m` | no | ## Outputs From 3bcf6064b7078dc679adedb0dfdd3e0cd87bec5d Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Mon, 30 Jul 2018 12:09:23 +0200 Subject: [PATCH 26/46] MON-224 Removing non existing monitors from README --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 0762021..892649a 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,6 @@ The `//` is very important, it's a terraform specific syntax used to separate gi - [aws](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/) - [alb](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/alb/) - [apigateway](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/apigateway/) - - [ecs](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/ecs/) - [elasticsearch](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/elasticsearch/) - [elb](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/elb/) - [kinesis-firehose](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/aws/kinesis-firehose/) From 2d89b8b9affc281c914e263d29831e9bd204e12f Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Mon, 30 Jul 2018 12:17:47 +0200 Subject: [PATCH 27/46] MON-224 Added created_by:terraform tag to all monitors --- cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf | 6 ++++++ cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf index e326c01..3135966 100644 --- a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf +++ b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf @@ -51,6 +51,7 @@ EOF "provider:gcp", "resource:cloud-sql", "env:${var.environment}", + "created_by:terraform", "${var.cpu_utilization_extra_tags}", ] } @@ -94,6 +95,7 @@ EOF "team:gcp", "provider:gcp", "env:${var.environment}", + "created_by:terraform", "resource:cloud-sql", "${var.disk_utilization_extra_tags}", ] @@ -143,6 +145,7 @@ EOF "team:gcp", "provider:gcp", "env:${var.environment}", + "created_by:terraform", "resource:cloud-sql", "${var.disk_utilization_forecast_extra_tags}", ] @@ -187,6 +190,7 @@ EOF "team:gcp", "provider:gcp", "env:${var.environment}", + "created_by:terraform", "resource:cloud-sql", "${var.memory_utilization_extra_tags}", ] @@ -236,6 +240,7 @@ EOF "team:gcp", "provider:gcp", "env:${var.environment}", + "created_by:terraform", "resource:cloud-sql", "${var.memory_utilization_forecast_extra_tags}", ] @@ -279,6 +284,7 @@ EOF "team:gcp", "provider:gcp", "env:${var.environment}", + "created_by:terraform", "resource:cloud-sql", "${var.failover_unavailable_extra_tags}", ] diff --git a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf index 9250d29..0e988e2 100644 --- a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf +++ b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf @@ -47,6 +47,7 @@ EOF "team:gcp", "provider:gcp", "env:${var.environment}", + "created_by:terraform", "resource:cloud-sql", "engine:mysql", "${var.network_connections_extra_tags}", @@ -92,6 +93,7 @@ EOF "team:gcp", "provider:gcp", "env:${var.environment}", + "created_by:terraform", "resource:cloud-sql", "engine:mysql", "${var.replication_lag_extra_tags}", @@ -145,6 +147,7 @@ EOF "team:gcp", "provider:gcp", "env:${var.environment}", + "created_by:terraform", "resource:cloud-sql", "engine:mysql", "${var.queries_changing_extra_tags}", @@ -198,6 +201,7 @@ EOF "team:gcp", "provider:gcp", "env:${var.environment}", + "created_by:terraform", "resource:cloud-sql", "engine:mysql", "${var.questions_changing_extra_tags}", From deb5fe67bce8502940a9ed078e55375f31633a1d Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Mon, 30 Jul 2018 12:33:31 +0200 Subject: [PATCH 28/46] MON-224 Added the variable to control monitor creation --- cloud/gcp/cloud-sql/instance/README.md | 6 ++++ cloud/gcp/cloud-sql/instance/inputs.tf | 35 +++++++++++++++++++ .../instance/monitors-cloud-sql-instance.tf | 12 +++++++ cloud/gcp/cloud-sql/mysql/README.md | 4 +++ cloud/gcp/cloud-sql/mysql/inputs.tf | 24 +++++++++++++ .../mysql/monitors-cloudsql-mysql.tf | 8 +++++ 6 files changed, 89 insertions(+) diff --git a/cloud/gcp/cloud-sql/instance/README.md b/cloud/gcp/cloud-sql/instance/README.md index 60e7dbb..8e5ae36 100644 --- a/cloud/gcp/cloud-sql/instance/README.md +++ b/cloud/gcp/cloud-sql/instance/README.md @@ -27,6 +27,7 @@ Creates DataDog monitors with the following checks: | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| +| cpu_utilization_enabled | Whether or not to create the monitor | string | `true` | no | | cpu_utilization_extra_tags | Extra tags for GCP Cloud SQL CPU Utilization monitor | list | `` | no | | cpu_utilization_message | Custom message for the CPU Utilization monitor | string | `` | no | | cpu_utilization_silenced | Groups to mute for GCP Cloud SQL CPU Utilization monitor | map | `` | no | @@ -34,7 +35,9 @@ Creates DataDog monitors with the following checks: | cpu_utilization_threshold_warning | CPU Utilization in percentage (warning threshold) | string | `80` | no | | cpu_utilization_timeframe | Timeframe for the CPU Utilization monitor | string | `last_15m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | +| disk_utilization_enabled | Whether or not to create the monitor | string | `true` | no | | disk_utilization_extra_tags | Extra tags for GCP Cloud SQL CPU Utilization monitor | list | `` | no | +| disk_utilization_forecast_enabled | Whether or not to create the monitor | string | `true` | no | | disk_utilization_forecast_extra_tags | Extra tags for GCP Cloud SQL CPU Utilization monitor | list | `` | no | | disk_utilization_forecast_message | Custom message for the Disk Utilization monitor | string | `` | no | | disk_utilization_forecast_silenced | Groups to mute for GCP Cloud SQL Disk Utilization monitor | map | `` | no | @@ -47,6 +50,7 @@ Creates DataDog monitors with the following checks: | disk_utilization_threshold_warning | Disk Utilization in percentage (warning threshold) | string | `80` | no | | disk_utilization_timeframe | Timeframe for the Disk Utilization monitor | string | `last_5m` | no | | environment | Architecture environment | string | - | yes | +| failover_unavailable_enabled | Whether or not to create the monitor | string | `true` | no | | failover_unavailable_extra_tags | Extra tags for GCP Cloud SQL Failover Unavailable monitor | list | `` | no | | failover_unavailable_message | Custom message for the Failover Unavailable monitor | string | `` | no | | failover_unavailable_silenced | Groups to mute for GCP Cloud SQL Failover Unavailable monitor | map | `` | no | @@ -54,7 +58,9 @@ Creates DataDog monitors with the following checks: | failover_unavailable_timeframe | Timeframe for the Failover Unavailable monitor | string | `last_5m` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| memory_utilization_enabled | Whether or not to create the monitor | string | `true` | no | | memory_utilization_extra_tags | Extra tags for GCP Cloud SQL Memory Utilization monitor | list | `` | no | +| memory_utilization_forecast_enabled | Whether or not to create the monitor | string | `true` | no | | memory_utilization_forecast_extra_tags | Extra tags for GCP Cloud SQL Memory Utilization Forecast monitor | list | `` | no | | memory_utilization_forecast_history | History for the Memory Utilization Forecast monitor | string | `12h` | no | | memory_utilization_forecast_interval | Interval for the Memory Utilization Forecast monitor | string | `30m` | no | diff --git a/cloud/gcp/cloud-sql/instance/inputs.tf b/cloud/gcp/cloud-sql/instance/inputs.tf index 845f679..5513b83 100644 --- a/cloud/gcp/cloud-sql/instance/inputs.tf +++ b/cloud/gcp/cloud-sql/instance/inputs.tf @@ -36,6 +36,12 @@ variable "project_id" { # # CPU # +variable "cpu_utilization_enabled" { + description = "Whether or not to create the monitor" + type = "string" + default = "true" +} + variable "cpu_utilization_message" { description = "Custom message for the CPU Utilization monitor" type = "string" @@ -75,6 +81,12 @@ variable "cpu_utilization_extra_tags" { # # DISK Utilization # +variable "disk_utilization_enabled" { + description = "Whether or not to create the monitor" + type = "string" + default = "true" +} + variable "disk_utilization_message" { description = "Custom message for the Disk Utilization monitor" type = "string" @@ -114,6 +126,12 @@ variable "disk_utilization_extra_tags" { # # DISK Utilization Forecast # +variable "disk_utilization_forecast_enabled" { + description = "Whether or not to create the monitor" + type = "string" + default = "true" +} + variable "disk_utilization_forecast_message" { description = "Custom message for the Disk Utilization monitor" type = "string" @@ -153,6 +171,12 @@ variable "disk_utilization_forecast_extra_tags" { # # Memory Utilization # +variable "memory_utilization_enabled" { + description = "Whether or not to create the monitor" + type = "string" + default = "true" +} + variable "memory_utilization_message" { description = "Custom message for the Memory Utilization monitor" default = "" @@ -188,6 +212,12 @@ variable "memory_utilization_extra_tags" { # # Memory Utilization Forecast # +variable "memory_utilization_forecast_enabled" { + description = "Whether or not to create the monitor" + type = "string" + default = "true" +} + variable "memory_utilization_forecast_message" { description = "Custom message for the Memory Utilization Forecast monitor" default = "" @@ -233,6 +263,11 @@ variable "memory_utilization_forecast_extra_tags" { # # Failover Unavailable # +variable "failover_unavailable_enabled" { + description = "Whether or not to create the monitor" + type = "string" + default = "true" +} variable "failover_unavailable_message" { description = "Custom message for the Failover Unavailable monitor" diff --git a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf index 3135966..0efc1fa 100644 --- a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf +++ b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf @@ -15,6 +15,8 @@ data "template_file" "filter" { # CPU Utilization # resource "datadog_monitor" "cpu_utilization" { + count = "${var.cpu_utilization_enabled} == true ? 1 : 0 " + name = "[${var.environment}] Cloud SQL CPU utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.cpu_utilization_message, var.message)}" @@ -60,6 +62,8 @@ EOF # Disk Utilization # resource "datadog_monitor" "disk_utilization" { + count = "${var.disk_utilization_enabled} == true ? 1 : 0 " + name = "[${var.environment}] Cloud SQL Disk utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.disk_utilization_message, var.message)}" @@ -105,6 +109,8 @@ EOF # Disk Utilization Forecast # resource "datadog_monitor" "disk_utilization_forecast" { + count = "${var.disk_utilization_forecast_enabled} == true ? 1 : 0 " + name = "[${var.environment}] Cloud SQL Disk utilization could reach {{#is_alert}}{{threshold}}%{{/is_alert}} in a near future" message = "${coalesce(var.disk_utilization_forecast_message, var.message)}" @@ -155,6 +161,8 @@ EOF # Memory Utilization # resource "datadog_monitor" "memory_utilization" { + count = "${var.memory_utilization_enabled} == true ? 1 : 0 " + name = "[${var.environment}] Cloud SQL Memory Utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.memory_utilization_message, var.message)}" @@ -200,6 +208,8 @@ EOF # Memory Utilization Forecast # resource "datadog_monitor" "memory_utilization_forecast" { + count = "${var.memory_utilization_forecast_enabled} == true ? 1 : 0 " + name = "[${var.environment}] Cloud SQL Memory Utilization could reach {{#is_alert}}{{threshold}}%{{/is_alert}} in a near future" message = "${coalesce(var.memory_utilization_forecast_message, var.message)}" @@ -250,6 +260,8 @@ EOF # Failover Unavailable # resource "datadog_monitor" "failover_unavailable" { + count = "${var.failover_unavailable_enabled} == true ? 1 : 0 " + name = "[${var.environment}] Cloud SQL Failover Unavailable" message = "${coalesce(var.failover_unavailable_message, var.message)}" diff --git a/cloud/gcp/cloud-sql/mysql/README.md b/cloud/gcp/cloud-sql/mysql/README.md index dad3956..e10dde1 100644 --- a/cloud/gcp/cloud-sql/mysql/README.md +++ b/cloud/gcp/cloud-sql/mysql/README.md @@ -30,6 +30,7 @@ Creates DataDog monitors with the following checks: | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | message | Message sent when a monitor is triggered | string | - | yes | +| network_connections_enabled | Whether or not to create the monitor | string | `true` | no | | network_connections_extra_tags | Extra tags for GCP Cloud SQL Network Connections monitor | list | `` | no | | network_connections_hard_limit | Max number of connections for the CloudSQL Instance. Default value is the max value on https://cloud.google.com/sql/docs/quotas#fixed-limits for MySQL | string | `4000` | no | | network_connections_message | Custom message for the Network Connections monitor | string | `` | no | @@ -42,6 +43,7 @@ Creates DataDog monitors with the following checks: | queries_changing_database_ids | List of database ids for the Queries Changing monitor | list | `` | no | | queries_changing_deviations | Deviations to detect the anomaly | string | `4` | no | | queries_changing_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no | +| queries_changing_enabled | Whether or not to create the monitor | string | `true` | no | | queries_changing_extra_tags | Extra tags for GCP Cloud SQL Queries Changing monitor | list | `` | no | | queries_changing_message | Custom message for the Queries Changing monitor | string | `` | no | | queries_changing_seasonality | Seasonality of the algorithm | string | `weekly` | no | @@ -54,6 +56,7 @@ Creates DataDog monitors with the following checks: | questions_changing_database_ids | List of database ids for the Questions Changing monitor | list | `` | no | | questions_changing_deviations | Deviations to detect the anomaly | string | `4` | no | | questions_changing_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no | +| questions_changing_enabled | Whether or not to create the monitor | string | `true` | no | | questions_changing_extra_tags | Extra tags for GCP Cloud SQL Questions Changing monitor | list | `` | no | | questions_changing_message | Custom message for the Questions Changing monitor | string | `` | no | | questions_changing_seasonality | Seasonality of the algorithm | string | `weekly` | no | @@ -62,6 +65,7 @@ Creates DataDog monitors with the following checks: | questions_changing_threshold_critical_recovery | Questions Changing critical recovery threshold | string | `0.99` | no | | questions_changing_threshold_warning | Questions Changing warning threshold | string | `0.5` | no | | questions_changing_timeframe | Timeframe for the Questions Changing monitor | string | `last_1h` | no | +| replication_lag_enabled | Whether or not to create the monitor | string | `true` | no | | replication_lag_extra_tags | Extra tags for GCP Cloud SQL SQL Replication monitor | list | `` | no | | replication_lag_message | Custom message for the Replication Lag monitor | string | `` | no | | replication_lag_silenced | Groups to mute for GCP Cloud SQL Replication Lag monitor | map | `` | no | diff --git a/cloud/gcp/cloud-sql/mysql/inputs.tf b/cloud/gcp/cloud-sql/mysql/inputs.tf index a11bd26..e0f56c4 100644 --- a/cloud/gcp/cloud-sql/mysql/inputs.tf +++ b/cloud/gcp/cloud-sql/mysql/inputs.tf @@ -36,6 +36,12 @@ variable "project_id" { # # Network Connections # +variable "network_connections_enabled" { + description = "Whether or not to create the monitor" + type = "string" + default = "true" +} + variable "network_connections_message" { description = "Custom message for the Network Connections monitor" type = "string" @@ -81,6 +87,12 @@ variable "network_connections_extra_tags" { # # Replication Lag # +variable "replication_lag_enabled" { + description = "Whether or not to create the monitor" + type = "string" + default = "true" +} + variable "replication_lag_message" { description = "Custom message for the Replication Lag monitor" type = "string" @@ -120,6 +132,12 @@ variable "replication_lag_extra_tags" { # # Queries Changing Abnormally # +variable "queries_changing_enabled" { + description = "Whether or not to create the monitor" + type = "string" + default = "true" +} + variable "queries_changing_database_ids" { description = "List of database ids for the Queries Changing monitor" type = "list" @@ -195,6 +213,12 @@ variable "queries_changing_extra_tags" { # # Questions Changing # +variable "questions_changing_enabled" { + description = "Whether or not to create the monitor" + type = "string" + default = "true" +} + variable "questions_changing_message" { description = "Custom message for the Questions Changing monitor" type = "string" diff --git a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf index 0e988e2..7944ebc 100644 --- a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf +++ b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf @@ -15,6 +15,8 @@ data "template_file" "filter" { # MySQL Network Connections # resource "datadog_monitor" "network_connections" { + count = "${var.network_connections_enabled} == true ? 1 : 0 " + name = "[${var.environment}] Cloud SQL MySQL Network Connections (hard limit: ${var.network_connections_hard_limit}) {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.network_connections_message, var.message)}" @@ -58,6 +60,8 @@ EOF # Replication Lag # resource "datadog_monitor" "replication_lag" { + count = "${var.replication_lag_enabled} == true ? 1 : 0 " + name = "[${var.environment}] Cloud SQL MySQL Replication Lag {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}" message = "${coalesce(var.replication_lag_message, var.message)}" @@ -104,6 +108,8 @@ EOF # Queries Anomaly # resource "datadog_monitor" "queries_changing_anomaly" { + count = "${var.queries_changing_enabled} == true ? 1 : 0 " + name = "[${var.environment}] Cloud SQL MySQL Queries Count changed abnormally" message = "${coalesce(var.queries_changing_message, var.message)}" @@ -158,6 +164,8 @@ EOF # Questions Anomaly # resource "datadog_monitor" "questions_changing_anomaly" { + count = "${var.questions_changing_enabled} == true ? 1 : 0 " + name = "[${var.environment}] Cloud SQL MySQL Questions Count changed abnormally" message = "${coalesce(var.questions_changing_message, var.message)}" From 50906d0940260896dc455e2ee939740dd77f1057 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Mon, 30 Jul 2018 12:33:31 +0200 Subject: [PATCH 29/46] MON-224 Added the variable to control monitor creation --- cloud/gcp/cloud-sql/mysql/README.md | 2 -- cloud/gcp/cloud-sql/mysql/inputs.tf | 11 ----------- 2 files changed, 13 deletions(-) diff --git a/cloud/gcp/cloud-sql/mysql/README.md b/cloud/gcp/cloud-sql/mysql/README.md index e10dde1..9551eaf 100644 --- a/cloud/gcp/cloud-sql/mysql/README.md +++ b/cloud/gcp/cloud-sql/mysql/README.md @@ -40,7 +40,6 @@ Creates DataDog monitors with the following checks: | network_connections_timeframe | Timeframe for the Network Connections monitor | string | `last_15m` | no | | project_id | ID of the GCP Project | string | - | yes | | queries_changing_anomaly_detection_algorithm | Anomaly Detection Algorithm used | string | `agile` | no | -| queries_changing_database_ids | List of database ids for the Queries Changing monitor | list | `` | no | | queries_changing_deviations | Deviations to detect the anomaly | string | `4` | no | | queries_changing_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no | | queries_changing_enabled | Whether or not to create the monitor | string | `true` | no | @@ -53,7 +52,6 @@ Creates DataDog monitors with the following checks: | queries_changing_threshold_warning | Queries Changing warning threshold | string | `0.5` | no | | queries_changing_timeframe | Timeframe for the Queries Changing mon monitor | string | `last_1h` | no | | questions_changing_anomaly_detection_algorithm | Anomaly Detection Algorithm used | string | `agile` | no | -| questions_changing_database_ids | List of database ids for the Questions Changing monitor | list | `` | no | | questions_changing_deviations | Deviations to detect the anomaly | string | `4` | no | | questions_changing_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no | | questions_changing_enabled | Whether or not to create the monitor | string | `true` | no | diff --git a/cloud/gcp/cloud-sql/mysql/inputs.tf b/cloud/gcp/cloud-sql/mysql/inputs.tf index e0f56c4..babdf71 100644 --- a/cloud/gcp/cloud-sql/mysql/inputs.tf +++ b/cloud/gcp/cloud-sql/mysql/inputs.tf @@ -138,11 +138,6 @@ variable "queries_changing_enabled" { default = "true" } -variable "queries_changing_database_ids" { - description = "List of database ids for the Queries Changing monitor" - type = "list" - default = [] -} variable "queries_changing_message" { description = "Custom message for the Queries Changing monitor" @@ -231,12 +226,6 @@ variable "questions_changing_timeframe" { default = "last_1h" } -variable "questions_changing_database_ids" { - description = "List of database ids for the Questions Changing monitor" - type = "list" - default = [] -} - variable "questions_changing_anomaly_detection_algorithm" { description = "Anomaly Detection Algorithm used" type = "string" From 3535f294a5f47a72cb400181ad85a7c82683ca4a Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Mon, 30 Jul 2018 12:46:30 +0200 Subject: [PATCH 30/46] MON-224 Generalize anomaly monitors and standardize variable names. --- cloud/gcp/cloud-sql/mysql/README.md | 50 +++++++----- cloud/gcp/cloud-sql/mysql/inputs.tf | 81 +++++++++++++------ .../mysql/monitors-cloudsql-mysql.tf | 66 +++++++-------- 3 files changed, 120 insertions(+), 77 deletions(-) diff --git a/cloud/gcp/cloud-sql/mysql/README.md b/cloud/gcp/cloud-sql/mysql/README.md index 9551eaf..4c21190 100644 --- a/cloud/gcp/cloud-sql/mysql/README.md +++ b/cloud/gcp/cloud-sql/mysql/README.md @@ -39,30 +39,36 @@ Creates DataDog monitors with the following checks: | network_connections_threshold_warning | Number of network connections (warning threshold) | string | `3200` | no | | network_connections_timeframe | Timeframe for the Network Connections monitor | string | `last_15m` | no | | project_id | ID of the GCP Project | string | - | yes | +| queries_changing_anomaly_alert_window | Alert window. | string | `last_30m` | no | +| queries_changing_anomaly_count_default_zero | Count default zero. | string | `false` | no | | queries_changing_anomaly_detection_algorithm | Anomaly Detection Algorithm used | string | `agile` | no | -| queries_changing_deviations | Deviations to detect the anomaly | string | `4` | no | -| queries_changing_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no | -| queries_changing_enabled | Whether or not to create the monitor | string | `true` | no | -| queries_changing_extra_tags | Extra tags for GCP Cloud SQL Queries Changing monitor | list | `` | no | -| queries_changing_message | Custom message for the Queries Changing monitor | string | `` | no | -| queries_changing_seasonality | Seasonality of the algorithm | string | `weekly` | no | -| queries_changing_silenced | Groups to mute for GCP Cloud SQL Queries Changing monitor | map | `` | no | -| queries_changing_threshold_critical | Queries Changing critical threshold | string | `1` | no | -| queries_changing_threshold_critical_recovery | Queries Changing critical recovery threshold | string | `0.99` | no | -| queries_changing_threshold_warning | Queries Changing warning threshold | string | `0.5` | no | -| queries_changing_timeframe | Timeframe for the Queries Changing mon monitor | string | `last_1h` | no | +| queries_changing_anomaly_deviations | Deviations to detect the anomaly | string | `4` | no | +| queries_changing_anomaly_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no | +| queries_changing_anomaly_enabled | Whether or not to create the monitor | string | `true` | no | +| queries_changing_anomaly_extra_tags | Extra tags for GCP Cloud SQL Queries Changing monitor | list | `` | no | +| queries_changing_anomaly_interval | Interval. | string | `20` | no | +| queries_changing_anomaly_message | Custom message for the Queries Changing monitor | string | `` | no | +| queries_changing_anomaly_seasonality | Seasonality of the algorithm | string | `weekly` | no | +| queries_changing_anomaly_silenced | Groups to mute for GCP Cloud SQL Queries Changing monitor | map | `` | no | +| queries_changing_anomaly_threshold_critical | Queries Changing critical threshold | string | `1` | no | +| queries_changing_anomaly_threshold_critical_recovery | Queries Changing critical recovery threshold | string | `0.99` | no | +| queries_changing_anomaly_threshold_warning | Queries Changing warning threshold | string | `0.5` | no | +| queries_changing_anomaly_timeframe | Timeframe for the Queries Changing mon monitor | string | `last_1h` | no | +| questions_changing_anomaly_alert_window | Alert window. | string | `last_30m` | no | +| questions_changing_anomaly_count_default_zero | Count default zero. | string | `false` | no | | questions_changing_anomaly_detection_algorithm | Anomaly Detection Algorithm used | string | `agile` | no | -| questions_changing_deviations | Deviations to detect the anomaly | string | `4` | no | -| questions_changing_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no | -| questions_changing_enabled | Whether or not to create the monitor | string | `true` | no | -| questions_changing_extra_tags | Extra tags for GCP Cloud SQL Questions Changing monitor | list | `` | no | -| questions_changing_message | Custom message for the Questions Changing monitor | string | `` | no | -| questions_changing_seasonality | Seasonality of the algorithm | string | `weekly` | no | -| questions_changing_silenced | Groups to mute for GCP Cloud SQL Questions Changing monitor | map | `` | no | -| questions_changing_threshold_critical | Questions Changing critical threshold | string | `1` | no | -| questions_changing_threshold_critical_recovery | Questions Changing critical recovery threshold | string | `0.99` | no | -| questions_changing_threshold_warning | Questions Changing warning threshold | string | `0.5` | no | -| questions_changing_timeframe | Timeframe for the Questions Changing monitor | string | `last_1h` | no | +| questions_changing_anomaly_deviations | Deviations to detect the anomaly | string | `4` | no | +| questions_changing_anomaly_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no | +| questions_changing_anomaly_enabled | Whether or not to create the monitor | string | `true` | no | +| questions_changing_anomaly_extra_tags | Extra tags for GCP Cloud SQL Questions Changing monitor | list | `` | no | +| questions_changing_anomaly_interval | Interval. | string | `20` | no | +| questions_changing_anomaly_message | Custom message for the Questions Changing monitor | string | `` | no | +| questions_changing_anomaly_seasonality | Seasonality of the algorithm | string | `weekly` | no | +| questions_changing_anomaly_silenced | Groups to mute for GCP Cloud SQL Questions Changing monitor | map | `` | no | +| questions_changing_anomaly_threshold_critical | Questions Changing critical threshold | string | `1` | no | +| questions_changing_anomaly_threshold_critical_recovery | Questions Changing critical recovery threshold | string | `0.99` | no | +| questions_changing_anomaly_threshold_warning | Questions Changing warning threshold | string | `0.5` | no | +| questions_changing_anomaly_timeframe | Timeframe for the Questions Changing monitor | string | `last_1h` | no | | replication_lag_enabled | Whether or not to create the monitor | string | `true` | no | | replication_lag_extra_tags | Extra tags for GCP Cloud SQL SQL Replication monitor | list | `` | no | | replication_lag_message | Custom message for the Replication Lag monitor | string | `` | no | diff --git a/cloud/gcp/cloud-sql/mysql/inputs.tf b/cloud/gcp/cloud-sql/mysql/inputs.tf index babdf71..2d7bdf4 100644 --- a/cloud/gcp/cloud-sql/mysql/inputs.tf +++ b/cloud/gcp/cloud-sql/mysql/inputs.tf @@ -132,20 +132,19 @@ variable "replication_lag_extra_tags" { # # Queries Changing Abnormally # -variable "queries_changing_enabled" { +variable "queries_changing_anomaly_enabled" { description = "Whether or not to create the monitor" type = "string" default = "true" } - -variable "queries_changing_message" { +variable "queries_changing_anomaly_message" { description = "Custom message for the Queries Changing monitor" type = "string" default = "" } -variable "queries_changing_timeframe" { +variable "queries_changing_anomaly_timeframe" { description = "Timeframe for the Queries Changing mon monitor" type = "string" default = "last_1h" @@ -157,49 +156,67 @@ variable "queries_changing_anomaly_detection_algorithm" { default = "agile" } -variable "queries_changing_deviations" { +variable "queries_changing_anomaly_deviations" { description = "Deviations to detect the anomaly" type = "string" default = 4 } -variable "queries_changing_direction" { +variable "queries_changing_anomaly_direction" { description = "Direction of the anomaly. It can be both, below or above." type = "string" default = "both" } -variable "queries_changing_seasonality" { +variable "queries_changing_anomaly_alert_window" { + description = "Alert window." + type = "string" + default = "last_30m" +} + +variable "queries_changing_anomaly_interval" { + description = "Interval." + type = "string" + default = 20 +} + +variable "queries_changing_anomaly_count_default_zero" { + description = "Count default zero." + type = "string" + default = "false" +} + +variable "queries_changing_anomaly_seasonality" { description = "Seasonality of the algorithm" type = "string" default = "weekly" } -variable "queries_changing_threshold_warning" { +variable "queries_changing_anomaly_threshold_warning" { description = "Queries Changing warning threshold" type = "string" default = 0.5 } -variable "queries_changing_threshold_critical" { +variable "queries_changing_anomaly_threshold_critical" { description = "Queries Changing critical threshold" type = "string" default = 1 } -variable "queries_changing_threshold_critical_recovery" { +variable "queries_changing_anomaly_threshold_critical_recovery" { description = "Queries Changing critical recovery threshold" type = "string" default = 0.99 } -variable "queries_changing_silenced" { +variable "queries_changing_anomaly_silenced" { description = "Groups to mute for GCP Cloud SQL Queries Changing monitor" type = "map" default = {} } -variable "queries_changing_extra_tags" { +variable "queries_changing_anomaly_extra_tags" { description = "Extra tags for GCP Cloud SQL Queries Changing monitor" type = "list" default = [] @@ -208,19 +225,19 @@ variable "queries_changing_extra_tags" { # # Questions Changing # -variable "questions_changing_enabled" { +variable "questions_changing_anomaly_enabled" { description = "Whether or not to create the monitor" type = "string" default = "true" } -variable "questions_changing_message" { +variable "questions_changing_anomaly_message" { description = "Custom message for the Questions Changing monitor" type = "string" default = "" } -variable "questions_changing_timeframe" { +variable "questions_changing_anomaly_timeframe" { description = "Timeframe for the Questions Changing monitor" type = "string" default = "last_1h" @@ -232,49 +249,67 @@ variable "questions_changing_anomaly_detection_algorithm" { default = "agile" } -variable "questions_changing_deviations" { +variable "questions_changing_anomaly_alert_window" { + description = "Alert window." + type = "string" + default = "last_30m" +} + +variable "questions_changing_anomaly_interval" { + description = "Interval." + type = "string" + default = 20 +} + +variable "questions_changing_anomaly_count_default_zero" { + description = "Count default zero." + type = "string" + default = "false" +} + +variable "questions_changing_anomaly_deviations" { description = "Deviations to detect the anomaly" type = "string" default = 4 } -variable "questions_changing_direction" { +variable "questions_changing_anomaly_direction" { description = "Direction of the anomaly. It can be both, below or above." type = "string" default = "both" } -variable "questions_changing_seasonality" { +variable "questions_changing_anomaly_seasonality" { description = "Seasonality of the algorithm" type = "string" default = "weekly" } -variable "questions_changing_threshold_warning" { +variable "questions_changing_anomaly_threshold_warning" { description = "Questions Changing warning threshold" type = "string" default = 0.5 } -variable "questions_changing_threshold_critical" { +variable "questions_changing_anomaly_threshold_critical" { description = "Questions Changing critical threshold" type = "string" default = 1 } -variable "questions_changing_threshold_critical_recovery" { +variable "questions_changing_anomaly_threshold_critical_recovery" { description = "Questions Changing critical recovery threshold" type = "string" default = 0.99 } -variable "questions_changing_silenced" { +variable "questions_changing_anomaly_silenced" { description = "Groups to mute for GCP Cloud SQL Questions Changing monitor" type = "map" default = {} } -variable "questions_changing_extra_tags" { +variable "questions_changing_anomaly_extra_tags" { description = "Extra tags for GCP Cloud SQL Questions Changing monitor" type = "list" default = [] diff --git a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf index 7944ebc..2455a90 100644 --- a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf +++ b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf @@ -41,9 +41,11 @@ EOF timeout_h = 0 include_tags = true locked = false - evaluation_delay = "${var.delay}" - new_host_delay = "${var.delay}" - silenced = "${var.network_connections_silenced}" + + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + + silenced = "${var.network_connections_silenced}" tags = [ "team:gcp", @@ -108,32 +110,32 @@ EOF # Queries Anomaly # resource "datadog_monitor" "queries_changing_anomaly" { - count = "${var.queries_changing_enabled} == true ? 1 : 0 " + count = "${var.queries_changing_anomaly_enabled} == true ? 1 : 0 " name = "[${var.environment}] Cloud SQL MySQL Queries Count changed abnormally" - message = "${coalesce(var.queries_changing_message, var.message)}" + message = "${coalesce(var.queries_changing_anomaly_message, var.message)}" type = "query alert" query = < ${var.queries_changing_threshold_critical} + > ${var.queries_changing_anomaly_threshold_critical} EOF thresholds { - warning = "${var.queries_changing_threshold_warning}" - critical = "${var.queries_changing_threshold_critical}" - critical_recovery = "${var.queries_changing_threshold_critical_recovery}" + warning = "${var.queries_changing_anomaly_threshold_warning}" + critical = "${var.queries_changing_anomaly_threshold_critical}" + critical_recovery = "${var.queries_changing_anomaly_threshold_critical_recovery}" } notify_audit = false @@ -147,7 +149,7 @@ EOF evaluation_delay = "${var.delay}" new_host_delay = "${var.delay}" - silenced = "${var.queries_changing_silenced}" + silenced = "${var.queries_changing_anomaly_silenced}" tags = [ "team:gcp", @@ -156,7 +158,7 @@ EOF "created_by:terraform", "resource:cloud-sql", "engine:mysql", - "${var.queries_changing_extra_tags}", + "${var.queries_changing_anomaly_extra_tags}", ] } @@ -164,10 +166,10 @@ EOF # Questions Anomaly # resource "datadog_monitor" "questions_changing_anomaly" { - count = "${var.questions_changing_enabled} == true ? 1 : 0 " + count = "${var.questions_changing_anomaly_enabled} == true ? 1 : 0 " name = "[${var.environment}] Cloud SQL MySQL Questions Count changed abnormally" - message = "${coalesce(var.questions_changing_message, var.message)}" + message = "${coalesce(var.questions_changing_anomaly_message, var.message)}" type = "query alert" @@ -176,20 +178,20 @@ resource "datadog_monitor" "questions_changing_anomaly" { anomalies( avg:gcp.cloudsql.database.mysql.questions{${data.template_file.filter.rendered}} by {database_id}, '${var.questions_changing_anomaly_detection_algorithm}', - ${var.questions_changing_deviations}, - direction='${var.questions_changing_direction}', - alert_window='last_30m', - interval=20, - count_default_zero='false', - seasonality='${var.questions_changing_seasonality}' + ${var.questions_changing_anomaly_deviations}, + direction='${var.questions_changing_anomaly_direction}', + alert_window='${var.questions_changing_anomaly_alert_window}', + interval=${var.questions_changing_anomaly_interval}, + count_default_zero='${var.questions_changing_anomaly_count_default_zero}', + seasonality='${var.questions_changing_anomaly_seasonality}' ) - > ${var.questions_changing_threshold_critical} + > ${var.questions_changing_anomaly_threshold_critical} EOF thresholds { - warning = "${var.questions_changing_threshold_warning}" - critical = "${var.questions_changing_threshold_critical}" - critical_recovery = "${var.questions_changing_threshold_critical_recovery}" + warning = "${var.questions_changing_anomaly_threshold_warning}" + critical = "${var.questions_changing_anomaly_threshold_critical}" + critical_recovery = "${var.questions_changing_anomaly_threshold_critical_recovery}" } notify_audit = false @@ -203,7 +205,7 @@ EOF evaluation_delay = "${var.delay}" new_host_delay = "${var.delay}" - silenced = "${var.questions_changing_silenced}" + silenced = "${var.questions_changing_anomaly_silenced}" tags = [ "team:gcp", @@ -212,6 +214,6 @@ EOF "created_by:terraform", "resource:cloud-sql", "engine:mysql", - "${var.questions_changing_extra_tags}", + "${var.questions_changing_anomaly_extra_tags}", ] } From e09b68134d9c1a3b8c9289658306e68daba08755 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Mon, 30 Jul 2018 13:02:23 +0200 Subject: [PATCH 31/46] MON-224 Fix count value to allow not creating the monitors --- .../instance/monitors-cloud-sql-instance.tf | 12 ++++++------ cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf index 0efc1fa..66da161 100644 --- a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf +++ b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf @@ -15,7 +15,7 @@ data "template_file" "filter" { # CPU Utilization # resource "datadog_monitor" "cpu_utilization" { - count = "${var.cpu_utilization_enabled} == true ? 1 : 0 " + count = "${var.cpu_utilization_enabled == "true" ? 1 : 0 }" name = "[${var.environment}] Cloud SQL CPU utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.cpu_utilization_message, var.message)}" @@ -62,7 +62,7 @@ EOF # Disk Utilization # resource "datadog_monitor" "disk_utilization" { - count = "${var.disk_utilization_enabled} == true ? 1 : 0 " + count = "${var.disk_utilization_enabled == "true" ? 1 : 0 }" name = "[${var.environment}] Cloud SQL Disk utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.disk_utilization_message, var.message)}" @@ -109,7 +109,7 @@ EOF # Disk Utilization Forecast # resource "datadog_monitor" "disk_utilization_forecast" { - count = "${var.disk_utilization_forecast_enabled} == true ? 1 : 0 " + count = "${var.disk_utilization_forecast_enabled == "true" ? 1 : 0 }" name = "[${var.environment}] Cloud SQL Disk utilization could reach {{#is_alert}}{{threshold}}%{{/is_alert}} in a near future" message = "${coalesce(var.disk_utilization_forecast_message, var.message)}" @@ -161,7 +161,7 @@ EOF # Memory Utilization # resource "datadog_monitor" "memory_utilization" { - count = "${var.memory_utilization_enabled} == true ? 1 : 0 " + count = "${var.memory_utilization_enabled == "true" ? 1 : 0 }" name = "[${var.environment}] Cloud SQL Memory Utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.memory_utilization_message, var.message)}" @@ -208,7 +208,7 @@ EOF # Memory Utilization Forecast # resource "datadog_monitor" "memory_utilization_forecast" { - count = "${var.memory_utilization_forecast_enabled} == true ? 1 : 0 " + count = "${var.memory_utilization_forecast_enabled == "true" ? 1 : 0 }" name = "[${var.environment}] Cloud SQL Memory Utilization could reach {{#is_alert}}{{threshold}}%{{/is_alert}} in a near future" message = "${coalesce(var.memory_utilization_forecast_message, var.message)}" @@ -260,7 +260,7 @@ EOF # Failover Unavailable # resource "datadog_monitor" "failover_unavailable" { - count = "${var.failover_unavailable_enabled} == true ? 1 : 0 " + count = "${var.failover_unavailable_enabled == "true" ? 1 : 0 }" name = "[${var.environment}] Cloud SQL Failover Unavailable" message = "${coalesce(var.failover_unavailable_message, var.message)}" diff --git a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf index 2455a90..777440a 100644 --- a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf +++ b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf @@ -15,7 +15,7 @@ data "template_file" "filter" { # MySQL Network Connections # resource "datadog_monitor" "network_connections" { - count = "${var.network_connections_enabled} == true ? 1 : 0 " + count = "${var.network_connections_enabled == "true" ? 1 : 0 }" name = "[${var.environment}] Cloud SQL MySQL Network Connections (hard limit: ${var.network_connections_hard_limit}) {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.network_connections_message, var.message)}" @@ -62,7 +62,7 @@ EOF # Replication Lag # resource "datadog_monitor" "replication_lag" { - count = "${var.replication_lag_enabled} == true ? 1 : 0 " + count = "${var.replication_lag_enabled == "true" ? 1 : 0 }" name = "[${var.environment}] Cloud SQL MySQL Replication Lag {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}" message = "${coalesce(var.replication_lag_message, var.message)}" @@ -110,7 +110,7 @@ EOF # Queries Anomaly # resource "datadog_monitor" "queries_changing_anomaly" { - count = "${var.queries_changing_anomaly_enabled} == true ? 1 : 0 " + count = "${var.queries_changing_anomaly_enabled == "true" ? 1 : 0 }" name = "[${var.environment}] Cloud SQL MySQL Queries Count changed abnormally" message = "${coalesce(var.queries_changing_anomaly_message, var.message)}" @@ -166,7 +166,7 @@ EOF # Questions Anomaly # resource "datadog_monitor" "questions_changing_anomaly" { - count = "${var.questions_changing_anomaly_enabled} == true ? 1 : 0 " + count = "${var.questions_changing_anomaly_enabled == "true" ? 1 : 0 }" name = "[${var.environment}] Cloud SQL MySQL Questions Count changed abnormally" message = "${coalesce(var.questions_changing_anomaly_message, var.message)}" From 5293e24faf9a76fd5e685c03de2d4f0a073b371c Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Mon, 30 Jul 2018 13:06:06 +0200 Subject: [PATCH 32/46] MON-224 Fix timeframe on questions changing anomaly monitor --- cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf index 777440a..e7e694f 100644 --- a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf +++ b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf @@ -174,7 +174,7 @@ resource "datadog_monitor" "questions_changing_anomaly" { type = "query alert" query = < Date: Mon, 30 Jul 2018 13:29:58 +0200 Subject: [PATCH 33/46] MON-224 forecast monitors generalized --- cloud/gcp/cloud-sql/instance/README.md | 24 ++++-- cloud/gcp/cloud-sql/instance/inputs.tf | 76 +++++++++++++++++-- .../instance/monitors-cloud-sql-instance.tf | 18 ++--- 3 files changed, 95 insertions(+), 23 deletions(-) diff --git a/cloud/gcp/cloud-sql/instance/README.md b/cloud/gcp/cloud-sql/instance/README.md index 8e5ae36..318b3c8 100644 --- a/cloud/gcp/cloud-sql/instance/README.md +++ b/cloud/gcp/cloud-sql/instance/README.md @@ -37,13 +37,19 @@ Creates DataDog monitors with the following checks: | delay | Delay in seconds for the metric evaluation | string | `900` | no | | disk_utilization_enabled | Whether or not to create the monitor | string | `true` | no | | disk_utilization_extra_tags | Extra tags for GCP Cloud SQL CPU Utilization monitor | list | `` | no | +| disk_utilization_forecast_algorithm | Algorithm for the Disk Utilization Forecast monitor | string | `linear` | no | +| disk_utilization_forecast_deviations | Deviations for the Disk Utilization Forecast monitor | string | `1` | no | | disk_utilization_forecast_enabled | Whether or not to create the monitor | string | `true` | no | -| disk_utilization_forecast_extra_tags | Extra tags for GCP Cloud SQL CPU Utilization monitor | list | `` | no | -| disk_utilization_forecast_message | Custom message for the Disk Utilization monitor | string | `` | no | -| disk_utilization_forecast_silenced | Groups to mute for GCP Cloud SQL Disk Utilization monitor | map | `` | no | -| disk_utilization_forecast_threshold_critical | Disk Utilization in percentage (critical threshold) | string | `80` | no | -| disk_utilization_forecast_threshold_critical_recovery | Disk Utilization in percentage (recovery threshold) | string | `72` | no | -| disk_utilization_forecast_timeframe | Timeframe for the Disk Utilization monitor | string | `next_1w` | no | +| disk_utilization_forecast_extra_tags | Extra tags for GCP Cloud SQL Disk Utilization Forecast monitor | list | `` | no | +| disk_utilization_forecast_interval | Interval for the Disk Utilization Forecast monitor | string | `60m` | no | +| disk_utilization_forecast_linear_history | History for the Disk Utilization Forecast monitor | string | `3d` | no | +| disk_utilization_forecast_linear_model | Model for the Disk Utilization Forecast monitor | string | `default` | no | +| disk_utilization_forecast_message | Custom message for the Disk Utilization Forecast monitor | string | `` | no | +| disk_utilization_forecast_seasonal_seasonality | Seasonality for the Disk Utilization Forecast monitor | string | `weekly` | no | +| disk_utilization_forecast_silenced | Groups to mute for GCP Cloud SQL Disk Utilization Forecast monitor | map | `` | no | +| disk_utilization_forecast_threshold_critical | Disk Utilization Forecast in percentage (critical threshold) | string | `80` | no | +| disk_utilization_forecast_threshold_critical_recovery | Disk Utilization Forecast in percentage (recovery threshold) | string | `72` | no | +| disk_utilization_forecast_timeframe | Timeframe for the Disk Utilization Forecast monitor | string | `next_1w` | no | | disk_utilization_message | Custom message for the Disk Utilization monitor | string | `` | no | | disk_utilization_silenced | Groups to mute for GCP Cloud SQL Disk Utilization monitor | map | `` | no | | disk_utilization_threshold_critical | Disk Utilization in percentage (critical threshold) | string | `90` | no | @@ -60,11 +66,15 @@ Creates DataDog monitors with the following checks: | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | memory_utilization_enabled | Whether or not to create the monitor | string | `true` | no | | memory_utilization_extra_tags | Extra tags for GCP Cloud SQL Memory Utilization monitor | list | `` | no | +| memory_utilization_forecast_algorithm | Algorithm for the Memory Utilization Forecast monitor | string | `linear` | no | +| memory_utilization_forecast_deviations | Deviations for the Memory Utilization Forecast monitor | string | `1` | no | | memory_utilization_forecast_enabled | Whether or not to create the monitor | string | `true` | no | | memory_utilization_forecast_extra_tags | Extra tags for GCP Cloud SQL Memory Utilization Forecast monitor | list | `` | no | -| memory_utilization_forecast_history | History for the Memory Utilization Forecast monitor | string | `12h` | no | | memory_utilization_forecast_interval | Interval for the Memory Utilization Forecast monitor | string | `30m` | no | +| memory_utilization_forecast_linear_history | History for the Memory Utilization Forecast monitor | string | `12h` | no | +| memory_utilization_forecast_linear_model | Model for the Memory Utilization Forecast monitor | string | `default` | no | | memory_utilization_forecast_message | Custom message for the Memory Utilization Forecast monitor | string | `` | no | +| memory_utilization_forecast_seasonal_seasonality | Seasonality for the Memory Utilization Forecast monitor | string | `weekly` | no | | memory_utilization_forecast_silenced | Groups to mute for GCP Cloud SQL Memory Utilization Forecast monitor | map | `` | no | | memory_utilization_forecast_threshold_critical | Memory Utilization Forecast in percentage (warning threshold) | string | `90` | no | | memory_utilization_forecast_threshold_critical_recovery | Memory Utilization Forecast in percentage (recovery threshold) | string | `81` | no | diff --git a/cloud/gcp/cloud-sql/instance/inputs.tf b/cloud/gcp/cloud-sql/instance/inputs.tf index 5513b83..ca4761c 100644 --- a/cloud/gcp/cloud-sql/instance/inputs.tf +++ b/cloud/gcp/cloud-sql/instance/inputs.tf @@ -133,37 +133,73 @@ variable "disk_utilization_forecast_enabled" { } variable "disk_utilization_forecast_message" { - description = "Custom message for the Disk Utilization monitor" + description = "Custom message for the Disk Utilization Forecast monitor" type = "string" default = "" } variable "disk_utilization_forecast_timeframe" { - description = "Timeframe for the Disk Utilization monitor" + description = "Timeframe for the Disk Utilization Forecast monitor" type = "string" default = "next_1w" } +variable "disk_utilization_forecast_algorithm" { + description = "Algorithm for the Disk Utilization Forecast monitor" + type = "string" + default = "linear" +} + +variable "disk_utilization_forecast_deviations" { + description = "Deviations for the Disk Utilization Forecast monitor" + type = "string" + default = 1 +} + +variable "disk_utilization_forecast_interval" { + description = "Interval for the Disk Utilization Forecast monitor" + type = "string" + default = "60m" +} + +variable "disk_utilization_forecast_linear_history" { + description = "History for the Disk Utilization Forecast monitor" + type = "string" + default = "3d" +} + +variable "disk_utilization_forecast_linear_model" { + description = "Model for the Disk Utilization Forecast monitor" + type = "string" + default = "default" +} + +variable "disk_utilization_forecast_seasonal_seasonality" { + description = "Seasonality for the Disk Utilization Forecast monitor" + type = "string" + default = "weekly" +} + variable "disk_utilization_forecast_threshold_critical" { - description = "Disk Utilization in percentage (critical threshold)" + description = "Disk Utilization Forecast in percentage (critical threshold)" type = "string" default = 80 } variable "disk_utilization_forecast_threshold_critical_recovery" { - description = "Disk Utilization in percentage (recovery threshold)" + description = "Disk Utilization Forecast in percentage (recovery threshold)" type = "string" default = 72 } variable "disk_utilization_forecast_silenced" { - description = "Groups to mute for GCP Cloud SQL Disk Utilization monitor" + description = "Groups to mute for GCP Cloud SQL Disk Utilization Forecast monitor" type = "map" default = {} } variable "disk_utilization_forecast_extra_tags" { - description = "Extra tags for GCP Cloud SQL CPU Utilization monitor" + description = "Extra tags for GCP Cloud SQL Disk Utilization Forecast monitor" type = "list" default = [] } @@ -228,16 +264,42 @@ variable "memory_utilization_forecast_timeframe" { default = "next_3d" } +variable "memory_utilization_forecast_algorithm" { + description = "Algorithm for the Memory Utilization Forecast monitor" + type = "string" + default = "linear" +} + +variable "memory_utilization_forecast_deviations" { + description = "Deviations for the Memory Utilization Forecast monitor" + type = "string" + default = 1 +} + variable "memory_utilization_forecast_interval" { description = "Interval for the Memory Utilization Forecast monitor" + type = "string" default = "30m" } -variable "memory_utilization_forecast_history" { +variable "memory_utilization_forecast_linear_history" { description = "History for the Memory Utilization Forecast monitor" + type = "string" default = "12h" } +variable "memory_utilization_forecast_linear_model" { + description = "Model for the Memory Utilization Forecast monitor" + type = "string" + default = "default" +} + +variable "memory_utilization_forecast_seasonal_seasonality" { + description = "Seasonality for the Memory Utilization Forecast monitor" + type = "string" + default = "weekly" +} + variable "memory_utilization_forecast_threshold_critical" { description = "Memory Utilization Forecast in percentage (warning threshold)" default = 90 diff --git a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf index 66da161..577abcb 100644 --- a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf +++ b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf @@ -120,11 +120,11 @@ resource "datadog_monitor" "disk_utilization_forecast" { max(${var.disk_utilization_forecast_timeframe}): forecast( avg:gcp.cloudsql.database.disk.utilization{${data.template_file.filter.rendered}} by {database_id} * 100, - 'linear', - 1, - interval='60m', - history='3d', - model='default' + '${var.disk_utilization_forecast_algorithm}', + ${var.disk_utilization_forecast_deviations}, + interval='${var.disk_utilization_forecast_interval}', + ${var.disk_utilization_forecast_algorithm == "linear" ? format("history='%s',model='%s'", var.disk_utilization_forecast_linear_history, var.disk_utilization_forecast_linear_model): ""} + ${var.disk_utilization_forecast_algorithm == "seasonal" ? format("seasonality='%s'", var.disk_utilization_forecast_seasonal_seasonality): ""} ) >= ${var.disk_utilization_forecast_threshold_critical} EOF @@ -219,11 +219,11 @@ resource "datadog_monitor" "memory_utilization_forecast" { max(${var.memory_utilization_forecast_timeframe}): forecast( avg:gcp.cloudsql.database.memory.utilization{${data.template_file.filter.rendered}} by {database_id} * 100, - 'linear', - 1, + '${var.memory_utilization_forecast_algorithm}', + ${var.memory_utilization_forecast_deviations}, interval='${var.memory_utilization_forecast_interval}', - history='${var.memory_utilization_forecast_history}', - model='default' + ${var.memory_utilization_forecast_algorithm == "linear" ? format("history='%s',model='%s'", var.memory_utilization_forecast_linear_history, var.memory_utilization_forecast_linear_model): ""} + ${var.memory_utilization_forecast_algorithm == "seasonal" ? format("seasonality='%s'", var.memory_utilization_forecast_seasonal_seasonality): ""} ) >= ${var.memory_utilization_forecast_threshold_critical} EOF From 817a01a738230b73079f02adc888f7e904c3fb9d Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Mon, 30 Jul 2018 14:14:59 +0200 Subject: [PATCH 34/46] MON-224 Fix names on monitors --- cloud/gcp/cloud-sql/instance/README.md | 6 +++--- cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cloud/gcp/cloud-sql/instance/README.md b/cloud/gcp/cloud-sql/instance/README.md index 318b3c8..d5829c1 100644 --- a/cloud/gcp/cloud-sql/instance/README.md +++ b/cloud/gcp/cloud-sql/instance/README.md @@ -16,9 +16,9 @@ module "datadog-monitors-cloud-gcp-cloud-sql-instance" { Creates DataDog monitors with the following checks: -- Cloud SQL CPU utilization -- Cloud SQL Disk utilization -- Cloud SQL Disk utilization could reach +- Cloud SQL CPU Utilization +- Cloud SQL Disk Utilization +- Cloud SQL Disk Utilization could reach - Cloud SQL Memory Utilization - Cloud SQL Memory Utilization could reach - Cloud SQL Failover Unavailable diff --git a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf index 577abcb..5260f35 100644 --- a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf +++ b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf @@ -17,7 +17,7 @@ data "template_file" "filter" { resource "datadog_monitor" "cpu_utilization" { count = "${var.cpu_utilization_enabled == "true" ? 1 : 0 }" - name = "[${var.environment}] Cloud SQL CPU utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + name = "[${var.environment}] Cloud SQL CPU Utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.cpu_utilization_message, var.message)}" type = "metric alert" @@ -64,7 +64,7 @@ EOF resource "datadog_monitor" "disk_utilization" { count = "${var.disk_utilization_enabled == "true" ? 1 : 0 }" - name = "[${var.environment}] Cloud SQL Disk utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + name = "[${var.environment}] Cloud SQL Disk Utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.disk_utilization_message, var.message)}" type = "metric alert" @@ -111,7 +111,7 @@ EOF resource "datadog_monitor" "disk_utilization_forecast" { count = "${var.disk_utilization_forecast_enabled == "true" ? 1 : 0 }" - name = "[${var.environment}] Cloud SQL Disk utilization could reach {{#is_alert}}{{threshold}}%{{/is_alert}} in a near future" + name = "[${var.environment}] Cloud SQL Disk Utilization could reach {{#is_alert}}{{threshold}}%{{/is_alert}} in a near future" message = "${coalesce(var.disk_utilization_forecast_message, var.message)}" type = "metric alert" From a6e49ad0948b778ff53b2ba016ff7e25b8f2c6eb Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Mon, 30 Jul 2018 16:25:54 +0200 Subject: [PATCH 35/46] MON-224 Fixing expression on queries changing monitor --- cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf | 2 +- cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf index 5260f35..d43ec11 100644 --- a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf +++ b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf @@ -72,7 +72,7 @@ resource "datadog_monitor" "disk_utilization" { query = < ${var.disk_utilization_threshold_critical} EOF diff --git a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf index e7e694f..d31ba08 100644 --- a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf +++ b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf @@ -120,7 +120,7 @@ resource "datadog_monitor" "queries_changing_anomaly" { query = < Date: Tue, 31 Jul 2018 10:26:58 +0200 Subject: [PATCH 36/46] MON-224 Network Connections monitor removed. It is on the MySQL integration --- cloud/gcp/cloud-sql/mysql/README.md | 10 ---- cloud/gcp/cloud-sql/mysql/inputs.tf | 51 ------------------- .../mysql/monitors-cloudsql-mysql.tf | 47 ----------------- cloud/gcp/cloud-sql/mysql/outputs.tf | 5 -- 4 files changed, 113 deletions(-) diff --git a/cloud/gcp/cloud-sql/mysql/README.md b/cloud/gcp/cloud-sql/mysql/README.md index 4c21190..1e3b7ce 100644 --- a/cloud/gcp/cloud-sql/mysql/README.md +++ b/cloud/gcp/cloud-sql/mysql/README.md @@ -16,7 +16,6 @@ module "datadog-monitors-cloud-gcp-cloud-sql-mysql" { Creates DataDog monitors with the following checks: -- Cloud SQL MySQL Network Connections (hard limit: ${var.network_connections_hard_limit}) - Cloud SQL MySQL Replication Lag - Cloud SQL MySQL Queries Count changed abnormally - Cloud SQL MySQL Questions Count changed abnormally @@ -30,14 +29,6 @@ Creates DataDog monitors with the following checks: | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | message | Message sent when a monitor is triggered | string | - | yes | -| network_connections_enabled | Whether or not to create the monitor | string | `true` | no | -| network_connections_extra_tags | Extra tags for GCP Cloud SQL Network Connections monitor | list | `` | no | -| network_connections_hard_limit | Max number of connections for the CloudSQL Instance. Default value is the max value on https://cloud.google.com/sql/docs/quotas#fixed-limits for MySQL | string | `4000` | no | -| network_connections_message | Custom message for the Network Connections monitor | string | `` | no | -| network_connections_silenced | Groups to mute for GCP Cloud SQL Network Connections monitor | map | `` | no | -| network_connections_threshold_critical | Number of network connections (critical threshold) | string | `3600` | no | -| network_connections_threshold_warning | Number of network connections (warning threshold) | string | `3200` | no | -| network_connections_timeframe | Timeframe for the Network Connections monitor | string | `last_15m` | no | | project_id | ID of the GCP Project | string | - | yes | | queries_changing_anomaly_alert_window | Alert window. | string | `last_30m` | no | | queries_changing_anomaly_count_default_zero | Count default zero. | string | `false` | no | @@ -81,7 +72,6 @@ Creates DataDog monitors with the following checks: | Name | Description | |------|-------------| -| network_connections_id | id for monitor network_connections | | queries_changing_anomaly_id | id for monitor queries_changing_anomaly | | questions_changing_anomaly_id | id for monitor questions_changing_anomaly | | replication_lag_id | id for monitor replication_lag | diff --git a/cloud/gcp/cloud-sql/mysql/inputs.tf b/cloud/gcp/cloud-sql/mysql/inputs.tf index 2d7bdf4..997f66f 100644 --- a/cloud/gcp/cloud-sql/mysql/inputs.tf +++ b/cloud/gcp/cloud-sql/mysql/inputs.tf @@ -33,57 +33,6 @@ variable "project_id" { description = "ID of the GCP Project" } -# -# Network Connections -# -variable "network_connections_enabled" { - description = "Whether or not to create the monitor" - type = "string" - default = "true" -} - -variable "network_connections_message" { - description = "Custom message for the Network Connections monitor" - type = "string" - default = "" -} - -variable "network_connections_timeframe" { - description = "Timeframe for the Network Connections monitor" - type = "string" - default = "last_15m" -} - -variable "network_connections_hard_limit" { - description = "Max number of connections for the CloudSQL Instance. Default value is the max value on https://cloud.google.com/sql/docs/quotas#fixed-limits for MySQL" - type = "string" - default = 4000 -} - -variable "network_connections_threshold_warning" { - description = "Number of network connections (warning threshold)" - type = "string" - default = 3200 -} - -variable "network_connections_threshold_critical" { - description = "Number of network connections (critical threshold)" - type = "string" - default = 3600 -} - -variable "network_connections_silenced" { - description = "Groups to mute for GCP Cloud SQL Network Connections monitor" - type = "map" - default = {} -} - -variable "network_connections_extra_tags" { - description = "Extra tags for GCP Cloud SQL Network Connections monitor" - type = "list" - default = [] -} - # # Replication Lag # diff --git a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf index d31ba08..1d90cad 100644 --- a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf +++ b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf @@ -11,53 +11,6 @@ data "template_file" "filter" { } } -# -# MySQL Network Connections -# -resource "datadog_monitor" "network_connections" { - count = "${var.network_connections_enabled == "true" ? 1 : 0 }" - - name = "[${var.environment}] Cloud SQL MySQL Network Connections (hard limit: ${var.network_connections_hard_limit}) {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" - message = "${coalesce(var.network_connections_message, var.message)}" - - type = "metric alert" - - query = < ${var.network_connections_threshold_critical} -EOF - - thresholds { - warning = "${var.network_connections_threshold_warning}" - critical = "${var.network_connections_threshold_critical}" - } - - notify_no_data = false - require_full_window = false - renotify_interval = 0 - notify_audit = false - timeout_h = 0 - include_tags = true - locked = false - - evaluation_delay = "${var.delay}" - new_host_delay = "${var.delay}" - - silenced = "${var.network_connections_silenced}" - - tags = [ - "team:gcp", - "provider:gcp", - "env:${var.environment}", - "created_by:terraform", - "resource:cloud-sql", - "engine:mysql", - "${var.network_connections_extra_tags}", - ] -} - # # Replication Lag # diff --git a/cloud/gcp/cloud-sql/mysql/outputs.tf b/cloud/gcp/cloud-sql/mysql/outputs.tf index e093106..ec11b98 100644 --- a/cloud/gcp/cloud-sql/mysql/outputs.tf +++ b/cloud/gcp/cloud-sql/mysql/outputs.tf @@ -1,8 +1,3 @@ -output "network_connections_id" { - description = "id for monitor network_connections" - value = "${datadog_monitor.network_connections.id}" -} - output "replication_lag_id" { description = "id for monitor replication_lag" value = "${datadog_monitor.replication_lag.id}" From 82d3ee2f910929a4c84443fd8c21bb6bf8ed4fe4 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Tue, 31 Jul 2018 17:43:02 +0200 Subject: [PATCH 37/46] MON-224 Revert the control of monitors creation. It must be managed in a specific issue --- cloud/gcp/cloud-sql/instance/README.md | 6 ---- cloud/gcp/cloud-sql/instance/inputs.tf | 30 ------------------- .../instance/monitors-cloud-sql-instance.tf | 12 -------- cloud/gcp/cloud-sql/mysql/README.md | 3 -- cloud/gcp/cloud-sql/mysql/inputs.tf | 15 ---------- .../mysql/monitors-cloudsql-mysql.tf | 6 ---- 6 files changed, 72 deletions(-) diff --git a/cloud/gcp/cloud-sql/instance/README.md b/cloud/gcp/cloud-sql/instance/README.md index d5829c1..7d3b0b7 100644 --- a/cloud/gcp/cloud-sql/instance/README.md +++ b/cloud/gcp/cloud-sql/instance/README.md @@ -27,7 +27,6 @@ Creates DataDog monitors with the following checks: | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| -| cpu_utilization_enabled | Whether or not to create the monitor | string | `true` | no | | cpu_utilization_extra_tags | Extra tags for GCP Cloud SQL CPU Utilization monitor | list | `` | no | | cpu_utilization_message | Custom message for the CPU Utilization monitor | string | `` | no | | cpu_utilization_silenced | Groups to mute for GCP Cloud SQL CPU Utilization monitor | map | `` | no | @@ -35,11 +34,9 @@ Creates DataDog monitors with the following checks: | cpu_utilization_threshold_warning | CPU Utilization in percentage (warning threshold) | string | `80` | no | | cpu_utilization_timeframe | Timeframe for the CPU Utilization monitor | string | `last_15m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | -| disk_utilization_enabled | Whether or not to create the monitor | string | `true` | no | | disk_utilization_extra_tags | Extra tags for GCP Cloud SQL CPU Utilization monitor | list | `` | no | | disk_utilization_forecast_algorithm | Algorithm for the Disk Utilization Forecast monitor | string | `linear` | no | | disk_utilization_forecast_deviations | Deviations for the Disk Utilization Forecast monitor | string | `1` | no | -| disk_utilization_forecast_enabled | Whether or not to create the monitor | string | `true` | no | | disk_utilization_forecast_extra_tags | Extra tags for GCP Cloud SQL Disk Utilization Forecast monitor | list | `` | no | | disk_utilization_forecast_interval | Interval for the Disk Utilization Forecast monitor | string | `60m` | no | | disk_utilization_forecast_linear_history | History for the Disk Utilization Forecast monitor | string | `3d` | no | @@ -56,7 +53,6 @@ Creates DataDog monitors with the following checks: | disk_utilization_threshold_warning | Disk Utilization in percentage (warning threshold) | string | `80` | no | | disk_utilization_timeframe | Timeframe for the Disk Utilization monitor | string | `last_5m` | no | | environment | Architecture environment | string | - | yes | -| failover_unavailable_enabled | Whether or not to create the monitor | string | `true` | no | | failover_unavailable_extra_tags | Extra tags for GCP Cloud SQL Failover Unavailable monitor | list | `` | no | | failover_unavailable_message | Custom message for the Failover Unavailable monitor | string | `` | no | | failover_unavailable_silenced | Groups to mute for GCP Cloud SQL Failover Unavailable monitor | map | `` | no | @@ -64,11 +60,9 @@ Creates DataDog monitors with the following checks: | failover_unavailable_timeframe | Timeframe for the Failover Unavailable monitor | string | `last_5m` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | -| memory_utilization_enabled | Whether or not to create the monitor | string | `true` | no | | memory_utilization_extra_tags | Extra tags for GCP Cloud SQL Memory Utilization monitor | list | `` | no | | memory_utilization_forecast_algorithm | Algorithm for the Memory Utilization Forecast monitor | string | `linear` | no | | memory_utilization_forecast_deviations | Deviations for the Memory Utilization Forecast monitor | string | `1` | no | -| memory_utilization_forecast_enabled | Whether or not to create the monitor | string | `true` | no | | memory_utilization_forecast_extra_tags | Extra tags for GCP Cloud SQL Memory Utilization Forecast monitor | list | `` | no | | memory_utilization_forecast_interval | Interval for the Memory Utilization Forecast monitor | string | `30m` | no | | memory_utilization_forecast_linear_history | History for the Memory Utilization Forecast monitor | string | `12h` | no | diff --git a/cloud/gcp/cloud-sql/instance/inputs.tf b/cloud/gcp/cloud-sql/instance/inputs.tf index ca4761c..c83fcc9 100644 --- a/cloud/gcp/cloud-sql/instance/inputs.tf +++ b/cloud/gcp/cloud-sql/instance/inputs.tf @@ -36,11 +36,6 @@ variable "project_id" { # # CPU # -variable "cpu_utilization_enabled" { - description = "Whether or not to create the monitor" - type = "string" - default = "true" -} variable "cpu_utilization_message" { description = "Custom message for the CPU Utilization monitor" @@ -81,11 +76,6 @@ variable "cpu_utilization_extra_tags" { # # DISK Utilization # -variable "disk_utilization_enabled" { - description = "Whether or not to create the monitor" - type = "string" - default = "true" -} variable "disk_utilization_message" { description = "Custom message for the Disk Utilization monitor" @@ -126,11 +116,6 @@ variable "disk_utilization_extra_tags" { # # DISK Utilization Forecast # -variable "disk_utilization_forecast_enabled" { - description = "Whether or not to create the monitor" - type = "string" - default = "true" -} variable "disk_utilization_forecast_message" { description = "Custom message for the Disk Utilization Forecast monitor" @@ -207,11 +192,6 @@ variable "disk_utilization_forecast_extra_tags" { # # Memory Utilization # -variable "memory_utilization_enabled" { - description = "Whether or not to create the monitor" - type = "string" - default = "true" -} variable "memory_utilization_message" { description = "Custom message for the Memory Utilization monitor" @@ -248,11 +228,6 @@ variable "memory_utilization_extra_tags" { # # Memory Utilization Forecast # -variable "memory_utilization_forecast_enabled" { - description = "Whether or not to create the monitor" - type = "string" - default = "true" -} variable "memory_utilization_forecast_message" { description = "Custom message for the Memory Utilization Forecast monitor" @@ -325,11 +300,6 @@ variable "memory_utilization_forecast_extra_tags" { # # Failover Unavailable # -variable "failover_unavailable_enabled" { - description = "Whether or not to create the monitor" - type = "string" - default = "true" -} variable "failover_unavailable_message" { description = "Custom message for the Failover Unavailable monitor" diff --git a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf index d43ec11..a961f89 100644 --- a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf +++ b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf @@ -15,8 +15,6 @@ data "template_file" "filter" { # CPU Utilization # resource "datadog_monitor" "cpu_utilization" { - count = "${var.cpu_utilization_enabled == "true" ? 1 : 0 }" - name = "[${var.environment}] Cloud SQL CPU Utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.cpu_utilization_message, var.message)}" @@ -62,8 +60,6 @@ EOF # Disk Utilization # resource "datadog_monitor" "disk_utilization" { - count = "${var.disk_utilization_enabled == "true" ? 1 : 0 }" - name = "[${var.environment}] Cloud SQL Disk Utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.disk_utilization_message, var.message)}" @@ -109,8 +105,6 @@ EOF # Disk Utilization Forecast # resource "datadog_monitor" "disk_utilization_forecast" { - count = "${var.disk_utilization_forecast_enabled == "true" ? 1 : 0 }" - name = "[${var.environment}] Cloud SQL Disk Utilization could reach {{#is_alert}}{{threshold}}%{{/is_alert}} in a near future" message = "${coalesce(var.disk_utilization_forecast_message, var.message)}" @@ -161,8 +155,6 @@ EOF # Memory Utilization # resource "datadog_monitor" "memory_utilization" { - count = "${var.memory_utilization_enabled == "true" ? 1 : 0 }" - name = "[${var.environment}] Cloud SQL Memory Utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.memory_utilization_message, var.message)}" @@ -208,8 +200,6 @@ EOF # Memory Utilization Forecast # resource "datadog_monitor" "memory_utilization_forecast" { - count = "${var.memory_utilization_forecast_enabled == "true" ? 1 : 0 }" - name = "[${var.environment}] Cloud SQL Memory Utilization could reach {{#is_alert}}{{threshold}}%{{/is_alert}} in a near future" message = "${coalesce(var.memory_utilization_forecast_message, var.message)}" @@ -260,8 +250,6 @@ EOF # Failover Unavailable # resource "datadog_monitor" "failover_unavailable" { - count = "${var.failover_unavailable_enabled == "true" ? 1 : 0 }" - name = "[${var.environment}] Cloud SQL Failover Unavailable" message = "${coalesce(var.failover_unavailable_message, var.message)}" diff --git a/cloud/gcp/cloud-sql/mysql/README.md b/cloud/gcp/cloud-sql/mysql/README.md index 1e3b7ce..7066482 100644 --- a/cloud/gcp/cloud-sql/mysql/README.md +++ b/cloud/gcp/cloud-sql/mysql/README.md @@ -35,7 +35,6 @@ Creates DataDog monitors with the following checks: | queries_changing_anomaly_detection_algorithm | Anomaly Detection Algorithm used | string | `agile` | no | | queries_changing_anomaly_deviations | Deviations to detect the anomaly | string | `4` | no | | queries_changing_anomaly_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no | -| queries_changing_anomaly_enabled | Whether or not to create the monitor | string | `true` | no | | queries_changing_anomaly_extra_tags | Extra tags for GCP Cloud SQL Queries Changing monitor | list | `` | no | | queries_changing_anomaly_interval | Interval. | string | `20` | no | | queries_changing_anomaly_message | Custom message for the Queries Changing monitor | string | `` | no | @@ -50,7 +49,6 @@ Creates DataDog monitors with the following checks: | questions_changing_anomaly_detection_algorithm | Anomaly Detection Algorithm used | string | `agile` | no | | questions_changing_anomaly_deviations | Deviations to detect the anomaly | string | `4` | no | | questions_changing_anomaly_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no | -| questions_changing_anomaly_enabled | Whether or not to create the monitor | string | `true` | no | | questions_changing_anomaly_extra_tags | Extra tags for GCP Cloud SQL Questions Changing monitor | list | `` | no | | questions_changing_anomaly_interval | Interval. | string | `20` | no | | questions_changing_anomaly_message | Custom message for the Questions Changing monitor | string | `` | no | @@ -60,7 +58,6 @@ Creates DataDog monitors with the following checks: | questions_changing_anomaly_threshold_critical_recovery | Questions Changing critical recovery threshold | string | `0.99` | no | | questions_changing_anomaly_threshold_warning | Questions Changing warning threshold | string | `0.5` | no | | questions_changing_anomaly_timeframe | Timeframe for the Questions Changing monitor | string | `last_1h` | no | -| replication_lag_enabled | Whether or not to create the monitor | string | `true` | no | | replication_lag_extra_tags | Extra tags for GCP Cloud SQL SQL Replication monitor | list | `` | no | | replication_lag_message | Custom message for the Replication Lag monitor | string | `` | no | | replication_lag_silenced | Groups to mute for GCP Cloud SQL Replication Lag monitor | map | `` | no | diff --git a/cloud/gcp/cloud-sql/mysql/inputs.tf b/cloud/gcp/cloud-sql/mysql/inputs.tf index 997f66f..6872b23 100644 --- a/cloud/gcp/cloud-sql/mysql/inputs.tf +++ b/cloud/gcp/cloud-sql/mysql/inputs.tf @@ -36,11 +36,6 @@ variable "project_id" { # # Replication Lag # -variable "replication_lag_enabled" { - description = "Whether or not to create the monitor" - type = "string" - default = "true" -} variable "replication_lag_message" { description = "Custom message for the Replication Lag monitor" @@ -81,11 +76,6 @@ variable "replication_lag_extra_tags" { # # Queries Changing Abnormally # -variable "queries_changing_anomaly_enabled" { - description = "Whether or not to create the monitor" - type = "string" - default = "true" -} variable "queries_changing_anomaly_message" { description = "Custom message for the Queries Changing monitor" @@ -174,11 +164,6 @@ variable "queries_changing_anomaly_extra_tags" { # # Questions Changing # -variable "questions_changing_anomaly_enabled" { - description = "Whether or not to create the monitor" - type = "string" - default = "true" -} variable "questions_changing_anomaly_message" { description = "Custom message for the Questions Changing monitor" diff --git a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf index 1d90cad..2c360cf 100644 --- a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf +++ b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf @@ -15,8 +15,6 @@ data "template_file" "filter" { # Replication Lag # resource "datadog_monitor" "replication_lag" { - count = "${var.replication_lag_enabled == "true" ? 1 : 0 }" - name = "[${var.environment}] Cloud SQL MySQL Replication Lag {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}" message = "${coalesce(var.replication_lag_message, var.message)}" @@ -63,8 +61,6 @@ EOF # Queries Anomaly # resource "datadog_monitor" "queries_changing_anomaly" { - count = "${var.queries_changing_anomaly_enabled == "true" ? 1 : 0 }" - name = "[${var.environment}] Cloud SQL MySQL Queries Count changed abnormally" message = "${coalesce(var.queries_changing_anomaly_message, var.message)}" @@ -119,8 +115,6 @@ EOF # Questions Anomaly # resource "datadog_monitor" "questions_changing_anomaly" { - count = "${var.questions_changing_anomaly_enabled == "true" ? 1 : 0 }" - name = "[${var.environment}] Cloud SQL MySQL Questions Count changed abnormally" message = "${coalesce(var.questions_changing_anomaly_message, var.message)}" From 4c4c24a34f42df11295ee3cd7a13b566741cfcd7 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Thu, 2 Aug 2018 11:31:55 +0200 Subject: [PATCH 38/46] MON-224 time aggregator for all monitors --- cloud/gcp/cloud-sql/instance/README.md | 6 ++++ cloud/gcp/cloud-sql/instance/inputs.tf | 34 +++++++++++++++++++ .../instance/monitors-cloud-sql-instance.tf | 12 +++---- cloud/gcp/cloud-sql/mysql/README.md | 3 ++ cloud/gcp/cloud-sql/mysql/inputs.tf | 18 ++++++++++ .../mysql/monitors-cloudsql-mysql.tf | 6 ++-- 6 files changed, 70 insertions(+), 9 deletions(-) diff --git a/cloud/gcp/cloud-sql/instance/README.md b/cloud/gcp/cloud-sql/instance/README.md index 7d3b0b7..2a3aa54 100644 --- a/cloud/gcp/cloud-sql/instance/README.md +++ b/cloud/gcp/cloud-sql/instance/README.md @@ -32,6 +32,7 @@ Creates DataDog monitors with the following checks: | cpu_utilization_silenced | Groups to mute for GCP Cloud SQL CPU Utilization monitor | map | `` | no | | cpu_utilization_threshold_critical | CPU Utilization in percentage (critical threshold) | string | `90` | no | | cpu_utilization_threshold_warning | CPU Utilization in percentage (warning threshold) | string | `80` | no | +| cpu_utilization_time_aggregator | Time aggregator for the CPU Utilization monitor | string | `avg` | no | | cpu_utilization_timeframe | Timeframe for the CPU Utilization monitor | string | `last_15m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | | disk_utilization_extra_tags | Extra tags for GCP Cloud SQL CPU Utilization monitor | list | `` | no | @@ -46,17 +47,20 @@ Creates DataDog monitors with the following checks: | disk_utilization_forecast_silenced | Groups to mute for GCP Cloud SQL Disk Utilization Forecast monitor | map | `` | no | | disk_utilization_forecast_threshold_critical | Disk Utilization Forecast in percentage (critical threshold) | string | `80` | no | | disk_utilization_forecast_threshold_critical_recovery | Disk Utilization Forecast in percentage (recovery threshold) | string | `72` | no | +| disk_utilization_forecast_time_aggregator | Time aggregator for the Disk Utilization Forecast monitor | string | `max` | no | | disk_utilization_forecast_timeframe | Timeframe for the Disk Utilization Forecast monitor | string | `next_1w` | no | | disk_utilization_message | Custom message for the Disk Utilization monitor | string | `` | no | | disk_utilization_silenced | Groups to mute for GCP Cloud SQL Disk Utilization monitor | map | `` | no | | disk_utilization_threshold_critical | Disk Utilization in percentage (critical threshold) | string | `90` | no | | disk_utilization_threshold_warning | Disk Utilization in percentage (warning threshold) | string | `80` | no | +| disk_utilization_time_aggregator | Time aggregator for the Disk Utilization monitor | string | `avg` | no | | disk_utilization_timeframe | Timeframe for the Disk Utilization monitor | string | `last_5m` | no | | environment | Architecture environment | string | - | yes | | failover_unavailable_extra_tags | Extra tags for GCP Cloud SQL Failover Unavailable monitor | list | `` | no | | failover_unavailable_message | Custom message for the Failover Unavailable monitor | string | `` | no | | failover_unavailable_silenced | Groups to mute for GCP Cloud SQL Failover Unavailable monitor | map | `` | no | | failover_unavailable_threshold_critical | Failover Unavailable critical threshold | string | `0` | no | +| failover_unavailable_time_aggregator | Time aggreggator for the Failover Unavailable monitor | string | `max` | no | | failover_unavailable_timeframe | Timeframe for the Failover Unavailable monitor | string | `last_5m` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | @@ -72,11 +76,13 @@ Creates DataDog monitors with the following checks: | memory_utilization_forecast_silenced | Groups to mute for GCP Cloud SQL Memory Utilization Forecast monitor | map | `` | no | | memory_utilization_forecast_threshold_critical | Memory Utilization Forecast in percentage (warning threshold) | string | `90` | no | | memory_utilization_forecast_threshold_critical_recovery | Memory Utilization Forecast in percentage (recovery threshold) | string | `81` | no | +| memory_utilization_forecast_time_aggregator | Time aggregator for the Memory Utilization Forecast monitor | string | `max` | no | | memory_utilization_forecast_timeframe | Timeframe for the Memory Utilization Forecast monitor | string | `next_3d` | no | | memory_utilization_message | Custom message for the Memory Utilization monitor | string | `` | no | | memory_utilization_silenced | Groups to mute for GCP Cloud SQL Memory Utilization monitor | map | `` | no | | memory_utilization_threshold_critical | Memory Utilization in percentage (critical threshold) | string | `90` | no | | memory_utilization_threshold_warning | Memory Utilization in percentage (warning threshold) | string | `80` | no | +| memory_utilization_time_aggregator | Time aggregator for the Memory Utilization monitor | string | `avg` | no | | memory_utilization_timeframe | Timeframe for the Memory Utilization monitor | string | `last_5m` | no | | message | Message sent when a monitor is triggered | string | - | yes | | project_id | ID of the GCP Project | string | - | yes | diff --git a/cloud/gcp/cloud-sql/instance/inputs.tf b/cloud/gcp/cloud-sql/instance/inputs.tf index c83fcc9..f9e5149 100644 --- a/cloud/gcp/cloud-sql/instance/inputs.tf +++ b/cloud/gcp/cloud-sql/instance/inputs.tf @@ -43,6 +43,12 @@ variable "cpu_utilization_message" { default = "" } +variable "cpu_utilization_time_aggregator" { + description = "Time aggregator for the CPU Utilization monitor" + type = "string" + default = "avg" +} + variable "cpu_utilization_timeframe" { description = "Timeframe for the CPU Utilization monitor" type = "string" @@ -83,6 +89,12 @@ variable "disk_utilization_message" { default = "" } +variable "disk_utilization_time_aggregator" { + description = "Time aggregator for the Disk Utilization monitor" + type = "string" + default = "avg" +} + variable "disk_utilization_timeframe" { description = "Timeframe for the Disk Utilization monitor" type = "string" @@ -123,6 +135,12 @@ variable "disk_utilization_forecast_message" { default = "" } +variable "disk_utilization_forecast_time_aggregator" { + description = "Time aggregator for the Disk Utilization Forecast monitor" + type = "string" + default = "max" +} + variable "disk_utilization_forecast_timeframe" { description = "Timeframe for the Disk Utilization Forecast monitor" type = "string" @@ -198,6 +216,11 @@ variable "memory_utilization_message" { default = "" } +variable "memory_utilization_time_aggregator" { + description = "Time aggregator for the Memory Utilization monitor" + default = "avg" +} + variable "memory_utilization_timeframe" { description = "Timeframe for the Memory Utilization monitor" default = "last_5m" @@ -234,6 +257,11 @@ variable "memory_utilization_forecast_message" { default = "" } +variable "memory_utilization_forecast_time_aggregator" { + description = "Time aggregator for the Memory Utilization Forecast monitor" + default = "max" +} + variable "memory_utilization_forecast_timeframe" { description = "Timeframe for the Memory Utilization Forecast monitor" default = "next_3d" @@ -307,6 +335,12 @@ variable "failover_unavailable_message" { default = "" } +variable "failover_unavailable_time_aggregator" { + description = "Time aggreggator for the Failover Unavailable monitor" + type = "string" + default = "max" +} + variable "failover_unavailable_timeframe" { description = "Timeframe for the Failover Unavailable monitor" type = "string" diff --git a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf index a961f89..201722a 100644 --- a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf +++ b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf @@ -21,7 +21,7 @@ resource "datadog_monitor" "cpu_utilization" { type = "metric alert" query = < ${var.cpu_utilization_threshold_critical} @@ -66,7 +66,7 @@ resource "datadog_monitor" "disk_utilization" { type = "metric alert" query = < ${var.disk_utilization_threshold_critical} @@ -111,7 +111,7 @@ resource "datadog_monitor" "disk_utilization_forecast" { type = "metric alert" query = < ${var.memory_utilization_threshold_critical} @@ -206,7 +206,7 @@ resource "datadog_monitor" "memory_utilization_forecast" { type = "query alert" query = <` | no | | replication_lag_message | Custom message for the Replication Lag monitor | string | `` | no | | replication_lag_silenced | Groups to mute for GCP Cloud SQL Replication Lag monitor | map | `` | no | | replication_lag_threshold_critical | Seconds behind the master (critical threshold) | string | `180` | no | | replication_lag_threshold_warning | Seconds behind the master (warning threshold) | string | `90` | no | +| replication_lag_time_aggregator | Time aggregator for the Replication Lag monitor | string | `min` | no | | replication_lag_timeframe | Timeframe for the Replication Lag monitor | string | `last_10m` | no | ## Outputs diff --git a/cloud/gcp/cloud-sql/mysql/inputs.tf b/cloud/gcp/cloud-sql/mysql/inputs.tf index 6872b23..06d2336 100644 --- a/cloud/gcp/cloud-sql/mysql/inputs.tf +++ b/cloud/gcp/cloud-sql/mysql/inputs.tf @@ -43,6 +43,12 @@ variable "replication_lag_message" { default = "" } +variable "replication_lag_time_aggregator" { + description = "Time aggregator for the Replication Lag monitor" + type = "string" + default = "min" +} + variable "replication_lag_timeframe" { description = "Timeframe for the Replication Lag monitor" type = "string" @@ -83,6 +89,12 @@ variable "queries_changing_anomaly_message" { default = "" } +variable "queries_changing_anomaly_time_aggregator" { + description = "Time aggregator for the Queries Changing mon monitor" + type = "string" + default = "avg" +} + variable "queries_changing_anomaly_timeframe" { description = "Timeframe for the Queries Changing mon monitor" type = "string" @@ -171,6 +183,12 @@ variable "questions_changing_anomaly_message" { default = "" } +variable "questions_changing_anomaly_time_aggregator" { + description = "Time aggregator for the Questions Changing monitor" + type = "string" + default = "avg" +} + variable "questions_changing_anomaly_timeframe" { description = "Timeframe for the Questions Changing monitor" type = "string" diff --git a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf index 2c360cf..d6860ab 100644 --- a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf +++ b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf @@ -21,7 +21,7 @@ resource "datadog_monitor" "replication_lag" { type = "metric alert" query = < ${var.replication_lag_threshold_critical} @@ -67,7 +67,7 @@ resource "datadog_monitor" "queries_changing_anomaly" { type = "query alert" query = < Date: Fri, 10 Aug 2018 18:37:42 +0200 Subject: [PATCH 39/46] MON-224 no_data_timeframe removed from monitors --- cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf | 4 ---- cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf | 1 - 2 files changed, 5 deletions(-) diff --git a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf index 201722a..114a755 100644 --- a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf +++ b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf @@ -36,7 +36,6 @@ EOF locked = false timeout_h = 0 include_tags = true - no_data_timeframe = 30 require_full_window = false notify_no_data = true renotify_interval = 0 @@ -81,7 +80,6 @@ EOF locked = false timeout_h = 0 include_tags = true - no_data_timeframe = 20 require_full_window = false notify_no_data = true renotify_interval = 0 @@ -176,7 +174,6 @@ EOF locked = false timeout_h = 0 include_tags = true - no_data_timeframe = 20 require_full_window = false notify_no_data = true renotify_interval = 0 @@ -270,7 +267,6 @@ EOF locked = false timeout_h = 0 include_tags = true - no_data_timeframe = 20 require_full_window = false notify_no_data = true renotify_interval = 0 diff --git a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf index d6860ab..a4d7ce5 100644 --- a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf +++ b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf @@ -36,7 +36,6 @@ EOF locked = false timeout_h = 0 include_tags = true - no_data_timeframe = 25 require_full_window = false notify_no_data = true renotify_interval = 0 From d6bdc2c5864c3ae16056234b7dc4051b066f1368 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Mon, 13 Aug 2018 16:19:04 +0200 Subject: [PATCH 40/46] MON-224 Split delay in evaluation_delay and new_host_delay --- cloud/gcp/cloud-sql/instance/README.md | 3 ++- cloud/gcp/cloud-sql/instance/inputs.tf | 7 +++++- .../instance/monitors-cloud-sql-instance.tf | 24 +++++++++---------- cloud/gcp/cloud-sql/mysql/README.md | 3 ++- cloud/gcp/cloud-sql/mysql/inputs.tf | 7 +++++- .../mysql/monitors-cloudsql-mysql.tf | 12 +++++----- 6 files changed, 34 insertions(+), 22 deletions(-) diff --git a/cloud/gcp/cloud-sql/instance/README.md b/cloud/gcp/cloud-sql/instance/README.md index 2a3aa54..a9fed1b 100644 --- a/cloud/gcp/cloud-sql/instance/README.md +++ b/cloud/gcp/cloud-sql/instance/README.md @@ -34,7 +34,6 @@ Creates DataDog monitors with the following checks: | cpu_utilization_threshold_warning | CPU Utilization in percentage (warning threshold) | string | `80` | no | | cpu_utilization_time_aggregator | Time aggregator for the CPU Utilization monitor | string | `avg` | no | | cpu_utilization_timeframe | Timeframe for the CPU Utilization monitor | string | `last_15m` | no | -| delay | Delay in seconds for the metric evaluation | string | `900` | no | | disk_utilization_extra_tags | Extra tags for GCP Cloud SQL CPU Utilization monitor | list | `` | no | | disk_utilization_forecast_algorithm | Algorithm for the Disk Utilization Forecast monitor | string | `linear` | no | | disk_utilization_forecast_deviations | Deviations for the Disk Utilization Forecast monitor | string | `1` | no | @@ -56,6 +55,7 @@ Creates DataDog monitors with the following checks: | disk_utilization_time_aggregator | Time aggregator for the Disk Utilization monitor | string | `avg` | no | | disk_utilization_timeframe | Timeframe for the Disk Utilization monitor | string | `last_5m` | no | | environment | Architecture environment | string | - | yes | +| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no | | failover_unavailable_extra_tags | Extra tags for GCP Cloud SQL Failover Unavailable monitor | list | `` | no | | failover_unavailable_message | Custom message for the Failover Unavailable monitor | string | `` | no | | failover_unavailable_silenced | Groups to mute for GCP Cloud SQL Failover Unavailable monitor | map | `` | no | @@ -85,6 +85,7 @@ Creates DataDog monitors with the following checks: | memory_utilization_time_aggregator | Time aggregator for the Memory Utilization monitor | string | `avg` | no | | memory_utilization_timeframe | Timeframe for the Memory Utilization monitor | string | `last_5m` | no | | message | Message sent when a monitor is triggered | string | - | yes | +| new_host_delay | Delay in seconds for the new host evaluation | string | `300` | no | | project_id | ID of the GCP Project | string | - | yes | ## Outputs diff --git a/cloud/gcp/cloud-sql/instance/inputs.tf b/cloud/gcp/cloud-sql/instance/inputs.tf index f9e5149..c4c8909 100644 --- a/cloud/gcp/cloud-sql/instance/inputs.tf +++ b/cloud/gcp/cloud-sql/instance/inputs.tf @@ -20,11 +20,16 @@ variable "message" { description = "Message sent when a monitor is triggered" } -variable "delay" { +variable "evaluation_delay" { description = "Delay in seconds for the metric evaluation" default = 900 } +variable "new_host_delay" { + description = "Delay in seconds for the new host evaluation" + default = 300 +} + # # Filter variables # diff --git a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf index 114a755..f4c45d2 100644 --- a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf +++ b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf @@ -40,8 +40,8 @@ EOF notify_no_data = true renotify_interval = 0 - evaluation_delay = "${var.delay}" - new_host_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" + new_host_delay = "${var.new_host_delay}" silenced = "${var.cpu_utilization_silenced}" @@ -84,8 +84,8 @@ EOF notify_no_data = true renotify_interval = 0 - evaluation_delay = "${var.delay}" - new_host_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" + new_host_delay = "${var.new_host_delay}" silenced = "${var.disk_utilization_silenced}" @@ -134,8 +134,8 @@ EOF notify_no_data = false renotify_interval = 0 - evaluation_delay = "${var.delay}" - new_host_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" + new_host_delay = "${var.new_host_delay}" silenced = "${var.disk_utilization_forecast_silenced}" @@ -178,8 +178,8 @@ EOF notify_no_data = true renotify_interval = 0 - evaluation_delay = "${var.delay}" - new_host_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" + new_host_delay = "${var.new_host_delay}" silenced = "${var.memory_utilization_silenced}" @@ -228,8 +228,8 @@ EOF notify_no_data = false renotify_interval = 0 - evaluation_delay = "${var.delay}" - new_host_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" + new_host_delay = "${var.new_host_delay}" silenced = "${var.memory_utilization_forecast_silenced}" @@ -271,8 +271,8 @@ EOF notify_no_data = true renotify_interval = 0 - evaluation_delay = "${var.delay}" - new_host_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" + new_host_delay = "${var.new_host_delay}" silenced = "${var.failover_unavailable_silenced}" diff --git a/cloud/gcp/cloud-sql/mysql/README.md b/cloud/gcp/cloud-sql/mysql/README.md index f194fea..7786bb8 100644 --- a/cloud/gcp/cloud-sql/mysql/README.md +++ b/cloud/gcp/cloud-sql/mysql/README.md @@ -24,11 +24,12 @@ Creates DataDog monitors with the following checks: | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| -| delay | Delay in seconds for the metric evaluation | string | `900` | no | | environment | Architecture environment | string | - | yes | +| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | message | Message sent when a monitor is triggered | string | - | yes | +| new_host_delay | Delay in seconds for the new host evaluation | string | `300` | no | | project_id | ID of the GCP Project | string | - | yes | | queries_changing_anomaly_alert_window | Alert window. | string | `last_30m` | no | | queries_changing_anomaly_count_default_zero | Count default zero. | string | `false` | no | diff --git a/cloud/gcp/cloud-sql/mysql/inputs.tf b/cloud/gcp/cloud-sql/mysql/inputs.tf index 06d2336..a22be9b 100644 --- a/cloud/gcp/cloud-sql/mysql/inputs.tf +++ b/cloud/gcp/cloud-sql/mysql/inputs.tf @@ -20,11 +20,16 @@ variable "message" { description = "Message sent when a monitor is triggered" } -variable "delay" { +variable "evaluation_delay" { description = "Delay in seconds for the metric evaluation" default = 900 } +variable "new_host_delay" { + description = "Delay in seconds for the new host evaluation" + default = 300 +} + # # Filter variables # diff --git a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf index a4d7ce5..3bb6101 100644 --- a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf +++ b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf @@ -40,8 +40,8 @@ EOF notify_no_data = true renotify_interval = 0 - evaluation_delay = "${var.delay}" - new_host_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" + new_host_delay = "${var.new_host_delay}" silenced = "${var.replication_lag_silenced}" @@ -94,8 +94,8 @@ EOF notify_no_data = false renotify_interval = 0 - evaluation_delay = "${var.delay}" - new_host_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" + new_host_delay = "${var.new_host_delay}" silenced = "${var.queries_changing_anomaly_silenced}" @@ -148,8 +148,8 @@ EOF notify_no_data = false renotify_interval = 0 - evaluation_delay = "${var.delay}" - new_host_delay = "${var.delay}" + evaluation_delay = "${var.evaluation_delay}" + new_host_delay = "${var.new_host_delay}" silenced = "${var.questions_changing_anomaly_silenced}" From eae2eeba30428e1d38b257aba9e07a70b01c9d5e Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Mon, 13 Aug 2018 16:26:06 +0200 Subject: [PATCH 41/46] MON-224 Standardize filter --- cloud/gcp/cloud-sql/instance/README.md | 4 +-- cloud/gcp/cloud-sql/instance/inputs.tf | 17 ++----------- .../instance/monitors-cloud-sql-instance.tf | 25 +++++-------------- cloud/gcp/cloud-sql/mysql/README.md | 4 +-- cloud/gcp/cloud-sql/mysql/inputs.tf | 17 ++----------- .../mysql/monitors-cloudsql-mysql.tf | 19 +++----------- 6 files changed, 15 insertions(+), 71 deletions(-) diff --git a/cloud/gcp/cloud-sql/instance/README.md b/cloud/gcp/cloud-sql/instance/README.md index a9fed1b..fe2ff8a 100644 --- a/cloud/gcp/cloud-sql/instance/README.md +++ b/cloud/gcp/cloud-sql/instance/README.md @@ -62,8 +62,7 @@ Creates DataDog monitors with the following checks: | failover_unavailable_threshold_critical | Failover Unavailable critical threshold | string | `0` | no | | failover_unavailable_time_aggregator | Time aggreggator for the Failover Unavailable monitor | string | `max` | no | | failover_unavailable_timeframe | Timeframe for the Failover Unavailable monitor | string | `last_5m` | no | -| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | -| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| filter_tags | Tags used for filtering | string | `*` | no | | memory_utilization_extra_tags | Extra tags for GCP Cloud SQL Memory Utilization monitor | list | `` | no | | memory_utilization_forecast_algorithm | Algorithm for the Memory Utilization Forecast monitor | string | `linear` | no | | memory_utilization_forecast_deviations | Deviations for the Memory Utilization Forecast monitor | string | `1` | no | @@ -86,7 +85,6 @@ Creates DataDog monitors with the following checks: | memory_utilization_timeframe | Timeframe for the Memory Utilization monitor | string | `last_5m` | no | | message | Message sent when a monitor is triggered | string | - | yes | | new_host_delay | Delay in seconds for the new host evaluation | string | `300` | no | -| project_id | ID of the GCP Project | string | - | yes | ## Outputs diff --git a/cloud/gcp/cloud-sql/instance/inputs.tf b/cloud/gcp/cloud-sql/instance/inputs.tf index c4c8909..e0a2ea8 100644 --- a/cloud/gcp/cloud-sql/instance/inputs.tf +++ b/cloud/gcp/cloud-sql/instance/inputs.tf @@ -6,13 +6,8 @@ variable "environment" { type = "string" } -variable "filter_tags_use_defaults" { - description = "Use default filter tags convention" - default = "true" -} - -variable "filter_tags_custom" { - description = "Tags used for custom filtering when filter_tags_use_defaults is false" +variable "filter_tags" { + description = "Tags used for filtering" default = "*" } @@ -30,14 +25,6 @@ variable "new_host_delay" { default = 300 } -# -# Filter variables -# -variable "project_id" { - type = "string" - description = "ID of the GCP Project" -} - # # CPU # diff --git a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf index f4c45d2..6764dae 100644 --- a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf +++ b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf @@ -1,16 +1,3 @@ -# -# FILTER -# -data "template_file" "filter" { - template = "$${filter}" - - vars { - filter = "${var.filter_tags_use_defaults == "true" ? - format("project_id:%s", var.project_id) : - "${var.filter_tags_custom}"}" - } -} - # # CPU Utilization # @@ -22,7 +9,7 @@ resource "datadog_monitor" "cpu_utilization" { query = < ${var.cpu_utilization_threshold_critical} EOF @@ -66,7 +53,7 @@ resource "datadog_monitor" "disk_utilization" { query = < ${var.disk_utilization_threshold_critical} EOF @@ -111,7 +98,7 @@ resource "datadog_monitor" "disk_utilization_forecast" { query = < ${var.memory_utilization_threshold_critical} EOF @@ -205,7 +192,7 @@ resource "datadog_monitor" "memory_utilization_forecast" { query = < ${var.replication_lag_threshold_critical} EOF @@ -68,7 +55,7 @@ resource "datadog_monitor" "queries_changing_anomaly" { query = < Date: Mon, 13 Aug 2018 16:33:13 +0200 Subject: [PATCH 42/46] MON-224 Fix queries removing } --- .../instance/monitors-cloud-sql-instance.tf | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf index 6764dae..8a003cf 100644 --- a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf +++ b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf @@ -9,7 +9,7 @@ resource "datadog_monitor" "cpu_utilization" { query = < ${var.cpu_utilization_threshold_critical} EOF @@ -53,7 +53,7 @@ resource "datadog_monitor" "disk_utilization" { query = < ${var.disk_utilization_threshold_critical} EOF @@ -98,7 +98,7 @@ resource "datadog_monitor" "disk_utilization_forecast" { query = < ${var.memory_utilization_threshold_critical} EOF @@ -192,7 +192,7 @@ resource "datadog_monitor" "memory_utilization_forecast" { query = < Date: Tue, 21 Aug 2018 16:22:12 +0200 Subject: [PATCH 43/46] MON-224 Tags migrated to the new standard --- .../instance/monitors-cloud-sql-instance.tf | 54 +++---------------- .../mysql/monitors-cloudsql-mysql.tf | 30 ++--------- 2 files changed, 9 insertions(+), 75 deletions(-) diff --git a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf index 8a003cf..f4c5c60 100644 --- a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf +++ b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf @@ -32,14 +32,7 @@ EOF silenced = "${var.cpu_utilization_silenced}" - tags = [ - "team:gcp", - "provider:gcp", - "resource:cloud-sql", - "env:${var.environment}", - "created_by:terraform", - "${var.cpu_utilization_extra_tags}", - ] + tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:cloud-sql", "team:claranet", "created-by:terraform", "${var.cpu_utilization_extra_tags}"] } # @@ -76,14 +69,7 @@ EOF silenced = "${var.disk_utilization_silenced}" - tags = [ - "team:gcp", - "provider:gcp", - "env:${var.environment}", - "created_by:terraform", - "resource:cloud-sql", - "${var.disk_utilization_extra_tags}", - ] + tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:cloud-sql", "team:claranet", "created-by:terraform", "${var.disk_utilization_extra_tags}"] } # @@ -126,14 +112,7 @@ EOF silenced = "${var.disk_utilization_forecast_silenced}" - tags = [ - "team:gcp", - "provider:gcp", - "env:${var.environment}", - "created_by:terraform", - "resource:cloud-sql", - "${var.disk_utilization_forecast_extra_tags}", - ] + tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:cloud-sql", "team:claranet", "created-by:terraform", "${var.disk_utilization_forecast_extra_tags}"] } # @@ -170,14 +149,7 @@ EOF silenced = "${var.memory_utilization_silenced}" - tags = [ - "team:gcp", - "provider:gcp", - "env:${var.environment}", - "created_by:terraform", - "resource:cloud-sql", - "${var.memory_utilization_extra_tags}", - ] + tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:cloud-sql", "team:claranet", "created-by:terraform", "${var.memory_utilization_extra_tags}"] } # @@ -220,14 +192,7 @@ EOF silenced = "${var.memory_utilization_forecast_silenced}" - tags = [ - "team:gcp", - "provider:gcp", - "env:${var.environment}", - "created_by:terraform", - "resource:cloud-sql", - "${var.memory_utilization_forecast_extra_tags}", - ] + tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:cloud-sql", "team:claranet", "created-by:terraform", "${var.memory_utilization_forecast_extra_tags}"] } # @@ -263,12 +228,5 @@ EOF silenced = "${var.failover_unavailable_silenced}" - tags = [ - "team:gcp", - "provider:gcp", - "env:${var.environment}", - "created_by:terraform", - "resource:cloud-sql", - "${var.failover_unavailable_extra_tags}", - ] + tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:cloud-sql", "team:claranet", "created-by:terraform", "${var.failover_unavailable_extra_tags}"] } diff --git a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf index d4777cb..02d7725 100644 --- a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf +++ b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf @@ -32,15 +32,7 @@ EOF silenced = "${var.replication_lag_silenced}" - tags = [ - "team:gcp", - "provider:gcp", - "env:${var.environment}", - "created_by:terraform", - "resource:cloud-sql", - "engine:mysql", - "${var.replication_lag_extra_tags}", - ] + tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:cloud-sql", "team:claranet", "created-by:terraform", "engine:mysql", "${var.replication_lag_extra_tags}"] } # @@ -86,15 +78,7 @@ EOF silenced = "${var.queries_changing_anomaly_silenced}" - tags = [ - "team:gcp", - "provider:gcp", - "env:${var.environment}", - "created_by:terraform", - "resource:cloud-sql", - "engine:mysql", - "${var.queries_changing_anomaly_extra_tags}", - ] + tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:cloud-sql", "team:claranet", "created-by:terraform", "engine:mysql", "${var.queries_changing_anomaly_extra_tags}"] } # @@ -140,13 +124,5 @@ EOF silenced = "${var.questions_changing_anomaly_silenced}" - tags = [ - "team:gcp", - "provider:gcp", - "env:${var.environment}", - "created_by:terraform", - "resource:cloud-sql", - "engine:mysql", - "${var.questions_changing_anomaly_extra_tags}", - ] + tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:cloud-sql", "team:claranet", "created-by:terraform", "engine:mysql", "${var.questions_changing_anomaly_extra_tags}"] } From 5f8f1ef23ff59ba013218483220f50c5fbbfca27 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Tue, 21 Aug 2018 16:47:13 +0200 Subject: [PATCH 44/46] MON-224 Common monitors are now in the folder common --- README.md | 2 +- cloud/gcp/cloud-sql/{instance => common}/README.md | 6 +++--- cloud/gcp/cloud-sql/{instance => common}/inputs.tf | 0 .../{instance => common}/monitors-cloud-sql-instance.tf | 0 cloud/gcp/cloud-sql/{instance => common}/outputs.tf | 0 5 files changed, 4 insertions(+), 4 deletions(-) rename cloud/gcp/cloud-sql/{instance => common}/README.md (97%) rename cloud/gcp/cloud-sql/{instance => common}/inputs.tf (100%) rename cloud/gcp/cloud-sql/{instance => common}/monitors-cloud-sql-instance.tf (100%) rename cloud/gcp/cloud-sql/{instance => common}/outputs.tf (100%) diff --git a/README.md b/README.md index 892649a..4439901 100644 --- a/README.md +++ b/README.md @@ -93,7 +93,7 @@ The `//` is very important, it's a terraform specific syntax used to separate gi - [stream-analytics](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/azure/stream-analytics/) - [gcp](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/gcp/) - [cloud-sql](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/gcp/cloud-sql/) - - [instance](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/gcp/cloud-sql/instance/) + - [common](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/gcp/cloud-sql/common/) - [mysql](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/gcp/cloud-sql/mysql/) - [common](https://bitbucket.org/morea/terraform.feature.datadog/src/master/common/) - [alerting-message](https://bitbucket.org/morea/terraform.feature.datadog/src/master/common/alerting-message/) diff --git a/cloud/gcp/cloud-sql/instance/README.md b/cloud/gcp/cloud-sql/common/README.md similarity index 97% rename from cloud/gcp/cloud-sql/instance/README.md rename to cloud/gcp/cloud-sql/common/README.md index fe2ff8a..8cf09fd 100644 --- a/cloud/gcp/cloud-sql/instance/README.md +++ b/cloud/gcp/cloud-sql/common/README.md @@ -1,10 +1,10 @@ -# CLOUD GCP CLOUD-SQL INSTANCE DataDog monitors +# CLOUD GCP CLOUD-SQL COMMON DataDog monitors ## How to use this module ``` -module "datadog-monitors-cloud-gcp-cloud-sql-instance" { - source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/gcp/cloud-sql/instance?ref={revision}" +module "datadog-monitors-cloud-gcp-cloud-sql-common" { + source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/gcp/cloud-sql/common?ref={revision}" environment = "${var.environment}" message = "${module.datadog-message-alerting.alerting-message}" diff --git a/cloud/gcp/cloud-sql/instance/inputs.tf b/cloud/gcp/cloud-sql/common/inputs.tf similarity index 100% rename from cloud/gcp/cloud-sql/instance/inputs.tf rename to cloud/gcp/cloud-sql/common/inputs.tf diff --git a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf b/cloud/gcp/cloud-sql/common/monitors-cloud-sql-instance.tf similarity index 100% rename from cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf rename to cloud/gcp/cloud-sql/common/monitors-cloud-sql-instance.tf diff --git a/cloud/gcp/cloud-sql/instance/outputs.tf b/cloud/gcp/cloud-sql/common/outputs.tf similarity index 100% rename from cloud/gcp/cloud-sql/instance/outputs.tf rename to cloud/gcp/cloud-sql/common/outputs.tf From 034336a07c9cb4afad8bd6c27eea94dcb7078b24 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 30 Aug 2018 11:11:15 +0200 Subject: [PATCH 45/46] MON-224 auto update --- cloud/gcp/cloud-sql/common/README.md | 6 +++--- cloud/gcp/cloud-sql/common/outputs.tf | 12 ++++++------ cloud/gcp/cloud-sql/mysql/README.md | 2 +- cloud/gcp/cloud-sql/mysql/outputs.tf | 6 +++--- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/cloud/gcp/cloud-sql/common/README.md b/cloud/gcp/cloud-sql/common/README.md index 8cf09fd..879b1b8 100644 --- a/cloud/gcp/cloud-sql/common/README.md +++ b/cloud/gcp/cloud-sql/common/README.md @@ -17,11 +17,11 @@ module "datadog-monitors-cloud-gcp-cloud-sql-common" { Creates DataDog monitors with the following checks: - Cloud SQL CPU Utilization +- Cloud SQL Disk Utilization forecast - Cloud SQL Disk Utilization -- Cloud SQL Disk Utilization could reach -- Cloud SQL Memory Utilization -- Cloud SQL Memory Utilization could reach - Cloud SQL Failover Unavailable +- Cloud SQL Memory Utilization forecast +- Cloud SQL Memory Utilization ## Inputs diff --git a/cloud/gcp/cloud-sql/common/outputs.tf b/cloud/gcp/cloud-sql/common/outputs.tf index 8fb4292..d07f065 100644 --- a/cloud/gcp/cloud-sql/common/outputs.tf +++ b/cloud/gcp/cloud-sql/common/outputs.tf @@ -1,29 +1,29 @@ output "cpu_utilization_id" { description = "id for monitor cpu_utilization" - value = "${datadog_monitor.cpu_utilization.id}" + value = "${datadog_monitor.cpu_utilization.*.id}" } output "disk_utilization_id" { description = "id for monitor disk_utilization" - value = "${datadog_monitor.disk_utilization.id}" + value = "${datadog_monitor.disk_utilization.*.id}" } output "disk_utilization_forecast_id" { description = "id for monitor disk_utilization_forecast" - value = "${datadog_monitor.disk_utilization_forecast.id}" + value = "${datadog_monitor.disk_utilization_forecast.*.id}" } output "memory_utilization_id" { description = "id for monitor memory_utilization" - value = "${datadog_monitor.memory_utilization.id}" + value = "${datadog_monitor.memory_utilization.*.id}" } output "memory_utilization_forecast_id" { description = "id for monitor memory_utilization_forecast" - value = "${datadog_monitor.memory_utilization_forecast.id}" + value = "${datadog_monitor.memory_utilization_forecast.*.id}" } output "failover_unavailable_id" { description = "id for monitor failover_unavailable" - value = "${datadog_monitor.failover_unavailable.id}" + value = "${datadog_monitor.failover_unavailable.*.id}" } diff --git a/cloud/gcp/cloud-sql/mysql/README.md b/cloud/gcp/cloud-sql/mysql/README.md index 3395675..c1979b5 100644 --- a/cloud/gcp/cloud-sql/mysql/README.md +++ b/cloud/gcp/cloud-sql/mysql/README.md @@ -16,9 +16,9 @@ module "datadog-monitors-cloud-gcp-cloud-sql-mysql" { Creates DataDog monitors with the following checks: -- Cloud SQL MySQL Replication Lag - Cloud SQL MySQL Queries Count changed abnormally - Cloud SQL MySQL Questions Count changed abnormally +- Cloud SQL MySQL Replication Lag ## Inputs diff --git a/cloud/gcp/cloud-sql/mysql/outputs.tf b/cloud/gcp/cloud-sql/mysql/outputs.tf index ec11b98..77e2353 100644 --- a/cloud/gcp/cloud-sql/mysql/outputs.tf +++ b/cloud/gcp/cloud-sql/mysql/outputs.tf @@ -1,14 +1,14 @@ output "replication_lag_id" { description = "id for monitor replication_lag" - value = "${datadog_monitor.replication_lag.id}" + value = "${datadog_monitor.replication_lag.*.id}" } output "queries_changing_anomaly_id" { description = "id for monitor queries_changing_anomaly" - value = "${datadog_monitor.queries_changing_anomaly.id}" + value = "${datadog_monitor.queries_changing_anomaly.*.id}" } output "questions_changing_anomaly_id" { description = "id for monitor questions_changing_anomaly" - value = "${datadog_monitor.questions_changing_anomaly.id}" + value = "${datadog_monitor.questions_changing_anomaly.*.id}" } From 75c9350667d021f3acde68b41b196c0af3966b06 Mon Sep 17 00:00:00 2001 From: Quentin Manfroi Date: Thu, 30 Aug 2018 11:33:46 +0200 Subject: [PATCH 46/46] MON-224 delete duplicated monitors from mysql --- ...stance.tf => monitors-cloud-sql-common.tf} | 0 cloud/gcp/cloud-sql/mysql/README.md | 34 ---- cloud/gcp/cloud-sql/mysql/inputs.tf | 188 ------------------ .../mysql/monitors-cloudsql-mysql.tf | 92 --------- cloud/gcp/cloud-sql/mysql/outputs.tf | 10 - 5 files changed, 324 deletions(-) rename cloud/gcp/cloud-sql/common/{monitors-cloud-sql-instance.tf => monitors-cloud-sql-common.tf} (100%) diff --git a/cloud/gcp/cloud-sql/common/monitors-cloud-sql-instance.tf b/cloud/gcp/cloud-sql/common/monitors-cloud-sql-common.tf similarity index 100% rename from cloud/gcp/cloud-sql/common/monitors-cloud-sql-instance.tf rename to cloud/gcp/cloud-sql/common/monitors-cloud-sql-common.tf diff --git a/cloud/gcp/cloud-sql/mysql/README.md b/cloud/gcp/cloud-sql/mysql/README.md index c1979b5..9af6c43 100644 --- a/cloud/gcp/cloud-sql/mysql/README.md +++ b/cloud/gcp/cloud-sql/mysql/README.md @@ -16,8 +16,6 @@ module "datadog-monitors-cloud-gcp-cloud-sql-mysql" { Creates DataDog monitors with the following checks: -- Cloud SQL MySQL Queries Count changed abnormally -- Cloud SQL MySQL Questions Count changed abnormally - Cloud SQL MySQL Replication Lag ## Inputs @@ -29,36 +27,6 @@ Creates DataDog monitors with the following checks: | filter_tags | Tags used for filtering | string | `*` | no | | message | Message sent when a monitor is triggered | string | - | yes | | new_host_delay | Delay in seconds for the new host evaluation | string | `300` | no | -| queries_changing_anomaly_alert_window | Alert window. | string | `last_30m` | no | -| queries_changing_anomaly_count_default_zero | Count default zero. | string | `false` | no | -| queries_changing_anomaly_detection_algorithm | Anomaly Detection Algorithm used | string | `agile` | no | -| queries_changing_anomaly_deviations | Deviations to detect the anomaly | string | `4` | no | -| queries_changing_anomaly_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no | -| queries_changing_anomaly_extra_tags | Extra tags for GCP Cloud SQL Queries Changing monitor | list | `` | no | -| queries_changing_anomaly_interval | Interval. | string | `20` | no | -| queries_changing_anomaly_message | Custom message for the Queries Changing monitor | string | `` | no | -| queries_changing_anomaly_seasonality | Seasonality of the algorithm | string | `weekly` | no | -| queries_changing_anomaly_silenced | Groups to mute for GCP Cloud SQL Queries Changing monitor | map | `` | no | -| queries_changing_anomaly_threshold_critical | Queries Changing critical threshold | string | `1` | no | -| queries_changing_anomaly_threshold_critical_recovery | Queries Changing critical recovery threshold | string | `0.99` | no | -| queries_changing_anomaly_threshold_warning | Queries Changing warning threshold | string | `0.5` | no | -| queries_changing_anomaly_time_aggregator | Time aggregator for the Queries Changing mon monitor | string | `avg` | no | -| queries_changing_anomaly_timeframe | Timeframe for the Queries Changing mon monitor | string | `last_1h` | no | -| questions_changing_anomaly_alert_window | Alert window. | string | `last_30m` | no | -| questions_changing_anomaly_count_default_zero | Count default zero. | string | `false` | no | -| questions_changing_anomaly_detection_algorithm | Anomaly Detection Algorithm used | string | `agile` | no | -| questions_changing_anomaly_deviations | Deviations to detect the anomaly | string | `4` | no | -| questions_changing_anomaly_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no | -| questions_changing_anomaly_extra_tags | Extra tags for GCP Cloud SQL Questions Changing monitor | list | `` | no | -| questions_changing_anomaly_interval | Interval. | string | `20` | no | -| questions_changing_anomaly_message | Custom message for the Questions Changing monitor | string | `` | no | -| questions_changing_anomaly_seasonality | Seasonality of the algorithm | string | `weekly` | no | -| questions_changing_anomaly_silenced | Groups to mute for GCP Cloud SQL Questions Changing monitor | map | `` | no | -| questions_changing_anomaly_threshold_critical | Questions Changing critical threshold | string | `1` | no | -| questions_changing_anomaly_threshold_critical_recovery | Questions Changing critical recovery threshold | string | `0.99` | no | -| questions_changing_anomaly_threshold_warning | Questions Changing warning threshold | string | `0.5` | no | -| questions_changing_anomaly_time_aggregator | Time aggregator for the Questions Changing monitor | string | `avg` | no | -| questions_changing_anomaly_timeframe | Timeframe for the Questions Changing monitor | string | `last_1h` | no | | replication_lag_extra_tags | Extra tags for GCP Cloud SQL SQL Replication monitor | list | `` | no | | replication_lag_message | Custom message for the Replication Lag monitor | string | `` | no | | replication_lag_silenced | Groups to mute for GCP Cloud SQL Replication Lag monitor | map | `` | no | @@ -71,8 +39,6 @@ Creates DataDog monitors with the following checks: | Name | Description | |------|-------------| -| queries_changing_anomaly_id | id for monitor queries_changing_anomaly | -| questions_changing_anomaly_id | id for monitor questions_changing_anomaly | | replication_lag_id | id for monitor replication_lag | ## Related documentation diff --git a/cloud/gcp/cloud-sql/mysql/inputs.tf b/cloud/gcp/cloud-sql/mysql/inputs.tf index e065be7..0b3c567 100644 --- a/cloud/gcp/cloud-sql/mysql/inputs.tf +++ b/cloud/gcp/cloud-sql/mysql/inputs.tf @@ -70,191 +70,3 @@ variable "replication_lag_extra_tags" { type = "list" default = [] } - -# -# Queries Changing Abnormally -# - -variable "queries_changing_anomaly_message" { - description = "Custom message for the Queries Changing monitor" - type = "string" - default = "" -} - -variable "queries_changing_anomaly_time_aggregator" { - description = "Time aggregator for the Queries Changing mon monitor" - type = "string" - default = "avg" -} - -variable "queries_changing_anomaly_timeframe" { - description = "Timeframe for the Queries Changing mon monitor" - type = "string" - default = "last_1h" -} - -variable "queries_changing_anomaly_detection_algorithm" { - description = "Anomaly Detection Algorithm used" - type = "string" - default = "agile" -} - -variable "queries_changing_anomaly_deviations" { - description = "Deviations to detect the anomaly" - type = "string" - default = 4 -} - -variable "queries_changing_anomaly_direction" { - description = "Direction of the anomaly. It can be both, below or above." - type = "string" - default = "both" -} - -variable "queries_changing_anomaly_alert_window" { - description = "Alert window." - type = "string" - default = "last_30m" -} - -variable "queries_changing_anomaly_interval" { - description = "Interval." - type = "string" - default = 20 -} - -variable "queries_changing_anomaly_count_default_zero" { - description = "Count default zero." - type = "string" - default = "false" -} - -variable "queries_changing_anomaly_seasonality" { - description = "Seasonality of the algorithm" - type = "string" - default = "weekly" -} - -variable "queries_changing_anomaly_threshold_warning" { - description = "Queries Changing warning threshold" - type = "string" - default = 0.5 -} - -variable "queries_changing_anomaly_threshold_critical" { - description = "Queries Changing critical threshold" - type = "string" - default = 1 -} - -variable "queries_changing_anomaly_threshold_critical_recovery" { - description = "Queries Changing critical recovery threshold" - type = "string" - default = 0.99 -} - -variable "queries_changing_anomaly_silenced" { - description = "Groups to mute for GCP Cloud SQL Queries Changing monitor" - type = "map" - default = {} -} - -variable "queries_changing_anomaly_extra_tags" { - description = "Extra tags for GCP Cloud SQL Queries Changing monitor" - type = "list" - default = [] -} - -# -# Questions Changing -# - -variable "questions_changing_anomaly_message" { - description = "Custom message for the Questions Changing monitor" - type = "string" - default = "" -} - -variable "questions_changing_anomaly_time_aggregator" { - description = "Time aggregator for the Questions Changing monitor" - type = "string" - default = "avg" -} - -variable "questions_changing_anomaly_timeframe" { - description = "Timeframe for the Questions Changing monitor" - type = "string" - default = "last_1h" -} - -variable "questions_changing_anomaly_detection_algorithm" { - description = "Anomaly Detection Algorithm used" - type = "string" - default = "agile" -} - -variable "questions_changing_anomaly_alert_window" { - description = "Alert window." - type = "string" - default = "last_30m" -} - -variable "questions_changing_anomaly_interval" { - description = "Interval." - type = "string" - default = 20 -} - -variable "questions_changing_anomaly_count_default_zero" { - description = "Count default zero." - type = "string" - default = "false" -} - -variable "questions_changing_anomaly_deviations" { - description = "Deviations to detect the anomaly" - type = "string" - default = 4 -} - -variable "questions_changing_anomaly_direction" { - description = "Direction of the anomaly. It can be both, below or above." - type = "string" - default = "both" -} - -variable "questions_changing_anomaly_seasonality" { - description = "Seasonality of the algorithm" - type = "string" - default = "weekly" -} - -variable "questions_changing_anomaly_threshold_warning" { - description = "Questions Changing warning threshold" - type = "string" - default = 0.5 -} - -variable "questions_changing_anomaly_threshold_critical" { - description = "Questions Changing critical threshold" - type = "string" - default = 1 -} - -variable "questions_changing_anomaly_threshold_critical_recovery" { - description = "Questions Changing critical recovery threshold" - type = "string" - default = 0.99 -} - -variable "questions_changing_anomaly_silenced" { - description = "Groups to mute for GCP Cloud SQL Questions Changing monitor" - type = "map" - default = {} -} - -variable "questions_changing_anomaly_extra_tags" { - description = "Extra tags for GCP Cloud SQL Questions Changing monitor" - type = "list" - default = [] -} diff --git a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf index 02d7725..0e0fd1c 100644 --- a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf +++ b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf @@ -34,95 +34,3 @@ EOF tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:cloud-sql", "team:claranet", "created-by:terraform", "engine:mysql", "${var.replication_lag_extra_tags}"] } - -# -# Queries Anomaly -# -resource "datadog_monitor" "queries_changing_anomaly" { - name = "[${var.environment}] Cloud SQL MySQL Queries Count changed abnormally" - message = "${coalesce(var.queries_changing_anomaly_message, var.message)}" - - type = "query alert" - - query = < ${var.queries_changing_anomaly_threshold_critical} -EOF - - thresholds { - warning = "${var.queries_changing_anomaly_threshold_warning}" - critical = "${var.queries_changing_anomaly_threshold_critical}" - critical_recovery = "${var.queries_changing_anomaly_threshold_critical_recovery}" - } - - notify_audit = false - locked = false - timeout_h = 0 - include_tags = true - require_full_window = false - notify_no_data = false - renotify_interval = 0 - - evaluation_delay = "${var.evaluation_delay}" - new_host_delay = "${var.new_host_delay}" - - silenced = "${var.queries_changing_anomaly_silenced}" - - tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:cloud-sql", "team:claranet", "created-by:terraform", "engine:mysql", "${var.queries_changing_anomaly_extra_tags}"] -} - -# -# Questions Anomaly -# -resource "datadog_monitor" "questions_changing_anomaly" { - name = "[${var.environment}] Cloud SQL MySQL Questions Count changed abnormally" - message = "${coalesce(var.questions_changing_anomaly_message, var.message)}" - - type = "query alert" - - query = < ${var.questions_changing_anomaly_threshold_critical} -EOF - - thresholds { - warning = "${var.questions_changing_anomaly_threshold_warning}" - critical = "${var.questions_changing_anomaly_threshold_critical}" - critical_recovery = "${var.questions_changing_anomaly_threshold_critical_recovery}" - } - - notify_audit = false - locked = false - timeout_h = 0 - include_tags = true - require_full_window = false - notify_no_data = false - renotify_interval = 0 - - evaluation_delay = "${var.evaluation_delay}" - new_host_delay = "${var.new_host_delay}" - - silenced = "${var.questions_changing_anomaly_silenced}" - - tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:cloud-sql", "team:claranet", "created-by:terraform", "engine:mysql", "${var.questions_changing_anomaly_extra_tags}"] -} diff --git a/cloud/gcp/cloud-sql/mysql/outputs.tf b/cloud/gcp/cloud-sql/mysql/outputs.tf index 77e2353..e8e24fd 100644 --- a/cloud/gcp/cloud-sql/mysql/outputs.tf +++ b/cloud/gcp/cloud-sql/mysql/outputs.tf @@ -2,13 +2,3 @@ output "replication_lag_id" { description = "id for monitor replication_lag" value = "${datadog_monitor.replication_lag.*.id}" } - -output "queries_changing_anomaly_id" { - description = "id for monitor queries_changing_anomaly" - value = "${datadog_monitor.queries_changing_anomaly.*.id}" -} - -output "questions_changing_anomaly_id" { - description = "id for monitor questions_changing_anomaly" - value = "${datadog_monitor.questions_changing_anomaly.*.id}" -}