diff --git a/cloud/gcp/cloud-sql/instance/README.md b/cloud/gcp/cloud-sql/instance/README.md index 805c302..900c15a 100644 --- a/cloud/gcp/cloud-sql/instance/README.md +++ b/cloud/gcp/cloud-sql/instance/README.md @@ -5,28 +5,30 @@ |------|-------------|:----:|:-----:|:-----:| | cpu_message | Custom message for the CPU Utilization monitor | string | `` | no | | cpu_silenced | Groups to mute for GCP Cloud SQL CPU Utilization monitor | map | `` | no | -| cpu_tags | Tags to add to the CPU Utilization monitors | map | `` | no | | cpu_threshold_critical | CPU Utilization in fraction (critical threshold) | string | `0.9` | no | | cpu_threshold_warning | CPU Utilization in fraction (warning threshold) | string | `0.85` | no | | cpu_timeframe | Timeframe for the CPU Utilization monitor | string | `last_2h` | no | -| database_id | ID of the Cloud SQL Database Instance | string | - | yes | | delay | Delay in seconds for the metric evaluation | string | `900` | no | | disk_message | Custom message for the Disk Utilization monitor | string | `` | no | | disk_silenced | Groups to mute for GCP Cloud SQL Disk Utilization monitor | map | `` | no | -| disk_tags | Tags to add to the Disk Utilization monitors | map | `` | no | | disk_threshold_critical | Disk Utilization in fraction (critical threshold) | string | `0.9` | no | | disk_threshold_warning | Disk Utilization in fraction (warning threshold) | string | `0.8` | no | | disk_timeframe | Timeframe for the Disk Utilization monitor | string | `last_5m` | no | | environment | Architecture environment | string | - | yes | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | +| memory_forecast_history | History for the Memory Utilization Forecast monitor | string | `12h` | no | +| memory_forecast_interval | Interval for the Memory Utilization Forecast monitor | string | `30m` | no | +| memory_forecast_message | Custom message for the Memory Utilization Forecast monitor | string | `` | no | +| memory_forecast_silenced | Groups to mute for GCP Cloud SQL Memory Utilization Forecast monitor | map | `` | no | +| memory_forecast_threshold_critical | Memory Utilization Forecast in fraction (critical threshold) | string | `0.9` | no | +| memory_forecast_threshold_warning | Memory Utilization Forecast in fraction (warning threshold) | string | `0.8` | no | +| memory_forecast_timeframe | Timeframe for the Memory Utilization Forecast monitor | string | `next_3d` | no | +| memory_message | Custom message for the Memory Utilization monitor | string | `` | no | +| memory_silenced | Groups to mute for GCP Cloud SQL Memory Utilization monitor | map | `` | no | +| memory_threshold_critical | Memory Utilization in fraction (critical threshold) | string | `0.9` | no | +| memory_threshold_warning | Memory Utilization in fraction (warning threshold) | string | `0.8` | no | +| memory_timeframe | Timeframe for the Memory Utilization monitor | string | `last_5m` | no | | message | Message sent when a monitor is triggered | string | - | yes | -| network_connections_hard_limit | Max number of network connections | string | - | yes | -| network_connections_message | Custom message for the Netowork Connections monitor | string | `` | no | -| network_connections_silenced | Groups to mute for GCP Cloud SQL Network Connections monitor | map | `` | no | -| network_connections_tags | Tags to add to the Network Connections monitors | map | `` | no | -| network_connections_threshold_critical | Fraction of network connections (warning threshold) | string | `0.9` | no | -| network_connections_threshold_warning | Fraction of network connections (warning threshold) | string | `0.8` | no | -| network_connections_timeframe | Timeframe for the Network Connections monitor | string | `last_5m` | no | | project_id | ID of the GCP Project | string | - | yes | diff --git a/cloud/gcp/cloud-sql/instance/inputs.tf b/cloud/gcp/cloud-sql/instance/inputs.tf index 9918e2e..4109158 100644 --- a/cloud/gcp/cloud-sql/instance/inputs.tf +++ b/cloud/gcp/cloud-sql/instance/inputs.tf @@ -100,39 +100,69 @@ variable "disk_silenced" { } # -# Network Connections +# Memory Utilization # -variable "network_connections_message" { - description = "Custom message for the Netowork Connections monitor" - type = "string" +variable "memory_message" { + description = "Custom message for the Memory Utilization monitor" default = "" } -variable "network_connections_timeframe" { - description = "Timeframe for the Network Connections monitor" - type = "string" +variable "memory_timeframe" { + description = "Timeframe for the Memory Utilization monitor" default = "last_5m" } -variable "network_connections_hard_limit" { - description = "Max number of network connections" - type = "string" -} - -variable "network_connections_threshold_warning" { - description = "Fraction of network connections (warning threshold)" - type = "string" +variable "memory_threshold_warning" { + description = "Memory Utilization in fraction (warning threshold)" default = 0.8 } -variable "network_connections_threshold_critical" { - description = "Fraction of network connections (warning threshold)" - type = "string" +variable "memory_threshold_critical" { + description = "Memory Utilization in fraction (critical threshold)" default = 0.9 } -variable "network_connections_silenced" { - description = "Groups to mute for GCP Cloud SQL Network Connections monitor" +variable "memory_silenced" { + description = "Groups to mute for GCP Cloud SQL Memory Utilization monitor" + type = "map" + default = {} +} + +# +# Memory Utilization Forecast +# +variable "memory_forecast_message" { + description = "Custom message for the Memory Utilization Forecast monitor" + default = "" +} + +variable "memory_forecast_timeframe" { + description = "Timeframe for the Memory Utilization Forecast monitor" + default = "next_3d" +} + +variable "memory_forecast_interval" { + description = "Interval for the Memory Utilization Forecast monitor" + default = "30m" +} + +variable "memory_forecast_history" { + description = "History for the Memory Utilization Forecast monitor" + default = "12h" +} + +variable "memory_forecast_threshold_warning" { + description = "Memory Utilization Forecast in fraction (warning threshold)" + default = 0.8 +} + +variable "memory_forecast_threshold_critical" { + description = "Memory Utilization Forecast in fraction (critical threshold)" + default = 0.9 +} + +variable "memory_forecast_silenced" { + description = "Groups to mute for GCP Cloud SQL Memory Utilization Forecast monitor" type = "map" default = {} } diff --git a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf index 722af31..69df62d 100644 --- a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf +++ b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf @@ -21,11 +21,9 @@ resource "datadog_monitor" "cpu_utilization" { type = "metric alert" query = < ${var.cpu_threshold_critical} + avg(${var.cpu_timeframe}): avg:gcp.cloudsql.database.cpu.utilization{${data.template_file.filter.rendered}} + by {database_id} + > ${var.cpu_threshold_critical} EOF thresholds { @@ -63,9 +61,8 @@ resource "datadog_monitor" "disk_utilization" { type = "metric alert" query = < ${var.disk_threshold_critical} EOF @@ -96,26 +93,24 @@ EOF } # -# Network Connections +# Memory Utilization # -resource "datadog_monitor" "network_connections" { - name = "[${var.environment}] Cloud SQL Network Connections (hard limit: ${var.network_connections_hard_limit}) {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" - message = "${coalesce(var.network_connections_message, var.message)}" +resource "datadog_monitor" "memory_utilization" { + name = "[${var.environment}] Cloud SQL Memory Utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.memory_message, var.message)}" type = "metric alert" query = < ${var.network_connections_threshold_critical} + > ${var.memory_threshold_critical} EOF thresholds { - warning = "${var.network_connections_threshold_warning}" - critical = "${var.network_connections_threshold_critical}" + warning = "${var.memory_threshold_warning}" + critical = "${var.memory_threshold_critical}" } include_tags = true @@ -128,7 +123,51 @@ EOF locked = false evaluation_delay = "${var.delay}" new_host_delay = "${var.delay}" - silenced = "${var.network_connections_silenced}" + silenced = "${var.memory_silenced}" + + tags = [ + "team:gcp", + "provider:gcp", + "env:${var.environment}", + "resource:cloud-sql", + ] +} + +resource "datadog_monitor" "memory_utilization_forecast" { + name = "[${var.environment}] Cloud SQL Memory Utilization Forecast {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + message = "${coalesce(var.memory_forecast_message, var.message)}" + + type = "query alert" + + query = < ${var.memory_forecast_threshold_critical} +EOF + + thresholds { + warning = "${var.memory_forecast_threshold_warning}" + critical = "${var.memory_forecast_threshold_critical}" + } + + include_tags = true + notify_no_data = true + require_full_window = false + renotify_interval = 0 + notify_audit = false + timeout_h = 0 + include_tags = true + locked = false + evaluation_delay = "${var.delay}" + new_host_delay = "${var.delay}" + silenced = "${var.memory_forecast_silenced}" tags = [ "team:gcp",