From 0ca137cbd20be4513d9fd1a878a4b2193c7222d7 Mon Sep 17 00:00:00 2001 From: Rafael Romero Carmona Date: Thu, 5 Jul 2018 15:34:15 +0200 Subject: [PATCH] MON-224 Extra tags and many fixes from testing --- cloud/gcp/cloud-sql/instance/README.md | 49 ++++++------ cloud/gcp/cloud-sql/instance/inputs.tf | 74 +++++++++++++------ .../instance/monitors-cloud-sql-instance.tf | 59 ++++++++------- cloud/gcp/cloud-sql/mysql/README.md | 6 +- cloud/gcp/cloud-sql/mysql/inputs.tf | 26 ++++++- .../mysql/monitors-cloudsql-mysql.tf | 12 ++- 6 files changed, 149 insertions(+), 77 deletions(-) diff --git a/cloud/gcp/cloud-sql/instance/README.md b/cloud/gcp/cloud-sql/instance/README.md index b1638e2..9584e07 100644 --- a/cloud/gcp/cloud-sql/instance/README.md +++ b/cloud/gcp/cloud-sql/instance/README.md @@ -36,36 +36,41 @@ Inputs | Name | Description | Type | Default | Required | |------|-------------|:----:|:-----:|:-----:| -| cpu_message | Custom message for the CPU Utilization monitor | string | `` | no | -| cpu_silenced | Groups to mute for GCP Cloud SQL CPU Utilization monitor | map | `` | no | -| cpu_threshold_critical | CPU Utilization in fraction (critical threshold) | string | `0.9` | no | -| cpu_threshold_warning | CPU Utilization in fraction (warning threshold) | string | `0.8` | no | -| cpu_timeframe | Timeframe for the CPU Utilization monitor | string | `last_30m` | no | +| cpu_utilization_extra_tags | Extra tags for GCP Cloud SQL CPU Utilization monitor | list | `` | no | +| cpu_utilization_message | Custom message for the CPU Utilization monitor | string | `` | no | +| cpu_utilization_silenced | Groups to mute for GCP Cloud SQL CPU Utilization monitor | map | `` | no | +| cpu_utilization_threshold_critical | CPU Utilization in fraction (critical threshold) | string | `0.9` | no | +| cpu_utilization_threshold_warning | CPU Utilization in fraction (warning threshold) | string | `0.8` | no | +| cpu_utilization_timeframe | Timeframe for the CPU Utilization monitor | string | `last_30m` | no | | delay | Delay in seconds for the metric evaluation | string | `900` | no | -| disk_message | Custom message for the Disk Utilization monitor | string | `` | no | -| disk_silenced | Groups to mute for GCP Cloud SQL Disk Utilization monitor | map | `` | no | -| disk_threshold_critical | Disk Utilization in fraction (critical threshold) | string | `0.9` | no | -| disk_threshold_warning | Disk Utilization in fraction (warning threshold) | string | `0.8` | no | -| disk_timeframe | Timeframe for the Disk Utilization monitor | string | `last_5m` | no | +| disk_utilization_extra_tags | Extra tags for GCP Cloud SQL CPU Utilization monitor | list | `` | no | +| disk_utilization_message | Custom message for the Disk Utilization monitor | string | `` | no | +| disk_utilization_silenced | Groups to mute for GCP Cloud SQL Disk Utilization monitor | map | `` | no | +| disk_utilization_threshold_critical | Disk Utilization in fraction (critical threshold) | string | `0.9` | no | +| disk_utilization_threshold_warning | Disk Utilization in fraction (warning threshold) | string | `0.8` | no | +| disk_utilization_timeframe | Timeframe for the Disk Utilization monitor | string | `last_5m` | no | | environment | Architecture environment | string | - | yes | +| failover_unavailable_extra_tags | Extra tags for GCP Cloud SQL Failover Unavailable monitor | list | `` | no | | failover_unavailable_message | Custom message for the Failover Unavailable monitor | string | `` | no | | failover_unavailable_silenced | Groups to mute for GCP Cloud SQL Failover Unavailable monitor | map | `` | no | | failover_unavailable_threshold_critical | Failover Unavailable critical threshold | string | `0` | no | | failover_unavailable_timeframe | Timeframe for the Failover Unavailable monitor | string | `last_5m` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | -| memory_forecast_history | History for the Memory Utilization Forecast monitor | string | `12h` | no | -| memory_forecast_interval | Interval for the Memory Utilization Forecast monitor | string | `30m` | no | -| memory_forecast_message | Custom message for the Memory Utilization Forecast monitor | string | `` | no | -| memory_forecast_silenced | Groups to mute for GCP Cloud SQL Memory Utilization Forecast monitor | map | `` | no | -| memory_forecast_threshold_critical | Memory Utilization Forecast in fraction (critical threshold) | string | `0.9` | no | -| memory_forecast_threshold_warning | Memory Utilization Forecast in fraction (warning threshold) | string | `0.8` | no | -| memory_forecast_timeframe | Timeframe for the Memory Utilization Forecast monitor | string | `next_3d` | no | -| memory_message | Custom message for the Memory Utilization monitor | string | `` | no | -| memory_silenced | Groups to mute for GCP Cloud SQL Memory Utilization monitor | map | `` | no | -| memory_threshold_critical | Memory Utilization in fraction (critical threshold) | string | `0.9` | no | -| memory_threshold_warning | Memory Utilization in fraction (warning threshold) | string | `0.8` | no | -| memory_timeframe | Timeframe for the Memory Utilization monitor | string | `last_5m` | no | +| memory_utilization_extra_tags | Extra tags for GCP Cloud SQL Memory Utilization monitor | list | `` | no | +| memory_utilization_forecast_extra_tags | Extra tags for GCP Cloud SQL Memory Utilization Forecast monitor | list | `` | no | +| memory_utilization_forecast_history | History for the Memory Utilization Forecast monitor | string | `12h` | no | +| memory_utilization_forecast_interval | Interval for the Memory Utilization Forecast monitor | string | `30m` | no | +| memory_utilization_forecast_message | Custom message for the Memory Utilization Forecast monitor | string | `` | no | +| memory_utilization_forecast_silenced | Groups to mute for GCP Cloud SQL Memory Utilization Forecast monitor | map | `` | no | +| memory_utilization_forecast_threshold_critical | Memory Utilization Forecast in fraction (critical threshold) | string | `0.9` | no | +| memory_utilization_forecast_threshold_warning | Memory Utilization Forecast in fraction (warning threshold) | string | `0.8` | no | +| memory_utilization_forecast_timeframe | Timeframe for the Memory Utilization Forecast monitor | string | `next_3d` | no | +| memory_utilization_message | Custom message for the Memory Utilization monitor | string | `` | no | +| memory_utilization_silenced | Groups to mute for GCP Cloud SQL Memory Utilization monitor | map | `` | no | +| memory_utilization_threshold_critical | Memory Utilization in fraction (critical threshold) | string | `0.9` | no | +| memory_utilization_threshold_warning | Memory Utilization in fraction (warning threshold) | string | `0.8` | no | +| memory_utilization_timeframe | Timeframe for the Memory Utilization monitor | string | `last_5m` | no | | message | Message sent when a monitor is triggered | string | - | yes | | project_id | ID of the GCP Project | string | - | yes | diff --git a/cloud/gcp/cloud-sql/instance/inputs.tf b/cloud/gcp/cloud-sql/instance/inputs.tf index f7d82cd..3d7370d 100644 --- a/cloud/gcp/cloud-sql/instance/inputs.tf +++ b/cloud/gcp/cloud-sql/instance/inputs.tf @@ -36,137 +36,161 @@ variable "project_id" { # # CPU # -variable "cpu_message" { +variable "cpu_utilization_message" { description = "Custom message for the CPU Utilization monitor" type = "string" default = "" } -variable "cpu_timeframe" { +variable "cpu_utilization_timeframe" { description = "Timeframe for the CPU Utilization monitor" type = "string" default = "last_30m" } -variable "cpu_threshold_warning" { +variable "cpu_utilization_threshold_warning" { description = "CPU Utilization in fraction (warning threshold)" type = "string" default = 0.8 } -variable "cpu_threshold_critical" { +variable "cpu_utilization_threshold_critical" { description = "CPU Utilization in fraction (critical threshold)" type = "string" default = 0.9 } -variable "cpu_silenced" { +variable "cpu_utilization_silenced" { description = "Groups to mute for GCP Cloud SQL CPU Utilization monitor" type = "map" default = {} } +variable "cpu_utilization_extra_tags" { + description = "Extra tags for GCP Cloud SQL CPU Utilization monitor" + type = "list" + default = [] +} + # # DISK # -variable "disk_message" { +variable "disk_utilization_message" { description = "Custom message for the Disk Utilization monitor" type = "string" default = "" } -variable "disk_timeframe" { +variable "disk_utilization_timeframe" { description = "Timeframe for the Disk Utilization monitor" type = "string" default = "last_5m" } -variable "disk_threshold_warning" { +variable "disk_utilization_threshold_warning" { description = "Disk Utilization in fraction (warning threshold)" type = "string" default = 0.8 } -variable "disk_threshold_critical" { +variable "disk_utilization_threshold_critical" { description = "Disk Utilization in fraction (critical threshold)" type = "string" default = 0.9 } -variable "disk_silenced" { +variable "disk_utilization_silenced" { description = "Groups to mute for GCP Cloud SQL Disk Utilization monitor" type = "map" default = {} } +variable "disk_utilization_extra_tags" { + description = "Extra tags for GCP Cloud SQL CPU Utilization monitor" + type = "list" + default = [] +} + # # Memory Utilization # -variable "memory_message" { +variable "memory_utilization_message" { description = "Custom message for the Memory Utilization monitor" default = "" } -variable "memory_timeframe" { +variable "memory_utilization_timeframe" { description = "Timeframe for the Memory Utilization monitor" default = "last_5m" } -variable "memory_threshold_warning" { +variable "memory_utilization_threshold_warning" { description = "Memory Utilization in fraction (warning threshold)" default = 0.8 } -variable "memory_threshold_critical" { +variable "memory_utilization_threshold_critical" { description = "Memory Utilization in fraction (critical threshold)" default = 0.9 } -variable "memory_silenced" { +variable "memory_utilization_silenced" { description = "Groups to mute for GCP Cloud SQL Memory Utilization monitor" type = "map" default = {} } +variable "memory_utilization_extra_tags" { + description = "Extra tags for GCP Cloud SQL Memory Utilization monitor" + type = "list" + default = [] +} + # # Memory Utilization Forecast # -variable "memory_forecast_message" { +variable "memory_utilization_forecast_message" { description = "Custom message for the Memory Utilization Forecast monitor" default = "" } -variable "memory_forecast_timeframe" { +variable "memory_utilization_forecast_timeframe" { description = "Timeframe for the Memory Utilization Forecast monitor" default = "next_3d" } -variable "memory_forecast_interval" { +variable "memory_utilization_forecast_interval" { description = "Interval for the Memory Utilization Forecast monitor" default = "30m" } -variable "memory_forecast_history" { +variable "memory_utilization_forecast_history" { description = "History for the Memory Utilization Forecast monitor" default = "12h" } -variable "memory_forecast_threshold_warning" { +variable "memory_utilization_forecast_threshold_warning" { description = "Memory Utilization Forecast in fraction (warning threshold)" default = 0.8 } -variable "memory_forecast_threshold_critical" { +variable "memory_utilization_forecast_threshold_critical" { description = "Memory Utilization Forecast in fraction (critical threshold)" default = 0.9 } -variable "memory_forecast_silenced" { +variable "memory_utilization_forecast_silenced" { description = "Groups to mute for GCP Cloud SQL Memory Utilization Forecast monitor" type = "map" default = {} } +variable "memory_utilization_forecast_extra_tags" { + description = "Extra tags for GCP Cloud SQL Memory Utilization Forecast monitor" + type = "list" + default = [] +} + # # Failover Unavailable # @@ -194,3 +218,9 @@ variable "failover_unavailable_silenced" { type = "map" default = {} } + +variable "failover_unavailable_extra_tags" { + description = "Extra tags for GCP Cloud SQL Failover Unavailable monitor" + type = "list" + default = [] +} diff --git a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf index 3ebc71d..ccf6ce9 100644 --- a/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf +++ b/cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf @@ -16,20 +16,20 @@ data "template_file" "filter" { # resource "datadog_monitor" "cpu_utilization" { name = "[${var.environment}] Cloud SQL CPU utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" - message = "${coalesce(var.cpu_message, var.message)}" + message = "${coalesce(var.cpu_utilization_message, var.message)}" type = "metric alert" query = < ${var.cpu_threshold_critical} + > ${var.cpu_utilization_threshold_critical} EOF thresholds { - warning = "${var.cpu_threshold_warning}" - critical = "${var.cpu_threshold_critical}" + warning = "${var.cpu_utilization_threshold_warning}" + critical = "${var.cpu_utilization_threshold_critical}" } notify_no_data = true @@ -41,13 +41,14 @@ EOF locked = false evaluation_delay = "${var.delay}" new_host_delay = "${var.delay}" - silenced = "${var.cpu_silenced}" + silenced = "${var.cpu_utilization_silenced}" tags = [ "team:gcp", "provider:gcp", - "env:${var.environment}", "resource:cloud-sql", + "env:${var.environment}", + "${var.cpu_utilization_extra_tags}", ] } @@ -56,20 +57,20 @@ EOF # resource "datadog_monitor" "disk_utilization" { name = "[${var.environment}] Cloud SQL Disk utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" - message = "${coalesce(var.disk_message, var.message)}" + message = "${coalesce(var.disk_utilization_message, var.message)}" type = "metric alert" query = < ${var.disk_threshold_critical} + > ${var.disk_utilization_threshold_critical} EOF thresholds { - warning = "${var.disk_threshold_warning}" - critical = "${var.disk_threshold_critical}" + warning = "${var.disk_utilization_threshold_warning}" + critical = "${var.disk_utilization_threshold_critical}" } notify_no_data = true @@ -81,13 +82,14 @@ EOF locked = false evaluation_delay = "${var.delay}" new_host_delay = "${var.delay}" - silenced = "${var.disk_silenced}" + silenced = "${var.disk_utilization_silenced}" tags = [ "team:gcp", "provider:gcp", "env:${var.environment}", "resource:cloud-sql", + "${var.disk_utilization_extra_tags}", ] } @@ -96,20 +98,20 @@ EOF # resource "datadog_monitor" "memory_utilization" { name = "[${var.environment}] Cloud SQL Memory Utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" - message = "${coalesce(var.memory_message, var.message)}" + message = "${coalesce(var.memory_utilization_message, var.message)}" type = "metric alert" query = < ${var.memory_threshold_critical} + > ${var.memory_utilization_threshold_critical} EOF thresholds { - warning = "${var.memory_threshold_warning}" - critical = "${var.memory_threshold_critical}" + warning = "${var.memory_utilization_threshold_warning}" + critical = "${var.memory_utilization_threshold_critical}" } notify_no_data = true @@ -121,13 +123,14 @@ EOF locked = false evaluation_delay = "${var.delay}" new_host_delay = "${var.delay}" - silenced = "${var.memory_silenced}" + silenced = "${var.memory_utilization_silenced}" tags = [ "team:gcp", "provider:gcp", "env:${var.environment}", "resource:cloud-sql", + "${var.memory_utilization_extra_tags}", ] } @@ -136,26 +139,26 @@ EOF # resource "datadog_monitor" "memory_utilization_forecast" { name = "[${var.environment}] Cloud SQL Memory Utilization Forecast {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" - message = "${coalesce(var.memory_forecast_message, var.message)}" + message = "${coalesce(var.memory_utilization_forecast_message, var.message)}" type = "query alert" query = < ${var.memory_forecast_threshold_critical} + > ${var.memory_utilization_forecast_threshold_critical} EOF thresholds { - warning = "${var.memory_forecast_threshold_warning}" - critical = "${var.memory_forecast_threshold_critical}" + warning = "${var.memory_utilization_forecast_threshold_warning}" + critical = "${var.memory_utilization_forecast_threshold_critical}" } notify_no_data = true @@ -167,13 +170,14 @@ EOF locked = false evaluation_delay = "${var.delay}" new_host_delay = "${var.delay}" - silenced = "${var.memory_forecast_silenced}" + silenced = "${var.memory_utilization_forecast_silenced}" tags = [ "team:gcp", "provider:gcp", "env:${var.environment}", "resource:cloud-sql", + "${var.memory_utilization_forecast_extra_tags}", ] } @@ -213,5 +217,6 @@ EOF "provider:gcp", "env:${var.environment}", "resource:cloud-sql", + "${var.failover_unavailable_extra_tags}", ] } diff --git a/cloud/gcp/cloud-sql/mysql/README.md b/cloud/gcp/cloud-sql/mysql/README.md index eaf55f6..5fcf0b5 100644 --- a/cloud/gcp/cloud-sql/mysql/README.md +++ b/cloud/gcp/cloud-sql/mysql/README.md @@ -43,6 +43,7 @@ Inputs | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | message | Message sent when a monitor is triggered | string | - | yes | +| network_connections_extra_tags | Extra tags for GCP Cloud SQL Network Connections monitor | list | `` | no | | network_connections_hard_limit | Max number of connections for the CloudSQL Instance. Default value is the max value on https://cloud.google.com/sql/docs/quotas#fixed-limits for MySQL | string | `4000` | no | | network_connections_message | Custom message for the Network Connections monitor | string | `` | no | | network_connections_silenced | Groups to mute for GCP Cloud SQL Network Connections monitor | map | `` | no | @@ -54,6 +55,7 @@ Inputs | queries_changing_database_ids | List of database ids for the Queries Changing monitor | list | `` | no | | queries_changing_deviations | Deviations to detect the anomaly | string | `4` | no | | queries_changing_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no | +| queries_changing_extra_tags | Extra tags for GCP Cloud SQL Queries Changing monitor | list | `` | no | | queries_changing_message | Custom message for the Queries Changing monitor | string | `` | no | | queries_changing_seasonality | Seasonality of the algorithm | string | `weekly` | no | | queries_changing_silenced | Groups to mute for GCP Cloud SQL Queries Changing monitor | map | `` | no | @@ -64,12 +66,14 @@ Inputs | questions_changing_database_ids | List of database ids for the Questions Changing monitor | list | `` | no | | questions_changing_deviations | Deviations to detect the anomaly | string | `4` | no | | questions_changing_direction | Direction of the anomaly. It can be both, below or above. | string | `both` | no | +| questions_changing_extra_tags | Extra tags for GCP Cloud SQL Questions Changing monitor | list | `` | no | | questions_changing_message | Custom message for the Questions Changing monitor | string | `` | no | | questions_changing_seasonality | Seasonality of the algorithm | string | `weekly` | no | -| questions_changing_silenced | Groups to mute for GCP Cloud SQL Network Connections monitor | map | `` | no | +| questions_changing_silenced | Groups to mute for GCP Cloud SQL Questions Changing monitor | map | `` | no | | questions_changing_threshold_critical | Questions Changing critical threshold | string | `1` | no | | questions_changing_threshold_warning | Questions Changing warning threshold | string | `0.5` | no | | questions_changing_timeframe | Timeframe for the Questions Changing monitor | string | `last_10m` | no | +| replication_lag_extra_tags | Extra tags for GCP Cloud SQL SQL Replication monitor | list | `` | no | | replication_lag_message | Custom message for the Replication Lag monitor | string | `` | no | | replication_lag_silenced | Groups to mute for GCP Cloud SQL Replication Lag monitor | map | `` | no | | replication_lag_threshold_critical | Seconds behind the master (critical threshold) | string | `900` | no | diff --git a/cloud/gcp/cloud-sql/mysql/inputs.tf b/cloud/gcp/cloud-sql/mysql/inputs.tf index 55d274b..e7bda19 100644 --- a/cloud/gcp/cloud-sql/mysql/inputs.tf +++ b/cloud/gcp/cloud-sql/mysql/inputs.tf @@ -72,6 +72,12 @@ variable "network_connections_silenced" { default = {} } +variable "network_connections_extra_tags" { + description = "Extra tags for GCP Cloud SQL Network Connections monitor" + type = "list" + default = [] +} + # # Replication Lag # @@ -105,6 +111,12 @@ variable "replication_lag_silenced" { default = {} } +variable "replication_lag_extra_tags" { + description = "Extra tags for GCP Cloud SQL SQL Replication monitor" + type = "list" + default = [] +} + # # Queries Changing Abnormally # @@ -168,6 +180,12 @@ variable "queries_changing_silenced" { default = {} } +variable "queries_changing_extra_tags" { + description = "Extra tags for GCP Cloud SQL Queries Changing monitor" + type = "list" + default = [] +} + # # Questions Changing # @@ -226,7 +244,13 @@ variable "questions_changing_threshold_critical" { } variable "questions_changing_silenced" { - description = "Groups to mute for GCP Cloud SQL Network Connections monitor" + description = "Groups to mute for GCP Cloud SQL Questions Changing monitor" type = "map" default = {} } + +variable "questions_changing_extra_tags" { + description = "Extra tags for GCP Cloud SQL Questions Changing monitor" + type = "list" + default = [] +} diff --git a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf index 833664e..5f7327f 100644 --- a/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf +++ b/cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf @@ -49,6 +49,7 @@ EOF "env:${var.environment}", "resource:cloud-sql", "engine:mysql", + "${var.network_connections_extra_tags}", ] } @@ -90,6 +91,7 @@ EOF "env:${var.environment}", "resource:cloud-sql", "engine:mysql", + "${var.replication_lag_extra_tags}", ] } @@ -99,10 +101,10 @@ EOF resource "datadog_monitor" "queries_changing_anomaly" { count = "${length(var.queries_changing_database_ids)}" - name = "[${var.environment}] Cloud SQL MySQL Queries Count changed abnormally on ${var.project_id}:${var.queries_changing_database_ids[count.index]} {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" + name = "[${var.environment}] [${var.queries_changing_database_ids[count.index]}] Cloud SQL MySQL Queries Count changed abnormally {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}" message = "${coalesce(var.queries_changing_message, var.message)}" - type = "metric alert" + type = "query alert" query = <