MON-224 CloudSQL Instance Failover Unavailable monitor

This commit is contained in:
Rafael Romero Carmona 2018-06-15 10:17:19 +02:00 committed by Quentin Manfroi
parent 9557437195
commit 79f8a5d486
3 changed files with 73 additions and 7 deletions

View File

@ -23,6 +23,7 @@ Creates DataDog monitors with the following checks :
* CloudSQL Instance Disk Utilization * CloudSQL Instance Disk Utilization
* CloudSQL Instance Memory Utilization * CloudSQL Instance Memory Utilization
* CloudSQL Instance Memory Utilization Forecast * CloudSQL Instance Memory Utilization Forecast
* CloudSQL Instance Failover Unavailable
Useful links Useful links
------------ ------------
@ -47,21 +48,18 @@ Inputs
| disk_threshold_warning | Disk Utilization in fraction (warning threshold) | string | `0.8` | no | | disk_threshold_warning | Disk Utilization in fraction (warning threshold) | string | `0.8` | no |
| disk_timeframe | Timeframe for the Disk Utilization monitor | string | `last_5m` | no | | disk_timeframe | Timeframe for the Disk Utilization monitor | string | `last_5m` | no |
| environment | Architecture environment | string | - | yes | | environment | Architecture environment | string | - | yes |
| failover_unavailable_message | Custom message for the Failover Unavailable monitor | string | `` | no |
| failover_unavailable_silenced | Groups to mute for GCP Cloud SQL Failover Unavailable monitor | map | `<map>` | no |
| failover_unavailable_threshold_critical | Failover Unavailable critical threshold | string | `0` | no |
| failover_unavailable_timeframe | Timeframe for the Failover Unavailable monitor | string | `last_5m` | no |
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no | | filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no | | filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
| memory_forecast_history | History for the Memory Utilization Forecast monitor | string | `12h` | no | | memory_forecast_history | History for the Memory Utilization Forecast monitor | string | `12h` | no |
| memory_forecast_history | History for the Memory Utilization Forecast monitor | string | `12h` | no |
| memory_forecast_interval | Interval for the Memory Utilization Forecast monitor | string | `30m` | no |
| memory_forecast_interval | Interval for the Memory Utilization Forecast monitor | string | `30m` | no | | memory_forecast_interval | Interval for the Memory Utilization Forecast monitor | string | `30m` | no |
| memory_forecast_message | Custom message for the Memory Utilization Forecast monitor | string | `` | no | | memory_forecast_message | Custom message for the Memory Utilization Forecast monitor | string | `` | no |
| memory_forecast_message | Custom message for the Memory Utilization Forecast monitor | string | `` | no |
| memory_forecast_silenced | Groups to mute for GCP Cloud SQL Memory Utilization Forecast monitor | map | `<map>` | no |
| memory_forecast_silenced | Groups to mute for GCP Cloud SQL Memory Utilization Forecast monitor | map | `<map>` | no | | memory_forecast_silenced | Groups to mute for GCP Cloud SQL Memory Utilization Forecast monitor | map | `<map>` | no |
| memory_forecast_threshold_critical | Memory Utilization Forecast in fraction (critical threshold) | string | `0.9` | no | | memory_forecast_threshold_critical | Memory Utilization Forecast in fraction (critical threshold) | string | `0.9` | no |
| memory_forecast_threshold_critical | Memory Utilization Forecast in fraction (critical threshold) | string | `0.9` | no |
| memory_forecast_threshold_warning | Memory Utilization Forecast in fraction (warning threshold) | string | `0.8` | no | | memory_forecast_threshold_warning | Memory Utilization Forecast in fraction (warning threshold) | string | `0.8` | no |
| memory_forecast_threshold_warning | Memory Utilization Forecast in fraction (warning threshold) | string | `0.8` | no |
| memory_forecast_timeframe | Timeframe for the Memory Utilization Forecast monitor | string | `next_3d` | no |
| memory_forecast_timeframe | Timeframe for the Memory Utilization Forecast monitor | string | `next_3d` | no | | memory_forecast_timeframe | Timeframe for the Memory Utilization Forecast monitor | string | `next_3d` | no |
| memory_message | Custom message for the Memory Utilization monitor | string | `` | no | | memory_message | Custom message for the Memory Utilization monitor | string | `` | no |
| memory_silenced | Groups to mute for GCP Cloud SQL Memory Utilization monitor | map | `<map>` | no | | memory_silenced | Groups to mute for GCP Cloud SQL Memory Utilization monitor | map | `<map>` | no |

View File

@ -166,3 +166,31 @@ variable "memory_forecast_silenced" {
type = "map" type = "map"
default = {} default = {}
} }
#
# Failover Unavailable
#
variable "failover_unavailable_message" {
description = "Custom message for the Failover Unavailable monitor"
type = "string"
default = ""
}
variable "failover_unavailable_timeframe" {
description = "Timeframe for the Failover Unavailable monitor"
type = "string"
default = "last_5m"
}
variable "failover_unavailable_threshold_critical" {
description = "Failover Unavailable critical threshold"
type = "string"
default = 0
}
variable "failover_unavailable_silenced" {
description = "Groups to mute for GCP Cloud SQL Failover Unavailable monitor"
type = "map"
default = {}
}

View File

@ -179,3 +179,43 @@ EOF
"resource:cloud-sql", "resource:cloud-sql",
] ]
} }
#
# Failover Unavailable
#
resource "datadog_monitor" "failover_unavailable" {
name = "[${var.environment}] Cloud SQL MySQL Failover Unavailable {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
message = "${coalesce(var.failover_unavailable_message, var.message)}"
type = "metric alert"
query = <<EOF
max(${var.failover_unavailable_timeframe}):
avg:gcp.cloudsql.database.available_for_failover{${data.template_file.filter.rendered}}
by {database_id}
<= ${var.failover_unavailable_threshold_critical}
EOF
thresholds {
critical = "${var.failover_unavailable_threshold_critical}"
}
include_tags = true
notify_no_data = true
require_full_window = false
renotify_interval = 0
notify_audit = false
timeout_h = 0
include_tags = true
locked = false
evaluation_delay = "${var.delay}"
new_host_delay = "${var.delay}"
silenced = "${var.failover_unavailable_silenced}"
tags = [
"team:gcp",
"provider:gcp",
"env:${var.environment}",
"resource:cloud-sql",
]
}