Merged in MON-224-gcp-cloud-sql (pull request #97)
MON-224 GCP CloudSQL Monitors Approved-by: Jean-Philippe LAINÉ <jean-philippe.laine@fr.clara.net> Approved-by: Rafael Romero Carmona <rafael.romero.carmona@fr.clara.net> Approved-by: Quentin Manfroi <quentin.manfroi@yahoo.fr> Approved-by: Alexandre Gaillet <alexandre.gaillet@fr.clara.net>
This commit is contained in:
commit
f5ab90b55f
@ -91,6 +91,10 @@ The `//` is very important, it's a terraform specific syntax used to separate gi
|
||||
- [sql-database](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/azure/sql-database/)
|
||||
- [storage](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/azure/storage/)
|
||||
- [stream-analytics](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/azure/stream-analytics/)
|
||||
- [gcp](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/gcp/)
|
||||
- [cloud-sql](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/gcp/cloud-sql/)
|
||||
- [common](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/gcp/cloud-sql/common/)
|
||||
- [mysql](https://bitbucket.org/morea/terraform.feature.datadog/src/master/cloud/gcp/cloud-sql/mysql/)
|
||||
- [common](https://bitbucket.org/morea/terraform.feature.datadog/src/master/common/)
|
||||
- [alerting-message](https://bitbucket.org/morea/terraform.feature.datadog/src/master/common/alerting-message/)
|
||||
- [filter-tags](https://bitbucket.org/morea/terraform.feature.datadog/src/master/common/filter-tags/)
|
||||
|
||||
103
cloud/gcp/cloud-sql/common/README.md
Normal file
103
cloud/gcp/cloud-sql/common/README.md
Normal file
@ -0,0 +1,103 @@
|
||||
# CLOUD GCP CLOUD-SQL COMMON DataDog monitors
|
||||
|
||||
## How to use this module
|
||||
|
||||
```
|
||||
module "datadog-monitors-cloud-gcp-cloud-sql-common" {
|
||||
source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/gcp/cloud-sql/common?ref={revision}"
|
||||
|
||||
environment = "${var.environment}"
|
||||
message = "${module.datadog-message-alerting.alerting-message}"
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
## Purpose
|
||||
|
||||
Creates DataDog monitors with the following checks:
|
||||
|
||||
- Cloud SQL CPU Utilization
|
||||
- Cloud SQL Disk Utilization forecast
|
||||
- Cloud SQL Disk Utilization
|
||||
- Cloud SQL Failover Unavailable
|
||||
- Cloud SQL Memory Utilization forecast
|
||||
- Cloud SQL Memory Utilization
|
||||
|
||||
## Inputs
|
||||
|
||||
| Name | Description | Type | Default | Required |
|
||||
|------|-------------|:----:|:-----:|:-----:|
|
||||
| cpu_utilization_extra_tags | Extra tags for GCP Cloud SQL CPU Utilization monitor | list | `<list>` | no |
|
||||
| cpu_utilization_message | Custom message for the CPU Utilization monitor | string | `` | no |
|
||||
| cpu_utilization_silenced | Groups to mute for GCP Cloud SQL CPU Utilization monitor | map | `<map>` | no |
|
||||
| cpu_utilization_threshold_critical | CPU Utilization in percentage (critical threshold) | string | `90` | no |
|
||||
| cpu_utilization_threshold_warning | CPU Utilization in percentage (warning threshold) | string | `80` | no |
|
||||
| cpu_utilization_time_aggregator | Time aggregator for the CPU Utilization monitor | string | `avg` | no |
|
||||
| cpu_utilization_timeframe | Timeframe for the CPU Utilization monitor | string | `last_15m` | no |
|
||||
| disk_utilization_extra_tags | Extra tags for GCP Cloud SQL CPU Utilization monitor | list | `<list>` | no |
|
||||
| disk_utilization_forecast_algorithm | Algorithm for the Disk Utilization Forecast monitor | string | `linear` | no |
|
||||
| disk_utilization_forecast_deviations | Deviations for the Disk Utilization Forecast monitor | string | `1` | no |
|
||||
| disk_utilization_forecast_extra_tags | Extra tags for GCP Cloud SQL Disk Utilization Forecast monitor | list | `<list>` | no |
|
||||
| disk_utilization_forecast_interval | Interval for the Disk Utilization Forecast monitor | string | `60m` | no |
|
||||
| disk_utilization_forecast_linear_history | History for the Disk Utilization Forecast monitor | string | `3d` | no |
|
||||
| disk_utilization_forecast_linear_model | Model for the Disk Utilization Forecast monitor | string | `default` | no |
|
||||
| disk_utilization_forecast_message | Custom message for the Disk Utilization Forecast monitor | string | `` | no |
|
||||
| disk_utilization_forecast_seasonal_seasonality | Seasonality for the Disk Utilization Forecast monitor | string | `weekly` | no |
|
||||
| disk_utilization_forecast_silenced | Groups to mute for GCP Cloud SQL Disk Utilization Forecast monitor | map | `<map>` | no |
|
||||
| disk_utilization_forecast_threshold_critical | Disk Utilization Forecast in percentage (critical threshold) | string | `80` | no |
|
||||
| disk_utilization_forecast_threshold_critical_recovery | Disk Utilization Forecast in percentage (recovery threshold) | string | `72` | no |
|
||||
| disk_utilization_forecast_time_aggregator | Time aggregator for the Disk Utilization Forecast monitor | string | `max` | no |
|
||||
| disk_utilization_forecast_timeframe | Timeframe for the Disk Utilization Forecast monitor | string | `next_1w` | no |
|
||||
| disk_utilization_message | Custom message for the Disk Utilization monitor | string | `` | no |
|
||||
| disk_utilization_silenced | Groups to mute for GCP Cloud SQL Disk Utilization monitor | map | `<map>` | no |
|
||||
| disk_utilization_threshold_critical | Disk Utilization in percentage (critical threshold) | string | `90` | no |
|
||||
| disk_utilization_threshold_warning | Disk Utilization in percentage (warning threshold) | string | `80` | no |
|
||||
| disk_utilization_time_aggregator | Time aggregator for the Disk Utilization monitor | string | `avg` | no |
|
||||
| disk_utilization_timeframe | Timeframe for the Disk Utilization monitor | string | `last_5m` | no |
|
||||
| environment | Architecture environment | string | - | yes |
|
||||
| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no |
|
||||
| failover_unavailable_extra_tags | Extra tags for GCP Cloud SQL Failover Unavailable monitor | list | `<list>` | no |
|
||||
| failover_unavailable_message | Custom message for the Failover Unavailable monitor | string | `` | no |
|
||||
| failover_unavailable_silenced | Groups to mute for GCP Cloud SQL Failover Unavailable monitor | map | `<map>` | no |
|
||||
| failover_unavailable_threshold_critical | Failover Unavailable critical threshold | string | `0` | no |
|
||||
| failover_unavailable_time_aggregator | Time aggreggator for the Failover Unavailable monitor | string | `max` | no |
|
||||
| failover_unavailable_timeframe | Timeframe for the Failover Unavailable monitor | string | `last_5m` | no |
|
||||
| filter_tags | Tags used for filtering | string | `*` | no |
|
||||
| memory_utilization_extra_tags | Extra tags for GCP Cloud SQL Memory Utilization monitor | list | `<list>` | no |
|
||||
| memory_utilization_forecast_algorithm | Algorithm for the Memory Utilization Forecast monitor | string | `linear` | no |
|
||||
| memory_utilization_forecast_deviations | Deviations for the Memory Utilization Forecast monitor | string | `1` | no |
|
||||
| memory_utilization_forecast_extra_tags | Extra tags for GCP Cloud SQL Memory Utilization Forecast monitor | list | `<list>` | no |
|
||||
| memory_utilization_forecast_interval | Interval for the Memory Utilization Forecast monitor | string | `30m` | no |
|
||||
| memory_utilization_forecast_linear_history | History for the Memory Utilization Forecast monitor | string | `12h` | no |
|
||||
| memory_utilization_forecast_linear_model | Model for the Memory Utilization Forecast monitor | string | `default` | no |
|
||||
| memory_utilization_forecast_message | Custom message for the Memory Utilization Forecast monitor | string | `` | no |
|
||||
| memory_utilization_forecast_seasonal_seasonality | Seasonality for the Memory Utilization Forecast monitor | string | `weekly` | no |
|
||||
| memory_utilization_forecast_silenced | Groups to mute for GCP Cloud SQL Memory Utilization Forecast monitor | map | `<map>` | no |
|
||||
| memory_utilization_forecast_threshold_critical | Memory Utilization Forecast in percentage (warning threshold) | string | `90` | no |
|
||||
| memory_utilization_forecast_threshold_critical_recovery | Memory Utilization Forecast in percentage (recovery threshold) | string | `81` | no |
|
||||
| memory_utilization_forecast_time_aggregator | Time aggregator for the Memory Utilization Forecast monitor | string | `max` | no |
|
||||
| memory_utilization_forecast_timeframe | Timeframe for the Memory Utilization Forecast monitor | string | `next_3d` | no |
|
||||
| memory_utilization_message | Custom message for the Memory Utilization monitor | string | `` | no |
|
||||
| memory_utilization_silenced | Groups to mute for GCP Cloud SQL Memory Utilization monitor | map | `<map>` | no |
|
||||
| memory_utilization_threshold_critical | Memory Utilization in percentage (critical threshold) | string | `90` | no |
|
||||
| memory_utilization_threshold_warning | Memory Utilization in percentage (warning threshold) | string | `80` | no |
|
||||
| memory_utilization_time_aggregator | Time aggregator for the Memory Utilization monitor | string | `avg` | no |
|
||||
| memory_utilization_timeframe | Timeframe for the Memory Utilization monitor | string | `last_5m` | no |
|
||||
| message | Message sent when a monitor is triggered | string | - | yes |
|
||||
| new_host_delay | Delay in seconds for the new host evaluation | string | `300` | no |
|
||||
|
||||
## Outputs
|
||||
|
||||
| Name | Description |
|
||||
|------|-------------|
|
||||
| cpu_utilization_id | id for monitor cpu_utilization |
|
||||
| disk_utilization_forecast_id | id for monitor disk_utilization_forecast |
|
||||
| disk_utilization_id | id for monitor disk_utilization |
|
||||
| failover_unavailable_id | id for monitor failover_unavailable |
|
||||
| memory_utilization_forecast_id | id for monitor memory_utilization_forecast |
|
||||
| memory_utilization_id | id for monitor memory_utilization |
|
||||
|
||||
## Related documentation
|
||||
|
||||
* [GCP Metrics for CloudSQL](https://cloud.google.com/monitoring/api/metrics_gcp#gcp-cloudsql)
|
||||
* [Datadog Useful monitors for GCP CloudSQL](https://www.datadoghq.com/blog/monitor-google-cloud-sql/)
|
||||
358
cloud/gcp/cloud-sql/common/inputs.tf
Normal file
358
cloud/gcp/cloud-sql/common/inputs.tf
Normal file
@ -0,0 +1,358 @@
|
||||
#
|
||||
# Datadog global variables
|
||||
#
|
||||
variable "environment" {
|
||||
description = "Architecture environment"
|
||||
type = "string"
|
||||
}
|
||||
|
||||
variable "filter_tags" {
|
||||
description = "Tags used for filtering"
|
||||
default = "*"
|
||||
}
|
||||
|
||||
variable "message" {
|
||||
description = "Message sent when a monitor is triggered"
|
||||
}
|
||||
|
||||
variable "evaluation_delay" {
|
||||
description = "Delay in seconds for the metric evaluation"
|
||||
default = 900
|
||||
}
|
||||
|
||||
variable "new_host_delay" {
|
||||
description = "Delay in seconds for the new host evaluation"
|
||||
default = 300
|
||||
}
|
||||
|
||||
#
|
||||
# CPU
|
||||
#
|
||||
|
||||
variable "cpu_utilization_message" {
|
||||
description = "Custom message for the CPU Utilization monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "cpu_utilization_time_aggregator" {
|
||||
description = "Time aggregator for the CPU Utilization monitor"
|
||||
type = "string"
|
||||
default = "avg"
|
||||
}
|
||||
|
||||
variable "cpu_utilization_timeframe" {
|
||||
description = "Timeframe for the CPU Utilization monitor"
|
||||
type = "string"
|
||||
default = "last_15m"
|
||||
}
|
||||
|
||||
variable "cpu_utilization_threshold_warning" {
|
||||
description = "CPU Utilization in percentage (warning threshold)"
|
||||
type = "string"
|
||||
default = 80
|
||||
}
|
||||
|
||||
variable "cpu_utilization_threshold_critical" {
|
||||
description = "CPU Utilization in percentage (critical threshold)"
|
||||
type = "string"
|
||||
default = 90
|
||||
}
|
||||
|
||||
variable "cpu_utilization_silenced" {
|
||||
description = "Groups to mute for GCP Cloud SQL CPU Utilization monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "cpu_utilization_extra_tags" {
|
||||
description = "Extra tags for GCP Cloud SQL CPU Utilization monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
#
|
||||
# DISK Utilization
|
||||
#
|
||||
|
||||
variable "disk_utilization_message" {
|
||||
description = "Custom message for the Disk Utilization monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "disk_utilization_time_aggregator" {
|
||||
description = "Time aggregator for the Disk Utilization monitor"
|
||||
type = "string"
|
||||
default = "avg"
|
||||
}
|
||||
|
||||
variable "disk_utilization_timeframe" {
|
||||
description = "Timeframe for the Disk Utilization monitor"
|
||||
type = "string"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "disk_utilization_threshold_warning" {
|
||||
description = "Disk Utilization in percentage (warning threshold)"
|
||||
type = "string"
|
||||
default = 80
|
||||
}
|
||||
|
||||
variable "disk_utilization_threshold_critical" {
|
||||
description = "Disk Utilization in percentage (critical threshold)"
|
||||
type = "string"
|
||||
default = 90
|
||||
}
|
||||
|
||||
variable "disk_utilization_silenced" {
|
||||
description = "Groups to mute for GCP Cloud SQL Disk Utilization monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "disk_utilization_extra_tags" {
|
||||
description = "Extra tags for GCP Cloud SQL CPU Utilization monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
#
|
||||
# DISK Utilization Forecast
|
||||
#
|
||||
|
||||
variable "disk_utilization_forecast_message" {
|
||||
description = "Custom message for the Disk Utilization Forecast monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "disk_utilization_forecast_time_aggregator" {
|
||||
description = "Time aggregator for the Disk Utilization Forecast monitor"
|
||||
type = "string"
|
||||
default = "max"
|
||||
}
|
||||
|
||||
variable "disk_utilization_forecast_timeframe" {
|
||||
description = "Timeframe for the Disk Utilization Forecast monitor"
|
||||
type = "string"
|
||||
default = "next_1w"
|
||||
}
|
||||
|
||||
variable "disk_utilization_forecast_algorithm" {
|
||||
description = "Algorithm for the Disk Utilization Forecast monitor"
|
||||
type = "string"
|
||||
default = "linear"
|
||||
}
|
||||
|
||||
variable "disk_utilization_forecast_deviations" {
|
||||
description = "Deviations for the Disk Utilization Forecast monitor"
|
||||
type = "string"
|
||||
default = 1
|
||||
}
|
||||
|
||||
variable "disk_utilization_forecast_interval" {
|
||||
description = "Interval for the Disk Utilization Forecast monitor"
|
||||
type = "string"
|
||||
default = "60m"
|
||||
}
|
||||
|
||||
variable "disk_utilization_forecast_linear_history" {
|
||||
description = "History for the Disk Utilization Forecast monitor"
|
||||
type = "string"
|
||||
default = "3d"
|
||||
}
|
||||
|
||||
variable "disk_utilization_forecast_linear_model" {
|
||||
description = "Model for the Disk Utilization Forecast monitor"
|
||||
type = "string"
|
||||
default = "default"
|
||||
}
|
||||
|
||||
variable "disk_utilization_forecast_seasonal_seasonality" {
|
||||
description = "Seasonality for the Disk Utilization Forecast monitor"
|
||||
type = "string"
|
||||
default = "weekly"
|
||||
}
|
||||
|
||||
variable "disk_utilization_forecast_threshold_critical" {
|
||||
description = "Disk Utilization Forecast in percentage (critical threshold)"
|
||||
type = "string"
|
||||
default = 80
|
||||
}
|
||||
|
||||
variable "disk_utilization_forecast_threshold_critical_recovery" {
|
||||
description = "Disk Utilization Forecast in percentage (recovery threshold)"
|
||||
type = "string"
|
||||
default = 72
|
||||
}
|
||||
|
||||
variable "disk_utilization_forecast_silenced" {
|
||||
description = "Groups to mute for GCP Cloud SQL Disk Utilization Forecast monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "disk_utilization_forecast_extra_tags" {
|
||||
description = "Extra tags for GCP Cloud SQL Disk Utilization Forecast monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
#
|
||||
# Memory Utilization
|
||||
#
|
||||
|
||||
variable "memory_utilization_message" {
|
||||
description = "Custom message for the Memory Utilization monitor"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "memory_utilization_time_aggregator" {
|
||||
description = "Time aggregator for the Memory Utilization monitor"
|
||||
default = "avg"
|
||||
}
|
||||
|
||||
variable "memory_utilization_timeframe" {
|
||||
description = "Timeframe for the Memory Utilization monitor"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "memory_utilization_threshold_warning" {
|
||||
description = "Memory Utilization in percentage (warning threshold)"
|
||||
default = 80
|
||||
}
|
||||
|
||||
variable "memory_utilization_threshold_critical" {
|
||||
description = "Memory Utilization in percentage (critical threshold)"
|
||||
default = 90
|
||||
}
|
||||
|
||||
variable "memory_utilization_silenced" {
|
||||
description = "Groups to mute for GCP Cloud SQL Memory Utilization monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "memory_utilization_extra_tags" {
|
||||
description = "Extra tags for GCP Cloud SQL Memory Utilization monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
#
|
||||
# Memory Utilization Forecast
|
||||
#
|
||||
|
||||
variable "memory_utilization_forecast_message" {
|
||||
description = "Custom message for the Memory Utilization Forecast monitor"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "memory_utilization_forecast_time_aggregator" {
|
||||
description = "Time aggregator for the Memory Utilization Forecast monitor"
|
||||
default = "max"
|
||||
}
|
||||
|
||||
variable "memory_utilization_forecast_timeframe" {
|
||||
description = "Timeframe for the Memory Utilization Forecast monitor"
|
||||
default = "next_3d"
|
||||
}
|
||||
|
||||
variable "memory_utilization_forecast_algorithm" {
|
||||
description = "Algorithm for the Memory Utilization Forecast monitor"
|
||||
type = "string"
|
||||
default = "linear"
|
||||
}
|
||||
|
||||
variable "memory_utilization_forecast_deviations" {
|
||||
description = "Deviations for the Memory Utilization Forecast monitor"
|
||||
type = "string"
|
||||
default = 1
|
||||
}
|
||||
|
||||
variable "memory_utilization_forecast_interval" {
|
||||
description = "Interval for the Memory Utilization Forecast monitor"
|
||||
type = "string"
|
||||
default = "30m"
|
||||
}
|
||||
|
||||
variable "memory_utilization_forecast_linear_history" {
|
||||
description = "History for the Memory Utilization Forecast monitor"
|
||||
type = "string"
|
||||
default = "12h"
|
||||
}
|
||||
|
||||
variable "memory_utilization_forecast_linear_model" {
|
||||
description = "Model for the Memory Utilization Forecast monitor"
|
||||
type = "string"
|
||||
default = "default"
|
||||
}
|
||||
|
||||
variable "memory_utilization_forecast_seasonal_seasonality" {
|
||||
description = "Seasonality for the Memory Utilization Forecast monitor"
|
||||
type = "string"
|
||||
default = "weekly"
|
||||
}
|
||||
|
||||
variable "memory_utilization_forecast_threshold_critical" {
|
||||
description = "Memory Utilization Forecast in percentage (warning threshold)"
|
||||
default = 90
|
||||
}
|
||||
|
||||
variable "memory_utilization_forecast_threshold_critical_recovery" {
|
||||
description = "Memory Utilization Forecast in percentage (recovery threshold)"
|
||||
default = 81
|
||||
}
|
||||
|
||||
variable "memory_utilization_forecast_silenced" {
|
||||
description = "Groups to mute for GCP Cloud SQL Memory Utilization Forecast monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "memory_utilization_forecast_extra_tags" {
|
||||
description = "Extra tags for GCP Cloud SQL Memory Utilization Forecast monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
|
||||
#
|
||||
# Failover Unavailable
|
||||
#
|
||||
|
||||
variable "failover_unavailable_message" {
|
||||
description = "Custom message for the Failover Unavailable monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "failover_unavailable_time_aggregator" {
|
||||
description = "Time aggreggator for the Failover Unavailable monitor"
|
||||
type = "string"
|
||||
default = "max"
|
||||
}
|
||||
|
||||
variable "failover_unavailable_timeframe" {
|
||||
description = "Timeframe for the Failover Unavailable monitor"
|
||||
type = "string"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "failover_unavailable_threshold_critical" {
|
||||
description = "Failover Unavailable critical threshold"
|
||||
type = "string"
|
||||
default = 0
|
||||
}
|
||||
|
||||
variable "failover_unavailable_silenced" {
|
||||
description = "Groups to mute for GCP Cloud SQL Failover Unavailable monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "failover_unavailable_extra_tags" {
|
||||
description = "Extra tags for GCP Cloud SQL Failover Unavailable monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
232
cloud/gcp/cloud-sql/common/monitors-cloud-sql-common.tf
Normal file
232
cloud/gcp/cloud-sql/common/monitors-cloud-sql-common.tf
Normal file
@ -0,0 +1,232 @@
|
||||
#
|
||||
# CPU Utilization
|
||||
#
|
||||
resource "datadog_monitor" "cpu_utilization" {
|
||||
name = "[${var.environment}] Cloud SQL CPU Utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.cpu_utilization_message, var.message)}"
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
${var.cpu_utilization_time_aggregator}(${var.cpu_utilization_timeframe}):
|
||||
avg:gcp.cloudsql.database.cpu.utilization{${var.filter_tags}}
|
||||
by {database_id} * 100
|
||||
> ${var.cpu_utilization_threshold_critical}
|
||||
EOF
|
||||
|
||||
thresholds {
|
||||
warning = "${var.cpu_utilization_threshold_warning}"
|
||||
critical = "${var.cpu_utilization_threshold_critical}"
|
||||
}
|
||||
|
||||
notify_audit = false
|
||||
locked = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
require_full_window = false
|
||||
notify_no_data = true
|
||||
renotify_interval = 0
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
silenced = "${var.cpu_utilization_silenced}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:cloud-sql", "team:claranet", "created-by:terraform", "${var.cpu_utilization_extra_tags}"]
|
||||
}
|
||||
|
||||
#
|
||||
# Disk Utilization
|
||||
#
|
||||
resource "datadog_monitor" "disk_utilization" {
|
||||
name = "[${var.environment}] Cloud SQL Disk Utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.disk_utilization_message, var.message)}"
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
${var.disk_utilization_time_aggregator}(${var.disk_utilization_timeframe}):
|
||||
avg:gcp.cloudsql.database.disk.utilization{${var.filter_tags}}
|
||||
by {database_id} * 100
|
||||
> ${var.disk_utilization_threshold_critical}
|
||||
EOF
|
||||
|
||||
thresholds {
|
||||
warning = "${var.disk_utilization_threshold_warning}"
|
||||
critical = "${var.disk_utilization_threshold_critical}"
|
||||
}
|
||||
|
||||
notify_audit = false
|
||||
locked = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
require_full_window = false
|
||||
notify_no_data = true
|
||||
renotify_interval = 0
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
silenced = "${var.disk_utilization_silenced}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:cloud-sql", "team:claranet", "created-by:terraform", "${var.disk_utilization_extra_tags}"]
|
||||
}
|
||||
|
||||
#
|
||||
# Disk Utilization Forecast
|
||||
#
|
||||
resource "datadog_monitor" "disk_utilization_forecast" {
|
||||
name = "[${var.environment}] Cloud SQL Disk Utilization could reach {{#is_alert}}{{threshold}}%{{/is_alert}} in a near future"
|
||||
message = "${coalesce(var.disk_utilization_forecast_message, var.message)}"
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
${var.disk_utilization_forecast_time_aggregator}(${var.disk_utilization_forecast_timeframe}):
|
||||
forecast(
|
||||
avg:gcp.cloudsql.database.disk.utilization{${var.filter_tags}} by {database_id} * 100,
|
||||
'${var.disk_utilization_forecast_algorithm}',
|
||||
${var.disk_utilization_forecast_deviations},
|
||||
interval='${var.disk_utilization_forecast_interval}',
|
||||
${var.disk_utilization_forecast_algorithm == "linear" ? format("history='%s',model='%s'", var.disk_utilization_forecast_linear_history, var.disk_utilization_forecast_linear_model): ""}
|
||||
${var.disk_utilization_forecast_algorithm == "seasonal" ? format("seasonality='%s'", var.disk_utilization_forecast_seasonal_seasonality): ""}
|
||||
)
|
||||
>= ${var.disk_utilization_forecast_threshold_critical}
|
||||
EOF
|
||||
|
||||
thresholds {
|
||||
critical = "${var.disk_utilization_forecast_threshold_critical}"
|
||||
critical_recovery = "${var.disk_utilization_forecast_threshold_critical_recovery}"
|
||||
}
|
||||
|
||||
notify_audit = false
|
||||
locked = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
require_full_window = false
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
silenced = "${var.disk_utilization_forecast_silenced}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:cloud-sql", "team:claranet", "created-by:terraform", "${var.disk_utilization_forecast_extra_tags}"]
|
||||
}
|
||||
|
||||
#
|
||||
# Memory Utilization
|
||||
#
|
||||
resource "datadog_monitor" "memory_utilization" {
|
||||
name = "[${var.environment}] Cloud SQL Memory Utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.memory_utilization_message, var.message)}"
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
${var.memory_utilization_time_aggregator}(${var.memory_utilization_timeframe}):
|
||||
avg:gcp.cloudsql.database.memory.utilization{${var.filter_tags}}
|
||||
by {database_id} * 100
|
||||
> ${var.memory_utilization_threshold_critical}
|
||||
EOF
|
||||
|
||||
thresholds {
|
||||
warning = "${var.memory_utilization_threshold_warning}"
|
||||
critical = "${var.memory_utilization_threshold_critical}"
|
||||
}
|
||||
|
||||
notify_audit = false
|
||||
locked = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
require_full_window = false
|
||||
notify_no_data = true
|
||||
renotify_interval = 0
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
silenced = "${var.memory_utilization_silenced}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:cloud-sql", "team:claranet", "created-by:terraform", "${var.memory_utilization_extra_tags}"]
|
||||
}
|
||||
|
||||
#
|
||||
# Memory Utilization Forecast
|
||||
#
|
||||
resource "datadog_monitor" "memory_utilization_forecast" {
|
||||
name = "[${var.environment}] Cloud SQL Memory Utilization could reach {{#is_alert}}{{threshold}}%{{/is_alert}} in a near future"
|
||||
message = "${coalesce(var.memory_utilization_forecast_message, var.message)}"
|
||||
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOF
|
||||
${var.memory_utilization_forecast_time_aggregator}(${var.memory_utilization_forecast_timeframe}):
|
||||
forecast(
|
||||
avg:gcp.cloudsql.database.memory.utilization{${var.filter_tags}} by {database_id} * 100,
|
||||
'${var.memory_utilization_forecast_algorithm}',
|
||||
${var.memory_utilization_forecast_deviations},
|
||||
interval='${var.memory_utilization_forecast_interval}',
|
||||
${var.memory_utilization_forecast_algorithm == "linear" ? format("history='%s',model='%s'", var.memory_utilization_forecast_linear_history, var.memory_utilization_forecast_linear_model): ""}
|
||||
${var.memory_utilization_forecast_algorithm == "seasonal" ? format("seasonality='%s'", var.memory_utilization_forecast_seasonal_seasonality): ""}
|
||||
)
|
||||
>= ${var.memory_utilization_forecast_threshold_critical}
|
||||
EOF
|
||||
|
||||
thresholds {
|
||||
critical = "${var.memory_utilization_forecast_threshold_critical}"
|
||||
critical_recovery = "${var.memory_utilization_forecast_threshold_critical_recovery}"
|
||||
}
|
||||
|
||||
notify_audit = false
|
||||
locked = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
require_full_window = false
|
||||
notify_no_data = false
|
||||
renotify_interval = 0
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
silenced = "${var.memory_utilization_forecast_silenced}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:cloud-sql", "team:claranet", "created-by:terraform", "${var.memory_utilization_forecast_extra_tags}"]
|
||||
}
|
||||
|
||||
#
|
||||
# Failover Unavailable
|
||||
#
|
||||
resource "datadog_monitor" "failover_unavailable" {
|
||||
name = "[${var.environment}] Cloud SQL Failover Unavailable"
|
||||
message = "${coalesce(var.failover_unavailable_message, var.message)}"
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
${var.failover_unavailable_time_aggregator}(${var.failover_unavailable_timeframe}):
|
||||
avg:gcp.cloudsql.database.available_for_failover{${var.filter_tags}}
|
||||
by {database_id}
|
||||
<= ${var.failover_unavailable_threshold_critical}
|
||||
EOF
|
||||
|
||||
thresholds {
|
||||
critical = "${var.failover_unavailable_threshold_critical}"
|
||||
}
|
||||
|
||||
notify_audit = false
|
||||
locked = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
require_full_window = false
|
||||
notify_no_data = true
|
||||
renotify_interval = 0
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
silenced = "${var.failover_unavailable_silenced}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:cloud-sql", "team:claranet", "created-by:terraform", "${var.failover_unavailable_extra_tags}"]
|
||||
}
|
||||
29
cloud/gcp/cloud-sql/common/outputs.tf
Normal file
29
cloud/gcp/cloud-sql/common/outputs.tf
Normal file
@ -0,0 +1,29 @@
|
||||
output "cpu_utilization_id" {
|
||||
description = "id for monitor cpu_utilization"
|
||||
value = "${datadog_monitor.cpu_utilization.*.id}"
|
||||
}
|
||||
|
||||
output "disk_utilization_id" {
|
||||
description = "id for monitor disk_utilization"
|
||||
value = "${datadog_monitor.disk_utilization.*.id}"
|
||||
}
|
||||
|
||||
output "disk_utilization_forecast_id" {
|
||||
description = "id for monitor disk_utilization_forecast"
|
||||
value = "${datadog_monitor.disk_utilization_forecast.*.id}"
|
||||
}
|
||||
|
||||
output "memory_utilization_id" {
|
||||
description = "id for monitor memory_utilization"
|
||||
value = "${datadog_monitor.memory_utilization.*.id}"
|
||||
}
|
||||
|
||||
output "memory_utilization_forecast_id" {
|
||||
description = "id for monitor memory_utilization_forecast"
|
||||
value = "${datadog_monitor.memory_utilization_forecast.*.id}"
|
||||
}
|
||||
|
||||
output "failover_unavailable_id" {
|
||||
description = "id for monitor failover_unavailable"
|
||||
value = "${datadog_monitor.failover_unavailable.*.id}"
|
||||
}
|
||||
50
cloud/gcp/cloud-sql/mysql/README.md
Normal file
50
cloud/gcp/cloud-sql/mysql/README.md
Normal file
@ -0,0 +1,50 @@
|
||||
# CLOUD GCP CLOUD-SQL MYSQL DataDog monitors
|
||||
|
||||
## How to use this module
|
||||
|
||||
```
|
||||
module "datadog-monitors-cloud-gcp-cloud-sql-mysql" {
|
||||
source = "git::ssh://git@bitbucket.org/morea/terraform.feature.datadog.git//cloud/gcp/cloud-sql/mysql?ref={revision}"
|
||||
|
||||
environment = "${var.environment}"
|
||||
message = "${module.datadog-message-alerting.alerting-message}"
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
## Purpose
|
||||
|
||||
Creates DataDog monitors with the following checks:
|
||||
|
||||
- Cloud SQL MySQL Replication Lag
|
||||
|
||||
## Inputs
|
||||
|
||||
| Name | Description | Type | Default | Required |
|
||||
|------|-------------|:----:|:-----:|:-----:|
|
||||
| environment | Architecture environment | string | - | yes |
|
||||
| evaluation_delay | Delay in seconds for the metric evaluation | string | `900` | no |
|
||||
| filter_tags | Tags used for filtering | string | `*` | no |
|
||||
| message | Message sent when a monitor is triggered | string | - | yes |
|
||||
| new_host_delay | Delay in seconds for the new host evaluation | string | `300` | no |
|
||||
| replication_lag_extra_tags | Extra tags for GCP Cloud SQL SQL Replication monitor | list | `<list>` | no |
|
||||
| replication_lag_message | Custom message for the Replication Lag monitor | string | `` | no |
|
||||
| replication_lag_silenced | Groups to mute for GCP Cloud SQL Replication Lag monitor | map | `<map>` | no |
|
||||
| replication_lag_threshold_critical | Seconds behind the master (critical threshold) | string | `180` | no |
|
||||
| replication_lag_threshold_warning | Seconds behind the master (warning threshold) | string | `90` | no |
|
||||
| replication_lag_time_aggregator | Time aggregator for the Replication Lag monitor | string | `min` | no |
|
||||
| replication_lag_timeframe | Timeframe for the Replication Lag monitor | string | `last_10m` | no |
|
||||
|
||||
## Outputs
|
||||
|
||||
| Name | Description |
|
||||
|------|-------------|
|
||||
| replication_lag_id | id for monitor replication_lag |
|
||||
|
||||
## Related documentation
|
||||
|
||||
* [GCP Metrics for CloudSQL](https://cloud.google.com/monitoring/api/metrics_gcp#gcp-cloudsql)
|
||||
* [Datadog Useful monitors for GCP CloudSQL](https://www.datadoghq.com/blog/monitor-google-cloud-sql/)
|
||||
* [Max connections depends on the type of the instance](https://cloud.google.com/sql/docs/quotas#fixed-limits)
|
||||
* [Monitoring Replication Lag](https://cloud.google.com/sql/docs/mysql/high-availability#replication-lag-monitor)
|
||||
* [Monitoring MySQL Performance Metrics](https://www.datadoghq.com/blog/monitoring-mysql-performance-metrics)
|
||||
72
cloud/gcp/cloud-sql/mysql/inputs.tf
Normal file
72
cloud/gcp/cloud-sql/mysql/inputs.tf
Normal file
@ -0,0 +1,72 @@
|
||||
#
|
||||
# Datadog global variables
|
||||
#
|
||||
variable "environment" {
|
||||
description = "Architecture environment"
|
||||
type = "string"
|
||||
}
|
||||
|
||||
variable "filter_tags" {
|
||||
description = "Tags used for filtering"
|
||||
default = "*"
|
||||
}
|
||||
|
||||
variable "message" {
|
||||
description = "Message sent when a monitor is triggered"
|
||||
}
|
||||
|
||||
variable "evaluation_delay" {
|
||||
description = "Delay in seconds for the metric evaluation"
|
||||
default = 900
|
||||
}
|
||||
|
||||
variable "new_host_delay" {
|
||||
description = "Delay in seconds for the new host evaluation"
|
||||
default = 300
|
||||
}
|
||||
|
||||
#
|
||||
# Replication Lag
|
||||
#
|
||||
|
||||
variable "replication_lag_message" {
|
||||
description = "Custom message for the Replication Lag monitor"
|
||||
type = "string"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "replication_lag_time_aggregator" {
|
||||
description = "Time aggregator for the Replication Lag monitor"
|
||||
type = "string"
|
||||
default = "min"
|
||||
}
|
||||
|
||||
variable "replication_lag_timeframe" {
|
||||
description = "Timeframe for the Replication Lag monitor"
|
||||
type = "string"
|
||||
default = "last_10m"
|
||||
}
|
||||
|
||||
variable "replication_lag_threshold_warning" {
|
||||
description = "Seconds behind the master (warning threshold)"
|
||||
type = "string"
|
||||
default = 90
|
||||
}
|
||||
|
||||
variable "replication_lag_threshold_critical" {
|
||||
description = "Seconds behind the master (critical threshold)"
|
||||
type = "string"
|
||||
default = 180
|
||||
}
|
||||
|
||||
variable "replication_lag_silenced" {
|
||||
description = "Groups to mute for GCP Cloud SQL Replication Lag monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "replication_lag_extra_tags" {
|
||||
description = "Extra tags for GCP Cloud SQL SQL Replication monitor"
|
||||
type = "list"
|
||||
default = []
|
||||
}
|
||||
36
cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf
Normal file
36
cloud/gcp/cloud-sql/mysql/monitors-cloudsql-mysql.tf
Normal file
@ -0,0 +1,36 @@
|
||||
#
|
||||
# Replication Lag
|
||||
#
|
||||
resource "datadog_monitor" "replication_lag" {
|
||||
name = "[${var.environment}] Cloud SQL MySQL Replication Lag {{#is_alert}}{{{comparator}}} {{threshold}}s ({{value}}s){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}s ({{value}}s){{/is_warning}}"
|
||||
message = "${coalesce(var.replication_lag_message, var.message)}"
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
${var.replication_lag_time_aggregator}(${var.replication_lag_timeframe}):
|
||||
avg:gcp.cloudsql.database.mysql.replication.seconds_behind_master{${var.filter_tags}}
|
||||
by {database_id}
|
||||
> ${var.replication_lag_threshold_critical}
|
||||
EOF
|
||||
|
||||
thresholds {
|
||||
critical = "${var.replication_lag_threshold_critical}"
|
||||
warning = "${var.replication_lag_threshold_warning}"
|
||||
}
|
||||
|
||||
notify_audit = false
|
||||
locked = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
require_full_window = false
|
||||
notify_no_data = true
|
||||
renotify_interval = 0
|
||||
|
||||
evaluation_delay = "${var.evaluation_delay}"
|
||||
new_host_delay = "${var.new_host_delay}"
|
||||
|
||||
silenced = "${var.replication_lag_silenced}"
|
||||
|
||||
tags = ["env:${var.environment}", "type:cloud", "provider:gcp", "resource:cloud-sql", "team:claranet", "created-by:terraform", "engine:mysql", "${var.replication_lag_extra_tags}"]
|
||||
}
|
||||
4
cloud/gcp/cloud-sql/mysql/outputs.tf
Normal file
4
cloud/gcp/cloud-sql/mysql/outputs.tf
Normal file
@ -0,0 +1,4 @@
|
||||
output "replication_lag_id" {
|
||||
description = "id for monitor replication_lag"
|
||||
value = "${datadog_monitor.replication_lag.*.id}"
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user