MON-224 Standard and recommended monitors with their inputs and readme
This commit is contained in:
parent
d34fe1819f
commit
70a45ed9f6
32
cloud/gcp/cloud-sql/instance/README.md
Normal file
32
cloud/gcp/cloud-sql/instance/README.md
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
|
||||||
|
## Inputs
|
||||||
|
|
||||||
|
| Name | Description | Type | Default | Required |
|
||||||
|
|------|-------------|:----:|:-----:|:-----:|
|
||||||
|
| cpu_message | Custom message for the CPU Utilization monitor | string | `` | no |
|
||||||
|
| cpu_silenced | Groups to mute for GCP Cloud SQL CPU Utilization monitor | map | `<map>` | no |
|
||||||
|
| cpu_tags | Tags to add to the CPU Utilization monitors | map | `` | no |
|
||||||
|
| cpu_threshold_critical | CPU Utilization in fraction (critical threshold) | string | `0.9` | no |
|
||||||
|
| cpu_threshold_warning | CPU Utilization in fraction (warning threshold) | string | `0.85` | no |
|
||||||
|
| cpu_timeframe | Timeframe for the CPU Utilization monitor | string | `last_2h` | no |
|
||||||
|
| database_id | ID of the Cloud SQL Database Instance | string | - | yes |
|
||||||
|
| delay | Delay in seconds for the metric evaluation | string | `900` | no |
|
||||||
|
| disk_message | Custom message for the Disk Utilization monitor | string | `` | no |
|
||||||
|
| disk_silenced | Groups to mute for GCP Cloud SQL Disk Utilization monitor | map | `<map>` | no |
|
||||||
|
| disk_tags | Tags to add to the Disk Utilization monitors | map | `` | no |
|
||||||
|
| disk_threshold_critical | Disk Utilization in fraction (critical threshold) | string | `0.9` | no |
|
||||||
|
| disk_threshold_warning | Disk Utilization in fraction (warning threshold) | string | `0.8` | no |
|
||||||
|
| disk_timeframe | Timeframe for the Disk Utilization monitor | string | `last_5m` | no |
|
||||||
|
| environment | Architecture environment | string | - | yes |
|
||||||
|
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
|
||||||
|
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
|
||||||
|
| message | Message sent when a monitor is triggered | string | - | yes |
|
||||||
|
| network_connections_hard_limit | Max number of network connections | string | - | yes |
|
||||||
|
| network_connections_message | Custom message for the Netowork Connections monitor | string | `` | no |
|
||||||
|
| network_connections_silenced | Groups to mute for GCP Cloud SQL Network Connections monitor | map | `<map>` | no |
|
||||||
|
| network_connections_tags | Tags to add to the Network Connections monitors | map | `` | no |
|
||||||
|
| network_connections_threshold_critical | Fraction of network connections (warning threshold) | string | `0.9` | no |
|
||||||
|
| network_connections_threshold_warning | Fraction of network connections (warning threshold) | string | `0.8` | no |
|
||||||
|
| network_connections_timeframe | Timeframe for the Network Connections monitor | string | `last_5m` | no |
|
||||||
|
| project_id | ID of the GCP Project | string | - | yes |
|
||||||
|
|
||||||
161
cloud/gcp/cloud-sql/instance/inputs.tf
Normal file
161
cloud/gcp/cloud-sql/instance/inputs.tf
Normal file
@ -0,0 +1,161 @@
|
|||||||
|
#
|
||||||
|
# Datadog global variables
|
||||||
|
#
|
||||||
|
variable "environment" {
|
||||||
|
description = "Architecture environment"
|
||||||
|
type = "string"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "filter_tags_use_defaults" {
|
||||||
|
description = "Use default filter tags convention"
|
||||||
|
default = "true"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "filter_tags_custom" {
|
||||||
|
description = "Tags used for custom filtering when filter_tags_use_defaults is false"
|
||||||
|
default = "*"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "message" {
|
||||||
|
description = "Message sent when a monitor is triggered"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "delay" {
|
||||||
|
description = "Delay in seconds for the metric evaluation"
|
||||||
|
default = 900
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Filter variables
|
||||||
|
#
|
||||||
|
variable "project_id" {
|
||||||
|
type = "string"
|
||||||
|
description = "ID of the GCP Project"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "database_id" {
|
||||||
|
type = "string"
|
||||||
|
description = "ID of the Cloud SQL Database Instance"
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# CPU
|
||||||
|
#
|
||||||
|
variable "cpu_message" {
|
||||||
|
description = "Custom message for the CPU Utilization monitor"
|
||||||
|
type = "string"
|
||||||
|
default = ""
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "cpu_timeframe" {
|
||||||
|
description = "Timeframe for the CPU Utilization monitor"
|
||||||
|
type = "string"
|
||||||
|
default = "last_2h"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "cpu_threshold_warning" {
|
||||||
|
description = "CPU Utilization in fraction (warning threshold)"
|
||||||
|
type = "string"
|
||||||
|
default = 0.85
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "cpu_threshold_critical" {
|
||||||
|
description = "CPU Utilization in fraction (critical threshold)"
|
||||||
|
type = "string"
|
||||||
|
default = 0.9
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "cpu_silenced" {
|
||||||
|
description = "Groups to mute for GCP Cloud SQL CPU Utilization monitor"
|
||||||
|
type = "map"
|
||||||
|
default = {}
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "cpu_tags" {
|
||||||
|
description = "Tags to add to the CPU Utilization monitors"
|
||||||
|
type = "map"
|
||||||
|
default = {}
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# DISK
|
||||||
|
#
|
||||||
|
variable "disk_message" {
|
||||||
|
description = "Custom message for the Disk Utilization monitor"
|
||||||
|
type = "string"
|
||||||
|
default = ""
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "disk_timeframe" {
|
||||||
|
description = "Timeframe for the Disk Utilization monitor"
|
||||||
|
type = "string"
|
||||||
|
default = "last_5m"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "disk_threshold_warning" {
|
||||||
|
description = "Disk Utilization in fraction (warning threshold)"
|
||||||
|
type = "string"
|
||||||
|
default = 0.8
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "disk_threshold_critical" {
|
||||||
|
description = "Disk Utilization in fraction (critical threshold)"
|
||||||
|
type = "string"
|
||||||
|
default = 0.9
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "disk_silenced" {
|
||||||
|
description = "Groups to mute for GCP Cloud SQL Disk Utilization monitor"
|
||||||
|
type = "map"
|
||||||
|
default = {}
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "disk_tags" {
|
||||||
|
description = "Tags to add to the Disk Utilization monitors"
|
||||||
|
type = "map"
|
||||||
|
default = {}
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Network Connections
|
||||||
|
#
|
||||||
|
variable "network_connections_message" {
|
||||||
|
description = "Custom message for the Netowork Connections monitor"
|
||||||
|
type = "string"
|
||||||
|
default = ""
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "network_connections_timeframe" {
|
||||||
|
description = "Timeframe for the Network Connections monitor"
|
||||||
|
type = "string"
|
||||||
|
default = "last_5m"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "network_connections_hard_limit" {
|
||||||
|
description = "Max number of network connections"
|
||||||
|
type = "string"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "network_connections_threshold_warning" {
|
||||||
|
description = "Fraction of network connections (warning threshold)"
|
||||||
|
type = "string"
|
||||||
|
default = 0.8
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "network_connections_threshold_critical" {
|
||||||
|
description = "Fraction of network connections (warning threshold)"
|
||||||
|
type = "string"
|
||||||
|
default = 0.9
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "network_connections_silenced" {
|
||||||
|
description = "Groups to mute for GCP Cloud SQL Network Connections monitor"
|
||||||
|
type = "map"
|
||||||
|
default = {}
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "network_connections_tags" {
|
||||||
|
description = "Tags to add to the Network Connections monitors"
|
||||||
|
type = "map"
|
||||||
|
default = {}
|
||||||
|
}
|
||||||
142
cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf
Normal file
142
cloud/gcp/cloud-sql/instance/monitors-cloud-sql-instance.tf
Normal file
@ -0,0 +1,142 @@
|
|||||||
|
#
|
||||||
|
# FILTER
|
||||||
|
#
|
||||||
|
data "template_file" "filter" {
|
||||||
|
template = "$${filter}"
|
||||||
|
|
||||||
|
vars {
|
||||||
|
filter = "${var.filter_tags_use_defaults == "true" ?
|
||||||
|
format("project_id:%s", var.project_id) :
|
||||||
|
"${var.filter_tags_custom}"}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# CPU Utilization
|
||||||
|
#
|
||||||
|
resource "datadog_monitor" "cpu_utilization" {
|
||||||
|
name = "[${var.environment}] Cloud SQL CPU utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||||
|
message = "${coalesce(var.cpu_message, var.message)}"
|
||||||
|
|
||||||
|
type = "metric alert"
|
||||||
|
|
||||||
|
query = <<EOF
|
||||||
|
avg(${var.cpu_timeframe}): (
|
||||||
|
avg:gcp.cloudsql.database.cpu.utilization{${data.template_file.filter.rendered}}
|
||||||
|
)
|
||||||
|
by {database_id}
|
||||||
|
> ${var.cpu_threshold_critical}
|
||||||
|
EOF
|
||||||
|
|
||||||
|
thresholds {
|
||||||
|
warning = "${var.cpu_threshold_warning}"
|
||||||
|
critical = "${var.cpu_threshold_critical}"
|
||||||
|
}
|
||||||
|
|
||||||
|
include_tags = true
|
||||||
|
notify_no_data = true
|
||||||
|
require_full_window = false
|
||||||
|
renotify_interval = 0
|
||||||
|
notify_audit = false
|
||||||
|
timeout_h = 0
|
||||||
|
include_tags = true
|
||||||
|
locked = false
|
||||||
|
evaluation_delay = "${var.delay}"
|
||||||
|
new_host_delay = "${var.delay}"
|
||||||
|
silenced = "${var.cpu_silenced}"
|
||||||
|
|
||||||
|
tags = [
|
||||||
|
"team:gcp",
|
||||||
|
"provider:gcp",
|
||||||
|
"env:${var.environment}",
|
||||||
|
"resource:cloud-sql",
|
||||||
|
"${var.cpu_tags}",
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Disk Utilization
|
||||||
|
#
|
||||||
|
resource "datadog_monitor" "disk_utilization" {
|
||||||
|
name = "[${var.environment}] Cloud SQL Disk utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||||
|
message = "${coalesce(var.disk_message, var.message)}"
|
||||||
|
|
||||||
|
type = "metric alert"
|
||||||
|
|
||||||
|
query = <<EOF
|
||||||
|
avg(${var.disk_timeframe}): (
|
||||||
|
avg:gcp.cloudsql.database.disk.utilization{${data.template_file.filter.rendered}}
|
||||||
|
)
|
||||||
|
by {database_id}
|
||||||
|
> ${var.disk_threshold_critical}
|
||||||
|
EOF
|
||||||
|
|
||||||
|
thresholds {
|
||||||
|
warning = "${var.disk_threshold_warning}"
|
||||||
|
critical = "${var.disk_threshold_critical}"
|
||||||
|
}
|
||||||
|
|
||||||
|
include_tags = true
|
||||||
|
notify_no_data = true
|
||||||
|
require_full_window = false
|
||||||
|
renotify_interval = 0
|
||||||
|
notify_audit = false
|
||||||
|
timeout_h = 0
|
||||||
|
include_tags = true
|
||||||
|
locked = false
|
||||||
|
evaluation_delay = "${var.delay}"
|
||||||
|
new_host_delay = "${var.delay}"
|
||||||
|
silenced = "${var.disk_silenced}"
|
||||||
|
|
||||||
|
tags = [
|
||||||
|
"team:gcp",
|
||||||
|
"provider:gcp",
|
||||||
|
"env:${var.environment}",
|
||||||
|
"resource:cloud-sql",
|
||||||
|
"${var.disk_tags}",
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Network Connections
|
||||||
|
#
|
||||||
|
resource "datadog_monitor" "network_connections" {
|
||||||
|
name = "[${var.environment}] Cloud SQL Network Connections (hard limit: ${var.network_connections_hard_limit}) {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||||
|
message = "${coalesce(var.network_connections_message, var.message)}"
|
||||||
|
|
||||||
|
type = "metric alert"
|
||||||
|
|
||||||
|
query = <<EOF
|
||||||
|
avg(${var.network_connections_timeframe}): (
|
||||||
|
avg:gcp.cloudsql.database.network.connections{${data.template_file.filter.rendered}}
|
||||||
|
/${var.network_connections_hard_limit}
|
||||||
|
)
|
||||||
|
by {database_id}
|
||||||
|
> ${var.network_connections_threshold_critical}
|
||||||
|
EOF
|
||||||
|
|
||||||
|
thresholds {
|
||||||
|
warning = "${var.network_connections_threshold_warning}"
|
||||||
|
critical = "${var.network_connections_threshold_critical}"
|
||||||
|
}
|
||||||
|
|
||||||
|
include_tags = true
|
||||||
|
notify_no_data = true
|
||||||
|
require_full_window = false
|
||||||
|
renotify_interval = 0
|
||||||
|
notify_audit = false
|
||||||
|
timeout_h = 0
|
||||||
|
include_tags = true
|
||||||
|
locked = false
|
||||||
|
evaluation_delay = "${var.delay}"
|
||||||
|
new_host_delay = "${var.delay}"
|
||||||
|
silenced = "${var.network_connections_silenced}"
|
||||||
|
|
||||||
|
tags = [
|
||||||
|
"team:gcp",
|
||||||
|
"provider:gcp",
|
||||||
|
"env:${var.environment}",
|
||||||
|
"resource:cloud-sql",
|
||||||
|
"${var.network_connections_tags}",
|
||||||
|
]
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user