MON-224 Monitors for CPU, Disk, Memory Utilization and Memory Utilization Forecast
This commit is contained in:
parent
91b07cbb65
commit
316fde7e75
@ -5,28 +5,30 @@
|
||||
|------|-------------|:----:|:-----:|:-----:|
|
||||
| cpu_message | Custom message for the CPU Utilization monitor | string | `` | no |
|
||||
| cpu_silenced | Groups to mute for GCP Cloud SQL CPU Utilization monitor | map | `<map>` | no |
|
||||
| cpu_tags | Tags to add to the CPU Utilization monitors | map | `` | no |
|
||||
| cpu_threshold_critical | CPU Utilization in fraction (critical threshold) | string | `0.9` | no |
|
||||
| cpu_threshold_warning | CPU Utilization in fraction (warning threshold) | string | `0.85` | no |
|
||||
| cpu_timeframe | Timeframe for the CPU Utilization monitor | string | `last_2h` | no |
|
||||
| database_id | ID of the Cloud SQL Database Instance | string | - | yes |
|
||||
| delay | Delay in seconds for the metric evaluation | string | `900` | no |
|
||||
| disk_message | Custom message for the Disk Utilization monitor | string | `` | no |
|
||||
| disk_silenced | Groups to mute for GCP Cloud SQL Disk Utilization monitor | map | `<map>` | no |
|
||||
| disk_tags | Tags to add to the Disk Utilization monitors | map | `` | no |
|
||||
| disk_threshold_critical | Disk Utilization in fraction (critical threshold) | string | `0.9` | no |
|
||||
| disk_threshold_warning | Disk Utilization in fraction (warning threshold) | string | `0.8` | no |
|
||||
| disk_timeframe | Timeframe for the Disk Utilization monitor | string | `last_5m` | no |
|
||||
| environment | Architecture environment | string | - | yes |
|
||||
| filter_tags_custom | Tags used for custom filtering when filter_tags_use_defaults is false | string | `*` | no |
|
||||
| filter_tags_use_defaults | Use default filter tags convention | string | `true` | no |
|
||||
| memory_forecast_history | History for the Memory Utilization Forecast monitor | string | `12h` | no |
|
||||
| memory_forecast_interval | Interval for the Memory Utilization Forecast monitor | string | `30m` | no |
|
||||
| memory_forecast_message | Custom message for the Memory Utilization Forecast monitor | string | `` | no |
|
||||
| memory_forecast_silenced | Groups to mute for GCP Cloud SQL Memory Utilization Forecast monitor | map | `<map>` | no |
|
||||
| memory_forecast_threshold_critical | Memory Utilization Forecast in fraction (critical threshold) | string | `0.9` | no |
|
||||
| memory_forecast_threshold_warning | Memory Utilization Forecast in fraction (warning threshold) | string | `0.8` | no |
|
||||
| memory_forecast_timeframe | Timeframe for the Memory Utilization Forecast monitor | string | `next_3d` | no |
|
||||
| memory_message | Custom message for the Memory Utilization monitor | string | `` | no |
|
||||
| memory_silenced | Groups to mute for GCP Cloud SQL Memory Utilization monitor | map | `<map>` | no |
|
||||
| memory_threshold_critical | Memory Utilization in fraction (critical threshold) | string | `0.9` | no |
|
||||
| memory_threshold_warning | Memory Utilization in fraction (warning threshold) | string | `0.8` | no |
|
||||
| memory_timeframe | Timeframe for the Memory Utilization monitor | string | `last_5m` | no |
|
||||
| message | Message sent when a monitor is triggered | string | - | yes |
|
||||
| network_connections_hard_limit | Max number of network connections | string | - | yes |
|
||||
| network_connections_message | Custom message for the Netowork Connections monitor | string | `` | no |
|
||||
| network_connections_silenced | Groups to mute for GCP Cloud SQL Network Connections monitor | map | `<map>` | no |
|
||||
| network_connections_tags | Tags to add to the Network Connections monitors | map | `` | no |
|
||||
| network_connections_threshold_critical | Fraction of network connections (warning threshold) | string | `0.9` | no |
|
||||
| network_connections_threshold_warning | Fraction of network connections (warning threshold) | string | `0.8` | no |
|
||||
| network_connections_timeframe | Timeframe for the Network Connections monitor | string | `last_5m` | no |
|
||||
| project_id | ID of the GCP Project | string | - | yes |
|
||||
|
||||
|
||||
@ -100,39 +100,69 @@ variable "disk_silenced" {
|
||||
}
|
||||
|
||||
#
|
||||
# Network Connections
|
||||
# Memory Utilization
|
||||
#
|
||||
variable "network_connections_message" {
|
||||
description = "Custom message for the Netowork Connections monitor"
|
||||
type = "string"
|
||||
variable "memory_message" {
|
||||
description = "Custom message for the Memory Utilization monitor"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "network_connections_timeframe" {
|
||||
description = "Timeframe for the Network Connections monitor"
|
||||
type = "string"
|
||||
variable "memory_timeframe" {
|
||||
description = "Timeframe for the Memory Utilization monitor"
|
||||
default = "last_5m"
|
||||
}
|
||||
|
||||
variable "network_connections_hard_limit" {
|
||||
description = "Max number of network connections"
|
||||
type = "string"
|
||||
}
|
||||
|
||||
variable "network_connections_threshold_warning" {
|
||||
description = "Fraction of network connections (warning threshold)"
|
||||
type = "string"
|
||||
variable "memory_threshold_warning" {
|
||||
description = "Memory Utilization in fraction (warning threshold)"
|
||||
default = 0.8
|
||||
}
|
||||
|
||||
variable "network_connections_threshold_critical" {
|
||||
description = "Fraction of network connections (warning threshold)"
|
||||
type = "string"
|
||||
variable "memory_threshold_critical" {
|
||||
description = "Memory Utilization in fraction (critical threshold)"
|
||||
default = 0.9
|
||||
}
|
||||
|
||||
variable "network_connections_silenced" {
|
||||
description = "Groups to mute for GCP Cloud SQL Network Connections monitor"
|
||||
variable "memory_silenced" {
|
||||
description = "Groups to mute for GCP Cloud SQL Memory Utilization monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
#
|
||||
# Memory Utilization Forecast
|
||||
#
|
||||
variable "memory_forecast_message" {
|
||||
description = "Custom message for the Memory Utilization Forecast monitor"
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "memory_forecast_timeframe" {
|
||||
description = "Timeframe for the Memory Utilization Forecast monitor"
|
||||
default = "next_3d"
|
||||
}
|
||||
|
||||
variable "memory_forecast_interval" {
|
||||
description = "Interval for the Memory Utilization Forecast monitor"
|
||||
default = "30m"
|
||||
}
|
||||
|
||||
variable "memory_forecast_history" {
|
||||
description = "History for the Memory Utilization Forecast monitor"
|
||||
default = "12h"
|
||||
}
|
||||
|
||||
variable "memory_forecast_threshold_warning" {
|
||||
description = "Memory Utilization Forecast in fraction (warning threshold)"
|
||||
default = 0.8
|
||||
}
|
||||
|
||||
variable "memory_forecast_threshold_critical" {
|
||||
description = "Memory Utilization Forecast in fraction (critical threshold)"
|
||||
default = 0.9
|
||||
}
|
||||
|
||||
variable "memory_forecast_silenced" {
|
||||
description = "Groups to mute for GCP Cloud SQL Memory Utilization Forecast monitor"
|
||||
type = "map"
|
||||
default = {}
|
||||
}
|
||||
|
||||
@ -21,9 +21,7 @@ resource "datadog_monitor" "cpu_utilization" {
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
avg(${var.cpu_timeframe}): (
|
||||
avg:gcp.cloudsql.database.cpu.utilization{${data.template_file.filter.rendered}}
|
||||
)
|
||||
avg(${var.cpu_timeframe}): avg:gcp.cloudsql.database.cpu.utilization{${data.template_file.filter.rendered}}
|
||||
by {database_id}
|
||||
> ${var.cpu_threshold_critical}
|
||||
EOF
|
||||
@ -63,9 +61,8 @@ resource "datadog_monitor" "disk_utilization" {
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
avg(${var.disk_timeframe}): (
|
||||
avg(${var.disk_timeframe}):
|
||||
avg:gcp.cloudsql.database.disk.utilization{${data.template_file.filter.rendered}}
|
||||
)
|
||||
by {database_id}
|
||||
> ${var.disk_threshold_critical}
|
||||
EOF
|
||||
@ -96,26 +93,24 @@ EOF
|
||||
}
|
||||
|
||||
#
|
||||
# Network Connections
|
||||
# Memory Utilization
|
||||
#
|
||||
resource "datadog_monitor" "network_connections" {
|
||||
name = "[${var.environment}] Cloud SQL Network Connections (hard limit: ${var.network_connections_hard_limit}) {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.network_connections_message, var.message)}"
|
||||
resource "datadog_monitor" "memory_utilization" {
|
||||
name = "[${var.environment}] Cloud SQL Memory Utilization {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.memory_message, var.message)}"
|
||||
|
||||
type = "metric alert"
|
||||
|
||||
query = <<EOF
|
||||
avg(${var.network_connections_timeframe}): (
|
||||
avg:gcp.cloudsql.database.network.connections{${data.template_file.filter.rendered}}
|
||||
/${var.network_connections_hard_limit}
|
||||
)
|
||||
avg(${var.memory_timeframe}):
|
||||
avg:gcp.cloudsql.database.memory.utilization{${data.template_file.filter.rendered}}
|
||||
by {database_id}
|
||||
> ${var.network_connections_threshold_critical}
|
||||
> ${var.memory_threshold_critical}
|
||||
EOF
|
||||
|
||||
thresholds {
|
||||
warning = "${var.network_connections_threshold_warning}"
|
||||
critical = "${var.network_connections_threshold_critical}"
|
||||
warning = "${var.memory_threshold_warning}"
|
||||
critical = "${var.memory_threshold_critical}"
|
||||
}
|
||||
|
||||
include_tags = true
|
||||
@ -128,7 +123,51 @@ EOF
|
||||
locked = false
|
||||
evaluation_delay = "${var.delay}"
|
||||
new_host_delay = "${var.delay}"
|
||||
silenced = "${var.network_connections_silenced}"
|
||||
silenced = "${var.memory_silenced}"
|
||||
|
||||
tags = [
|
||||
"team:gcp",
|
||||
"provider:gcp",
|
||||
"env:${var.environment}",
|
||||
"resource:cloud-sql",
|
||||
]
|
||||
}
|
||||
|
||||
resource "datadog_monitor" "memory_utilization_forecast" {
|
||||
name = "[${var.environment}] Cloud SQL Memory Utilization Forecast {{#is_alert}}{{{comparator}}} {{threshold}}% ({{value}}%){{/is_alert}}{{#is_warning}}{{{comparator}}} {{warn_threshold}}% ({{value}}%){{/is_warning}}"
|
||||
message = "${coalesce(var.memory_forecast_message, var.message)}"
|
||||
|
||||
type = "query alert"
|
||||
|
||||
query = <<EOF
|
||||
max(${var.memory_forecast_timeframe}):
|
||||
forecast(
|
||||
avg:gcp.cloudsql.database.memory.utilization{${data.template_file.filter.rendered}} by {database_id},
|
||||
'linear',
|
||||
1,
|
||||
interval='${var.memory_forecast_interval}',
|
||||
history='${var.memory_forecast_history}',
|
||||
model='default'
|
||||
)
|
||||
> ${var.memory_forecast_threshold_critical}
|
||||
EOF
|
||||
|
||||
thresholds {
|
||||
warning = "${var.memory_forecast_threshold_warning}"
|
||||
critical = "${var.memory_forecast_threshold_critical}"
|
||||
}
|
||||
|
||||
include_tags = true
|
||||
notify_no_data = true
|
||||
require_full_window = false
|
||||
renotify_interval = 0
|
||||
notify_audit = false
|
||||
timeout_h = 0
|
||||
include_tags = true
|
||||
locked = false
|
||||
evaluation_delay = "${var.delay}"
|
||||
new_host_delay = "${var.delay}"
|
||||
silenced = "${var.memory_forecast_silenced}"
|
||||
|
||||
tags = [
|
||||
"team:gcp",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user